summaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/common
diff options
context:
space:
mode:
authorDaniil Cherednik <[email protected]>2022-06-24 13:08:23 +0300
committerDaniil Cherednik <[email protected]>2022-06-24 13:08:23 +0300
commit067fd14417000b3601483f660fe9e27c3b47f0b5 (patch)
treeeb4fc96bcae1331d15432f6555b003185bc75848 /contrib/libs/icu/common
parentece86e83e77dcf3d9e757517d3d16f707272a4c7 (diff)
REVERT: r9621717 (disable pg_wrapper for OSS) YQ-1154
ref:d888564254e64ea675383c26661ff5332bf406f5
Diffstat (limited to 'contrib/libs/icu/common')
-rw-r--r--contrib/libs/icu/common/appendable.cpp74
-rw-r--r--contrib/libs/icu/common/bmpset.cpp741
-rw-r--r--contrib/libs/icu/common/bmpset.h164
-rw-r--r--contrib/libs/icu/common/brkeng.cpp284
-rw-r--r--contrib/libs/icu/common/brkeng.h271
-rw-r--r--contrib/libs/icu/common/brkiter.cpp527
-rw-r--r--contrib/libs/icu/common/bytesinkutil.cpp161
-rw-r--r--contrib/libs/icu/common/bytesinkutil.h83
-rw-r--r--contrib/libs/icu/common/bytestream.cpp85
-rw-r--r--contrib/libs/icu/common/bytestrie.cpp441
-rw-r--r--contrib/libs/icu/common/bytestriebuilder.cpp504
-rw-r--r--contrib/libs/icu/common/bytestrieiterator.cpp214
-rw-r--r--contrib/libs/icu/common/caniter.cpp586
-rw-r--r--contrib/libs/icu/common/capi_helper.h97
-rw-r--r--contrib/libs/icu/common/characterproperties.cpp383
-rw-r--r--contrib/libs/icu/common/chariter.cpp100
-rw-r--r--contrib/libs/icu/common/charstr.cpp215
-rw-r--r--contrib/libs/icu/common/charstr.h168
-rw-r--r--contrib/libs/icu/common/cmemory.cpp138
-rw-r--r--contrib/libs/icu/common/cmemory.h820
-rw-r--r--contrib/libs/icu/common/cpputils.h97
-rw-r--r--contrib/libs/icu/common/cstr.cpp54
-rw-r--r--contrib/libs/icu/common/cstr.h60
-rw-r--r--contrib/libs/icu/common/cstring.cpp341
-rw-r--r--contrib/libs/icu/common/cstring.h126
-rw-r--r--contrib/libs/icu/common/cwchar.cpp55
-rw-r--r--contrib/libs/icu/common/cwchar.h58
-rw-r--r--contrib/libs/icu/common/dictbe.cpp1410
-rw-r--r--contrib/libs/icu/common/dictbe.h402
-rw-r--r--contrib/libs/icu/common/dictionarydata.cpp242
-rw-r--r--contrib/libs/icu/common/dictionarydata.h191
-rw-r--r--contrib/libs/icu/common/dtintrv.cpp63
-rw-r--r--contrib/libs/icu/common/edits.cpp803
-rw-r--r--contrib/libs/icu/common/errorcode.cpp42
-rw-r--r--contrib/libs/icu/common/filteredbrk.cpp710
-rw-r--r--contrib/libs/icu/common/filterednormalizer2.cpp363
-rw-r--r--contrib/libs/icu/common/hash.h248
-rw-r--r--contrib/libs/icu/common/icudataver.cpp31
-rw-r--r--contrib/libs/icu/common/icuplug.cpp884
-rw-r--r--contrib/libs/icu/common/icuplugimp.h93
-rw-r--r--contrib/libs/icu/common/loadednormalizer2impl.cpp418
-rw-r--r--contrib/libs/icu/common/localebuilder.cpp468
-rw-r--r--contrib/libs/icu/common/localematcher.cpp794
-rw-r--r--contrib/libs/icu/common/localeprioritylist.cpp239
-rw-r--r--contrib/libs/icu/common/localeprioritylist.h115
-rw-r--r--contrib/libs/icu/common/localsvc.h27
-rw-r--r--contrib/libs/icu/common/locavailable.cpp270
-rw-r--r--contrib/libs/icu/common/locbased.cpp55
-rw-r--r--contrib/libs/icu/common/locbased.h107
-rw-r--r--contrib/libs/icu/common/locdispnames.cpp885
-rw-r--r--contrib/libs/icu/common/locdistance.cpp415
-rw-r--r--contrib/libs/icu/common/locdistance.h151
-rw-r--r--contrib/libs/icu/common/locdspnm.cpp1110
-rw-r--r--contrib/libs/icu/common/locid.cpp1663
-rw-r--r--contrib/libs/icu/common/loclikely.cpp1358
-rw-r--r--contrib/libs/icu/common/loclikelysubtags.cpp746
-rw-r--r--contrib/libs/icu/common/loclikelysubtags.h157
-rw-r--r--contrib/libs/icu/common/locmap.cpp1309
-rw-r--r--contrib/libs/icu/common/locmap.h40
-rw-r--r--contrib/libs/icu/common/locresdata.cpp220
-rw-r--r--contrib/libs/icu/common/locutil.cpp275
-rw-r--r--contrib/libs/icu/common/locutil.h39
-rw-r--r--contrib/libs/icu/common/lsr.cpp114
-rw-r--r--contrib/libs/icu/common/lsr.h82
-rw-r--r--contrib/libs/icu/common/messageimpl.h65
-rw-r--r--contrib/libs/icu/common/messagepattern.cpp1233
-rw-r--r--contrib/libs/icu/common/msvcres.h25
-rw-r--r--contrib/libs/icu/common/mutex.h77
-rw-r--r--contrib/libs/icu/common/norm2_nfc_data.h1149
-rw-r--r--contrib/libs/icu/common/norm2allmodes.h369
-rw-r--r--contrib/libs/icu/common/normalizer2.cpp572
-rw-r--r--contrib/libs/icu/common/normalizer2impl.cpp2669
-rw-r--r--contrib/libs/icu/common/normalizer2impl.h978
-rw-r--r--contrib/libs/icu/common/normlzr.cpp529
-rw-r--r--contrib/libs/icu/common/parsepos.cpp23
-rw-r--r--contrib/libs/icu/common/patternprops.cpp230
-rw-r--r--contrib/libs/icu/common/patternprops.h98
-rw-r--r--contrib/libs/icu/common/pluralmap.cpp44
-rw-r--r--contrib/libs/icu/common/pluralmap.h292
-rw-r--r--contrib/libs/icu/common/propname.cpp328
-rw-r--r--contrib/libs/icu/common/propname.h212
-rw-r--r--contrib/libs/icu/common/propname_data.h1919
-rw-r--r--contrib/libs/icu/common/propsvec.cpp529
-rw-r--r--contrib/libs/icu/common/propsvec.h178
-rw-r--r--contrib/libs/icu/common/punycode.cpp589
-rw-r--r--contrib/libs/icu/common/punycode.h120
-rw-r--r--contrib/libs/icu/common/putil.cpp2430
-rw-r--r--contrib/libs/icu/common/putilimp.h615
-rw-r--r--contrib/libs/icu/common/rbbi.cpp1272
-rw-r--r--contrib/libs/icu/common/rbbi_cache.cpp653
-rw-r--r--contrib/libs/icu/common/rbbi_cache.h203
-rw-r--r--contrib/libs/icu/common/rbbidata.cpp425
-rw-r--r--contrib/libs/icu/common/rbbidata.h199
-rw-r--r--contrib/libs/icu/common/rbbinode.cpp372
-rw-r--r--contrib/libs/icu/common/rbbinode.h127
-rw-r--r--contrib/libs/icu/common/rbbirb.cpp341
-rw-r--r--contrib/libs/icu/common/rbbirb.h237
-rw-r--r--contrib/libs/icu/common/rbbirpt.h296
-rw-r--r--contrib/libs/icu/common/rbbiscan.cpp1283
-rw-r--r--contrib/libs/icu/common/rbbiscan.h167
-rw-r--r--contrib/libs/icu/common/rbbisetb.cpp684
-rw-r--r--contrib/libs/icu/common/rbbisetb.h147
-rw-r--r--contrib/libs/icu/common/rbbistbl.cpp270
-rw-r--r--contrib/libs/icu/common/rbbitblb.cpp1739
-rw-r--r--contrib/libs/icu/common/rbbitblb.h220
-rw-r--r--contrib/libs/icu/common/resbund.cpp399
-rw-r--r--contrib/libs/icu/common/resbund_cnv.cpp57
-rw-r--r--contrib/libs/icu/common/resource.cpp22
-rw-r--r--contrib/libs/icu/common/resource.h293
-rw-r--r--contrib/libs/icu/common/restrace.cpp130
-rw-r--r--contrib/libs/icu/common/restrace.h147
-rw-r--r--contrib/libs/icu/common/ruleiter.cpp162
-rw-r--r--contrib/libs/icu/common/ruleiter.h233
-rw-r--r--contrib/libs/icu/common/schriter.cpp119
-rw-r--r--contrib/libs/icu/common/serv.cpp982
-rw-r--r--contrib/libs/icu/common/serv.h996
-rw-r--r--contrib/libs/icu/common/servlk.cpp188
-rw-r--r--contrib/libs/icu/common/servlkf.cpp152
-rw-r--r--contrib/libs/icu/common/servloc.h551
-rw-r--r--contrib/libs/icu/common/servls.cpp295
-rw-r--r--contrib/libs/icu/common/servnotf.cpp120
-rw-r--r--contrib/libs/icu/common/servnotf.h125
-rw-r--r--contrib/libs/icu/common/servrbf.cpp96
-rw-r--r--contrib/libs/icu/common/servslkf.cpp123
-rw-r--r--contrib/libs/icu/common/sharedobject.cpp62
-rw-r--r--contrib/libs/icu/common/sharedobject.h184
-rw-r--r--contrib/libs/icu/common/simpleformatter.cpp323
-rw-r--r--contrib/libs/icu/common/sprpimpl.h130
-rw-r--r--contrib/libs/icu/common/static_unicode_sets.cpp245
-rw-r--r--contrib/libs/icu/common/static_unicode_sets.h140
-rw-r--r--contrib/libs/icu/common/stringpiece.cpp116
-rw-r--r--contrib/libs/icu/common/stringtriebuilder.cpp618
-rw-r--r--contrib/libs/icu/common/uarrsort.cpp274
-rw-r--r--contrib/libs/icu/common/uarrsort.h103
-rw-r--r--contrib/libs/icu/common/uassert.h51
-rw-r--r--contrib/libs/icu/common/ubidi.cpp3036
-rw-r--r--contrib/libs/icu/common/ubidi_props.cpp254
-rw-r--r--contrib/libs/icu/common/ubidi_props.h148
-rw-r--r--contrib/libs/icu/common/ubidi_props_data.h922
-rw-r--r--contrib/libs/icu/common/ubidiimp.h476
-rw-r--r--contrib/libs/icu/common/ubidiln.cpp1347
-rw-r--r--contrib/libs/icu/common/ubiditransform.cpp530
-rw-r--r--contrib/libs/icu/common/ubidiwrt.cpp650
-rw-r--r--contrib/libs/icu/common/ubrk.cpp357
-rw-r--r--contrib/libs/icu/common/ubrkimpl.h15
-rw-r--r--contrib/libs/icu/common/ucase.cpp1572
-rw-r--r--contrib/libs/icu/common/ucase.h444
-rw-r--r--contrib/libs/icu/common/ucase_props_data.h951
-rw-r--r--contrib/libs/icu/common/ucasemap.cpp953
-rw-r--r--contrib/libs/icu/common/ucasemap_imp.h282
-rw-r--r--contrib/libs/icu/common/ucasemap_titlecase_brkiter.cpp134
-rw-r--r--contrib/libs/icu/common/ucat.cpp78
-rw-r--r--contrib/libs/icu/common/uchar.cpp730
-rw-r--r--contrib/libs/icu/common/uchar_props_data.h3860
-rw-r--r--contrib/libs/icu/common/ucharstrie.cpp414
-rw-r--r--contrib/libs/icu/common/ucharstriebuilder.cpp443
-rw-r--r--contrib/libs/icu/common/ucharstrieiterator.cpp215
-rw-r--r--contrib/libs/icu/common/uchriter.cpp367
-rw-r--r--contrib/libs/icu/common/ucln.h91
-rw-r--r--contrib/libs/icu/common/ucln_cmn.cpp124
-rw-r--r--contrib/libs/icu/common/ucln_cmn.h75
-rw-r--r--contrib/libs/icu/common/ucln_imp.h182
-rw-r--r--contrib/libs/icu/common/ucmndata.cpp393
-rw-r--r--contrib/libs/icu/common/ucmndata.h117
-rw-r--r--contrib/libs/icu/common/ucnv.cpp2910
-rw-r--r--contrib/libs/icu/common/ucnv2022.cpp3973
-rw-r--r--contrib/libs/icu/common/ucnv_bld.cpp1689
-rw-r--r--contrib/libs/icu/common/ucnv_bld.h296
-rw-r--r--contrib/libs/icu/common/ucnv_cb.cpp261
-rw-r--r--contrib/libs/icu/common/ucnv_cnv.cpp182
-rw-r--r--contrib/libs/icu/common/ucnv_cnv.h323
-rw-r--r--contrib/libs/icu/common/ucnv_ct.cpp646
-rw-r--r--contrib/libs/icu/common/ucnv_err.cpp486
-rw-r--r--contrib/libs/icu/common/ucnv_ext.cpp1143
-rw-r--r--contrib/libs/icu/common/ucnv_ext.h481
-rw-r--r--contrib/libs/icu/common/ucnv_imp.h139
-rw-r--r--contrib/libs/icu/common/ucnv_io.cpp1360
-rw-r--r--contrib/libs/icu/common/ucnv_io.h127
-rw-r--r--contrib/libs/icu/common/ucnv_lmb.cpp1388
-rw-r--r--contrib/libs/icu/common/ucnv_set.cpp70
-rw-r--r--contrib/libs/icu/common/ucnv_u16.cpp1579
-rw-r--r--contrib/libs/icu/common/ucnv_u32.cpp1253
-rw-r--r--contrib/libs/icu/common/ucnv_u7.cpp1491
-rw-r--r--contrib/libs/icu/common/ucnv_u8.cpp944
-rw-r--r--contrib/libs/icu/common/ucnvbocu.cpp1413
-rw-r--r--contrib/libs/icu/common/ucnvdisp.cpp88
-rw-r--r--contrib/libs/icu/common/ucnvhz.cpp625
-rw-r--r--contrib/libs/icu/common/ucnvisci.cpp1635
-rw-r--r--contrib/libs/icu/common/ucnvlat1.cpp756
-rw-r--r--contrib/libs/icu/common/ucnvmbcs.cpp5723
-rw-r--r--contrib/libs/icu/common/ucnvmbcs.h605
-rw-r--r--contrib/libs/icu/common/ucnvscsu.cpp2045
-rw-r--r--contrib/libs/icu/common/ucnvsel.cpp823
-rw-r--r--contrib/libs/icu/common/ucol_data.h89
-rw-r--r--contrib/libs/icu/common/ucol_swp.cpp615
-rw-r--r--contrib/libs/icu/common/ucol_swp.h58
-rw-r--r--contrib/libs/icu/common/ucptrie.cpp601
-rw-r--r--contrib/libs/icu/common/ucptrie_impl.h289
-rw-r--r--contrib/libs/icu/common/ucurr.cpp2688
-rw-r--r--contrib/libs/icu/common/ucurrimp.h78
-rw-r--r--contrib/libs/icu/common/udata.cpp1460
-rw-r--r--contrib/libs/icu/common/udatamem.cpp161
-rw-r--r--contrib/libs/icu/common/udatamem.h61
-rw-r--r--contrib/libs/icu/common/udataswp.cpp473
-rw-r--r--contrib/libs/icu/common/udataswp.h404
-rw-r--r--contrib/libs/icu/common/uelement.h91
-rw-r--r--contrib/libs/icu/common/uenum.cpp189
-rw-r--r--contrib/libs/icu/common/uenumimp.h155
-rw-r--r--contrib/libs/icu/common/uhash.cpp991
-rw-r--r--contrib/libs/icu/common/uhash.h718
-rw-r--r--contrib/libs/icu/common/uhash_us.cpp26
-rw-r--r--contrib/libs/icu/common/uidna.cpp921
-rw-r--r--contrib/libs/icu/common/uinit.cpp74
-rw-r--r--contrib/libs/icu/common/uinvchar.cpp627
-rw-r--r--contrib/libs/icu/common/uinvchar.h219
-rw-r--r--contrib/libs/icu/common/uiter.cpp1108
-rw-r--r--contrib/libs/icu/common/ulayout_props.h46
-rw-r--r--contrib/libs/icu/common/ulist.cpp270
-rw-r--r--contrib/libs/icu/common/ulist.h50
-rw-r--r--contrib/libs/icu/common/uloc.cpp2239
-rw-r--r--contrib/libs/icu/common/uloc_keytype.cpp534
-rw-r--r--contrib/libs/icu/common/uloc_tag.cpp2877
-rw-r--r--contrib/libs/icu/common/ulocimp.h282
-rw-r--r--contrib/libs/icu/common/umapfile.cpp530
-rw-r--r--contrib/libs/icu/common/umapfile.h57
-rw-r--r--contrib/libs/icu/common/umath.cpp26
-rw-r--r--contrib/libs/icu/common/umutablecptrie.cpp1852
-rw-r--r--contrib/libs/icu/common/umutex.cpp204
-rw-r--r--contrib/libs/icu/common/umutex.h277
-rw-r--r--contrib/libs/icu/common/unames.cpp2108
-rw-r--r--contrib/libs/icu/common/unifiedcache.cpp522
-rw-r--r--contrib/libs/icu/common/unifiedcache.h556
-rw-r--r--contrib/libs/icu/common/unifilt.cpp71
-rw-r--r--contrib/libs/icu/common/unifunct.cpp28
-rw-r--r--contrib/libs/icu/common/uniset.cpp2356
-rw-r--r--contrib/libs/icu/common/uniset_closure.cpp250
-rw-r--r--contrib/libs/icu/common/uniset_props.cpp1174
-rw-r--r--contrib/libs/icu/common/unisetspan.cpp1509
-rw-r--r--contrib/libs/icu/common/unisetspan.h157
-rw-r--r--contrib/libs/icu/common/unistr.cpp1982
-rw-r--r--contrib/libs/icu/common/unistr_case.cpp250
-rw-r--r--contrib/libs/icu/common/unistr_case_locale.cpp56
-rw-r--r--contrib/libs/icu/common/unistr_cnv.cpp417
-rw-r--r--contrib/libs/icu/common/unistr_props.cpp77
-rw-r--r--contrib/libs/icu/common/unistr_titlecase_brkiter.cpp57
-rw-r--r--contrib/libs/icu/common/unistrappender.h90
-rw-r--r--contrib/libs/icu/common/unorm.cpp280
-rw-r--r--contrib/libs/icu/common/unormcmp.cpp640
-rw-r--r--contrib/libs/icu/common/unormimp.h488
-rw-r--r--contrib/libs/icu/common/uobject.cpp105
-rw-r--r--contrib/libs/icu/common/uposixdefs.h77
-rw-r--r--contrib/libs/icu/common/uprops.cpp797
-rw-r--r--contrib/libs/icu/common/uprops.h504
-rw-r--r--contrib/libs/icu/common/ures_cnv.cpp78
-rw-r--r--contrib/libs/icu/common/uresbund.cpp3090
-rw-r--r--contrib/libs/icu/common/uresdata.cpp1518
-rw-r--r--contrib/libs/icu/common/uresdata.h565
-rw-r--r--contrib/libs/icu/common/uresimp.h364
-rw-r--r--contrib/libs/icu/common/ureslocs.h27
-rw-r--r--contrib/libs/icu/common/usc_impl.cpp361
-rw-r--r--contrib/libs/icu/common/usc_impl.h139
-rw-r--r--contrib/libs/icu/common/uscript.cpp149
-rw-r--r--contrib/libs/icu/common/uscript_props.cpp302
-rw-r--r--contrib/libs/icu/common/uset.cpp641
-rw-r--r--contrib/libs/icu/common/uset_imp.h62
-rw-r--r--contrib/libs/icu/common/uset_props.cpp143
-rw-r--r--contrib/libs/icu/common/usetiter.cpp152
-rw-r--r--contrib/libs/icu/common/ushape.cpp1728
-rw-r--r--contrib/libs/icu/common/usprep.cpp871
-rw-r--r--contrib/libs/icu/common/ustack.cpp63
-rw-r--r--contrib/libs/icu/common/ustr_cnv.cpp256
-rw-r--r--contrib/libs/icu/common/ustr_cnv.h51
-rw-r--r--contrib/libs/icu/common/ustr_imp.h155
-rw-r--r--contrib/libs/icu/common/ustr_titlecase_brkiter.cpp237
-rw-r--r--contrib/libs/icu/common/ustr_wcs.cpp535
-rw-r--r--contrib/libs/icu/common/ustrcase.cpp1818
-rw-r--r--contrib/libs/icu/common/ustrcase_locale.cpp94
-rw-r--r--contrib/libs/icu/common/ustrenum.cpp398
-rw-r--r--contrib/libs/icu/common/ustrenum.h87
-rw-r--r--contrib/libs/icu/common/ustrfmt.cpp59
-rw-r--r--contrib/libs/icu/common/ustrfmt.h19
-rw-r--r--contrib/libs/icu/common/ustring.cpp1529
-rw-r--r--contrib/libs/icu/common/ustrtrns.cpp1451
-rw-r--r--contrib/libs/icu/common/utext.cpp2877
-rw-r--r--contrib/libs/icu/common/utf_impl.cpp329
-rw-r--r--contrib/libs/icu/common/util.cpp421
-rw-r--r--contrib/libs/icu/common/util.h257
-rw-r--r--contrib/libs/icu/common/util_props.cpp217
-rw-r--r--contrib/libs/icu/common/utrace.cpp504
-rw-r--r--contrib/libs/icu/common/utracimp.h391
-rw-r--r--contrib/libs/icu/common/utrie.cpp1234
-rw-r--r--contrib/libs/icu/common/utrie.h793
-rw-r--r--contrib/libs/icu/common/utrie2.cpp663
-rw-r--r--contrib/libs/icu/common/utrie2.h955
-rw-r--r--contrib/libs/icu/common/utrie2_builder.cpp1483
-rw-r--r--contrib/libs/icu/common/utrie2_impl.h175
-rw-r--r--contrib/libs/icu/common/utrie_swap.cpp344
-rw-r--r--contrib/libs/icu/common/uts46.cpp1484
-rw-r--r--contrib/libs/icu/common/utypeinfo.h32
-rw-r--r--contrib/libs/icu/common/utypes.cpp226
-rw-r--r--contrib/libs/icu/common/uvector.cpp567
-rw-r--r--contrib/libs/icu/common/uvector.h415
-rw-r--r--contrib/libs/icu/common/uvectr32.cpp335
-rw-r--r--contrib/libs/icu/common/uvectr32.h306
-rw-r--r--contrib/libs/icu/common/uvectr64.cpp214
-rw-r--r--contrib/libs/icu/common/uvectr64.h279
-rw-r--r--contrib/libs/icu/common/wintz.cpp124
-rw-r--r--contrib/libs/icu/common/wintz.h36
308 files changed, 0 insertions, 175838 deletions
diff --git a/contrib/libs/icu/common/appendable.cpp b/contrib/libs/icu/common/appendable.cpp
deleted file mode 100644
index fca3c1e4133..00000000000
--- a/contrib/libs/icu/common/appendable.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2011-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: appendable.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010dec07
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/appendable.h"
-#include "unicode/utf16.h"
-
-U_NAMESPACE_BEGIN
-
-Appendable::~Appendable() {}
-
-UBool
-Appendable::appendCodePoint(UChar32 c) {
- if(c<=0xffff) {
- return appendCodeUnit((UChar)c);
- } else {
- return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c));
- }
-}
-
-UBool
-Appendable::appendString(const UChar *s, int32_t length) {
- if(length<0) {
- UChar c;
- while((c=*s++)!=0) {
- if(!appendCodeUnit(c)) {
- return FALSE;
- }
- }
- } else if(length>0) {
- const UChar *limit=s+length;
- do {
- if(!appendCodeUnit(*s++)) {
- return FALSE;
- }
- } while(s<limit);
- }
- return TRUE;
-}
-
-UBool
-Appendable::reserveAppendCapacity(int32_t /*appendCapacity*/) {
- return TRUE;
-}
-
-UChar *
-Appendable::getAppendBuffer(int32_t minCapacity,
- int32_t /*desiredCapacityHint*/,
- UChar *scratch, int32_t scratchCapacity,
- int32_t *resultCapacity) {
- if(minCapacity<1 || scratchCapacity<minCapacity) {
- *resultCapacity=0;
- return NULL;
- }
- *resultCapacity=scratchCapacity;
- return scratch;
-}
-
-// UnicodeStringAppendable is implemented in unistr.cpp.
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/bmpset.cpp b/contrib/libs/icu/common/bmpset.cpp
deleted file mode 100644
index bc79f5e5a63..00000000000
--- a/contrib/libs/icu/common/bmpset.cpp
+++ /dev/null
@@ -1,741 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2007-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: bmpset.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2007jan29
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/uniset.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "bmpset.h"
-#include "uassert.h"
-
-U_NAMESPACE_BEGIN
-
-BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
- list(parentList), listLength(parentListLength) {
- uprv_memset(latin1Contains, 0, sizeof(latin1Contains));
- uprv_memset(table7FF, 0, sizeof(table7FF));
- uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
-
- /*
- * Set the list indexes for binary searches for
- * U+0800, U+1000, U+2000, .., U+F000, U+10000.
- * U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
- * looked up in the bit tables.
- * The last pair of indexes is for finding supplementary code points.
- */
- list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
- int32_t i;
- for(i=1; i<=0x10; ++i) {
- list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
- }
- list4kStarts[0x11]=listLength-1;
- containsFFFD=containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10]);
-
- initBits();
- overrideIllegal();
-}
-
-BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
- containsFFFD(otherBMPSet.containsFFFD),
- list(newParentList), listLength(newParentListLength) {
- uprv_memcpy(latin1Contains, otherBMPSet.latin1Contains, sizeof(latin1Contains));
- uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
- uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
- uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
-}
-
-BMPSet::~BMPSet() {
-}
-
-/*
- * Set bits in a bit rectangle in "vertical" bit organization.
- * start<limit<=0x800
- */
-static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
- U_ASSERT(start<limit);
- U_ASSERT(limit<=0x800);
-
- int32_t lead=start>>6; // Named for UTF-8 2-byte lead byte with upper 5 bits.
- int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits.
-
- // Set one bit indicating an all-one block.
- uint32_t bits=(uint32_t)1<<lead;
- if((start+1)==limit) { // Single-character shortcut.
- table[trail]|=bits;
- return;
- }
-
- int32_t limitLead=limit>>6;
- int32_t limitTrail=limit&0x3f;
-
- if(lead==limitLead) {
- // Partial vertical bit column.
- while(trail<limitTrail) {
- table[trail++]|=bits;
- }
- } else {
- // Partial vertical bit column,
- // followed by a bit rectangle,
- // followed by another partial vertical bit column.
- if(trail>0) {
- do {
- table[trail++]|=bits;
- } while(trail<64);
- ++lead;
- }
- if(lead<limitLead) {
- bits=~(((unsigned)1<<lead)-1);
- if(limitLead<0x20) {
- bits&=((unsigned)1<<limitLead)-1;
- }
- for(trail=0; trail<64; ++trail) {
- table[trail]|=bits;
- }
- }
- // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
- // In that case, bits=1<<limitLead is undefined but the bits value
- // is not used because trail<limitTrail is already false.
- bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
- for(trail=0; trail<limitTrail; ++trail) {
- table[trail]|=bits;
- }
- }
-}
-
-void BMPSet::initBits() {
- UChar32 start, limit;
- int32_t listIndex=0;
-
- // Set latin1Contains[].
- do {
- start=list[listIndex++];
- if(listIndex<listLength) {
- limit=list[listIndex++];
- } else {
- limit=0x110000;
- }
- if(start>=0x100) {
- break;
- }
- do {
- latin1Contains[start++]=1;
- } while(start<limit && start<0x100);
- } while(limit<=0x100);
-
- // Find the first range overlapping with (or after) 80..FF again,
- // to include them in table7FF as well.
- for(listIndex=0;;) {
- start=list[listIndex++];
- if(listIndex<listLength) {
- limit=list[listIndex++];
- } else {
- limit=0x110000;
- }
- if(limit>0x80) {
- if(start<0x80) {
- start=0x80;
- }
- break;
- }
- }
-
- // Set table7FF[].
- while(start<0x800) {
- set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
- if(limit>0x800) {
- start=0x800;
- break;
- }
-
- start=list[listIndex++];
- if(listIndex<listLength) {
- limit=list[listIndex++];
- } else {
- limit=0x110000;
- }
- }
-
- // Set bmpBlockBits[].
- int32_t minStart=0x800;
- while(start<0x10000) {
- if(limit>0x10000) {
- limit=0x10000;
- }
-
- if(start<minStart) {
- start=minStart;
- }
- if(start<limit) { // Else: Another range entirely in a known mixed-value block.
- if(start&0x3f) {
- // Mixed-value block of 64 code points.
- start>>=6;
- bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
- start=(start+1)<<6; // Round up to the next block boundary.
- minStart=start; // Ignore further ranges in this block.
- }
- if(start<limit) {
- if(start<(limit&~0x3f)) {
- // Multiple all-ones blocks of 64 code points each.
- set32x64Bits(bmpBlockBits, start>>6, limit>>6);
- }
-
- if(limit&0x3f) {
- // Mixed-value block of 64 code points.
- limit>>=6;
- bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
- limit=(limit+1)<<6; // Round up to the next block boundary.
- minStart=limit; // Ignore further ranges in this block.
- }
- }
- }
-
- if(limit==0x10000) {
- break;
- }
-
- start=list[listIndex++];
- if(listIndex<listLength) {
- limit=list[listIndex++];
- } else {
- limit=0x110000;
- }
- }
-}
-
-/*
- * Override some bits and bytes to the result of contains(FFFD)
- * for faster validity checking at runtime.
- * No need to set 0 values where they were reset to 0 in the constructor
- * and not modified by initBits().
- * (table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
- * Need to set 0 values for surrogates D800..DFFF.
- */
-void BMPSet::overrideIllegal() {
- uint32_t bits, mask;
- int32_t i;
-
- if(containsFFFD) {
- bits=3; // Lead bytes 0xC0 and 0xC1.
- for(i=0; i<64; ++i) {
- table7FF[i]|=bits;
- }
-
- bits=1; // Lead byte 0xE0.
- for(i=0; i<32; ++i) { // First half of 4k block.
- bmpBlockBits[i]|=bits;
- }
-
- mask= static_cast<uint32_t>(~(0x10001<<0xd)); // Lead byte 0xED.
- bits=1<<0xd;
- for(i=32; i<64; ++i) { // Second half of 4k block.
- bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
- }
- } else {
- mask= static_cast<uint32_t>(~(0x10001<<0xd)); // Lead byte 0xED.
- for(i=32; i<64; ++i) { // Second half of 4k block.
- bmpBlockBits[i]&=mask;
- }
- }
-}
-
-int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
- /* Examples:
- findCodePoint(c)
- set list[] c=0 1 3 4 7 8
- === ============== ===========
- [] [110000] 0 0 0 0 0 0
- [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
- [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
- [:Any:] [0, 110000] 1 1 1 1 1 1
- */
-
- // Return the smallest i such that c < list[i]. Assume
- // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
- if (c < list[lo])
- return lo;
- // High runner test. c is often after the last range, so an
- // initial check for this condition pays off.
- if (lo >= hi || c >= list[hi-1])
- return hi;
- // invariant: c >= list[lo]
- // invariant: c < list[hi]
- for (;;) {
- int32_t i = (lo + hi) >> 1;
- if (i == lo) {
- break; // Found!
- } else if (c < list[i]) {
- hi = i;
- } else {
- lo = i;
- }
- }
- return hi;
-}
-
-UBool
-BMPSet::contains(UChar32 c) const {
- if((uint32_t)c<=0xff) {
- return (UBool)latin1Contains[c];
- } else if((uint32_t)c<=0x7ff) {
- return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
- } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
- int lead=c>>12;
- uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
- if(twoBits<=1) {
- // All 64 code points with the same bits 15..6
- // are either in the set or not.
- return (UBool)twoBits;
- } else {
- // Look up the code point in its 4k block of code points.
- return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
- }
- } else if((uint32_t)c<=0x10ffff) {
- // surrogate or supplementary code point
- return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
- } else {
- // Out-of-range code points get FALSE, consistent with long-standing
- // behavior of UnicodeSet::contains(c).
- return FALSE;
- }
-}
-
-/*
- * Check for sufficient length for trail unit for each surrogate pair.
- * Handle single surrogates as surrogate code points as usual in ICU.
- */
-const UChar *
-BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
- UChar c, c2;
-
- if(spanCondition) {
- // span
- do {
- c=*s;
- if(c<=0xff) {
- if(!latin1Contains[c]) {
- break;
- }
- } else if(c<=0x7ff) {
- if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
- break;
- }
- } else if(c<0xd800 || c>=0xe000) {
- int lead=c>>12;
- uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
- if(twoBits<=1) {
- // All 64 code points with the same bits 15..6
- // are either in the set or not.
- if(twoBits==0) {
- break;
- }
- } else {
- // Look up the code point in its 4k block of code points.
- if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
- break;
- }
- }
- } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
- // surrogate code point
- if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
- break;
- }
- } else {
- // surrogate pair
- if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
- break;
- }
- ++s;
- }
- } while(++s<limit);
- } else {
- // span not
- do {
- c=*s;
- if(c<=0xff) {
- if(latin1Contains[c]) {
- break;
- }
- } else if(c<=0x7ff) {
- if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
- break;
- }
- } else if(c<0xd800 || c>=0xe000) {
- int lead=c>>12;
- uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
- if(twoBits<=1) {
- // All 64 code points with the same bits 15..6
- // are either in the set or not.
- if(twoBits!=0) {
- break;
- }
- } else {
- // Look up the code point in its 4k block of code points.
- if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
- break;
- }
- }
- } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
- // surrogate code point
- if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
- break;
- }
- } else {
- // surrogate pair
- if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
- break;
- }
- ++s;
- }
- } while(++s<limit);
- }
- return s;
-}
-
-/* Symmetrical with span(). */
-const UChar *
-BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
- UChar c, c2;
-
- if(spanCondition) {
- // span
- for(;;) {
- c=*(--limit);
- if(c<=0xff) {
- if(!latin1Contains[c]) {
- break;
- }
- } else if(c<=0x7ff) {
- if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
- break;
- }
- } else if(c<0xd800 || c>=0xe000) {
- int lead=c>>12;
- uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
- if(twoBits<=1) {
- // All 64 code points with the same bits 15..6
- // are either in the set or not.
- if(twoBits==0) {
- break;
- }
- } else {
- // Look up the code point in its 4k block of code points.
- if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
- break;
- }
- }
- } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
- // surrogate code point
- if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
- break;
- }
- } else {
- // surrogate pair
- if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
- break;
- }
- --limit;
- }
- if(s==limit) {
- return s;
- }
- }
- } else {
- // span not
- for(;;) {
- c=*(--limit);
- if(c<=0xff) {
- if(latin1Contains[c]) {
- break;
- }
- } else if(c<=0x7ff) {
- if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
- break;
- }
- } else if(c<0xd800 || c>=0xe000) {
- int lead=c>>12;
- uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
- if(twoBits<=1) {
- // All 64 code points with the same bits 15..6
- // are either in the set or not.
- if(twoBits!=0) {
- break;
- }
- } else {
- // Look up the code point in its 4k block of code points.
- if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
- break;
- }
- }
- } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
- // surrogate code point
- if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
- break;
- }
- } else {
- // surrogate pair
- if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
- break;
- }
- --limit;
- }
- if(s==limit) {
- return s;
- }
- }
- }
- return limit+1;
-}
-
-/*
- * Precheck for sufficient trail bytes at end of string only once per span.
- * Check validity.
- */
-const uint8_t *
-BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
- const uint8_t *limit=s+length;
- uint8_t b=*s;
- if(U8_IS_SINGLE(b)) {
- // Initial all-ASCII span.
- if(spanCondition) {
- do {
- if(!latin1Contains[b] || ++s==limit) {
- return s;
- }
- b=*s;
- } while(U8_IS_SINGLE(b));
- } else {
- do {
- if(latin1Contains[b] || ++s==limit) {
- return s;
- }
- b=*s;
- } while(U8_IS_SINGLE(b));
- }
- length=(int32_t)(limit-s);
- }
-
- if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
- spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
- }
-
- const uint8_t *limit0=limit;
-
- /*
- * Make sure that the last 1/2/3/4-byte sequence before limit is complete
- * or runs into a lead byte.
- * In the span loop compare s with limit only once
- * per multi-byte character.
- *
- * Give a trailing illegal sequence the same value as the result of contains(FFFD),
- * including it if that is part of the span, otherwise set limit0 to before
- * the truncated sequence.
- */
- b=*(limit-1);
- if((int8_t)b<0) {
- // b>=0x80: lead or trail byte
- if(b<0xc0) {
- // single trail byte, check for preceding 3- or 4-byte lead byte
- if(length>=2 && (b=*(limit-2))>=0xe0) {
- limit-=2;
- if(containsFFFD!=spanCondition) {
- limit0=limit;
- }
- } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
- // 4-byte lead byte with only two trail bytes
- limit-=3;
- if(containsFFFD!=spanCondition) {
- limit0=limit;
- }
- }
- } else {
- // lead byte with no trail bytes
- --limit;
- if(containsFFFD!=spanCondition) {
- limit0=limit;
- }
- }
- }
-
- uint8_t t1, t2, t3;
-
- while(s<limit) {
- b=*s;
- if(U8_IS_SINGLE(b)) {
- // ASCII
- if(spanCondition) {
- do {
- if(!latin1Contains[b]) {
- return s;
- } else if(++s==limit) {
- return limit0;
- }
- b=*s;
- } while(U8_IS_SINGLE(b));
- } else {
- do {
- if(latin1Contains[b]) {
- return s;
- } else if(++s==limit) {
- return limit0;
- }
- b=*s;
- } while(U8_IS_SINGLE(b));
- }
- }
- ++s; // Advance past the lead byte.
- if(b>=0xe0) {
- if(b<0xf0) {
- if( /* handle U+0000..U+FFFF inline */
- (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
- (t2=(uint8_t)(s[1]-0x80)) <= 0x3f
- ) {
- b&=0xf;
- uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
- if(twoBits<=1) {
- // All 64 code points with this lead byte and middle trail byte
- // are either in the set or not.
- if(twoBits!=(uint32_t)spanCondition) {
- return s-1;
- }
- } else {
- // Look up the code point in its 4k block of code points.
- UChar32 c=(b<<12)|(t1<<6)|t2;
- if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
- return s-1;
- }
- }
- s+=2;
- continue;
- }
- } else if( /* handle U+10000..U+10FFFF inline */
- (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
- (t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
- (t3=(uint8_t)(s[2]-0x80)) <= 0x3f
- ) {
- // Give an illegal sequence the same value as the result of contains(FFFD).
- UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
- if( ( (0x10000<=c && c<=0x10ffff) ?
- containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
- containsFFFD
- ) != spanCondition
- ) {
- return s-1;
- }
- s+=3;
- continue;
- }
- } else {
- if( /* handle U+0000..U+07FF inline */
- b>=0xc0 &&
- (t1=(uint8_t)(*s-0x80)) <= 0x3f
- ) {
- if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
- return s-1;
- }
- ++s;
- continue;
- }
- }
-
- // Give an illegal sequence the same value as the result of contains(FFFD).
- // Handle each byte of an illegal sequence separately to simplify the code;
- // no need to optimize error handling.
- if(containsFFFD!=spanCondition) {
- return s-1;
- }
- }
-
- return limit0;
-}
-
-/*
- * While going backwards through UTF-8 optimize only for ASCII.
- * Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
- * possible to tell from the last byte in a multi-byte sequence how many
- * preceding bytes there should be. Therefore, going backwards through UTF-8
- * is much harder than going forward.
- */
-int32_t
-BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
- if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
- spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
- }
-
- uint8_t b;
-
- do {
- b=s[--length];
- if(U8_IS_SINGLE(b)) {
- // ASCII sub-span
- if(spanCondition) {
- do {
- if(!latin1Contains[b]) {
- return length+1;
- } else if(length==0) {
- return 0;
- }
- b=s[--length];
- } while(U8_IS_SINGLE(b));
- } else {
- do {
- if(latin1Contains[b]) {
- return length+1;
- } else if(length==0) {
- return 0;
- }
- b=s[--length];
- } while(U8_IS_SINGLE(b));
- }
- }
-
- int32_t prev=length;
- UChar32 c;
- // trail byte: collect a multi-byte character
- // (or lead byte in last-trail position)
- c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
- // c is a valid code point, not ASCII, not a surrogate
- if(c<=0x7ff) {
- if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
- return prev+1;
- }
- } else if(c<=0xffff) {
- int lead=c>>12;
- uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
- if(twoBits<=1) {
- // All 64 code points with the same bits 15..6
- // are either in the set or not.
- if(twoBits!=(uint32_t)spanCondition) {
- return prev+1;
- }
- } else {
- // Look up the code point in its 4k block of code points.
- if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
- return prev+1;
- }
- }
- } else {
- if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
- return prev+1;
- }
- }
- } while(length>0);
- return 0;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/bmpset.h b/contrib/libs/icu/common/bmpset.h
deleted file mode 100644
index 018aeb7f95b..00000000000
--- a/contrib/libs/icu/common/bmpset.h
+++ /dev/null
@@ -1,164 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2007, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: bmpset.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2007jan29
-* created by: Markus W. Scherer
-*/
-
-#ifndef __BMPSET_H__
-#define __BMPSET_H__
-
-#include "unicode/utypes.h"
-#include "unicode/uniset.h"
-
-U_NAMESPACE_BEGIN
-
-/*
- * Helper class for frozen UnicodeSets, implements contains() and span()
- * optimized for BMP code points. Structured to be UTF-8-friendly.
- *
- * Latin-1: Look up bytes.
- * 2-byte characters: Bits organized vertically.
- * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
- * with mixed for illegal ranges.
- * Supplementary characters: Binary search over
- * the supplementary part of the parent set's inversion list.
- */
-class BMPSet : public UMemory {
-public:
- BMPSet(const int32_t *parentList, int32_t parentListLength);
- BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength);
- virtual ~BMPSet();
-
- virtual UBool contains(UChar32 c) const;
-
- /*
- * Span the initial substring for which each character c has spanCondition==contains(c).
- * It must be s<limit and spanCondition==0 or 1.
- * @return The string pointer which limits the span.
- */
- const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
-
- /*
- * Span the trailing substring for which each character c has spanCondition==contains(c).
- * It must be s<limit and spanCondition==0 or 1.
- * @return The string pointer which starts the span.
- */
- const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
-
- /*
- * Span the initial substring for which each character c has spanCondition==contains(c).
- * It must be length>0 and spanCondition==0 or 1.
- * @return The string pointer which limits the span.
- */
- const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
-
- /*
- * Span the trailing substring for which each character c has spanCondition==contains(c).
- * It must be length>0 and spanCondition==0 or 1.
- * @return The start of the span.
- */
- int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
-
-private:
- void initBits();
- void overrideIllegal();
-
- /**
- * Same as UnicodeSet::findCodePoint(UChar32 c) const except that the
- * binary search is restricted for finding code points in a certain range.
- *
- * For restricting the search for finding in the range start..end,
- * pass in
- * lo=findCodePoint(start) and
- * hi=findCodePoint(end)
- * with 0<=lo<=hi<len.
- * findCodePoint(c) defaults to lo=0 and hi=len-1.
- *
- * @param c a character in a subrange of MIN_VALUE..MAX_VALUE
- * @param lo The lowest index to be returned.
- * @param hi The highest index to be returned.
- * @return the smallest integer i in the range lo..hi,
- * inclusive, such that c < list[i]
- */
- int32_t findCodePoint(UChar32 c, int32_t lo, int32_t hi) const;
-
- inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
-
- /*
- * One byte 0 or 1 per Latin-1 character.
- */
- UBool latin1Contains[0x100];
-
- /* TRUE if contains(U+FFFD). */
- UBool containsFFFD;
-
- /*
- * One bit per code point from U+0000..U+07FF.
- * The bits are organized vertically; consecutive code points
- * correspond to the same bit positions in consecutive table words.
- * With code point parts
- * lead=c{10..6}
- * trail=c{5..0}
- * it is set.contains(c)==(table7FF[trail] bit lead)
- *
- * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD)
- * for faster validity checking at runtime.
- */
- uint32_t table7FF[64];
-
- /*
- * One bit per 64 BMP code points.
- * The bits are organized vertically; consecutive 64-code point blocks
- * correspond to the same bit position in consecutive table words.
- * With code point parts
- * lead=c{15..12}
- * t1=c{11..6}
- * test bits (lead+16) and lead in bmpBlockBits[t1].
- * If the upper bit is 0, then the lower bit indicates if contains(c)
- * for all code points in the 64-block.
- * If the upper bit is 1, then the block is mixed and set.contains(c)
- * must be called.
- *
- * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to
- * the result of contains(FFFD) for faster validity checking at runtime.
- */
- uint32_t bmpBlockBits[64];
-
- /*
- * Inversion list indexes for restricted binary searches in
- * findCodePoint(), from
- * findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000).
- * U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
- * always looked up in the bit tables.
- * The last pair of indexes is for finding supplementary code points.
- */
- int32_t list4kStarts[18];
-
- /*
- * The inversion list of the parent set, for the slower contains() implementation
- * for mixed BMP blocks and for supplementary code points.
- * The list is terminated with list[listLength-1]=0x110000.
- */
- const int32_t *list;
- int32_t listLength;
-};
-
-inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
- return (UBool)(findCodePoint(c, lo, hi) & 1);
-}
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/brkeng.cpp b/contrib/libs/icu/common/brkeng.cpp
deleted file mode 100644
index 78492db6620..00000000000
--- a/contrib/libs/icu/common/brkeng.cpp
+++ /dev/null
@@ -1,284 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- ************************************************************************************
- * Copyright (C) 2006-2016, International Business Machines Corporation
- * and others. All Rights Reserved.
- ************************************************************************************
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/uchar.h"
-#include "unicode/uniset.h"
-#include "unicode/chariter.h"
-#include "unicode/ures.h"
-#include "unicode/udata.h"
-#include "unicode/putil.h"
-#include "unicode/ustring.h"
-#include "unicode/uscript.h"
-#include "unicode/ucharstrie.h"
-#include "unicode/bytestrie.h"
-
-#include "brkeng.h"
-#include "cmemory.h"
-#include "dictbe.h"
-#include "charstr.h"
-#include "dictionarydata.h"
-#include "mutex.h"
-#include "uvector.h"
-#include "umutex.h"
-#include "uresimp.h"
-#include "ubrkimpl.h"
-
-U_NAMESPACE_BEGIN
-
-/*
- ******************************************************************
- */
-
-LanguageBreakEngine::LanguageBreakEngine() {
-}
-
-LanguageBreakEngine::~LanguageBreakEngine() {
-}
-
-/*
- ******************************************************************
- */
-
-LanguageBreakFactory::LanguageBreakFactory() {
-}
-
-LanguageBreakFactory::~LanguageBreakFactory() {
-}
-
-/*
- ******************************************************************
- */
-
-UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) {
- (void)status;
-}
-
-UnhandledEngine::~UnhandledEngine() {
- delete fHandled;
- fHandled = nullptr;
-}
-
-UBool
-UnhandledEngine::handles(UChar32 c) const {
- return fHandled && fHandled->contains(c);
-}
-
-int32_t
-UnhandledEngine::findBreaks( UText *text,
- int32_t /* startPos */,
- int32_t endPos,
- UVector32 &/*foundBreaks*/ ) const {
- UChar32 c = utext_current32(text);
- while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
- utext_next32(text); // TODO: recast loop to work with post-increment operations.
- c = utext_current32(text);
- }
- return 0;
-}
-
-void
-UnhandledEngine::handleCharacter(UChar32 c) {
- if (fHandled == nullptr) {
- fHandled = new UnicodeSet();
- if (fHandled == nullptr) {
- return;
- }
- }
- if (!fHandled->contains(c)) {
- UErrorCode status = U_ZERO_ERROR;
- // Apply the entire script of the character.
- int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
- fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
- }
-}
-
-/*
- ******************************************************************
- */
-
-ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
- fEngines = 0;
-}
-
-ICULanguageBreakFactory::~ICULanguageBreakFactory() {
- if (fEngines != 0) {
- delete fEngines;
- }
-}
-
-U_NAMESPACE_END
-U_CDECL_BEGIN
-static void U_CALLCONV _deleteEngine(void *obj) {
- delete (const icu::LanguageBreakEngine *) obj;
-}
-U_CDECL_END
-U_NAMESPACE_BEGIN
-
-const LanguageBreakEngine *
-ICULanguageBreakFactory::getEngineFor(UChar32 c) {
- const LanguageBreakEngine *lbe = NULL;
- UErrorCode status = U_ZERO_ERROR;
-
- static UMutex gBreakEngineMutex;
- Mutex m(&gBreakEngineMutex);
-
- if (fEngines == NULL) {
- UStack *engines = new UStack(_deleteEngine, NULL, status);
- if (U_FAILURE(status) || engines == NULL) {
- // Note: no way to return error code to caller.
- delete engines;
- return NULL;
- }
- fEngines = engines;
- } else {
- int32_t i = fEngines->size();
- while (--i >= 0) {
- lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
- if (lbe != NULL && lbe->handles(c)) {
- return lbe;
- }
- }
- }
-
- // We didn't find an engine. Create one.
- lbe = loadEngineFor(c);
- if (lbe != NULL) {
- fEngines->push((void *)lbe, status);
- }
- return lbe;
-}
-
-const LanguageBreakEngine *
-ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
- UErrorCode status = U_ZERO_ERROR;
- UScriptCode code = uscript_getScript(c, &status);
- if (U_SUCCESS(status)) {
- DictionaryMatcher *m = loadDictionaryMatcherFor(code);
- if (m != NULL) {
- const LanguageBreakEngine *engine = NULL;
- switch(code) {
- case USCRIPT_THAI:
- engine = new ThaiBreakEngine(m, status);
- break;
- case USCRIPT_LAO:
- engine = new LaoBreakEngine(m, status);
- break;
- case USCRIPT_MYANMAR:
- engine = new BurmeseBreakEngine(m, status);
- break;
- case USCRIPT_KHMER:
- engine = new KhmerBreakEngine(m, status);
- break;
-
-#if !UCONFIG_NO_NORMALIZATION
- // CJK not available w/o normalization
- case USCRIPT_HANGUL:
- engine = new CjkBreakEngine(m, kKorean, status);
- break;
-
- // use same BreakEngine and dictionary for both Chinese and Japanese
- case USCRIPT_HIRAGANA:
- case USCRIPT_KATAKANA:
- case USCRIPT_HAN:
- engine = new CjkBreakEngine(m, kChineseJapanese, status);
- break;
-#if 0
- // TODO: Have to get some characters with script=common handled
- // by CjkBreakEngine (e.g. U+309B). Simply subjecting
- // them to CjkBreakEngine does not work. The engine has to
- // special-case them.
- case USCRIPT_COMMON:
- {
- UBlockCode block = ublock_getCode(code);
- if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA)
- engine = new CjkBreakEngine(dict, kChineseJapanese, status);
- break;
- }
-#endif
-#endif
-
- default:
- break;
- }
- if (engine == NULL) {
- delete m;
- }
- else if (U_FAILURE(status)) {
- delete engine;
- engine = NULL;
- }
- return engine;
- }
- }
- return NULL;
-}
-
-DictionaryMatcher *
-ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
- UErrorCode status = U_ZERO_ERROR;
- // open root from brkitr tree.
- UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
- b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
- int32_t dictnlength = 0;
- const UChar *dictfname =
- ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status);
- if (U_FAILURE(status)) {
- ures_close(b);
- return NULL;
- }
- CharString dictnbuf;
- CharString ext;
- const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot
- if (extStart != NULL) {
- int32_t len = (int32_t)(extStart - dictfname);
- ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
- dictnlength = len;
- }
- dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
- ures_close(b);
-
- UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
- if (U_SUCCESS(status)) {
- // build trie
- const uint8_t *data = (const uint8_t *)udata_getMemory(file);
- const int32_t *indexes = (const int32_t *)data;
- const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
- const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
- DictionaryMatcher *m = NULL;
- if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
- const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
- const char *characters = (const char *)(data + offset);
- m = new BytesDictionaryMatcher(characters, transform, file);
- }
- else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
- const UChar *characters = (const UChar *)(data + offset);
- m = new UCharsDictionaryMatcher(characters, file);
- }
- if (m == NULL) {
- // no matcher exists to take ownership - either we are an invalid
- // type or memory allocation failed
- udata_close(file);
- }
- return m;
- } else if (dictfname != NULL) {
- // we don't have a dictionary matcher.
- // returning NULL here will cause us to fail to find a dictionary break engine, as expected
- status = U_ZERO_ERROR;
- return NULL;
- }
- return NULL;
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/contrib/libs/icu/common/brkeng.h b/contrib/libs/icu/common/brkeng.h
deleted file mode 100644
index e40fce13f64..00000000000
--- a/contrib/libs/icu/common/brkeng.h
+++ /dev/null
@@ -1,271 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- ************************************************************************************
- * Copyright (C) 2006-2012, International Business Machines Corporation and others. *
- * All Rights Reserved. *
- ************************************************************************************
- */
-
-#ifndef BRKENG_H
-#define BRKENG_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "unicode/utext.h"
-#include "unicode/uscript.h"
-
-U_NAMESPACE_BEGIN
-
-class UnicodeSet;
-class UStack;
-class UVector32;
-class DictionaryMatcher;
-
-/*******************************************************************
- * LanguageBreakEngine
- */
-
-/**
- * <p>LanguageBreakEngines implement language-specific knowledge for
- * finding text boundaries within a run of characters belonging to a
- * specific set. The boundaries will be of a specific kind, e.g. word,
- * line, etc.</p>
- *
- * <p>LanguageBreakEngines should normally be implemented so as to
- * be shared between threads without locking.</p>
- */
-class LanguageBreakEngine : public UMemory {
- public:
-
- /**
- * <p>Default constructor.</p>
- *
- */
- LanguageBreakEngine();
-
- /**
- * <p>Virtual destructor.</p>
- */
- virtual ~LanguageBreakEngine();
-
- /**
- * <p>Indicate whether this engine handles a particular character for
- * a particular kind of break.</p>
- *
- * @param c A character which begins a run that the engine might handle
- * @return TRUE if this engine handles the particular character and break
- * type.
- */
- virtual UBool handles(UChar32 c) const = 0;
-
- /**
- * <p>Find any breaks within a run in the supplied text.</p>
- *
- * @param text A UText representing the text. The
- * iterator is left at the end of the run of characters which the engine
- * is capable of handling.
- * @param startPos The start of the run within the supplied text.
- * @param endPos The end of the run within the supplied text.
- * @param foundBreaks A Vector of int32_t to receive the breaks.
- * @return The number of breaks found.
- */
- virtual int32_t findBreaks( UText *text,
- int32_t startPos,
- int32_t endPos,
- UVector32 &foundBreaks ) const = 0;
-
-};
-
-/*******************************************************************
- * LanguageBreakFactory
- */
-
-/**
- * <p>LanguageBreakFactorys find and return a LanguageBreakEngine
- * that can determine breaks for characters in a specific set, if
- * such an object can be found.</p>
- *
- * <p>If a LanguageBreakFactory is to be shared between threads,
- * appropriate synchronization must be used; there is none internal
- * to the factory.</p>
- *
- * <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
- * normally be shared between threads without synchronization, unless
- * the specific subclass of LanguageBreakFactory indicates otherwise.</p>
- *
- * <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
- * it returns when it itself is deleted, unless the specific subclass of
- * LanguageBreakFactory indicates otherwise. Naturally, the factory should
- * not be deleted until the LanguageBreakEngines it has returned are no
- * longer needed.</p>
- */
-class LanguageBreakFactory : public UMemory {
- public:
-
- /**
- * <p>Default constructor.</p>
- *
- */
- LanguageBreakFactory();
-
- /**
- * <p>Virtual destructor.</p>
- */
- virtual ~LanguageBreakFactory();
-
- /**
- * <p>Find and return a LanguageBreakEngine that can find the desired
- * kind of break for the set of characters to which the supplied
- * character belongs. It is up to the set of available engines to
- * determine what the sets of characters are.</p>
- *
- * @param c A character that begins a run for which a LanguageBreakEngine is
- * sought.
- * @return A LanguageBreakEngine with the desired characteristics, or 0.
- */
- virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
-
-};
-
-/*******************************************************************
- * UnhandledEngine
- */
-
-/**
- * <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
- * handles characters that no other LanguageBreakEngine is available to
- * handle. It is told the character and the type of break; at its
- * discretion it may handle more than the specified character (e.g.,
- * the entire script to which that character belongs.</p>
- *
- * <p>UnhandledEngines may not be shared between threads without
- * external synchronization.</p>
- */
-
-class UnhandledEngine : public LanguageBreakEngine {
- private:
-
- /**
- * The sets of characters handled.
- * @internal
- */
-
- UnicodeSet *fHandled;
-
- public:
-
- /**
- * <p>Default constructor.</p>
- *
- */
- UnhandledEngine(UErrorCode &status);
-
- /**
- * <p>Virtual destructor.</p>
- */
- virtual ~UnhandledEngine();
-
- /**
- * <p>Indicate whether this engine handles a particular character for
- * a particular kind of break.</p>
- *
- * @param c A character which begins a run that the engine might handle
- * @return TRUE if this engine handles the particular character and break
- * type.
- */
- virtual UBool handles(UChar32 c) const;
-
- /**
- * <p>Find any breaks within a run in the supplied text.</p>
- *
- * @param text A UText representing the text (TODO: UText). The
- * iterator is left at the end of the run of characters which the engine
- * is capable of handling.
- * @param startPos The start of the run within the supplied text.
- * @param endPos The end of the run within the supplied text.
- * @param foundBreaks An allocated C array of the breaks found, if any
- * @return The number of breaks found.
- */
- virtual int32_t findBreaks( UText *text,
- int32_t startPos,
- int32_t endPos,
- UVector32 &foundBreaks ) const;
-
- /**
- * <p>Tell the engine to handle a particular character and break type.</p>
- *
- * @param c A character which the engine should handle
- */
- virtual void handleCharacter(UChar32 c);
-
-};
-
-/*******************************************************************
- * ICULanguageBreakFactory
- */
-
-/**
- * <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
- * ICU. It creates dictionary-based LanguageBreakEngines from dictionary
- * data in the ICU data file.</p>
- */
-class ICULanguageBreakFactory : public LanguageBreakFactory {
- private:
-
- /**
- * The stack of break engines created by this factory
- * @internal
- */
-
- UStack *fEngines;
-
- public:
-
- /**
- * <p>Standard constructor.</p>
- *
- */
- ICULanguageBreakFactory(UErrorCode &status);
-
- /**
- * <p>Virtual destructor.</p>
- */
- virtual ~ICULanguageBreakFactory();
-
- /**
- * <p>Find and return a LanguageBreakEngine that can find the desired
- * kind of break for the set of characters to which the supplied
- * character belongs. It is up to the set of available engines to
- * determine what the sets of characters are.</p>
- *
- * @param c A character that begins a run for which a LanguageBreakEngine is
- * sought.
- * @return A LanguageBreakEngine with the desired characteristics, or 0.
- */
- virtual const LanguageBreakEngine *getEngineFor(UChar32 c);
-
-protected:
- /**
- * <p>Create a LanguageBreakEngine for the set of characters to which
- * the supplied character belongs, for the specified break type.</p>
- *
- * @param c A character that begins a run for which a LanguageBreakEngine is
- * sought.
- * @return A LanguageBreakEngine with the desired characteristics, or 0.
- */
- virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
-
- /**
- * <p>Create a DictionaryMatcher for the specified script and break type.</p>
- * @param script An ISO 15924 script code that identifies the dictionary to be
- * created.
- * @return A DictionaryMatcher with the desired characteristics, or NULL.
- */
- virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
-};
-
-U_NAMESPACE_END
-
- /* BRKENG_H */
-#endif
diff --git a/contrib/libs/icu/common/brkiter.cpp b/contrib/libs/icu/common/brkiter.cpp
deleted file mode 100644
index b9b6ca65cd4..00000000000
--- a/contrib/libs/icu/common/brkiter.cpp
+++ /dev/null
@@ -1,527 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 1997-2015, International Business Machines Corporation and
-* others. All Rights Reserved.
-*******************************************************************************
-*
-* File brkiter.cpp
-*
-* Modification History:
-*
-* Date Name Description
-* 02/18/97 aliu Converted from OpenClass. Added DONE.
-* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
-*****************************************************************************************
-*/
-
-// *****************************************************************************
-// This file was generated from the java source file BreakIterator.java
-// *****************************************************************************
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/rbbi.h"
-#include "unicode/brkiter.h"
-#include "unicode/udata.h"
-#include "unicode/ures.h"
-#include "unicode/ustring.h"
-#include "unicode/filteredbrk.h"
-#include "ucln_cmn.h"
-#include "cstring.h"
-#include "umutex.h"
-#include "servloc.h"
-#include "locbased.h"
-#include "uresimp.h"
-#include "uassert.h"
-#include "ubrkimpl.h"
-#include "utracimp.h"
-#include "charstr.h"
-
-// *****************************************************************************
-// class BreakIterator
-// This class implements methods for finding the location of boundaries in text.
-// Instances of BreakIterator maintain a current position and scan over text
-// returning the index of characters where boundaries occur.
-// *****************************************************************************
-
-U_NAMESPACE_BEGIN
-
-// -------------------------------------
-
-BreakIterator*
-BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status)
-{
- char fnbuff[256];
- char ext[4]={'\0'};
- CharString actualLocale;
- int32_t size;
- const UChar* brkfname = NULL;
- UResourceBundle brkRulesStack;
- UResourceBundle brkNameStack;
- UResourceBundle *brkRules = &brkRulesStack;
- UResourceBundle *brkName = &brkNameStack;
- RuleBasedBreakIterator *result = NULL;
-
- if (U_FAILURE(status))
- return NULL;
-
- ures_initStackObject(brkRules);
- ures_initStackObject(brkName);
-
- // Get the locale
- UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status);
-
- // Get the "boundaries" array.
- if (U_SUCCESS(status)) {
- brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
- // Get the string object naming the rules file
- brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
- // Get the actual string
- brkfname = ures_getString(brkName, &size, &status);
- U_ASSERT((size_t)size<sizeof(fnbuff));
- if ((size_t)size>=sizeof(fnbuff)) {
- size=0;
- if (U_SUCCESS(status)) {
- status = U_BUFFER_OVERFLOW_ERROR;
- }
- }
-
- // Use the string if we found it
- if (U_SUCCESS(status) && brkfname) {
- actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);
-
- UChar* extStart=u_strchr(brkfname, 0x002e);
- int len = 0;
- if(extStart!=NULL){
- len = (int)(extStart-brkfname);
- u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
- u_UCharsToChars(brkfname, fnbuff, len);
- }
- fnbuff[len]=0; // nul terminate
- }
- }
-
- ures_close(brkRules);
- ures_close(brkName);
-
- UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
- if (U_FAILURE(status)) {
- ures_close(b);
- return NULL;
- }
-
- // Create a RuleBasedBreakIterator
- result = new RuleBasedBreakIterator(file, status);
-
- // If there is a result, set the valid locale and actual locale, and the kind
- if (U_SUCCESS(status) && result != NULL) {
- U_LOCALE_BASED(locBased, *(BreakIterator*)result);
- locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
- actualLocale.data());
- }
-
- ures_close(b);
-
- if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple
- delete result;
- return NULL;
- }
-
- if (result == NULL) {
- udata_close(file);
- if (U_SUCCESS(status)) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- }
-
- return result;
-}
-
-// Creates a break iterator for word breaks.
-BreakIterator* U_EXPORT2
-BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
-{
- return createInstance(key, UBRK_WORD, status);
-}
-
-// -------------------------------------
-
-// Creates a break iterator for line breaks.
-BreakIterator* U_EXPORT2
-BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
-{
- return createInstance(key, UBRK_LINE, status);
-}
-
-// -------------------------------------
-
-// Creates a break iterator for character breaks.
-BreakIterator* U_EXPORT2
-BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
-{
- return createInstance(key, UBRK_CHARACTER, status);
-}
-
-// -------------------------------------
-
-// Creates a break iterator for sentence breaks.
-BreakIterator* U_EXPORT2
-BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
-{
- return createInstance(key, UBRK_SENTENCE, status);
-}
-
-// -------------------------------------
-
-// Creates a break iterator for title casing breaks.
-BreakIterator* U_EXPORT2
-BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
-{
- return createInstance(key, UBRK_TITLE, status);
-}
-
-// -------------------------------------
-
-// Gets all the available locales that has localized text boundary data.
-const Locale* U_EXPORT2
-BreakIterator::getAvailableLocales(int32_t& count)
-{
- return Locale::getAvailableLocales(count);
-}
-
-// ------------------------------------------
-//
-// Constructors, destructor and assignment operator
-//
-//-------------------------------------------
-
-BreakIterator::BreakIterator()
-{
- *validLocale = *actualLocale = 0;
-}
-
-BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
- uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
- uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
-}
-
-BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
- if (this != &other) {
- uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
- uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
- }
- return *this;
-}
-
-BreakIterator::~BreakIterator()
-{
-}
-
-// ------------------------------------------
-//
-// Registration
-//
-//-------------------------------------------
-#if !UCONFIG_NO_SERVICE
-
-// -------------------------------------
-
-class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
-public:
- virtual ~ICUBreakIteratorFactory();
-protected:
- virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
- return BreakIterator::makeInstance(loc, kind, status);
- }
-};
-
-ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {}
-
-// -------------------------------------
-
-class ICUBreakIteratorService : public ICULocaleService {
-public:
- ICUBreakIteratorService()
- : ICULocaleService(UNICODE_STRING("Break Iterator", 14))
- {
- UErrorCode status = U_ZERO_ERROR;
- registerFactory(new ICUBreakIteratorFactory(), status);
- }
-
- virtual ~ICUBreakIteratorService();
-
- virtual UObject* cloneInstance(UObject* instance) const {
- return ((BreakIterator*)instance)->clone();
- }
-
- virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
- LocaleKey& lkey = (LocaleKey&)key;
- int32_t kind = lkey.kind();
- Locale loc;
- lkey.currentLocale(loc);
- return BreakIterator::makeInstance(loc, kind, status);
- }
-
- virtual UBool isDefault() const {
- return countFactories() == 1;
- }
-};
-
-ICUBreakIteratorService::~ICUBreakIteratorService() {}
-
-// -------------------------------------
-
-// defined in ucln_cmn.h
-U_NAMESPACE_END
-
-static icu::UInitOnce gInitOnceBrkiter = U_INITONCE_INITIALIZER;
-static icu::ICULocaleService* gService = NULL;
-
-
-
-/**
- * Release all static memory held by breakiterator.
- */
-U_CDECL_BEGIN
-static UBool U_CALLCONV breakiterator_cleanup(void) {
-#if !UCONFIG_NO_SERVICE
- if (gService) {
- delete gService;
- gService = NULL;
- }
- gInitOnceBrkiter.reset();
-#endif
- return TRUE;
-}
-U_CDECL_END
-U_NAMESPACE_BEGIN
-
-static void U_CALLCONV
-initService(void) {
- gService = new ICUBreakIteratorService();
- ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
-}
-
-static ICULocaleService*
-getService(void)
-{
- umtx_initOnce(gInitOnceBrkiter, &initService);
- return gService;
-}
-
-
-// -------------------------------------
-
-static inline UBool
-hasService(void)
-{
- return !gInitOnceBrkiter.isReset() && getService() != NULL;
-}
-
-// -------------------------------------
-
-URegistryKey U_EXPORT2
-BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
-{
- ICULocaleService *service = getService();
- if (service == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- return service->registerInstance(toAdopt, locale, kind, status);
-}
-
-// -------------------------------------
-
-UBool U_EXPORT2
-BreakIterator::unregister(URegistryKey key, UErrorCode& status)
-{
- if (U_SUCCESS(status)) {
- if (hasService()) {
- return gService->unregister(key, status);
- }
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- return FALSE;
-}
-
-// -------------------------------------
-
-StringEnumeration* U_EXPORT2
-BreakIterator::getAvailableLocales(void)
-{
- ICULocaleService *service = getService();
- if (service == NULL) {
- return NULL;
- }
- return service->getAvailableLocales();
-}
-#endif /* UCONFIG_NO_SERVICE */
-
-// -------------------------------------
-
-BreakIterator*
-BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
-{
- if (U_FAILURE(status)) {
- return NULL;
- }
-
-#if !UCONFIG_NO_SERVICE
- if (hasService()) {
- Locale actualLoc("");
- BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
- // TODO: The way the service code works in ICU 2.8 is that if
- // there is a real registered break iterator, the actualLoc
- // will be populated, but if the handleDefault path is taken
- // (because nothing is registered that can handle the
- // requested locale) then the actualLoc comes back empty. In
- // that case, the returned object already has its actual/valid
- // locale data populated (by makeInstance, which is what
- // handleDefault calls), so we don't touch it. YES, A COMMENT
- // THIS LONG is a sign of bad code -- so the action item is to
- // revisit this in ICU 3.0 and clean it up/fix it/remove it.
- if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) {
- U_LOCALE_BASED(locBased, *result);
- locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
- }
- return result;
- }
- else
-#endif
- {
- return makeInstance(loc, kind, status);
- }
-}
-
-// -------------------------------------
-enum { kKeyValueLenMax = 32 };
-
-BreakIterator*
-BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
-{
-
- if (U_FAILURE(status)) {
- return NULL;
- }
- char lbType[kKeyValueLenMax];
-
- BreakIterator *result = NULL;
- switch (kind) {
- case UBRK_CHARACTER:
- {
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_CHARACTER);
- result = BreakIterator::buildInstance(loc, "grapheme", status);
- UTRACE_EXIT_STATUS(status);
- }
- break;
- case UBRK_WORD:
- {
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_WORD);
- result = BreakIterator::buildInstance(loc, "word", status);
- UTRACE_EXIT_STATUS(status);
- }
- break;
- case UBRK_LINE:
- {
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
- uprv_strcpy(lbType, "line");
- char lbKeyValue[kKeyValueLenMax] = {0};
- UErrorCode kvStatus = U_ZERO_ERROR;
- int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
- if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
- uprv_strcat(lbType, "_");
- uprv_strcat(lbType, lbKeyValue);
- }
- result = BreakIterator::buildInstance(loc, lbType, status);
-
- UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue);
- UTRACE_EXIT_STATUS(status);
- }
- break;
- case UBRK_SENTENCE:
- {
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_SENTENCE);
- result = BreakIterator::buildInstance(loc, "sentence", status);
-#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
- char ssKeyValue[kKeyValueLenMax] = {0};
- UErrorCode kvStatus = U_ZERO_ERROR;
- int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
- if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) {
- FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus);
- if (U_SUCCESS(kvStatus)) {
- result = fbiBuilder->build(result, status);
- delete fbiBuilder;
- }
- }
-#endif
- UTRACE_EXIT_STATUS(status);
- }
- break;
- case UBRK_TITLE:
- {
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_TITLE);
- result = BreakIterator::buildInstance(loc, "title", status);
- UTRACE_EXIT_STATUS(status);
- }
- break;
- default:
- status = U_ILLEGAL_ARGUMENT_ERROR;
- }
-
- if (U_FAILURE(status)) {
- return NULL;
- }
-
- return result;
-}
-
-Locale
-BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
- U_LOCALE_BASED(locBased, *this);
- return locBased.getLocale(type, status);
-}
-
-const char *
-BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
- U_LOCALE_BASED(locBased, *this);
- return locBased.getLocaleID(type, status);
-}
-
-
-// This implementation of getRuleStatus is a do-nothing stub, here to
-// provide a default implementation for any derived BreakIterator classes that
-// do not implement it themselves.
-int32_t BreakIterator::getRuleStatus() const {
- return 0;
-}
-
-// This implementation of getRuleStatusVec is a do-nothing stub, here to
-// provide a default implementation for any derived BreakIterator classes that
-// do not implement it themselves.
-int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return 0;
- }
- if (capacity < 1) {
- status = U_BUFFER_OVERFLOW_ERROR;
- return 1;
- }
- *fillInVec = 0;
- return 1;
-}
-
-BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
- U_LOCALE_BASED(locBased, (*this));
- locBased.setLocaleIDs(valid, actual);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
-
-//eof
diff --git a/contrib/libs/icu/common/bytesinkutil.cpp b/contrib/libs/icu/common/bytesinkutil.cpp
deleted file mode 100644
index c64a845f875..00000000000
--- a/contrib/libs/icu/common/bytesinkutil.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// bytesinkutil.cpp
-// created: 2017sep14 Markus W. Scherer
-
-#include "unicode/utypes.h"
-#include "unicode/bytestream.h"
-#include "unicode/edits.h"
-#include "unicode/stringoptions.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "bytesinkutil.h"
-#include "charstr.h"
-#include "cmemory.h"
-#include "uassert.h"
-
-U_NAMESPACE_BEGIN
-
-UBool
-ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
- ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return FALSE; }
- char scratch[200];
- int32_t s8Length = 0;
- for (int32_t i = 0; i < s16Length;) {
- int32_t capacity;
- int32_t desiredCapacity = s16Length - i;
- if (desiredCapacity < (INT32_MAX / 3)) {
- desiredCapacity *= 3; // max 3 UTF-8 bytes per UTF-16 code unit
- } else if (desiredCapacity < (INT32_MAX / 2)) {
- desiredCapacity *= 2;
- } else {
- desiredCapacity = INT32_MAX;
- }
- char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
- scratch, UPRV_LENGTHOF(scratch), &capacity);
- capacity -= U8_MAX_LENGTH - 1;
- int32_t j = 0;
- for (; i < s16Length && j < capacity;) {
- UChar32 c;
- U16_NEXT_UNSAFE(s16, i, c);
- U8_APPEND_UNSAFE(buffer, j, c);
- }
- if (j > (INT32_MAX - s8Length)) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return FALSE;
- }
- sink.Append(buffer, j);
- s8Length += j;
- }
- if (edits != nullptr) {
- edits->addReplace(length, s8Length);
- }
- return TRUE;
-}
-
-UBool
-ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
- const char16_t *s16, int32_t s16Length,
- ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return FALSE; }
- if ((limit - s) > INT32_MAX) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return FALSE;
- }
- return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
-}
-
-void
-ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
- char s8[U8_MAX_LENGTH];
- int32_t s8Length = 0;
- U8_APPEND_UNSAFE(s8, s8Length, c);
- if (edits != nullptr) {
- edits->addReplace(length, s8Length);
- }
- sink.Append(s8, s8Length);
-}
-
-namespace {
-
-// See unicode/utf8.h U8_APPEND_UNSAFE().
-inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
-inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
-
-} // namespace
-
-void
-ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
- U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8
- char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
- sink.Append(s8, 2);
-}
-
-void
-ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
- ByteSink &sink, uint32_t options, Edits *edits) {
- U_ASSERT(length > 0);
- if (edits != nullptr) {
- edits->addUnchanged(length);
- }
- if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
- sink.Append(reinterpret_cast<const char *>(s), length);
- }
-}
-
-UBool
-ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
- ByteSink &sink, uint32_t options, Edits *edits,
- UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return FALSE; }
- if ((limit - s) > INT32_MAX) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return FALSE;
- }
- int32_t length = (int32_t)(limit - s);
- if (length > 0) {
- appendNonEmptyUnchanged(s, length, sink, options, edits);
- }
- return TRUE;
-}
-
-CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {
-}
-
-CharStringByteSink::~CharStringByteSink() = default;
-
-void
-CharStringByteSink::Append(const char* bytes, int32_t n) {
- UErrorCode status = U_ZERO_ERROR;
- dest_.append(bytes, n, status);
- // Any errors are silently ignored.
-}
-
-char*
-CharStringByteSink::GetAppendBuffer(int32_t min_capacity,
- int32_t desired_capacity_hint,
- char* scratch,
- int32_t scratch_capacity,
- int32_t* result_capacity) {
- if (min_capacity < 1 || scratch_capacity < min_capacity) {
- *result_capacity = 0;
- return nullptr;
- }
-
- UErrorCode status = U_ZERO_ERROR;
- char* result = dest_.getAppendBuffer(
- min_capacity,
- desired_capacity_hint,
- *result_capacity,
- status);
- if (U_SUCCESS(status)) {
- return result;
- }
-
- *result_capacity = scratch_capacity;
- return scratch;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/bytesinkutil.h b/contrib/libs/icu/common/bytesinkutil.h
deleted file mode 100644
index 6808fbe6777..00000000000
--- a/contrib/libs/icu/common/bytesinkutil.h
+++ /dev/null
@@ -1,83 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// bytesinkutil.h
-// created: 2017sep14 Markus W. Scherer
-
-#include "unicode/utypes.h"
-#include "unicode/bytestream.h"
-#include "unicode/edits.h"
-#include "cmemory.h"
-#include "uassert.h"
-
-U_NAMESPACE_BEGIN
-
-class ByteSink;
-class CharString;
-class Edits;
-
-class U_COMMON_API ByteSinkUtil {
-public:
- ByteSinkUtil() = delete; // all static
-
- /** (length) bytes were mapped to valid (s16, s16Length). */
- static UBool appendChange(int32_t length,
- const char16_t *s16, int32_t s16Length,
- ByteSink &sink, Edits *edits, UErrorCode &errorCode);
-
- /** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */
- static UBool appendChange(const uint8_t *s, const uint8_t *limit,
- const char16_t *s16, int32_t s16Length,
- ByteSink &sink, Edits *edits, UErrorCode &errorCode);
-
- /** (length) bytes were mapped/changed to valid code point c. */
- static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr);
-
- /** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
- static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
- ByteSink &sink, Edits *edits = nullptr) {
- appendCodePoint((int32_t)(nextSrc - src), c, sink, edits);
- }
-
- /** Append the two-byte character (U+0080..U+07FF). */
- static void appendTwoBytes(UChar32 c, ByteSink &sink);
-
- static UBool appendUnchanged(const uint8_t *s, int32_t length,
- ByteSink &sink, uint32_t options, Edits *edits,
- UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return FALSE; }
- if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); }
- return TRUE;
- }
-
- static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
- ByteSink &sink, uint32_t options, Edits *edits,
- UErrorCode &errorCode);
-
-private:
- static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
- ByteSink &sink, uint32_t options, Edits *edits);
-};
-
-class U_COMMON_API CharStringByteSink : public ByteSink {
-public:
- CharStringByteSink(CharString* dest);
- ~CharStringByteSink() override;
-
- CharStringByteSink() = delete;
- CharStringByteSink(const CharStringByteSink&) = delete;
- CharStringByteSink& operator=(const CharStringByteSink&) = delete;
-
- void Append(const char* bytes, int32_t n) override;
-
- char* GetAppendBuffer(int32_t min_capacity,
- int32_t desired_capacity_hint,
- char* scratch,
- int32_t scratch_capacity,
- int32_t* result_capacity) override;
-
-private:
- CharString& dest_;
-};
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/bytestream.cpp b/contrib/libs/icu/common/bytestream.cpp
deleted file mode 100644
index 0d0e4dda39b..00000000000
--- a/contrib/libs/icu/common/bytestream.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-// Copyright (C) 2009-2011, International Business Machines
-// Corporation and others. All Rights Reserved.
-//
-// Copyright 2007 Google Inc. All Rights Reserved.
-// Author: [email protected] (Sanjay Ghemawat)
-
-#include "unicode/utypes.h"
-#include "unicode/bytestream.h"
-#include "cmemory.h"
-
-U_NAMESPACE_BEGIN
-
-ByteSink::~ByteSink() {}
-
-char* ByteSink::GetAppendBuffer(int32_t min_capacity,
- int32_t /*desired_capacity_hint*/,
- char* scratch, int32_t scratch_capacity,
- int32_t* result_capacity) {
- if (min_capacity < 1 || scratch_capacity < min_capacity) {
- *result_capacity = 0;
- return NULL;
- }
- *result_capacity = scratch_capacity;
- return scratch;
-}
-
-void ByteSink::Flush() {}
-
-CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity)
- : outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity),
- size_(0), appended_(0), overflowed_(FALSE) {
-}
-
-CheckedArrayByteSink::~CheckedArrayByteSink() {}
-
-CheckedArrayByteSink& CheckedArrayByteSink::Reset() {
- size_ = appended_ = 0;
- overflowed_ = FALSE;
- return *this;
-}
-
-void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
- if (n <= 0) {
- return;
- }
- if (n > (INT32_MAX - appended_)) {
- // TODO: Report as integer overflow, not merely buffer overflow.
- appended_ = INT32_MAX;
- overflowed_ = TRUE;
- return;
- }
- appended_ += n;
- int32_t available = capacity_ - size_;
- if (n > available) {
- n = available;
- overflowed_ = TRUE;
- }
- if (n > 0 && bytes != (outbuf_ + size_)) {
- uprv_memcpy(outbuf_ + size_, bytes, n);
- }
- size_ += n;
-}
-
-char* CheckedArrayByteSink::GetAppendBuffer(int32_t min_capacity,
- int32_t /*desired_capacity_hint*/,
- char* scratch,
- int32_t scratch_capacity,
- int32_t* result_capacity) {
- if (min_capacity < 1 || scratch_capacity < min_capacity) {
- *result_capacity = 0;
- return NULL;
- }
- int32_t available = capacity_ - size_;
- if (available >= min_capacity) {
- *result_capacity = available;
- return outbuf_ + size_;
- } else {
- *result_capacity = scratch_capacity;
- return scratch;
- }
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/bytestrie.cpp b/contrib/libs/icu/common/bytestrie.cpp
deleted file mode 100644
index c4d498c4bfa..00000000000
--- a/contrib/libs/icu/common/bytestrie.cpp
+++ /dev/null
@@ -1,441 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2010-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: bytestrie.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010sep25
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/bytestream.h"
-#include "unicode/bytestrie.h"
-#include "unicode/uobject.h"
-#include "cmemory.h"
-#include "uassert.h"
-
-U_NAMESPACE_BEGIN
-
-BytesTrie::~BytesTrie() {
- uprv_free(ownedArray_);
-}
-
-// lead byte already shifted right by 1.
-int32_t
-BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
- int32_t value;
- if(leadByte<kMinTwoByteValueLead) {
- value=leadByte-kMinOneByteValueLead;
- } else if(leadByte<kMinThreeByteValueLead) {
- value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
- } else if(leadByte<kFourByteValueLead) {
- value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
- } else if(leadByte==kFourByteValueLead) {
- value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
- } else {
- value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
- }
- return value;
-}
-
-const uint8_t *
-BytesTrie::jumpByDelta(const uint8_t *pos) {
- int32_t delta=*pos++;
- if(delta<kMinTwoByteDeltaLead) {
- // nothing to do
- } else if(delta<kMinThreeByteDeltaLead) {
- delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
- } else if(delta<kFourByteDeltaLead) {
- delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
- pos+=2;
- } else if(delta==kFourByteDeltaLead) {
- delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
- pos+=3;
- } else {
- delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
- pos+=4;
- }
- return pos+delta;
-}
-
-UStringTrieResult
-BytesTrie::current() const {
- const uint8_t *pos=pos_;
- if(pos==NULL) {
- return USTRINGTRIE_NO_MATCH;
- } else {
- int32_t node;
- return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
- valueResult(node) : USTRINGTRIE_NO_VALUE;
- }
-}
-
-UStringTrieResult
-BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
- // Branch according to the current byte.
- if(length==0) {
- length=*pos++;
- }
- ++length;
- // The length of the branch is the number of bytes to select from.
- // The data structure encodes a binary search.
- while(length>kMaxBranchLinearSubNodeLength) {
- if(inByte<*pos++) {
- length>>=1;
- pos=jumpByDelta(pos);
- } else {
- length=length-(length>>1);
- pos=skipDelta(pos);
- }
- }
- // Drop down to linear search for the last few bytes.
- // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
- // and divides length by 2.
- do {
- if(inByte==*pos++) {
- UStringTrieResult result;
- int32_t node=*pos;
- U_ASSERT(node>=kMinValueLead);
- if(node&kValueIsFinal) {
- // Leave the final value for getValue() to read.
- result=USTRINGTRIE_FINAL_VALUE;
- } else {
- // Use the non-final value as the jump delta.
- ++pos;
- // int32_t delta=readValue(pos, node>>1);
- node>>=1;
- int32_t delta;
- if(node<kMinTwoByteValueLead) {
- delta=node-kMinOneByteValueLead;
- } else if(node<kMinThreeByteValueLead) {
- delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
- } else if(node<kFourByteValueLead) {
- delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
- pos+=2;
- } else if(node==kFourByteValueLead) {
- delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
- pos+=3;
- } else {
- delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
- pos+=4;
- }
- // end readValue()
- pos+=delta;
- node=*pos;
- result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
- }
- pos_=pos;
- return result;
- }
- --length;
- pos=skipValue(pos);
- } while(length>1);
- if(inByte==*pos++) {
- pos_=pos;
- int32_t node=*pos;
- return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
- } else {
- stop();
- return USTRINGTRIE_NO_MATCH;
- }
-}
-
-UStringTrieResult
-BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
- for(;;) {
- int32_t node=*pos++;
- if(node<kMinLinearMatch) {
- return branchNext(pos, node, inByte);
- } else if(node<kMinValueLead) {
- // Match the first of length+1 bytes.
- int32_t length=node-kMinLinearMatch; // Actual match length minus 1.
- if(inByte==*pos++) {
- remainingMatchLength_=--length;
- pos_=pos;
- return (length<0 && (node=*pos)>=kMinValueLead) ?
- valueResult(node) : USTRINGTRIE_NO_VALUE;
- } else {
- // No match.
- break;
- }
- } else if(node&kValueIsFinal) {
- // No further matching bytes.
- break;
- } else {
- // Skip intermediate value.
- pos=skipValue(pos, node);
- // The next node must not also be a value node.
- U_ASSERT(*pos<kMinValueLead);
- }
- }
- stop();
- return USTRINGTRIE_NO_MATCH;
-}
-
-UStringTrieResult
-BytesTrie::next(int32_t inByte) {
- const uint8_t *pos=pos_;
- if(pos==NULL) {
- return USTRINGTRIE_NO_MATCH;
- }
- if(inByte<0) {
- inByte+=0x100;
- }
- int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
- if(length>=0) {
- // Remaining part of a linear-match node.
- if(inByte==*pos++) {
- remainingMatchLength_=--length;
- pos_=pos;
- int32_t node;
- return (length<0 && (node=*pos)>=kMinValueLead) ?
- valueResult(node) : USTRINGTRIE_NO_VALUE;
- } else {
- stop();
- return USTRINGTRIE_NO_MATCH;
- }
- }
- return nextImpl(pos, inByte);
-}
-
-UStringTrieResult
-BytesTrie::next(const char *s, int32_t sLength) {
- if(sLength<0 ? *s==0 : sLength==0) {
- // Empty input.
- return current();
- }
- const uint8_t *pos=pos_;
- if(pos==NULL) {
- return USTRINGTRIE_NO_MATCH;
- }
- int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
- for(;;) {
- // Fetch the next input byte, if there is one.
- // Continue a linear-match node without rechecking sLength<0.
- int32_t inByte;
- if(sLength<0) {
- for(;;) {
- if((inByte=*s++)==0) {
- remainingMatchLength_=length;
- pos_=pos;
- int32_t node;
- return (length<0 && (node=*pos)>=kMinValueLead) ?
- valueResult(node) : USTRINGTRIE_NO_VALUE;
- }
- if(length<0) {
- remainingMatchLength_=length;
- break;
- }
- if(inByte!=*pos) {
- stop();
- return USTRINGTRIE_NO_MATCH;
- }
- ++pos;
- --length;
- }
- } else {
- for(;;) {
- if(sLength==0) {
- remainingMatchLength_=length;
- pos_=pos;
- int32_t node;
- return (length<0 && (node=*pos)>=kMinValueLead) ?
- valueResult(node) : USTRINGTRIE_NO_VALUE;
- }
- inByte=*s++;
- --sLength;
- if(length<0) {
- remainingMatchLength_=length;
- break;
- }
- if(inByte!=*pos) {
- stop();
- return USTRINGTRIE_NO_MATCH;
- }
- ++pos;
- --length;
- }
- }
- for(;;) {
- int32_t node=*pos++;
- if(node<kMinLinearMatch) {
- UStringTrieResult result=branchNext(pos, node, inByte);
- if(result==USTRINGTRIE_NO_MATCH) {
- return USTRINGTRIE_NO_MATCH;
- }
- // Fetch the next input byte, if there is one.
- if(sLength<0) {
- if((inByte=*s++)==0) {
- return result;
- }
- } else {
- if(sLength==0) {
- return result;
- }
- inByte=*s++;
- --sLength;
- }
- if(result==USTRINGTRIE_FINAL_VALUE) {
- // No further matching bytes.
- stop();
- return USTRINGTRIE_NO_MATCH;
- }
- pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
- } else if(node<kMinValueLead) {
- // Match length+1 bytes.
- length=node-kMinLinearMatch; // Actual match length minus 1.
- if(inByte!=*pos) {
- stop();
- return USTRINGTRIE_NO_MATCH;
- }
- ++pos;
- --length;
- break;
- } else if(node&kValueIsFinal) {
- // No further matching bytes.
- stop();
- return USTRINGTRIE_NO_MATCH;
- } else {
- // Skip intermediate value.
- pos=skipValue(pos, node);
- // The next node must not also be a value node.
- U_ASSERT(*pos<kMinValueLead);
- }
- }
- }
-}
-
-const uint8_t *
-BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
- UBool haveUniqueValue, int32_t &uniqueValue) {
- while(length>kMaxBranchLinearSubNodeLength) {
- ++pos; // ignore the comparison byte
- if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
- return NULL;
- }
- length=length-(length>>1);
- pos=skipDelta(pos);
- }
- do {
- ++pos; // ignore a comparison byte
- // handle its value
- int32_t node=*pos++;
- UBool isFinal=(UBool)(node&kValueIsFinal);
- int32_t value=readValue(pos, node>>1);
- pos=skipValue(pos, node);
- if(isFinal) {
- if(haveUniqueValue) {
- if(value!=uniqueValue) {
- return NULL;
- }
- } else {
- uniqueValue=value;
- haveUniqueValue=TRUE;
- }
- } else {
- if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
- return NULL;
- }
- haveUniqueValue=TRUE;
- }
- } while(--length>1);
- return pos+1; // ignore the last comparison byte
-}
-
-UBool
-BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
- for(;;) {
- int32_t node=*pos++;
- if(node<kMinLinearMatch) {
- if(node==0) {
- node=*pos++;
- }
- pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
- if(pos==NULL) {
- return FALSE;
- }
- haveUniqueValue=TRUE;
- } else if(node<kMinValueLead) {
- // linear-match node
- pos+=node-kMinLinearMatch+1; // Ignore the match bytes.
- } else {
- UBool isFinal=(UBool)(node&kValueIsFinal);
- int32_t value=readValue(pos, node>>1);
- if(haveUniqueValue) {
- if(value!=uniqueValue) {
- return FALSE;
- }
- } else {
- uniqueValue=value;
- haveUniqueValue=TRUE;
- }
- if(isFinal) {
- return TRUE;
- }
- pos=skipValue(pos, node);
- }
- }
-}
-
-int32_t
-BytesTrie::getNextBytes(ByteSink &out) const {
- const uint8_t *pos=pos_;
- if(pos==NULL) {
- return 0;
- }
- if(remainingMatchLength_>=0) {
- append(out, *pos); // Next byte of a pending linear-match node.
- return 1;
- }
- int32_t node=*pos++;
- if(node>=kMinValueLead) {
- if(node&kValueIsFinal) {
- return 0;
- } else {
- pos=skipValue(pos, node);
- node=*pos++;
- U_ASSERT(node<kMinValueLead);
- }
- }
- if(node<kMinLinearMatch) {
- if(node==0) {
- node=*pos++;
- }
- getNextBranchBytes(pos, ++node, out);
- return node;
- } else {
- // First byte of the linear-match node.
- append(out, *pos);
- return 1;
- }
-}
-
-void
-BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
- while(length>kMaxBranchLinearSubNodeLength) {
- ++pos; // ignore the comparison byte
- getNextBranchBytes(jumpByDelta(pos), length>>1, out);
- length=length-(length>>1);
- pos=skipDelta(pos);
- }
- do {
- append(out, *pos++);
- pos=skipValue(pos);
- } while(--length>1);
- append(out, *pos);
-}
-
-void
-BytesTrie::append(ByteSink &out, int c) {
- char ch=(char)c;
- out.Append(&ch, 1);
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/bytestriebuilder.cpp b/contrib/libs/icu/common/bytestriebuilder.cpp
deleted file mode 100644
index ec1ab7d8f50..00000000000
--- a/contrib/libs/icu/common/bytestriebuilder.cpp
+++ /dev/null
@@ -1,504 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2010-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: bytestriebuilder.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010sep25
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/bytestrie.h"
-#include "unicode/bytestriebuilder.h"
-#include "unicode/stringpiece.h"
-#include "charstr.h"
-#include "cmemory.h"
-#include "uhash.h"
-#include "uarrsort.h"
-#include "uassert.h"
-#include "ustr_imp.h"
-
-U_NAMESPACE_BEGIN
-
-/*
- * Note: This builder implementation stores (bytes, value) pairs with full copies
- * of the byte sequences, until the BytesTrie is built.
- * It might(!) take less memory if we collected the data in a temporary, dynamic trie.
- */
-
-class BytesTrieElement : public UMemory {
-public:
- // Use compiler's default constructor, initializes nothing.
-
- void setTo(StringPiece s, int32_t val, CharString &strings, UErrorCode &errorCode);
-
- StringPiece getString(const CharString &strings) const {
- int32_t offset=stringOffset;
- int32_t length;
- if(offset>=0) {
- length=(uint8_t)strings[offset++];
- } else {
- offset=~offset;
- length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
- offset+=2;
- }
- return StringPiece(strings.data()+offset, length);
- }
- int32_t getStringLength(const CharString &strings) const {
- int32_t offset=stringOffset;
- if(offset>=0) {
- return (uint8_t)strings[offset];
- } else {
- offset=~offset;
- return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
- }
- }
-
- char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; }
-
- int32_t getValue() const { return value; }
-
- int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const;
-
-private:
- const char *data(const CharString &strings) const {
- int32_t offset=stringOffset;
- if(offset>=0) {
- ++offset;
- } else {
- offset=~offset+2;
- }
- return strings.data()+offset;
- }
-
- // If the stringOffset is non-negative, then the first strings byte contains
- // the string length.
- // If the stringOffset is negative, then the first two strings bytes contain
- // the string length (big-endian), and the offset needs to be bit-inverted.
- // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.)
- int32_t stringOffset;
- int32_t value;
-};
-
-void
-BytesTrieElement::setTo(StringPiece s, int32_t val,
- CharString &strings, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- int32_t length=s.length();
- if(length>0xffff) {
- // Too long: We store the length in 1 or 2 bytes.
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
- int32_t offset=strings.length();
- if(length>0xff) {
- offset=~offset;
- strings.append((char)(length>>8), errorCode);
- }
- strings.append((char)length, errorCode);
- stringOffset=offset;
- value=val;
- strings.append(s, errorCode);
-}
-
-int32_t
-BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const {
- // TODO: add StringPiece::compare(), see ticket #8187
- StringPiece thisString=getString(strings);
- StringPiece otherString=other.getString(strings);
- int32_t lengthDiff=thisString.length()-otherString.length();
- int32_t commonLength;
- if(lengthDiff<=0) {
- commonLength=thisString.length();
- } else {
- commonLength=otherString.length();
- }
- int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength);
- return diff!=0 ? diff : lengthDiff;
-}
-
-BytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode)
- : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0),
- bytes(NULL), bytesCapacity(0), bytesLength(0) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- strings=new CharString();
- if(strings==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-BytesTrieBuilder::~BytesTrieBuilder() {
- delete strings;
- delete[] elements;
- uprv_free(bytes);
-}
-
-BytesTrieBuilder &
-BytesTrieBuilder::add(StringPiece s, int32_t value, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return *this;
- }
- if(bytesLength>0) {
- // Cannot add elements after building.
- errorCode=U_NO_WRITE_PERMISSION;
- return *this;
- }
- if(elementsLength==elementsCapacity) {
- int32_t newCapacity;
- if(elementsCapacity==0) {
- newCapacity=1024;
- } else {
- newCapacity=4*elementsCapacity;
- }
- BytesTrieElement *newElements=new BytesTrieElement[newCapacity];
- if(newElements==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return *this; // error instead of dereferencing null
- }
- if(elementsLength>0) {
- uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(BytesTrieElement));
- }
- delete[] elements;
- elements=newElements;
- elementsCapacity=newCapacity;
- }
- elements[elementsLength++].setTo(s, value, *strings, errorCode);
- return *this;
-}
-
-U_CDECL_BEGIN
-
-static int32_t U_CALLCONV
-compareElementStrings(const void *context, const void *left, const void *right) {
- const CharString *strings=static_cast<const CharString *>(context);
- const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left);
- const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right);
- return leftElement->compareStringTo(*rightElement, *strings);
-}
-
-U_CDECL_END
-
-BytesTrie *
-BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
- buildBytes(buildOption, errorCode);
- BytesTrie *newTrie=NULL;
- if(U_SUCCESS(errorCode)) {
- newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength));
- if(newTrie==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- } else {
- bytes=NULL; // The new trie now owns the array.
- bytesCapacity=0;
- }
- }
- return newTrie;
-}
-
-StringPiece
-BytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
- buildBytes(buildOption, errorCode);
- StringPiece result;
- if(U_SUCCESS(errorCode)) {
- result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
- }
- return result;
-}
-
-void
-BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- if(bytes!=NULL && bytesLength>0) {
- // Already built.
- return;
- }
- if(bytesLength==0) {
- if(elementsLength==0) {
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
- uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
- compareElementStrings, strings,
- FALSE, // need not be a stable sort
- &errorCode);
- if(U_FAILURE(errorCode)) {
- return;
- }
- // Duplicate strings are not allowed.
- StringPiece prev=elements[0].getString(*strings);
- for(int32_t i=1; i<elementsLength; ++i) {
- StringPiece current=elements[i].getString(*strings);
- if(prev==current) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- prev=current;
- }
- }
- // Create and byte-serialize the trie for the elements.
- bytesLength=0;
- int32_t capacity=strings->length();
- if(capacity<1024) {
- capacity=1024;
- }
- if(bytesCapacity<capacity) {
- uprv_free(bytes);
- bytes=static_cast<char *>(uprv_malloc(capacity));
- if(bytes==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- bytesCapacity=0;
- return;
- }
- bytesCapacity=capacity;
- }
- StringTrieBuilder::build(buildOption, elementsLength, errorCode);
- if(bytes==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-BytesTrieBuilder &
-BytesTrieBuilder::clear() {
- strings->clear();
- elementsLength=0;
- bytesLength=0;
- return *this;
-}
-
-int32_t
-BytesTrieBuilder::getElementStringLength(int32_t i) const {
- return elements[i].getStringLength(*strings);
-}
-
-UChar
-BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
- return (uint8_t)elements[i].charAt(byteIndex, *strings);
-}
-
-int32_t
-BytesTrieBuilder::getElementValue(int32_t i) const {
- return elements[i].getValue();
-}
-
-int32_t
-BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
- const BytesTrieElement &firstElement=elements[first];
- const BytesTrieElement &lastElement=elements[last];
- int32_t minStringLength=firstElement.getStringLength(*strings);
- while(++byteIndex<minStringLength &&
- firstElement.charAt(byteIndex, *strings)==
- lastElement.charAt(byteIndex, *strings)) {}
- return byteIndex;
-}
-
-int32_t
-BytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
- int32_t length=0; // Number of different bytes at byteIndex.
- int32_t i=start;
- do {
- char byte=elements[i++].charAt(byteIndex, *strings);
- while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) {
- ++i;
- }
- ++length;
- } while(i<limit);
- return length;
-}
-
-int32_t
-BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
- do {
- char byte=elements[i++].charAt(byteIndex, *strings);
- while(byte==elements[i].charAt(byteIndex, *strings)) {
- ++i;
- }
- } while(--count>0);
- return i;
-}
-
-int32_t
-BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
- char b=(char)byte;
- while(b==elements[i].charAt(byteIndex, *strings)) {
- ++i;
- }
- return i;
-}
-
-BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
- : LinearMatchNode(len, nextNode), s(bytes) {
- hash=static_cast<int32_t>(
- static_cast<uint32_t>(hash)*37u + static_cast<uint32_t>(ustr_hashCharsN(bytes, len)));
-}
-
-UBool
-BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!LinearMatchNode::operator==(other)) {
- return FALSE;
- }
- const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
- return 0==uprv_memcmp(s, o.s, length);
-}
-
-void
-BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) {
- BytesTrieBuilder &b=(BytesTrieBuilder &)builder;
- next->write(builder);
- b.write(s, length);
- offset=b.write(b.getMinLinearMatch()+length-1);
-}
-
-StringTrieBuilder::Node *
-BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
- Node *nextNode) const {
- return new BTLinearMatchNode(
- elements[i].getString(*strings).data()+byteIndex,
- length,
- nextNode);
-}
-
-UBool
-BytesTrieBuilder::ensureCapacity(int32_t length) {
- if(bytes==NULL) {
- return FALSE; // previous memory allocation had failed
- }
- if(length>bytesCapacity) {
- int32_t newCapacity=bytesCapacity;
- do {
- newCapacity*=2;
- } while(newCapacity<=length);
- char *newBytes=static_cast<char *>(uprv_malloc(newCapacity));
- if(newBytes==NULL) {
- // unable to allocate memory
- uprv_free(bytes);
- bytes=NULL;
- bytesCapacity=0;
- return FALSE;
- }
- uprv_memcpy(newBytes+(newCapacity-bytesLength),
- bytes+(bytesCapacity-bytesLength), bytesLength);
- uprv_free(bytes);
- bytes=newBytes;
- bytesCapacity=newCapacity;
- }
- return TRUE;
-}
-
-int32_t
-BytesTrieBuilder::write(int32_t byte) {
- int32_t newLength=bytesLength+1;
- if(ensureCapacity(newLength)) {
- bytesLength=newLength;
- bytes[bytesCapacity-bytesLength]=(char)byte;
- }
- return bytesLength;
-}
-
-int32_t
-BytesTrieBuilder::write(const char *b, int32_t length) {
- int32_t newLength=bytesLength+length;
- if(ensureCapacity(newLength)) {
- bytesLength=newLength;
- uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length);
- }
- return bytesLength;
-}
-
-int32_t
-BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
- return write(elements[i].getString(*strings).data()+byteIndex, length);
-}
-
-int32_t
-BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
- if(0<=i && i<=BytesTrie::kMaxOneByteValue) {
- return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal);
- }
- char intBytes[5];
- int32_t length=1;
- if(i<0 || i>0xffffff) {
- intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
- intBytes[1]=(char)((uint32_t)i>>24);
- intBytes[2]=(char)((uint32_t)i>>16);
- intBytes[3]=(char)((uint32_t)i>>8);
- intBytes[4]=(char)i;
- length=5;
- // } else if(i<=BytesTrie::kMaxOneByteValue) {
- // intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
- } else {
- if(i<=BytesTrie::kMaxTwoByteValue) {
- intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
- } else {
- if(i<=BytesTrie::kMaxThreeByteValue) {
- intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
- } else {
- intBytes[0]=(char)BytesTrie::kFourByteValueLead;
- intBytes[1]=(char)(i>>16);
- length=2;
- }
- intBytes[length++]=(char)(i>>8);
- }
- intBytes[length++]=(char)i;
- }
- intBytes[0]=(char)((intBytes[0]<<1)|isFinal);
- return write(intBytes, length);
-}
-
-int32_t
-BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
- int32_t offset=write(node);
- if(hasValue) {
- offset=writeValueAndFinal(value, FALSE);
- }
- return offset;
-}
-
-int32_t
-BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
- int32_t i=bytesLength-jumpTarget;
- U_ASSERT(i>=0);
- if(i<=BytesTrie::kMaxOneByteDelta) {
- return write(i);
- }
- char intBytes[5];
- int32_t length;
- if(i<=BytesTrie::kMaxTwoByteDelta) {
- intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
- length=1;
- } else {
- if(i<=BytesTrie::kMaxThreeByteDelta) {
- intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
- length=2;
- } else {
- if(i<=0xffffff) {
- intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
- length=3;
- } else {
- intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
- intBytes[1]=(char)(i>>24);
- length=4;
- }
- intBytes[1]=(char)(i>>16);
- }
- intBytes[1]=(char)(i>>8);
- }
- intBytes[length++]=(char)i;
- return write(intBytes, length);
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/bytestrieiterator.cpp b/contrib/libs/icu/common/bytestrieiterator.cpp
deleted file mode 100644
index e64961a1f13..00000000000
--- a/contrib/libs/icu/common/bytestrieiterator.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2010-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: bytestrieiterator.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010nov03
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/bytestrie.h"
-#include "unicode/stringpiece.h"
-#include "charstr.h"
-#include "uvectr32.h"
-
-U_NAMESPACE_BEGIN
-
-BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
- UErrorCode &errorCode)
- : bytes_(static_cast<const uint8_t *>(trieBytes)),
- pos_(bytes_), initialPos_(bytes_),
- remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
- str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
- // a public API header for which we would want it to depend only on
- // other public headers.
- // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
- // via the CharString and UVector32 implementations, so this additional
- // cost is minimal.
- str_=new CharString();
- stack_=new UVector32(errorCode);
- if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
- UErrorCode &errorCode)
- : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
- remainingMatchLength_(trie.remainingMatchLength_),
- initialRemainingMatchLength_(trie.remainingMatchLength_),
- str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- str_=new CharString();
- stack_=new UVector32(errorCode);
- if(U_FAILURE(errorCode)) {
- return;
- }
- if(str_==NULL || stack_==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
- if(length>=0) {
- // Pending linear-match node, append remaining bytes to str_.
- ++length;
- if(maxLength_>0 && length>maxLength_) {
- length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
- }
- str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
- pos_+=length;
- remainingMatchLength_-=length;
- }
-}
-
-BytesTrie::Iterator::~Iterator() {
- delete str_;
- delete stack_;
-}
-
-BytesTrie::Iterator &
-BytesTrie::Iterator::reset() {
- pos_=initialPos_;
- remainingMatchLength_=initialRemainingMatchLength_;
- int32_t length=remainingMatchLength_+1; // Remaining match length.
- if(maxLength_>0 && length>maxLength_) {
- length=maxLength_;
- }
- str_->truncate(length);
- pos_+=length;
- remainingMatchLength_-=length;
- stack_->setSize(0);
- return *this;
-}
-
-UBool
-BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
-
-UBool
-BytesTrie::Iterator::next(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- const uint8_t *pos=pos_;
- if(pos==NULL) {
- if(stack_->isEmpty()) {
- return FALSE;
- }
- // Pop the state off the stack and continue with the next outbound edge of
- // the branch node.
- int32_t stackSize=stack_->size();
- int32_t length=stack_->elementAti(stackSize-1);
- pos=bytes_+stack_->elementAti(stackSize-2);
- stack_->setSize(stackSize-2);
- str_->truncate(length&0xffff);
- length=(int32_t)((uint32_t)length>>16);
- if(length>1) {
- pos=branchNext(pos, length, errorCode);
- if(pos==NULL) {
- return TRUE; // Reached a final value.
- }
- } else {
- str_->append((char)*pos++, errorCode);
- }
- }
- if(remainingMatchLength_>=0) {
- // We only get here if we started in a pending linear-match node
- // with more than maxLength remaining bytes.
- return truncateAndStop();
- }
- for(;;) {
- int32_t node=*pos++;
- if(node>=kMinValueLead) {
- // Deliver value for the byte sequence so far.
- UBool isFinal=(UBool)(node&kValueIsFinal);
- value_=readValue(pos, node>>1);
- if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
- pos_=NULL;
- } else {
- pos_=skipValue(pos, node);
- }
- return TRUE;
- }
- if(maxLength_>0 && str_->length()==maxLength_) {
- return truncateAndStop();
- }
- if(node<kMinLinearMatch) {
- if(node==0) {
- node=*pos++;
- }
- pos=branchNext(pos, node+1, errorCode);
- if(pos==NULL) {
- return TRUE; // Reached a final value.
- }
- } else {
- // Linear-match node, append length bytes to str_.
- int32_t length=node-kMinLinearMatch+1;
- if(maxLength_>0 && str_->length()+length>maxLength_) {
- str_->append(reinterpret_cast<const char *>(pos),
- maxLength_-str_->length(), errorCode);
- return truncateAndStop();
- }
- str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
- pos+=length;
- }
- }
-}
-
-StringPiece
-BytesTrie::Iterator::getString() const {
- return str_ == NULL ? StringPiece() : str_->toStringPiece();
-}
-
-UBool
-BytesTrie::Iterator::truncateAndStop() {
- pos_=NULL;
- value_=-1; // no real value for str
- return TRUE;
-}
-
-// Branch node, needs to take the first outbound edge and push state for the rest.
-const uint8_t *
-BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
- while(length>kMaxBranchLinearSubNodeLength) {
- ++pos; // ignore the comparison byte
- // Push state for the greater-or-equal edge.
- stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
- stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
- // Follow the less-than edge.
- length>>=1;
- pos=jumpByDelta(pos);
- }
- // List of key-value pairs where values are either final values or jump deltas.
- // Read the first (key, value) pair.
- uint8_t trieByte=*pos++;
- int32_t node=*pos++;
- UBool isFinal=(UBool)(node&kValueIsFinal);
- int32_t value=readValue(pos, node>>1);
- pos=skipValue(pos, node);
- stack_->addElement((int32_t)(pos-bytes_), errorCode);
- stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
- str_->append((char)trieByte, errorCode);
- if(isFinal) {
- pos_=NULL;
- value_=value;
- return NULL;
- } else {
- return pos+value;
- }
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/caniter.cpp b/contrib/libs/icu/common/caniter.cpp
deleted file mode 100644
index b28acfc84ee..00000000000
--- a/contrib/libs/icu/common/caniter.cpp
+++ /dev/null
@@ -1,586 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- *****************************************************************************
- * Copyright (C) 1996-2015, International Business Machines Corporation and
- * others. All Rights Reserved.
- *****************************************************************************
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/caniter.h"
-#include "unicode/normalizer2.h"
-#include "unicode/uchar.h"
-#include "unicode/uniset.h"
-#include "unicode/usetiter.h"
-#include "unicode/ustring.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "hash.h"
-#include "normalizer2impl.h"
-
-/**
- * This class allows one to iterate through all the strings that are canonically equivalent to a given
- * string. For example, here are some sample results:
-Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
-1: \u0041\u030A\u0064\u0307\u0327
- = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
-2: \u0041\u030A\u0064\u0327\u0307
- = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
-3: \u0041\u030A\u1E0B\u0327
- = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
-4: \u0041\u030A\u1E11\u0307
- = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
-5: \u00C5\u0064\u0307\u0327
- = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
-6: \u00C5\u0064\u0327\u0307
- = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
-7: \u00C5\u1E0B\u0327
- = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
-8: \u00C5\u1E11\u0307
- = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
-9: \u212B\u0064\u0307\u0327
- = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
-10: \u212B\u0064\u0327\u0307
- = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
-11: \u212B\u1E0B\u0327
- = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
-12: \u212B\u1E11\u0307
- = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
- *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
- * since it has not been optimized for that situation.
- *@author M. Davis
- *@draft
- */
-
-// public
-
-U_NAMESPACE_BEGIN
-
-// TODO: add boilerplate methods.
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
-
-/**
- *@param source string to get results for
- */
-CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) :
- pieces(NULL),
- pieces_length(0),
- pieces_lengths(NULL),
- current(NULL),
- current_length(0),
- nfd(*Normalizer2::getNFDInstance(status)),
- nfcImpl(*Normalizer2Factory::getNFCImpl(status))
-{
- if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
- setSource(sourceStr, status);
- }
-}
-
-CanonicalIterator::~CanonicalIterator() {
- cleanPieces();
-}
-
-void CanonicalIterator::cleanPieces() {
- int32_t i = 0;
- if(pieces != NULL) {
- for(i = 0; i < pieces_length; i++) {
- if(pieces[i] != NULL) {
- delete[] pieces[i];
- }
- }
- uprv_free(pieces);
- pieces = NULL;
- pieces_length = 0;
- }
- if(pieces_lengths != NULL) {
- uprv_free(pieces_lengths);
- pieces_lengths = NULL;
- }
- if(current != NULL) {
- uprv_free(current);
- current = NULL;
- current_length = 0;
- }
-}
-
-/**
- *@return gets the source: NOTE: it is the NFD form of source
- */
-UnicodeString CanonicalIterator::getSource() {
- return source;
-}
-
-/**
- * Resets the iterator so that one can start again from the beginning.
- */
-void CanonicalIterator::reset() {
- done = FALSE;
- for (int i = 0; i < current_length; ++i) {
- current[i] = 0;
- }
-}
-
-/**
- *@return the next string that is canonically equivalent. The value null is returned when
- * the iteration is done.
- */
-UnicodeString CanonicalIterator::next() {
- int32_t i = 0;
-
- if (done) {
- buffer.setToBogus();
- return buffer;
- }
-
- // delete old contents
- buffer.remove();
-
- // construct return value
-
- for (i = 0; i < pieces_length; ++i) {
- buffer.append(pieces[i][current[i]]);
- }
- //String result = buffer.toString(); // not needed
-
- // find next value for next time
-
- for (i = current_length - 1; ; --i) {
- if (i < 0) {
- done = TRUE;
- break;
- }
- current[i]++;
- if (current[i] < pieces_lengths[i]) break; // got sequence
- current[i] = 0;
- }
- return buffer;
-}
-
-/**
- *@param set the source string to iterate against. This allows the same iterator to be used
- * while changing the source string, saving object creation.
- */
-void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) {
- int32_t list_length = 0;
- UChar32 cp = 0;
- int32_t start = 0;
- int32_t i = 0;
- UnicodeString *list = NULL;
-
- nfd.normalize(newSource, source, status);
- if(U_FAILURE(status)) {
- return;
- }
- done = FALSE;
-
- cleanPieces();
-
- // catch degenerate case
- if (newSource.length() == 0) {
- pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *));
- pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
- pieces_length = 1;
- current = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
- current_length = 1;
- if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- goto CleanPartialInitialization;
- }
- current[0] = 0;
- pieces[0] = new UnicodeString[1];
- pieces_lengths[0] = 1;
- if (pieces[0] == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- goto CleanPartialInitialization;
- }
- return;
- }
-
-
- list = new UnicodeString[source.length()];
- if (list == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- goto CleanPartialInitialization;
- }
-
- // i should initialy be the number of code units at the
- // start of the string
- i = U16_LENGTH(source.char32At(0));
- //int32_t i = 1;
- // find the segments
- // This code iterates through the source string and
- // extracts segments that end up on a codepoint that
- // doesn't start any decompositions. (Analysis is done
- // on the NFD form - see above).
- for (; i < source.length(); i += U16_LENGTH(cp)) {
- cp = source.char32At(i);
- if (nfcImpl.isCanonSegmentStarter(cp)) {
- source.extract(start, i-start, list[list_length++]); // add up to i
- start = i;
- }
- }
- source.extract(start, i-start, list[list_length++]); // add last one
-
-
- // allocate the arrays, and find the strings that are CE to each segment
- pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *));
- pieces_length = list_length;
- pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
- current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
- current_length = list_length;
- if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- goto CleanPartialInitialization;
- }
-
- for (i = 0; i < current_length; i++) {
- current[i] = 0;
- }
- // for each segment, get all the combinations that can produce
- // it after NFD normalization
- for (i = 0; i < pieces_length; ++i) {
- //if (PROGRESS) printf("SEGMENT\n");
- pieces[i] = getEquivalents(list[i], pieces_lengths[i], status);
- }
-
- delete[] list;
- return;
-// Common section to cleanup all local variables and reset object variables.
-CleanPartialInitialization:
- if (list != NULL) {
- delete[] list;
- }
- cleanPieces();
-}
-
-/**
- * Dumb recursive implementation of permutation.
- * TODO: optimize
- * @param source the string to find permutations for
- * @return the results in a set.
- */
-void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
- if(U_FAILURE(status)) {
- return;
- }
- //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
- int32_t i = 0;
-
- // optimization:
- // if zero or one character, just return a set with it
- // we check for length < 2 to keep from counting code points all the time
- if (source.length() <= 2 && source.countChar32() <= 1) {
- UnicodeString *toPut = new UnicodeString(source);
- /* test for NULL */
- if (toPut == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- result->put(source, toPut, status);
- return;
- }
-
- // otherwise iterate through the string, and recursively permute all the other characters
- UChar32 cp;
- Hashtable subpermute(status);
- if(U_FAILURE(status)) {
- return;
- }
- subpermute.setValueDeleter(uprv_deleteUObject);
-
- for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
- cp = source.char32At(i);
- const UHashElement *ne = NULL;
- int32_t el = UHASH_FIRST;
- UnicodeString subPermuteString = source;
-
- // optimization:
- // if the character is canonical combining class zero,
- // don't permute it
- if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
- //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
- continue;
- }
-
- subpermute.removeAll();
-
- // see what the permutations of the characters before and after this one are
- //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
- permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
- /* Test for buffer overflows */
- if(U_FAILURE(status)) {
- return;
- }
- // The upper remove is destructive. The question is do we have to make a copy, or we don't care about the contents
- // of source at this point.
-
- // prefix this character to all of them
- ne = subpermute.nextElement(el);
- while (ne != NULL) {
- UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
- UnicodeString *chStr = new UnicodeString(cp);
- //test for NULL
- if (chStr == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer));
- //if (PROGRESS) printf(" Piece: %s\n", UToS(*chStr));
- result->put(*chStr, chStr, status);
- ne = subpermute.nextElement(el);
- }
- }
- //return result;
-}
-
-// privates
-
-// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
-UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) {
- Hashtable result(status);
- Hashtable permutations(status);
- Hashtable basic(status);
- if (U_FAILURE(status)) {
- return 0;
- }
- result.setValueDeleter(uprv_deleteUObject);
- permutations.setValueDeleter(uprv_deleteUObject);
- basic.setValueDeleter(uprv_deleteUObject);
-
- UChar USeg[256];
- int32_t segLen = segment.extract(USeg, 256, status);
- getEquivalents2(&basic, USeg, segLen, status);
-
- // now get all the permutations
- // add only the ones that are canonically equivalent
- // TODO: optimize by not permuting any class zero.
-
- const UHashElement *ne = NULL;
- int32_t el = UHASH_FIRST;
- //Iterator it = basic.iterator();
- ne = basic.nextElement(el);
- //while (it.hasNext())
- while (ne != NULL) {
- //String item = (String) it.next();
- UnicodeString item = *((UnicodeString *)(ne->value.pointer));
-
- permutations.removeAll();
- permute(item, CANITER_SKIP_ZEROES, &permutations, status);
- const UHashElement *ne2 = NULL;
- int32_t el2 = UHASH_FIRST;
- //Iterator it2 = permutations.iterator();
- ne2 = permutations.nextElement(el2);
- //while (it2.hasNext())
- while (ne2 != NULL) {
- //String possible = (String) it2.next();
- //UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
- UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
- UnicodeString attempt;
- nfd.normalize(possible, attempt, status);
-
- // TODO: check if operator == is semanticaly the same as attempt.equals(segment)
- if (attempt==segment) {
- //if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible)));
- // TODO: use the hashtable just to catch duplicates - store strings directly (somehow).
- result.put(possible, new UnicodeString(possible), status); //add(possible);
- } else {
- //if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible)));
- }
-
- ne2 = permutations.nextElement(el2);
- }
- ne = basic.nextElement(el);
- }
-
- /* Test for buffer overflows */
- if(U_FAILURE(status)) {
- return 0;
- }
- // convert into a String[] to clean up storage
- //String[] finalResult = new String[result.size()];
- UnicodeString *finalResult = NULL;
- int32_t resultCount;
- if((resultCount = result.count()) != 0) {
- finalResult = new UnicodeString[resultCount];
- if (finalResult == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- }
- else {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- //result.toArray(finalResult);
- result_len = 0;
- el = UHASH_FIRST;
- ne = result.nextElement(el);
- while(ne != NULL) {
- finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer));
- ne = result.nextElement(el);
- }
-
-
- return finalResult;
-}
-
-Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {
-
- if (U_FAILURE(status)) {
- return NULL;
- }
-
- //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));
-
- UnicodeString toPut(segment, segLen);
-
- fillinResult->put(toPut, new UnicodeString(toPut), status);
-
- UnicodeSet starts;
-
- // cycle through all the characters
- UChar32 cp;
- for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
- // see if any character is at the start of some decomposition
- U16_GET(segment, 0, i, segLen, cp);
- if (!nfcImpl.getCanonStartSet(cp, starts)) {
- continue;
- }
- // if so, see which decompositions match
- UnicodeSetIterator iter(starts);
- while (iter.next()) {
- UChar32 cp2 = iter.getCodepoint();
- Hashtable remainder(status);
- remainder.setValueDeleter(uprv_deleteUObject);
- if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
- continue;
- }
-
- // there were some matches, so add all the possibilities to the set.
- UnicodeString prefix(segment, i);
- prefix += cp2;
-
- int32_t el = UHASH_FIRST;
- const UHashElement *ne = remainder.nextElement(el);
- while (ne != NULL) {
- UnicodeString item = *((UnicodeString *)(ne->value.pointer));
- UnicodeString *toAdd = new UnicodeString(prefix);
- /* test for NULL */
- if (toAdd == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- *toAdd += item;
- fillinResult->put(*toAdd, toAdd, status);
-
- //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));
-
- ne = remainder.nextElement(el);
- }
- }
- }
-
- /* Test for buffer overflows */
- if(U_FAILURE(status)) {
- return NULL;
- }
- return fillinResult;
-}
-
-/**
- * See if the decomposition of cp2 is at segment starting at segmentPos
- * (with canonical rearrangment!)
- * If so, take the remainder, and return the equivalents
- */
-Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
-//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
- //if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
- //if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);
-
- if (U_FAILURE(status)) {
- return NULL;
- }
-
- UnicodeString temp(comp);
- int32_t inputLen=temp.length();
- UnicodeString decompString;
- nfd.normalize(temp, decompString, status);
- if (U_FAILURE(status)) {
- return NULL;
- }
- if (decompString.isBogus()) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- const UChar *decomp=decompString.getBuffer();
- int32_t decompLen=decompString.length();
-
- // See if it matches the start of segment (at segmentPos)
- UBool ok = FALSE;
- UChar32 cp;
- int32_t decompPos = 0;
- UChar32 decompCp;
- U16_NEXT(decomp, decompPos, decompLen, decompCp);
-
- int32_t i = segmentPos;
- while(i < segLen) {
- U16_NEXT(segment, i, segLen, cp);
-
- if (cp == decompCp) { // if equal, eat another cp from decomp
-
- //if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp))));
-
- if (decompPos == decompLen) { // done, have all decomp characters!
- temp.append(segment+i, segLen-i);
- ok = TRUE;
- break;
- }
- U16_NEXT(decomp, decompPos, decompLen, decompCp);
- } else {
- //if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp))));
-
- // brute force approach
- temp.append(cp);
-
- /* TODO: optimize
- // since we know that the classes are monotonically increasing, after zero
- // e.g. 0 5 7 9 0 3
- // we can do an optimization
- // there are only a few cases that work: zero, less, same, greater
- // if both classes are the same, we fail
- // if the decomp class < the segment class, we fail
-
- segClass = getClass(cp);
- if (decompClass <= segClass) return null;
- */
- }
- }
- if (!ok)
- return NULL; // we failed, characters left over
-
- //if (PROGRESS) printf("Matches\n");
-
- if (inputLen == temp.length()) {
- fillinResult->put(UnicodeString(), new UnicodeString(), status);
- return fillinResult; // succeed, but no remainder
- }
-
- // brute force approach
- // check to make sure result is canonically equivalent
- UnicodeString trial;
- nfd.normalize(temp, trial, status);
- if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
- return NULL;
- }
-
- return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
diff --git a/contrib/libs/icu/common/capi_helper.h b/contrib/libs/icu/common/capi_helper.h
deleted file mode 100644
index 54b1db9e331..00000000000
--- a/contrib/libs/icu/common/capi_helper.h
+++ /dev/null
@@ -1,97 +0,0 @@
-// © 2018 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-#ifndef __CAPI_HELPER_H__
-#define __CAPI_HELPER_H__
-
-#include "unicode/utypes.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * An internal helper class to help convert between C and C++ APIs.
- */
-template<typename CType, typename CPPType, int32_t kMagic>
-class IcuCApiHelper {
- public:
- /**
- * Convert from the C type to the C++ type (const version).
- */
- static const CPPType* validate(const CType* input, UErrorCode& status);
-
- /**
- * Convert from the C type to the C++ type (non-const version).
- */
- static CPPType* validate(CType* input, UErrorCode& status);
-
- /**
- * Convert from the C++ type to the C type (const version).
- */
- const CType* exportConstForC() const;
-
- /**
- * Convert from the C++ type to the C type (non-const version).
- */
- CType* exportForC();
-
- /**
- * Invalidates the object.
- */
- ~IcuCApiHelper();
-
- private:
- /**
- * While the object is valid, fMagic equals kMagic.
- */
- int32_t fMagic = kMagic;
-};
-
-
-template<typename CType, typename CPPType, int32_t kMagic>
-const CPPType*
-IcuCApiHelper<CType, CPPType, kMagic>::validate(const CType* input, UErrorCode& status) {
- if (U_FAILURE(status)) {
- return nullptr;
- }
- if (input == nullptr) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- auto* impl = reinterpret_cast<const CPPType*>(input);
- if (static_cast<const IcuCApiHelper<CType, CPPType, kMagic>*>(impl)->fMagic != kMagic) {
- status = U_INVALID_FORMAT_ERROR;
- return nullptr;
- }
- return impl;
-}
-
-template<typename CType, typename CPPType, int32_t kMagic>
-CPPType*
-IcuCApiHelper<CType, CPPType, kMagic>::validate(CType* input, UErrorCode& status) {
- auto* constInput = static_cast<const CType*>(input);
- auto* validated = validate(constInput, status);
- return const_cast<CPPType*>(validated);
-}
-
-template<typename CType, typename CPPType, int32_t kMagic>
-const CType*
-IcuCApiHelper<CType, CPPType, kMagic>::exportConstForC() const {
- return reinterpret_cast<const CType*>(static_cast<const CPPType*>(this));
-}
-
-template<typename CType, typename CPPType, int32_t kMagic>
-CType*
-IcuCApiHelper<CType, CPPType, kMagic>::exportForC() {
- return reinterpret_cast<CType*>(static_cast<CPPType*>(this));
-}
-
-template<typename CType, typename CPPType, int32_t kMagic>
-IcuCApiHelper<CType, CPPType, kMagic>::~IcuCApiHelper() {
- // head off application errors by preventing use of of deleted objects.
- fMagic = 0;
-}
-
-
-U_NAMESPACE_END
-
-#endif // __CAPI_HELPER_H__
diff --git a/contrib/libs/icu/common/characterproperties.cpp b/contrib/libs/icu/common/characterproperties.cpp
deleted file mode 100644
index 7b50a4e2051..00000000000
--- a/contrib/libs/icu/common/characterproperties.cpp
+++ /dev/null
@@ -1,383 +0,0 @@
-// © 2018 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// characterproperties.cpp
-// created: 2018sep03 Markus W. Scherer
-
-#include "unicode/utypes.h"
-#include "unicode/localpointer.h"
-#include "unicode/uchar.h"
-#include "unicode/ucpmap.h"
-#include "unicode/ucptrie.h"
-#include "unicode/umutablecptrie.h"
-#include "unicode/uniset.h"
-#include "unicode/uscript.h"
-#include "unicode/uset.h"
-#include "cmemory.h"
-#include "mutex.h"
-#include "normalizer2impl.h"
-#include "uassert.h"
-#include "ubidi_props.h"
-#include "ucase.h"
-#include "ucln_cmn.h"
-#include "umutex.h"
-#include "uprops.h"
-
-using icu::LocalPointer;
-#if !UCONFIG_NO_NORMALIZATION
-using icu::Normalizer2Factory;
-using icu::Normalizer2Impl;
-#endif
-using icu::UInitOnce;
-using icu::UnicodeSet;
-
-namespace {
-
-UBool U_CALLCONV characterproperties_cleanup();
-
-constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START;
-
-struct Inclusion {
- UnicodeSet *fSet = nullptr;
- UInitOnce fInitOnce = U_INITONCE_INITIALIZER;
-};
-Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()
-
-UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};
-
-UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {};
-
-icu::UMutex cpMutex;
-
-//----------------------------------------------------------------
-// Inclusions list
-//----------------------------------------------------------------
-
-// USetAdder implementation
-// Does not use uset.h to reduce code dependencies
-void U_CALLCONV
-_set_add(USet *set, UChar32 c) {
- ((UnicodeSet *)set)->add(c);
-}
-
-void U_CALLCONV
-_set_addRange(USet *set, UChar32 start, UChar32 end) {
- ((UnicodeSet *)set)->add(start, end);
-}
-
-void U_CALLCONV
-_set_addString(USet *set, const UChar *str, int32_t length) {
- ((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length));
-}
-
-UBool U_CALLCONV characterproperties_cleanup() {
- for (Inclusion &in: gInclusions) {
- delete in.fSet;
- in.fSet = nullptr;
- in.fInitOnce.reset();
- }
- for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) {
- delete sets[i];
- sets[i] = nullptr;
- }
- for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) {
- ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i]));
- maps[i] = nullptr;
- }
- return TRUE;
-}
-
-void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
- // This function is invoked only via umtx_initOnce().
- U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
- if (src == UPROPS_SRC_NONE) {
- errorCode = U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- U_ASSERT(gInclusions[src].fSet == nullptr);
-
- LocalPointer<UnicodeSet> incl(new UnicodeSet());
- if (incl.isNull()) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- USetAdder sa = {
- (USet *)incl.getAlias(),
- _set_add,
- _set_addRange,
- _set_addString,
- nullptr, // don't need remove()
- nullptr // don't need removeRange()
- };
-
- switch(src) {
- case UPROPS_SRC_CHAR:
- uchar_addPropertyStarts(&sa, &errorCode);
- break;
- case UPROPS_SRC_PROPSVEC:
- upropsvec_addPropertyStarts(&sa, &errorCode);
- break;
- case UPROPS_SRC_CHAR_AND_PROPSVEC:
- uchar_addPropertyStarts(&sa, &errorCode);
- upropsvec_addPropertyStarts(&sa, &errorCode);
- break;
-#if !UCONFIG_NO_NORMALIZATION
- case UPROPS_SRC_CASE_AND_NORM: {
- const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
- if(U_SUCCESS(errorCode)) {
- impl->addPropertyStarts(&sa, errorCode);
- }
- ucase_addPropertyStarts(&sa, &errorCode);
- break;
- }
- case UPROPS_SRC_NFC: {
- const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
- if(U_SUCCESS(errorCode)) {
- impl->addPropertyStarts(&sa, errorCode);
- }
- break;
- }
- case UPROPS_SRC_NFKC: {
- const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode);
- if(U_SUCCESS(errorCode)) {
- impl->addPropertyStarts(&sa, errorCode);
- }
- break;
- }
- case UPROPS_SRC_NFKC_CF: {
- const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode);
- if(U_SUCCESS(errorCode)) {
- impl->addPropertyStarts(&sa, errorCode);
- }
- break;
- }
- case UPROPS_SRC_NFC_CANON_ITER: {
- const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
- if(U_SUCCESS(errorCode)) {
- impl->addCanonIterPropertyStarts(&sa, errorCode);
- }
- break;
- }
-#endif
- case UPROPS_SRC_CASE:
- ucase_addPropertyStarts(&sa, &errorCode);
- break;
- case UPROPS_SRC_BIDI:
- ubidi_addPropertyStarts(&sa, &errorCode);
- break;
- case UPROPS_SRC_INPC:
- case UPROPS_SRC_INSC:
- case UPROPS_SRC_VO:
- uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
- break;
- default:
- errorCode = U_INTERNAL_PROGRAM_ERROR;
- break;
- }
-
- if (U_FAILURE(errorCode)) {
- return;
- }
- if (incl->isBogus()) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- // Compact for caching.
- incl->compact();
- gInclusions[src].fSet = incl.orphan();
- ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
-}
-
-const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return nullptr; }
- if (src < 0 || UPROPS_SRC_COUNT <= src) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- Inclusion &i = gInclusions[src];
- umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode);
- return i.fSet;
-}
-
-void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
- // This function is invoked only via umtx_initOnce().
- U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
- int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
- U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
- UPropertySource src = uprops_getSource(prop);
- const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
- if (U_FAILURE(errorCode)) {
- return;
- }
-
- LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0));
- if (intPropIncl.isNull()) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- int32_t numRanges = incl->getRangeCount();
- int32_t prevValue = 0;
- for (int32_t i = 0; i < numRanges; ++i) {
- UChar32 rangeEnd = incl->getRangeEnd(i);
- for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) {
- // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
- int32_t value = u_getIntPropertyValue(c, prop);
- if (value != prevValue) {
- intPropIncl->add(c);
- prevValue = value;
- }
- }
- }
-
- if (intPropIncl->isBogus()) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- // Compact for caching.
- intPropIncl->compact();
- gInclusions[inclIndex].fSet = intPropIncl.orphan();
- ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
-}
-
-} // namespace
-
-U_NAMESPACE_BEGIN
-
-const UnicodeSet *CharacterProperties::getInclusionsForProperty(
- UProperty prop, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return nullptr; }
- if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
- int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
- Inclusion &i = gInclusions[inclIndex];
- umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
- return i.fSet;
- } else {
- UPropertySource src = uprops_getSource(prop);
- return getInclusionsForSource(src, errorCode);
- }
-}
-
-U_NAMESPACE_END
-
-namespace {
-
-UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return nullptr; }
- LocalPointer<UnicodeSet> set(new UnicodeSet());
- if (set.isNull()) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- const UnicodeSet *inclusions =
- icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
- if (U_FAILURE(errorCode)) { return nullptr; }
- int32_t numRanges = inclusions->getRangeCount();
- UChar32 startHasProperty = -1;
-
- for (int32_t i = 0; i < numRanges; ++i) {
- UChar32 rangeEnd = inclusions->getRangeEnd(i);
- for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
- // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch.
- if (u_hasBinaryProperty(c, property)) {
- if (startHasProperty < 0) {
- // Transition from false to true.
- startHasProperty = c;
- }
- } else if (startHasProperty >= 0) {
- // Transition from true to false.
- set->add(startHasProperty, c - 1);
- startHasProperty = -1;
- }
- }
- }
- if (startHasProperty >= 0) {
- set->add(startHasProperty, 0x10FFFF);
- }
- set->freeze();
- return set.orphan();
-}
-
-UCPMap *makeMap(UProperty property, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return nullptr; }
- uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0;
- icu::LocalUMutableCPTriePointer mutableTrie(
- umutablecptrie_open(nullValue, nullValue, &errorCode));
- const UnicodeSet *inclusions =
- icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
- if (U_FAILURE(errorCode)) { return nullptr; }
- int32_t numRanges = inclusions->getRangeCount();
- UChar32 start = 0;
- uint32_t value = nullValue;
-
- for (int32_t i = 0; i < numRanges; ++i) {
- UChar32 rangeEnd = inclusions->getRangeEnd(i);
- for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
- // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
- uint32_t nextValue = u_getIntPropertyValue(c, property);
- if (value != nextValue) {
- if (value != nullValue) {
- umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode);
- }
- start = c;
- value = nextValue;
- }
- }
- }
- if (value != 0) {
- umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode);
- }
-
- UCPTrieType type;
- if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) {
- type = UCPTRIE_TYPE_FAST;
- } else {
- type = UCPTRIE_TYPE_SMALL;
- }
- UCPTrieValueWidth valueWidth;
- // TODO: UCharacterProperty.IntProperty
- int32_t max = u_getIntPropertyMaxValue(property);
- if (max <= 0xff) {
- valueWidth = UCPTRIE_VALUE_BITS_8;
- } else if (max <= 0xffff) {
- valueWidth = UCPTRIE_VALUE_BITS_16;
- } else {
- valueWidth = UCPTRIE_VALUE_BITS_32;
- }
- return reinterpret_cast<UCPMap *>(
- umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode));
-}
-
-} // namespace
-
-U_NAMESPACE_USE
-
-U_CAPI const USet * U_EXPORT2
-u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) { return nullptr; }
- if (property < 0 || UCHAR_BINARY_LIMIT <= property) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- Mutex m(&cpMutex);
- UnicodeSet *set = sets[property];
- if (set == nullptr) {
- sets[property] = set = makeSet(property, *pErrorCode);
- }
- if (U_FAILURE(*pErrorCode)) { return nullptr; }
- return set->toUSet();
-}
-
-U_CAPI const UCPMap * U_EXPORT2
-u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) { return nullptr; }
- if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- Mutex m(&cpMutex);
- UCPMap *map = maps[property - UCHAR_INT_START];
- if (map == nullptr) {
- maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode);
- }
- return map;
-}
diff --git a/contrib/libs/icu/common/chariter.cpp b/contrib/libs/icu/common/chariter.cpp
deleted file mode 100644
index 887119a0eba..00000000000
--- a/contrib/libs/icu/common/chariter.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#include "unicode/chariter.h"
-
-U_NAMESPACE_BEGIN
-
-ForwardCharacterIterator::~ForwardCharacterIterator() {}
-ForwardCharacterIterator::ForwardCharacterIterator()
-: UObject()
-{}
-ForwardCharacterIterator::ForwardCharacterIterator(const ForwardCharacterIterator &other)
-: UObject(other)
-{}
-
-
-CharacterIterator::CharacterIterator()
-: textLength(0), pos(0), begin(0), end(0) {
-}
-
-CharacterIterator::CharacterIterator(int32_t length)
-: textLength(length), pos(0), begin(0), end(length) {
- if(textLength < 0) {
- textLength = end = 0;
- }
-}
-
-CharacterIterator::CharacterIterator(int32_t length, int32_t position)
-: textLength(length), pos(position), begin(0), end(length) {
- if(textLength < 0) {
- textLength = end = 0;
- }
- if(pos < 0) {
- pos = 0;
- } else if(pos > end) {
- pos = end;
- }
-}
-
-CharacterIterator::CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position)
-: textLength(length), pos(position), begin(textBegin), end(textEnd) {
- if(textLength < 0) {
- textLength = 0;
- }
- if(begin < 0) {
- begin = 0;
- } else if(begin > textLength) {
- begin = textLength;
- }
- if(end < begin) {
- end = begin;
- } else if(end > textLength) {
- end = textLength;
- }
- if(pos < begin) {
- pos = begin;
- } else if(pos > end) {
- pos = end;
- }
-}
-
-CharacterIterator::~CharacterIterator() {}
-
-CharacterIterator::CharacterIterator(const CharacterIterator &that) :
-ForwardCharacterIterator(that),
-textLength(that.textLength), pos(that.pos), begin(that.begin), end(that.end)
-{
-}
-
-CharacterIterator &
-CharacterIterator::operator=(const CharacterIterator &that) {
- ForwardCharacterIterator::operator=(that);
- textLength = that.textLength;
- pos = that.pos;
- begin = that.begin;
- end = that.end;
- return *this;
-}
-
-// implementing first[32]PostInc() directly in a subclass should be faster
-// but these implementations make subclassing a little easier
-UChar
-CharacterIterator::firstPostInc(void) {
- setToStart();
- return nextPostInc();
-}
-
-UChar32
-CharacterIterator::first32PostInc(void) {
- setToStart();
- return next32PostInc();
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/charstr.cpp b/contrib/libs/icu/common/charstr.cpp
deleted file mode 100644
index dda29dac632..00000000000
--- a/contrib/libs/icu/common/charstr.cpp
+++ /dev/null
@@ -1,215 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2010-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: charstr.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010may19
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/putil.h"
-#include "charstr.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uinvchar.h"
-
-U_NAMESPACE_BEGIN
-
-CharString::CharString(CharString&& src) U_NOEXCEPT
- : buffer(std::move(src.buffer)), len(src.len) {
- src.len = 0; // not strictly necessary because we make no guarantees on the source string
-}
-
-CharString& CharString::operator=(CharString&& src) U_NOEXCEPT {
- buffer = std::move(src.buffer);
- len = src.len;
- src.len = 0; // not strictly necessary because we make no guarantees on the source string
- return *this;
-}
-
-char *CharString::cloneData(UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode)) { return nullptr; }
- char *p = static_cast<char *>(uprv_malloc(len + 1));
- if (p == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- uprv_memcpy(p, buffer.getAlias(), len + 1);
- return p;
-}
-
-CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
- if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
- len=s.len;
- uprv_memcpy(buffer.getAlias(), s.buffer.getAlias(), len+1);
- }
- return *this;
-}
-
-int32_t CharString::lastIndexOf(char c) const {
- for(int32_t i=len; i>0;) {
- if(buffer[--i]==c) {
- return i;
- }
- }
- return -1;
-}
-
-bool CharString::contains(StringPiece s) const {
- if (s.empty()) { return false; }
- const char *p = buffer.getAlias();
- int32_t lastStart = len - s.length();
- for (int32_t i = 0; i <= lastStart; ++i) {
- if (uprv_memcmp(p + i, s.data(), s.length()) == 0) {
- return true;
- }
- }
- return false;
-}
-
-CharString &CharString::truncate(int32_t newLength) {
- if(newLength<0) {
- newLength=0;
- }
- if(newLength<len) {
- buffer[len=newLength]=0;
- }
- return *this;
-}
-
-CharString &CharString::append(char c, UErrorCode &errorCode) {
- if(ensureCapacity(len+2, 0, errorCode)) {
- buffer[len++]=c;
- buffer[len]=0;
- }
- return *this;
-}
-
-CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return *this;
- }
- if(sLength<-1 || (s==NULL && sLength!=0)) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- if(sLength<0) {
- sLength= static_cast<int32_t>(uprv_strlen(s));
- }
- if(sLength>0) {
- if(s==(buffer.getAlias()+len)) {
- // The caller wrote into the getAppendBuffer().
- if(sLength>=(buffer.getCapacity()-len)) {
- // The caller wrote too much.
- errorCode=U_INTERNAL_PROGRAM_ERROR;
- } else {
- buffer[len+=sLength]=0;
- }
- } else if(buffer.getAlias()<=s && s<(buffer.getAlias()+len) &&
- sLength>=(buffer.getCapacity()-len)
- ) {
- // (Part of) this string is appended to itself which requires reallocation,
- // so we have to make a copy of the substring and append that.
- return append(CharString(s, sLength, errorCode), errorCode);
- } else if(ensureCapacity(len+sLength+1, 0, errorCode)) {
- uprv_memcpy(buffer.getAlias()+len, s, sLength);
- buffer[len+=sLength]=0;
- }
- }
- return *this;
-}
-
-char *CharString::getAppendBuffer(int32_t minCapacity,
- int32_t desiredCapacityHint,
- int32_t &resultCapacity,
- UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- resultCapacity=0;
- return NULL;
- }
- int32_t appendCapacity=buffer.getCapacity()-len-1; // -1 for NUL
- if(appendCapacity>=minCapacity) {
- resultCapacity=appendCapacity;
- return buffer.getAlias()+len;
- }
- if(ensureCapacity(len+minCapacity+1, len+desiredCapacityHint+1, errorCode)) {
- resultCapacity=buffer.getCapacity()-len-1;
- return buffer.getAlias()+len;
- }
- resultCapacity=0;
- return NULL;
-}
-
-CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) {
- return appendInvariantChars(s.getBuffer(), s.length(), errorCode);
-}
-
-CharString &CharString::appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return *this;
- }
- if (!uprv_isInvariantUString(uchars, ucharsLen)) {
- errorCode = U_INVARIANT_CONVERSION_ERROR;
- return *this;
- }
- if(ensureCapacity(len+ucharsLen+1, 0, errorCode)) {
- u_UCharsToChars(uchars, buffer.getAlias()+len, ucharsLen);
- len += ucharsLen;
- buffer[len] = 0;
- }
- return *this;
-}
-
-UBool CharString::ensureCapacity(int32_t capacity,
- int32_t desiredCapacityHint,
- UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- if(capacity>buffer.getCapacity()) {
- if(desiredCapacityHint==0) {
- desiredCapacityHint=capacity+buffer.getCapacity();
- }
- if( (desiredCapacityHint<=capacity || buffer.resize(desiredCapacityHint, len+1)==NULL) &&
- buffer.resize(capacity, len+1)==NULL
- ) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
- }
- return TRUE;
-}
-
-CharString &CharString::appendPathPart(StringPiece s, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return *this;
- }
- if(s.length()==0) {
- return *this;
- }
- char c;
- if(len>0 && (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
- append(U_FILE_SEP_CHAR, errorCode);
- }
- append(s, errorCode);
- return *this;
-}
-
-CharString &CharString::ensureEndsWithFileSeparator(UErrorCode &errorCode) {
- char c;
- if(U_SUCCESS(errorCode) && len>0 &&
- (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
- append(U_FILE_SEP_CHAR, errorCode);
- }
- return *this;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/charstr.h b/contrib/libs/icu/common/charstr.h
deleted file mode 100644
index 23b950ed6ec..00000000000
--- a/contrib/libs/icu/common/charstr.h
+++ /dev/null
@@ -1,168 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2001-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 11/19/2001 aliu Creation.
-* 05/19/2010 markus Rewritten from scratch
-**********************************************************************
-*/
-
-#ifndef CHARSTRING_H
-#define CHARSTRING_H
-
-#include "unicode/utypes.h"
-#include "unicode/unistr.h"
-#include "unicode/uobject.h"
-#include "cmemory.h"
-
-U_NAMESPACE_BEGIN
-
-// Windows needs us to DLL-export the MaybeStackArray template specialization,
-// but MacOS X cannot handle it. Same as in digitlst.h.
-#if !U_PLATFORM_IS_DARWIN_BASED
-template class U_COMMON_API MaybeStackArray<char, 40>;
-#endif
-
-/**
- * ICU-internal char * string class.
- * This class does not assume or enforce any particular character encoding.
- * Raw bytes can be stored. The string object owns its characters.
- * A terminating NUL is stored, but the class does not prevent embedded NUL characters.
- *
- * This class wants to be convenient but is also deliberately minimalist.
- * Please do not add methods if they only add minor convenience.
- * For example:
- * cs.data()[5]='a'; // no need for setCharAt(5, 'a')
- */
-class U_COMMON_API CharString : public UMemory {
-public:
- CharString() : len(0) { buffer[0]=0; }
- CharString(StringPiece s, UErrorCode &errorCode) : len(0) {
- buffer[0]=0;
- append(s, errorCode);
- }
- CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
- buffer[0]=0;
- append(s, errorCode);
- }
- CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
- buffer[0]=0;
- append(s, sLength, errorCode);
- }
- ~CharString() {}
-
- /**
- * Move constructor; might leave src in an undefined state.
- * This string will have the same contents and state that the source string had.
- */
- CharString(CharString &&src) U_NOEXCEPT;
- /**
- * Move assignment operator; might leave src in an undefined state.
- * This string will have the same contents and state that the source string had.
- * The behavior is undefined if *this and src are the same object.
- */
- CharString &operator=(CharString &&src) U_NOEXCEPT;
-
- /**
- * Replaces this string's contents with the other string's contents.
- * CharString does not support the standard copy constructor nor
- * the assignment operator, to make copies explicit and to
- * use a UErrorCode where memory allocations might be needed.
- */
- CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
-
- UBool isEmpty() const { return len==0; }
- int32_t length() const { return len; }
- char operator[](int32_t index) const { return buffer[index]; }
- StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
-
- const char *data() const { return buffer.getAlias(); }
- char *data() { return buffer.getAlias(); }
- /**
- * Allocates length()+1 chars and copies the NUL-terminated data().
- * The caller must uprv_free() the result.
- */
- char *cloneData(UErrorCode &errorCode) const;
-
- bool operator==(StringPiece other) const {
- return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
- }
- bool operator!=(StringPiece other) const {
- return !operator==(other);
- }
-
- /** @return last index of c, or -1 if c is not in this string */
- int32_t lastIndexOf(char c) const;
-
- bool contains(StringPiece s) const;
-
- CharString &clear() { len=0; buffer[0]=0; return *this; }
- CharString &truncate(int32_t newLength);
-
- CharString &append(char c, UErrorCode &errorCode);
- CharString &append(StringPiece s, UErrorCode &errorCode) {
- return append(s.data(), s.length(), errorCode);
- }
- CharString &append(const CharString &s, UErrorCode &errorCode) {
- return append(s.data(), s.length(), errorCode);
- }
- CharString &append(const char *s, int32_t sLength, UErrorCode &status);
- /**
- * Returns a writable buffer for appending and writes the buffer's capacity to
- * resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
- * There will additionally be space for a terminating NUL right at resultCapacity.
- * (This function is similar to ByteSink.GetAppendBuffer().)
- *
- * The returned buffer is only valid until the next write operation
- * on this string.
- *
- * After writing at most resultCapacity bytes, call append() with the
- * pointer returned from this function and the number of bytes written.
- *
- * @param minCapacity required minimum capacity of the returned buffer;
- * must be non-negative
- * @param desiredCapacityHint desired capacity of the returned buffer;
- * must be non-negative
- * @param resultCapacity will be set to the capacity of the returned buffer
- * @param errorCode in/out error code
- * @return a buffer with resultCapacity>=min_capacity
- */
- char *getAppendBuffer(int32_t minCapacity,
- int32_t desiredCapacityHint,
- int32_t &resultCapacity,
- UErrorCode &errorCode);
-
- CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
- CharString &appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode& errorCode);
-
- /**
- * Appends a filename/path part, e.g., a directory name.
- * First appends a U_FILE_SEP_CHAR if necessary.
- * Does nothing if s is empty.
- */
- CharString &appendPathPart(StringPiece s, UErrorCode &errorCode);
-
- /**
- * Appends a U_FILE_SEP_CHAR if this string is not empty
- * and does not already end with a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR.
- */
- CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode);
-
-private:
- MaybeStackArray<char, 40> buffer;
- int32_t len;
-
- UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode);
-
- CharString(const CharString &other); // forbid copying of this class
- CharString &operator=(const CharString &other); // forbid copying of this class
-};
-
-U_NAMESPACE_END
-
-#endif
-//eof
diff --git a/contrib/libs/icu/common/cmemory.cpp b/contrib/libs/icu/common/cmemory.cpp
deleted file mode 100644
index 663c1411e4c..00000000000
--- a/contrib/libs/icu/common/cmemory.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2002-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* File cmemory.c ICU Heap allocation.
-* All ICU heap allocation, both for C and C++ new of ICU
-* class types, comes through these functions.
-*
-* If you have a need to replace ICU allocation, this is the
-* place to do it.
-*
-* Note that uprv_malloc(0) returns a non-NULL pointer, and
-* that a subsequent free of that pointer value is a NOP.
-*
-******************************************************************************
-*/
-#include "unicode/uclean.h"
-#include "cmemory.h"
-#include "putilimp.h"
-#include "uassert.h"
-#include <stdlib.h>
-
-/* uprv_malloc(0) returns a pointer to this read-only data. */
-static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0};
-
-/* Function Pointers for user-supplied heap functions */
-static const void *pContext;
-static UMemAllocFn *pAlloc;
-static UMemReallocFn *pRealloc;
-static UMemFreeFn *pFree;
-
-#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
-#include <stdio.h>
-static int n=0;
-static long b=0;
-#endif
-
-U_CAPI void * U_EXPORT2
-uprv_malloc(size_t s) {
-#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
-#if 1
- putchar('>');
- fflush(stdout);
-#else
- fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr);
-#endif
-#endif
- if (s > 0) {
- if (pAlloc) {
- return (*pAlloc)(pContext, s);
- } else {
- return uprv_default_malloc(s);
- }
- } else {
- return (void *)zeroMem;
- }
-}
-
-U_CAPI void * U_EXPORT2
-uprv_realloc(void * buffer, size_t size) {
-#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
- putchar('~');
- fflush(stdout);
-#endif
- if (buffer == zeroMem) {
- return uprv_malloc(size);
- } else if (size == 0) {
- if (pFree) {
- (*pFree)(pContext, buffer);
- } else {
- uprv_default_free(buffer);
- }
- return (void *)zeroMem;
- } else {
- if (pRealloc) {
- return (*pRealloc)(pContext, buffer, size);
- } else {
- return uprv_default_realloc(buffer, size);
- }
- }
-}
-
-U_CAPI void U_EXPORT2
-uprv_free(void *buffer) {
-#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
- putchar('<');
- fflush(stdout);
-#endif
- if (buffer != zeroMem) {
- if (pFree) {
- (*pFree)(pContext, buffer);
- } else {
- uprv_default_free(buffer);
- }
- }
-}
-
-U_CAPI void * U_EXPORT2
-uprv_calloc(size_t num, size_t size) {
- void *mem = NULL;
- size *= num;
- mem = uprv_malloc(size);
- if (mem) {
- uprv_memset(mem, 0, size);
- }
- return mem;
-}
-
-U_CAPI void U_EXPORT2
-u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status)
-{
- if (U_FAILURE(*status)) {
- return;
- }
- if (a==NULL || r==NULL || f==NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- pContext = context;
- pAlloc = a;
- pRealloc = r;
- pFree = f;
-}
-
-
-U_CFUNC UBool cmemory_cleanup(void) {
- pContext = NULL;
- pAlloc = NULL;
- pRealloc = NULL;
- pFree = NULL;
- return TRUE;
-}
diff --git a/contrib/libs/icu/common/cmemory.h b/contrib/libs/icu/common/cmemory.h
deleted file mode 100644
index 8d604420215..00000000000
--- a/contrib/libs/icu/common/cmemory.h
+++ /dev/null
@@ -1,820 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1997-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* File CMEMORY.H
-*
-* Contains stdlib.h/string.h memory functions
-*
-* @author Bertrand A. Damiba
-*
-* Modification History:
-*
-* Date Name Description
-* 6/20/98 Bertrand Created.
-* 05/03/99 stephen Changed from functions to macros.
-*
-******************************************************************************
-*/
-
-#ifndef CMEMORY_H
-#define CMEMORY_H
-
-#include "unicode/utypes.h"
-
-#include <stddef.h>
-#include <string.h>
-#include "unicode/localpointer.h"
-
-#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
-#include <stdio.h>
-#endif
-
-
-#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
-#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
-
-/**
- * \def UPRV_LENGTHOF
- * Convenience macro to determine the length of a fixed array at compile-time.
- * @param array A fixed length array
- * @return The length of the array, in elements
- * @internal
- */
-#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
-#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
-#define uprv_memchr(ptr, value, num) U_STANDARD_CPP_NAMESPACE memchr(ptr, value, num)
-
-U_CAPI void * U_EXPORT2
-uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1);
-
-U_CAPI void * U_EXPORT2
-uprv_realloc(void *mem, size_t size) U_ALLOC_SIZE_ATTR(2);
-
-U_CAPI void U_EXPORT2
-uprv_free(void *mem);
-
-U_CAPI void * U_EXPORT2
-uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2);
-
-/**
- * Get the least significant bits of a pointer (a memory address).
- * For example, with a mask of 3, the macro gets the 2 least significant bits,
- * which will be 0 if the pointer is 32-bit (4-byte) aligned.
- *
- * uintptr_t is the most appropriate integer type to cast to.
- */
-#define U_POINTER_MASK_LSB(ptr, mask) ((uintptr_t)(ptr) & (mask))
-
-/**
- * Create & return an instance of "type" in statically allocated storage.
- * e.g.
- * static std::mutex *myMutex = STATIC_NEW(std::mutex);
- * To destroy an object created in this way, invoke the destructor explicitly, e.g.
- * myMutex->~mutex();
- * DO NOT use delete.
- * DO NOT use with class UMutex, which has specific support for static instances.
- *
- * STATIC_NEW is intended for use when
- * - We want a static (or global) object.
- * - We don't want it to ever be destructed, or to explicitly control destruction,
- * to avoid use-after-destruction problems.
- * - We want to avoid an ordinary heap allocated object,
- * to avoid the possibility of memory allocation failures, and
- * to avoid memory leak reports, from valgrind, for example.
- * This is defined as a macro rather than a template function because each invocation
- * must define distinct static storage for the object being returned.
- */
-#define STATIC_NEW(type) [] () { \
- alignas(type) static char storage[sizeof(type)]; \
- return new(storage) type();} ()
-
-/**
- * Heap clean up function, called from u_cleanup()
- * Clears any user heap functions from u_setMemoryFunctions()
- * Does NOT deallocate any remaining allocated memory.
- */
-U_CFUNC UBool
-cmemory_cleanup(void);
-
-/**
- * A function called by <TT>uhash_remove</TT>,
- * <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
- * an existing key or value.
- * @param obj A key or value stored in a hashtable
- * @see uprv_deleteUObject
- */
-typedef void U_CALLCONV UObjectDeleter(void* obj);
-
-/**
- * Deleter for UObject instances.
- * Works for all subclasses of UObject because it has a virtual destructor.
- */
-U_CAPI void U_EXPORT2
-uprv_deleteUObject(void *obj);
-
-#ifdef __cplusplus
-
-#include <utility>
-#include "unicode/uobject.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * "Smart pointer" class, deletes memory via uprv_free().
- * For most methods see the LocalPointerBase base class.
- * Adds operator[] for array item access.
- *
- * @see LocalPointerBase
- */
-template<typename T>
-class LocalMemory : public LocalPointerBase<T> {
-public:
- using LocalPointerBase<T>::operator*;
- using LocalPointerBase<T>::operator->;
- /**
- * Constructor takes ownership.
- * @param p simple pointer to an array of T items that is adopted
- */
- explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
- /**
- * Move constructor, leaves src with isNull().
- * @param src source smart pointer
- */
- LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
- src.ptr=NULL;
- }
- /**
- * Destructor deletes the memory it owns.
- */
- ~LocalMemory() {
- uprv_free(LocalPointerBase<T>::ptr);
- }
- /**
- * Move assignment operator, leaves src with isNull().
- * The behavior is undefined if *this and src are the same object.
- * @param src source smart pointer
- * @return *this
- */
- LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT {
- uprv_free(LocalPointerBase<T>::ptr);
- LocalPointerBase<T>::ptr=src.ptr;
- src.ptr=NULL;
- return *this;
- }
- /**
- * Swap pointers.
- * @param other other smart pointer
- */
- void swap(LocalMemory<T> &other) U_NOEXCEPT {
- T *temp=LocalPointerBase<T>::ptr;
- LocalPointerBase<T>::ptr=other.ptr;
- other.ptr=temp;
- }
- /**
- * Non-member LocalMemory swap function.
- * @param p1 will get p2's pointer
- * @param p2 will get p1's pointer
- */
- friend inline void swap(LocalMemory<T> &p1, LocalMemory<T> &p2) U_NOEXCEPT {
- p1.swap(p2);
- }
- /**
- * Deletes the array it owns,
- * and adopts (takes ownership of) the one passed in.
- * @param p simple pointer to an array of T items that is adopted
- */
- void adoptInstead(T *p) {
- uprv_free(LocalPointerBase<T>::ptr);
- LocalPointerBase<T>::ptr=p;
- }
- /**
- * Deletes the array it owns, allocates a new one and reset its bytes to 0.
- * Returns the new array pointer.
- * If the allocation fails, then the current array is unchanged and
- * this method returns NULL.
- * @param newCapacity must be >0
- * @return the allocated array pointer, or NULL if the allocation failed
- */
- inline T *allocateInsteadAndReset(int32_t newCapacity=1);
- /**
- * Deletes the array it owns and allocates a new one, copying length T items.
- * Returns the new array pointer.
- * If the allocation fails, then the current array is unchanged and
- * this method returns NULL.
- * @param newCapacity must be >0
- * @param length number of T items to be copied from the old array to the new one;
- * must be no more than the capacity of the old array,
- * which the caller must track because the LocalMemory does not track it
- * @return the allocated array pointer, or NULL if the allocation failed
- */
- inline T *allocateInsteadAndCopy(int32_t newCapacity=1, int32_t length=0);
- /**
- * Array item access (writable).
- * No index bounds check.
- * @param i array index
- * @return reference to the array item
- */
- T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
-};
-
-template<typename T>
-inline T *LocalMemory<T>::allocateInsteadAndReset(int32_t newCapacity) {
- if(newCapacity>0) {
- T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
- if(p!=NULL) {
- uprv_memset(p, 0, newCapacity*sizeof(T));
- uprv_free(LocalPointerBase<T>::ptr);
- LocalPointerBase<T>::ptr=p;
- }
- return p;
- } else {
- return NULL;
- }
-}
-
-
-template<typename T>
-inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t length) {
- if(newCapacity>0) {
- T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
- if(p!=NULL) {
- if(length>0) {
- if(length>newCapacity) {
- length=newCapacity;
- }
- uprv_memcpy(p, LocalPointerBase<T>::ptr, (size_t)length*sizeof(T));
- }
- uprv_free(LocalPointerBase<T>::ptr);
- LocalPointerBase<T>::ptr=p;
- }
- return p;
- } else {
- return NULL;
- }
-}
-
-/**
- * Simple array/buffer management class using uprv_malloc() and uprv_free().
- * Provides an internal array with fixed capacity. Can alias another array
- * or allocate one.
- *
- * The array address is properly aligned for type T. It might not be properly
- * aligned for types larger than T (or larger than the largest subtype of T).
- *
- * Unlike LocalMemory and LocalArray, this class never adopts
- * (takes ownership of) another array.
- *
- * WARNING: MaybeStackArray only works with primitive (plain-old data) types.
- * It does NOT know how to call a destructor! If you work with classes with
- * destructors, consider:
- *
- * - LocalArray in localpointer.h if you know the length ahead of time
- * - MaybeStackVector if you know the length at runtime
- */
-template<typename T, int32_t stackCapacity>
-class MaybeStackArray {
-public:
- // No heap allocation. Use only on the stack.
- static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
- static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
-#if U_HAVE_PLACEMENT_NEW
- static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
-#endif
-
- /**
- * Default constructor initializes with internal T[stackCapacity] buffer.
- */
- MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {}
- /**
- * Automatically allocates the heap array if the argument is larger than the stack capacity.
- * Intended for use when an approximate capacity is known at compile time but the true
- * capacity is not known until runtime.
- */
- MaybeStackArray(int32_t newCapacity) : MaybeStackArray() {
- if (capacity < newCapacity) { resize(newCapacity); }
- }
- /**
- * Destructor deletes the array (if owned).
- */
- ~MaybeStackArray() { releaseArray(); }
- /**
- * Move constructor: transfers ownership or copies the stack array.
- */
- MaybeStackArray(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT;
- /**
- * Move assignment: transfers ownership or copies the stack array.
- */
- MaybeStackArray<T, stackCapacity> &operator=(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT;
- /**
- * Returns the array capacity (number of T items).
- * @return array capacity
- */
- int32_t getCapacity() const { return capacity; }
- /**
- * Access without ownership change.
- * @return the array pointer
- */
- T *getAlias() const { return ptr; }
- /**
- * Returns the array limit. Simple convenience method.
- * @return getAlias()+getCapacity()
- */
- T *getArrayLimit() const { return getAlias()+capacity; }
- // No "operator T *() const" because that can make
- // expressions like mbs[index] ambiguous for some compilers.
- /**
- * Array item access (const).
- * No index bounds check.
- * @param i array index
- * @return reference to the array item
- */
- const T &operator[](ptrdiff_t i) const { return ptr[i]; }
- /**
- * Array item access (writable).
- * No index bounds check.
- * @param i array index
- * @return reference to the array item
- */
- T &operator[](ptrdiff_t i) { return ptr[i]; }
- /**
- * Deletes the array (if owned) and aliases another one, no transfer of ownership.
- * If the arguments are illegal, then the current array is unchanged.
- * @param otherArray must not be NULL
- * @param otherCapacity must be >0
- */
- void aliasInstead(T *otherArray, int32_t otherCapacity) {
- if(otherArray!=NULL && otherCapacity>0) {
- releaseArray();
- ptr=otherArray;
- capacity=otherCapacity;
- needToRelease=FALSE;
- }
- }
- /**
- * Deletes the array (if owned) and allocates a new one, copying length T items.
- * Returns the new array pointer.
- * If the allocation fails, then the current array is unchanged and
- * this method returns NULL.
- * @param newCapacity can be less than or greater than the current capacity;
- * must be >0
- * @param length number of T items to be copied from the old array to the new one
- * @return the allocated array pointer, or NULL if the allocation failed
- */
- inline T *resize(int32_t newCapacity, int32_t length=0);
- /**
- * Gives up ownership of the array if owned, or else clones it,
- * copying length T items; resets itself to the internal stack array.
- * Returns NULL if the allocation failed.
- * @param length number of T items to copy when cloning,
- * and capacity of the clone when cloning
- * @param resultCapacity will be set to the returned array's capacity (output-only)
- * @return the array pointer;
- * caller becomes responsible for deleting the array
- */
- inline T *orphanOrClone(int32_t length, int32_t &resultCapacity);
-private:
- T *ptr;
- int32_t capacity;
- UBool needToRelease;
- T stackArray[stackCapacity];
- void releaseArray() {
- if(needToRelease) {
- uprv_free(ptr);
- }
- }
- void resetToStackArray() {
- ptr=stackArray;
- capacity=stackCapacity;
- needToRelease=FALSE;
- }
- /* No comparison operators with other MaybeStackArray's. */
- bool operator==(const MaybeStackArray & /*other*/) {return FALSE;}
- bool operator!=(const MaybeStackArray & /*other*/) {return TRUE;}
- /* No ownership transfer: No copy constructor, no assignment operator. */
- MaybeStackArray(const MaybeStackArray & /*other*/) {}
- void operator=(const MaybeStackArray & /*other*/) {}
-};
-
-template<typename T, int32_t stackCapacity>
-icu::MaybeStackArray<T, stackCapacity>::MaybeStackArray(
- MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT
- : ptr(src.ptr), capacity(src.capacity), needToRelease(src.needToRelease) {
- if (src.ptr == src.stackArray) {
- ptr = stackArray;
- uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity);
- } else {
- src.resetToStackArray(); // take ownership away from src
- }
-}
-
-template<typename T, int32_t stackCapacity>
-inline MaybeStackArray <T, stackCapacity>&
-MaybeStackArray<T, stackCapacity>::operator=(MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT {
- releaseArray(); // in case this instance had its own memory allocated
- capacity = src.capacity;
- needToRelease = src.needToRelease;
- if (src.ptr == src.stackArray) {
- ptr = stackArray;
- uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity);
- } else {
- ptr = src.ptr;
- src.resetToStackArray(); // take ownership away from src
- }
- return *this;
-}
-
-template<typename T, int32_t stackCapacity>
-inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t length) {
- if(newCapacity>0) {
-#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
- ::fprintf(::stderr,"MaybeStacArray (resize) alloc %d * %lu\n", newCapacity,sizeof(T));
-#endif
- T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
- if(p!=NULL) {
- if(length>0) {
- if(length>capacity) {
- length=capacity;
- }
- if(length>newCapacity) {
- length=newCapacity;
- }
- uprv_memcpy(p, ptr, (size_t)length*sizeof(T));
- }
- releaseArray();
- ptr=p;
- capacity=newCapacity;
- needToRelease=TRUE;
- }
- return p;
- } else {
- return NULL;
- }
-}
-
-template<typename T, int32_t stackCapacity>
-inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32_t &resultCapacity) {
- T *p;
- if(needToRelease) {
- p=ptr;
- } else if(length<=0) {
- return NULL;
- } else {
- if(length>capacity) {
- length=capacity;
- }
- p=(T *)uprv_malloc(length*sizeof(T));
-#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
- ::fprintf(::stderr,"MaybeStacArray (orphan) alloc %d * %lu\n", length,sizeof(T));
-#endif
- if(p==NULL) {
- return NULL;
- }
- uprv_memcpy(p, ptr, (size_t)length*sizeof(T));
- }
- resultCapacity=length;
- resetToStackArray();
- return p;
-}
-
-/**
- * Variant of MaybeStackArray that allocates a header struct and an array
- * in one contiguous memory block, using uprv_malloc() and uprv_free().
- * Provides internal memory with fixed array capacity. Can alias another memory
- * block or allocate one.
- * The stackCapacity is the number of T items in the internal memory,
- * not counting the H header.
- * Unlike LocalMemory and LocalArray, this class never adopts
- * (takes ownership of) another memory block.
- */
-template<typename H, typename T, int32_t stackCapacity>
-class MaybeStackHeaderAndArray {
-public:
- // No heap allocation. Use only on the stack.
- static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
- static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
-#if U_HAVE_PLACEMENT_NEW
- static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
-#endif
-
- /**
- * Default constructor initializes with internal H+T[stackCapacity] buffer.
- */
- MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(FALSE) {}
- /**
- * Destructor deletes the memory (if owned).
- */
- ~MaybeStackHeaderAndArray() { releaseMemory(); }
- /**
- * Returns the array capacity (number of T items).
- * @return array capacity
- */
- int32_t getCapacity() const { return capacity; }
- /**
- * Access without ownership change.
- * @return the header pointer
- */
- H *getAlias() const { return ptr; }
- /**
- * Returns the array start.
- * @return array start, same address as getAlias()+1
- */
- T *getArrayStart() const { return reinterpret_cast<T *>(getAlias()+1); }
- /**
- * Returns the array limit.
- * @return array limit
- */
- T *getArrayLimit() const { return getArrayStart()+capacity; }
- /**
- * Access without ownership change. Same as getAlias().
- * A class instance can be used directly in expressions that take a T *.
- * @return the header pointer
- */
- operator H *() const { return ptr; }
- /**
- * Array item access (writable).
- * No index bounds check.
- * @param i array index
- * @return reference to the array item
- */
- T &operator[](ptrdiff_t i) { return getArrayStart()[i]; }
- /**
- * Deletes the memory block (if owned) and aliases another one, no transfer of ownership.
- * If the arguments are illegal, then the current memory is unchanged.
- * @param otherArray must not be NULL
- * @param otherCapacity must be >0
- */
- void aliasInstead(H *otherMemory, int32_t otherCapacity) {
- if(otherMemory!=NULL && otherCapacity>0) {
- releaseMemory();
- ptr=otherMemory;
- capacity=otherCapacity;
- needToRelease=FALSE;
- }
- }
- /**
- * Deletes the memory block (if owned) and allocates a new one,
- * copying the header and length T array items.
- * Returns the new header pointer.
- * If the allocation fails, then the current memory is unchanged and
- * this method returns NULL.
- * @param newCapacity can be less than or greater than the current capacity;
- * must be >0
- * @param length number of T items to be copied from the old array to the new one
- * @return the allocated pointer, or NULL if the allocation failed
- */
- inline H *resize(int32_t newCapacity, int32_t length=0);
- /**
- * Gives up ownership of the memory if owned, or else clones it,
- * copying the header and length T array items; resets itself to the internal memory.
- * Returns NULL if the allocation failed.
- * @param length number of T items to copy when cloning,
- * and array capacity of the clone when cloning
- * @param resultCapacity will be set to the returned array's capacity (output-only)
- * @return the header pointer;
- * caller becomes responsible for deleting the array
- */
- inline H *orphanOrClone(int32_t length, int32_t &resultCapacity);
-private:
- H *ptr;
- int32_t capacity;
- UBool needToRelease;
- // stackHeader must precede stackArray immediately.
- H stackHeader;
- T stackArray[stackCapacity];
- void releaseMemory() {
- if(needToRelease) {
- uprv_free(ptr);
- }
- }
- /* No comparison operators with other MaybeStackHeaderAndArray's. */
- bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return FALSE;}
- bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return TRUE;}
- /* No ownership transfer: No copy constructor, no assignment operator. */
- MaybeStackHeaderAndArray(const MaybeStackHeaderAndArray & /*other*/) {}
- void operator=(const MaybeStackHeaderAndArray & /*other*/) {}
-};
-
-template<typename H, typename T, int32_t stackCapacity>
-inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::resize(int32_t newCapacity,
- int32_t length) {
- if(newCapacity>=0) {
-#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
- ::fprintf(::stderr,"MaybeStackHeaderAndArray alloc %d + %d * %ul\n", sizeof(H),newCapacity,sizeof(T));
-#endif
- H *p=(H *)uprv_malloc(sizeof(H)+newCapacity*sizeof(T));
- if(p!=NULL) {
- if(length<0) {
- length=0;
- } else if(length>0) {
- if(length>capacity) {
- length=capacity;
- }
- if(length>newCapacity) {
- length=newCapacity;
- }
- }
- uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T));
- releaseMemory();
- ptr=p;
- capacity=newCapacity;
- needToRelease=TRUE;
- }
- return p;
- } else {
- return NULL;
- }
-}
-
-template<typename H, typename T, int32_t stackCapacity>
-inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::orphanOrClone(int32_t length,
- int32_t &resultCapacity) {
- H *p;
- if(needToRelease) {
- p=ptr;
- } else {
- if(length<0) {
- length=0;
- } else if(length>capacity) {
- length=capacity;
- }
-#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
- ::fprintf(::stderr,"MaybeStackHeaderAndArray (orphan) alloc %ul + %d * %lu\n", sizeof(H),length,sizeof(T));
-#endif
- p=(H *)uprv_malloc(sizeof(H)+length*sizeof(T));
- if(p==NULL) {
- return NULL;
- }
- uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T));
- }
- resultCapacity=length;
- ptr=&stackHeader;
- capacity=stackCapacity;
- needToRelease=FALSE;
- return p;
-}
-
-/**
- * A simple memory management class that creates new heap allocated objects (of
- * any class that has a public constructor), keeps track of them and eventually
- * deletes them all in its own destructor.
- *
- * A typical use-case would be code like this:
- *
- * MemoryPool<MyType> pool;
- *
- * MyType* o1 = pool.create();
- * if (o1 != nullptr) {
- * foo(o1);
- * }
- *
- * MyType* o2 = pool.create(1, 2, 3);
- * if (o2 != nullptr) {
- * bar(o2);
- * }
- *
- * // MemoryPool will take care of deleting the MyType objects.
- *
- * It doesn't do anything more than that, and is intentionally kept minimalist.
- */
-template<typename T, int32_t stackCapacity = 8>
-class MemoryPool : public UMemory {
-public:
- MemoryPool() : fCount(0), fPool() {}
-
- ~MemoryPool() {
- for (int32_t i = 0; i < fCount; ++i) {
- delete fPool[i];
- }
- }
-
- MemoryPool(const MemoryPool&) = delete;
- MemoryPool& operator=(const MemoryPool&) = delete;
-
- MemoryPool(MemoryPool&& other) U_NOEXCEPT : fCount(other.fCount),
- fPool(std::move(other.fPool)) {
- other.fCount = 0;
- }
-
- MemoryPool& operator=(MemoryPool&& other) U_NOEXCEPT {
- fCount = other.fCount;
- fPool = std::move(other.fPool);
- other.fCount = 0;
- return *this;
- }
-
- /**
- * Creates a new object of typename T, by forwarding any and all arguments
- * to the typename T constructor.
- *
- * @param args Arguments to be forwarded to the typename T constructor.
- * @return A pointer to the newly created object, or nullptr on error.
- */
- template<typename... Args>
- T* create(Args&&... args) {
- int32_t capacity = fPool.getCapacity();
- if (fCount == capacity &&
- fPool.resize(capacity == stackCapacity ? 4 * capacity : 2 * capacity,
- capacity) == nullptr) {
- return nullptr;
- }
- return fPool[fCount++] = new T(std::forward<Args>(args)...);
- }
-
- /**
- * @return Number of elements that have been allocated.
- */
- int32_t count() const {
- return fCount;
- }
-
-protected:
- int32_t fCount;
- MaybeStackArray<T*, stackCapacity> fPool;
-};
-
-/**
- * An internal Vector-like implementation based on MemoryPool.
- *
- * Heap-allocates each element and stores pointers.
- *
- * To append an item to the vector, use emplaceBack.
- *
- * MaybeStackVector<MyType> vector;
- * MyType* element = vector.emplaceBack();
- * if (!element) {
- * status = U_MEMORY_ALLOCATION_ERROR;
- * }
- * // do stuff with element
- *
- * To loop over the vector, use a for loop with indices:
- *
- * for (int32_t i = 0; i < vector.length(); i++) {
- * MyType* element = vector[i];
- * }
- */
-template<typename T, int32_t stackCapacity = 8>
-class MaybeStackVector : protected MemoryPool<T, stackCapacity> {
-public:
- using MemoryPool<T, stackCapacity>::MemoryPool;
- using MemoryPool<T, stackCapacity>::operator=;
-
- template<typename... Args>
- T* emplaceBack(Args&&... args) {
- return this->create(args...);
- }
-
- int32_t length() const {
- return this->fCount;
- }
-
- T** getAlias() {
- return this->fPool.getAlias();
- }
-
- /**
- * Array item access (read-only).
- * No index bounds check.
- * @param i array index
- * @return reference to the array item
- */
- const T* operator[](ptrdiff_t i) const {
- return this->fPool[i];
- }
-
- /**
- * Array item access (writable).
- * No index bounds check.
- * @param i array index
- * @return reference to the array item
- */
- T* operator[](ptrdiff_t i) {
- return this->fPool[i];
- }
-
- /**
- * Append all the items from another MaybeStackVector to this one.
- */
- void appendAll(const MaybeStackVector& other, UErrorCode& status) {
- for (int32_t i = 0; i < other.fCount; i++) {
- T* item = emplaceBack(*other[i]);
- if (!item) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- }
- }
-};
-
-
-U_NAMESPACE_END
-
-#endif /* __cplusplus */
-#endif /* CMEMORY_H */
diff --git a/contrib/libs/icu/common/cpputils.h b/contrib/libs/icu/common/cpputils.h
deleted file mode 100644
index 307e5704864..00000000000
--- a/contrib/libs/icu/common/cpputils.h
+++ /dev/null
@@ -1,97 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1997-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: cpputils.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*/
-
-#ifndef CPPUTILS_H
-#define CPPUTILS_H
-
-#include "unicode/utypes.h"
-#include "unicode/unistr.h"
-#include "cmemory.h"
-
-/*==========================================================================*/
-/* Array copy utility functions */
-/*==========================================================================*/
-
-static
-inline void uprv_arrayCopy(const double* src, double* dst, int32_t count)
-{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }
-
-static
-inline void uprv_arrayCopy(const double* src, int32_t srcStart,
- double* dst, int32_t dstStart, int32_t count)
-{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
-
-static
-inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
- { uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }
-
-static
-inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart,
- int8_t* dst, int32_t dstStart, int32_t count)
-{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
-
-static
-inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
-{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }
-
-static
-inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart,
- int16_t* dst, int32_t dstStart, int32_t count)
-{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
-
-static
-inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
-{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }
-
-static
-inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart,
- int32_t* dst, int32_t dstStart, int32_t count)
-{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
-
-static
-inline void
-uprv_arrayCopy(const UChar *src, int32_t srcStart,
- UChar *dst, int32_t dstStart, int32_t count)
-{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
-
-/**
- * Copy an array of UnicodeString OBJECTS (not pointers).
- * @internal
- */
-static inline void
-uprv_arrayCopy(const icu::UnicodeString *src, icu::UnicodeString *dst, int32_t count)
-{ while(count-- > 0) *dst++ = *src++; }
-
-/**
- * Copy an array of UnicodeString OBJECTS (not pointers).
- * @internal
- */
-static inline void
-uprv_arrayCopy(const icu::UnicodeString *src, int32_t srcStart,
- icu::UnicodeString *dst, int32_t dstStart, int32_t count)
-{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
-
-/**
- * Checks that the string is readable and writable.
- * Sets U_ILLEGAL_ARGUMENT_ERROR if the string isBogus() or has an open getBuffer().
- */
-inline void
-uprv_checkCanGetBuffer(const icu::UnicodeString &s, UErrorCode &errorCode) {
- if(U_SUCCESS(errorCode) && s.isBogus()) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-#endif /* _CPPUTILS */
diff --git a/contrib/libs/icu/common/cstr.cpp b/contrib/libs/icu/common/cstr.cpp
deleted file mode 100644
index 24654f8fc22..00000000000
--- a/contrib/libs/icu/common/cstr.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2015-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: charstr.cpp
-*/
-#include "unicode/utypes.h"
-#include "unicode/putil.h"
-#include "unicode/unistr.h"
-
-#include "cstr.h"
-
-#include "charstr.h"
-#include "uinvchar.h"
-
-U_NAMESPACE_BEGIN
-
-CStr::CStr(const UnicodeString &in) {
- UErrorCode status = U_ZERO_ERROR;
-#if !UCONFIG_NO_CONVERSION || U_CHARSET_IS_UTF8
- int32_t length = in.extract(0, in.length(), static_cast<char *>(NULL), static_cast<uint32_t>(0));
- int32_t resultCapacity = 0;
- char *buf = s.getAppendBuffer(length, length, resultCapacity, status);
- if (U_SUCCESS(status)) {
- in.extract(0, in.length(), buf, resultCapacity);
- s.append(buf, length, status);
- }
-#else
- // No conversion available. Convert any invariant characters; substitute '?' for the rest.
- // Note: can't just call u_UCharsToChars() or CharString.appendInvariantChars() on the
- // whole string because they require that the entire input be invariant.
- char buf[2];
- for (int i=0; i<in.length(); i = in.moveIndex32(i, 1)) {
- if (uprv_isInvariantUString(in.getBuffer()+i, 1)) {
- u_UCharsToChars(in.getBuffer()+i, buf, 1);
- } else {
- buf[0] = '?';
- }
- s.append(buf, 1, status);
- }
-#endif
-}
-
-CStr::~CStr() {
-}
-
-const char * CStr::operator ()() const {
- return s.data();
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/cstr.h b/contrib/libs/icu/common/cstr.h
deleted file mode 100644
index c33f487ea12..00000000000
--- a/contrib/libs/icu/common/cstr.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* File: cstr.h
-*/
-
-#ifndef CSTR_H
-#define CSTR_H
-
-#include "unicode/unistr.h"
-#include "unicode/uobject.h"
-#include "unicode/utypes.h"
-
-#include "charstr.h"
-
-/**
- * ICU-internal class CStr, a small helper class to facilitate passing UnicodeStrings
- * to functions needing (const char *) strings, such as printf().
- *
- * It is intended primarily for use in debugging or in tests. Uses platform
- * default code page conversion, which will do the best job possible,
- * but may be lossy, depending on the platform.
- *
- * If no other conversion is available, use invariant conversion and substitue
- * '?' for non-invariant characters.
- *
- * Example Usage:
- * UnicodeString s = whatever;
- * printf("%s", CStr(s)());
- *
- * The explicit call to the CStr() constructor creates a temporary object.
- * Operator () on the temporary object returns a (const char *) pointer.
- * The lifetime of the (const char *) data is that of the temporary object,
- * which works well when passing it as a parameter to another function, such as printf.
- */
-
-U_NAMESPACE_BEGIN
-
-class U_COMMON_API CStr : public UMemory {
- public:
- CStr(const UnicodeString &in);
- ~CStr();
- const char * operator ()() const;
-
- private:
- CharString s;
- CStr(const CStr &other); // Forbid copying of this class.
- CStr &operator =(const CStr &other); // Forbid assignment.
-};
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/cstring.cpp b/contrib/libs/icu/common/cstring.cpp
deleted file mode 100644
index 06275c4b564..00000000000
--- a/contrib/libs/icu/common/cstring.cpp
+++ /dev/null
@@ -1,341 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1997-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* File CSTRING.C
-*
-* @author Helena Shih
-*
-* Modification History:
-*
-* Date Name Description
-* 6/18/98 hshih Created
-* 09/08/98 stephen Added include for ctype, for Mac Port
-* 11/15/99 helena Integrated S/390 IEEE changes.
-******************************************************************************
-*/
-
-
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "unicode/utypes.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uassert.h"
-
-/*
- * We hardcode case conversion for invariant characters to match our expectation
- * and the compiler execution charset.
- * This prevents problems on systems
- * - with non-default casing behavior, like Turkish system locales where
- * tolower('I') maps to dotless i and toupper('i') maps to dotted I
- * - where there are no lowercase Latin characters at all, or using different
- * codes (some old EBCDIC codepages)
- *
- * This works because the compiler usually runs on a platform where the execution
- * charset includes all of the invariant characters at their expected
- * code positions, so that the char * string literals in ICU code match
- * the char literals here.
- *
- * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
- * and the set of uppercase Latin letters is discontiguous as well.
- */
-
-U_CAPI UBool U_EXPORT2
-uprv_isASCIILetter(char c) {
-#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
- return
- ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
- ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
-#else
- return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
-#endif
-}
-
-U_CAPI char U_EXPORT2
-uprv_toupper(char c) {
-#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
- if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
- c=(char)(c+('A'-'a'));
- }
-#else
- if('a'<=c && c<='z') {
- c=(char)(c+('A'-'a'));
- }
-#endif
- return c;
-}
-
-
-#if 0
-/*
- * Commented out because cstring.h defines uprv_tolower() to be
- * the same as either uprv_asciitolower() or uprv_ebcdictolower()
- * to reduce the amount of code to cover with tests.
- *
- * Note that this uprv_tolower() definition is likely to work for most
- * charset families, not just ASCII and EBCDIC, because its #else branch
- * is written generically.
- */
-U_CAPI char U_EXPORT2
-uprv_tolower(char c) {
-#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
- if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
- c=(char)(c+('a'-'A'));
- }
-#else
- if('A'<=c && c<='Z') {
- c=(char)(c+('a'-'A'));
- }
-#endif
- return c;
-}
-#endif
-
-U_CAPI char U_EXPORT2
-uprv_asciitolower(char c) {
- if(0x41<=c && c<=0x5a) {
- c=(char)(c+0x20);
- }
- return c;
-}
-
-U_CAPI char U_EXPORT2
-uprv_ebcdictolower(char c) {
- if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
- (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
- (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
- ) {
- c=(char)(c-0x40);
- }
- return c;
-}
-
-
-U_CAPI char* U_EXPORT2
-T_CString_toLowerCase(char* str)
-{
- char* origPtr = str;
-
- if (str) {
- do
- *str = (char)uprv_tolower(*str);
- while (*(str++));
- }
-
- return origPtr;
-}
-
-U_CAPI char* U_EXPORT2
-T_CString_toUpperCase(char* str)
-{
- char* origPtr = str;
-
- if (str) {
- do
- *str = (char)uprv_toupper(*str);
- while (*(str++));
- }
-
- return origPtr;
-}
-
-/*
- * Takes a int32_t and fills in a char* string with that number "radix"-based.
- * Does not handle negative values (makes an empty string for them).
- * Writes at most 12 chars ("-2147483647" plus NUL).
- * Returns the length of the string (not including the NUL).
- */
-U_CAPI int32_t U_EXPORT2
-T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
-{
- char tbuf[30];
- int32_t tbx = sizeof(tbuf);
- uint8_t digit;
- int32_t length = 0;
- uint32_t uval;
-
- U_ASSERT(radix>=2 && radix<=16);
- uval = (uint32_t) v;
- if(v<0 && radix == 10) {
- /* Only in base 10 do we conside numbers to be signed. */
- uval = (uint32_t)(-v);
- buffer[length++] = '-';
- }
-
- tbx = sizeof(tbuf)-1;
- tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
- do {
- digit = (uint8_t)(uval % radix);
- tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
- uval = uval / radix;
- } while (uval != 0);
-
- /* copy converted number into user buffer */
- uprv_strcpy(buffer+length, tbuf+tbx);
- length += sizeof(tbuf) - tbx -1;
- return length;
-}
-
-
-
-/*
- * Takes a int64_t and fills in a char* string with that number "radix"-based.
- * Writes at most 21: chars ("-9223372036854775807" plus NUL).
- * Returns the length of the string, not including the terminating NULL.
- */
-U_CAPI int32_t U_EXPORT2
-T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
-{
- char tbuf[30];
- int32_t tbx = sizeof(tbuf);
- uint8_t digit;
- int32_t length = 0;
- uint64_t uval;
-
- U_ASSERT(radix>=2 && radix<=16);
- uval = (uint64_t) v;
- if(v<0 && radix == 10) {
- /* Only in base 10 do we conside numbers to be signed. */
- uval = (uint64_t)(-v);
- buffer[length++] = '-';
- }
-
- tbx = sizeof(tbuf)-1;
- tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
- do {
- digit = (uint8_t)(uval % radix);
- tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
- uval = uval / radix;
- } while (uval != 0);
-
- /* copy converted number into user buffer */
- uprv_strcpy(buffer+length, tbuf+tbx);
- length += sizeof(tbuf) - tbx -1;
- return length;
-}
-
-
-U_CAPI int32_t U_EXPORT2
-T_CString_stringToInteger(const char *integerString, int32_t radix)
-{
- char *end;
- return uprv_strtoul(integerString, &end, radix);
-
-}
-
-U_CAPI int U_EXPORT2
-uprv_stricmp(const char *str1, const char *str2) {
- if(str1==NULL) {
- if(str2==NULL) {
- return 0;
- } else {
- return -1;
- }
- } else if(str2==NULL) {
- return 1;
- } else {
- /* compare non-NULL strings lexically with lowercase */
- int rc;
- unsigned char c1, c2;
-
- for(;;) {
- c1=(unsigned char)*str1;
- c2=(unsigned char)*str2;
- if(c1==0) {
- if(c2==0) {
- return 0;
- } else {
- return -1;
- }
- } else if(c2==0) {
- return 1;
- } else {
- /* compare non-zero characters with lowercase */
- rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
- if(rc!=0) {
- return rc;
- }
- }
- ++str1;
- ++str2;
- }
- }
-}
-
-U_CAPI int U_EXPORT2
-uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
- if(str1==NULL) {
- if(str2==NULL) {
- return 0;
- } else {
- return -1;
- }
- } else if(str2==NULL) {
- return 1;
- } else {
- /* compare non-NULL strings lexically with lowercase */
- int rc;
- unsigned char c1, c2;
-
- for(; n--;) {
- c1=(unsigned char)*str1;
- c2=(unsigned char)*str2;
- if(c1==0) {
- if(c2==0) {
- return 0;
- } else {
- return -1;
- }
- } else if(c2==0) {
- return 1;
- } else {
- /* compare non-zero characters with lowercase */
- rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
- if(rc!=0) {
- return rc;
- }
- }
- ++str1;
- ++str2;
- }
- }
-
- return 0;
-}
-
-U_CAPI char* U_EXPORT2
-uprv_strdup(const char *src) {
- size_t len = uprv_strlen(src) + 1;
- char *dup = (char *) uprv_malloc(len);
-
- if (dup) {
- uprv_memcpy(dup, src, len);
- }
-
- return dup;
-}
-
-U_CAPI char* U_EXPORT2
-uprv_strndup(const char *src, int32_t n) {
- char *dup;
-
- if(n < 0) {
- dup = uprv_strdup(src);
- } else {
- dup = (char*)uprv_malloc(n+1);
- if (dup) {
- uprv_memcpy(dup, src, n);
- dup[n] = 0;
- }
- }
-
- return dup;
-}
diff --git a/contrib/libs/icu/common/cstring.h b/contrib/libs/icu/common/cstring.h
deleted file mode 100644
index 3a14e4216c8..00000000000
--- a/contrib/libs/icu/common/cstring.h
+++ /dev/null
@@ -1,126 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1997-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* File CSTRING.H
-*
-* Contains CString interface
-*
-* @author Helena Shih
-*
-* Modification History:
-*
-* Date Name Description
-* 6/17/98 hshih Created.
-* 05/03/99 stephen Changed from functions to macros.
-* 06/14/99 stephen Added icu_strncat, icu_strncmp, icu_tolower
-*
-******************************************************************************
-*/
-
-#ifndef CSTRING_H
-#define CSTRING_H 1
-
-#include "unicode/utypes.h"
-#include "cmemory.h"
-#include <string.h>
-#include <stdlib.h>
-#include <ctype.h>
-
-#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE strcpy(dst, src)
-#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str)
-#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2)
-#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src)
-#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
-#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
-#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
-#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
-#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
-#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
-
-/**
- * Is c an ASCII-repertoire letter a-z or A-Z?
- * Note: The implementation is specific to whether ICU is compiled for
- * an ASCII-based or EBCDIC-based machine. There just does not seem to be a better name for this.
- */
-U_CAPI UBool U_EXPORT2
-uprv_isASCIILetter(char c);
-
-// NOTE: For u_asciiToUpper that takes a UChar, see ustr_imp.h
-
-U_CAPI char U_EXPORT2
-uprv_toupper(char c);
-
-
-U_CAPI char U_EXPORT2
-uprv_asciitolower(char c);
-
-U_CAPI char U_EXPORT2
-uprv_ebcdictolower(char c);
-
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define uprv_tolower uprv_asciitolower
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
-# define uprv_tolower uprv_ebcdictolower
-#else
-# error U_CHARSET_FAMILY is not valid
-#endif
-
-#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end)
-#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base)
-#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base)
-
-/* Conversion from a digit to the character with radix base from 2-19 */
-/* May need to use U_UPPER_ORDINAL*/
-#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10))
-
-U_CAPI char* U_EXPORT2
-uprv_strdup(const char *src);
-
-/**
- * uprv_malloc n+1 bytes, and copy n bytes from src into the new string.
- * Terminate with a null at offset n. If n is -1, works like uprv_strdup
- * @param src
- * @param n length of the input string, not including null.
- * @return new string (owned by caller, use uprv_free to free).
- * @internal
- */
-U_CAPI char* U_EXPORT2
-uprv_strndup(const char *src, int32_t n);
-
-U_CAPI char* U_EXPORT2
-T_CString_toLowerCase(char* str);
-
-U_CAPI char* U_EXPORT2
-T_CString_toUpperCase(char* str);
-
-U_CAPI int32_t U_EXPORT2
-T_CString_integerToString(char *buffer, int32_t n, int32_t radix);
-
-U_CAPI int32_t U_EXPORT2
-T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix);
-
-U_CAPI int32_t U_EXPORT2
-T_CString_stringToInteger(const char *integerString, int32_t radix);
-
-/**
- * Case-insensitive, language-independent string comparison
- * limited to the ASCII character repertoire.
- */
-U_CAPI int U_EXPORT2
-uprv_stricmp(const char *str1, const char *str2);
-
-/**
- * Case-insensitive, language-independent string comparison
- * limited to the ASCII character repertoire.
- */
-U_CAPI int U_EXPORT2
-uprv_strnicmp(const char *str1, const char *str2, uint32_t n);
-
-#endif /* ! CSTRING_H */
diff --git a/contrib/libs/icu/common/cwchar.cpp b/contrib/libs/icu/common/cwchar.cpp
deleted file mode 100644
index 20c7d71e0f0..00000000000
--- a/contrib/libs/icu/common/cwchar.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2001, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: cwchar.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001may25
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-
-#if !U_HAVE_WCSCPY
-
-#include "cwchar.h"
-
-U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) {
- wchar_t *start=dst;
- while(*dst!=0) {
- ++dst;
- }
- while((*dst=*src)!=0) {
- ++dst;
- ++src;
- }
- return start;
-}
-
-U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) {
- wchar_t *start=dst;
- while((*dst=*src)!=0) {
- ++dst;
- ++src;
- }
- return start;
-}
-
-U_CAPI size_t uprv_wcslen(const wchar_t *src) {
- const wchar_t *start=src;
- while(*src!=0) {
- ++src;
- }
- return src-start;
-}
-
-#endif
-
diff --git a/contrib/libs/icu/common/cwchar.h b/contrib/libs/icu/common/cwchar.h
deleted file mode 100644
index 8fd041a1b9c..00000000000
--- a/contrib/libs/icu/common/cwchar.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2001, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: cwchar.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001may25
-* created by: Markus W. Scherer
-*
-* This file contains ICU-internal definitions of wchar_t operations.
-* These definitions were moved here from cstring.h so that fewer
-* ICU implementation files include wchar.h.
-*/
-
-#ifndef __CWCHAR_H__
-#define __CWCHAR_H__
-
-#include <string.h>
-#include <stdlib.h>
-#include "unicode/utypes.h"
-
-/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */
-#if U_HAVE_WCHAR_H
-# include <wchar.h>
-#endif
-
-/*===========================================================================*/
-/* Wide-character functions */
-/*===========================================================================*/
-
-/* The following are not available on all systems, defined in wchar.h or string.h. */
-#if U_HAVE_WCSCPY
-# define uprv_wcscpy wcscpy
-# define uprv_wcscat wcscat
-# define uprv_wcslen wcslen
-#else
-U_CAPI wchar_t* U_EXPORT2
-uprv_wcscpy(wchar_t *dst, const wchar_t *src);
-U_CAPI wchar_t* U_EXPORT2
-uprv_wcscat(wchar_t *dst, const wchar_t *src);
-U_CAPI size_t U_EXPORT2
-uprv_wcslen(const wchar_t *src);
-#endif
-
-/* The following are part of the ANSI C standard, defined in stdlib.h . */
-#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count)
-#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count)
-
-
-#endif
diff --git a/contrib/libs/icu/common/dictbe.cpp b/contrib/libs/icu/common/dictbe.cpp
deleted file mode 100644
index b42cdf03fae..00000000000
--- a/contrib/libs/icu/common/dictbe.cpp
+++ /dev/null
@@ -1,1410 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- *******************************************************************************
- * Copyright (C) 2006-2016, International Business Machines Corporation
- * and others. All Rights Reserved.
- *******************************************************************************
- */
-
-#include <utility>
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "brkeng.h"
-#include "dictbe.h"
-#include "unicode/uniset.h"
-#include "unicode/chariter.h"
-#include "unicode/ubrk.h"
-#include "utracimp.h"
-#include "uvectr32.h"
-#include "uvector.h"
-#include "uassert.h"
-#include "unicode/normlzr.h"
-#include "cmemory.h"
-#include "dictionarydata.h"
-
-U_NAMESPACE_BEGIN
-
-/*
- ******************************************************************
- */
-
-DictionaryBreakEngine::DictionaryBreakEngine() {
-}
-
-DictionaryBreakEngine::~DictionaryBreakEngine() {
-}
-
-UBool
-DictionaryBreakEngine::handles(UChar32 c) const {
- return fSet.contains(c);
-}
-
-int32_t
-DictionaryBreakEngine::findBreaks( UText *text,
- int32_t startPos,
- int32_t endPos,
- UVector32 &foundBreaks ) const {
- (void)startPos; // TODO: remove this param?
- int32_t result = 0;
-
- // Find the span of characters included in the set.
- // The span to break begins at the current position in the text, and
- // extends towards the start or end of the text, depending on 'reverse'.
-
- int32_t start = (int32_t)utext_getNativeIndex(text);
- int32_t current;
- int32_t rangeStart;
- int32_t rangeEnd;
- UChar32 c = utext_current32(text);
- while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
- utext_next32(text); // TODO: recast loop for postincrement
- c = utext_current32(text);
- }
- rangeStart = start;
- rangeEnd = current;
- result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
- utext_setNativeIndex(text, current);
-
- return result;
-}
-
-void
-DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) {
- fSet = set;
- // Compact for caching
- fSet.compact();
-}
-
-/*
- ******************************************************************
- * PossibleWord
- */
-
-// Helper class for improving readability of the Thai/Lao/Khmer word break
-// algorithm. The implementation is completely inline.
-
-// List size, limited by the maximum number of words in the dictionary
-// that form a nested sequence.
-static const int32_t POSSIBLE_WORD_LIST_MAX = 20;
-
-class PossibleWord {
-private:
- // list of word candidate lengths, in increasing length order
- // TODO: bytes would be sufficient for word lengths.
- int32_t count; // Count of candidates
- int32_t prefix; // The longest match with a dictionary word
- int32_t offset; // Offset in the text of these candidates
- int32_t mark; // The preferred candidate's offset
- int32_t current; // The candidate we're currently looking at
- int32_t cuLengths[POSSIBLE_WORD_LIST_MAX]; // Word Lengths, in code units.
- int32_t cpLengths[POSSIBLE_WORD_LIST_MAX]; // Word Lengths, in code points.
-
-public:
- PossibleWord() : count(0), prefix(0), offset(-1), mark(0), current(0) {}
- ~PossibleWord() {}
-
- // Fill the list of candidates if needed, select the longest, and return the number found
- int32_t candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd );
-
- // Select the currently marked candidate, point after it in the text, and invalidate self
- int32_t acceptMarked( UText *text );
-
- // Back up from the current candidate to the next shorter one; return TRUE if that exists
- // and point the text after it
- UBool backUp( UText *text );
-
- // Return the longest prefix this candidate location shares with a dictionary word
- // Return value is in code points.
- int32_t longestPrefix() { return prefix; }
-
- // Mark the current candidate as the one we like
- void markCurrent() { mark = current; }
-
- // Get length in code points of the marked word.
- int32_t markedCPLength() { return cpLengths[mark]; }
-};
-
-
-int32_t PossibleWord::candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd ) {
- // TODO: If getIndex is too slow, use offset < 0 and add discardAll()
- int32_t start = (int32_t)utext_getNativeIndex(text);
- if (start != offset) {
- offset = start;
- count = dict->matches(text, rangeEnd-start, UPRV_LENGTHOF(cuLengths), cuLengths, cpLengths, NULL, &prefix);
- // Dictionary leaves text after longest prefix, not longest word. Back up.
- if (count <= 0) {
- utext_setNativeIndex(text, start);
- }
- }
- if (count > 0) {
- utext_setNativeIndex(text, start+cuLengths[count-1]);
- }
- current = count-1;
- mark = current;
- return count;
-}
-
-int32_t
-PossibleWord::acceptMarked( UText *text ) {
- utext_setNativeIndex(text, offset + cuLengths[mark]);
- return cuLengths[mark];
-}
-
-
-UBool
-PossibleWord::backUp( UText *text ) {
- if (current > 0) {
- utext_setNativeIndex(text, offset + cuLengths[--current]);
- return TRUE;
- }
- return FALSE;
-}
-
-/*
- ******************************************************************
- * ThaiBreakEngine
- */
-
-// How many words in a row are "good enough"?
-static const int32_t THAI_LOOKAHEAD = 3;
-
-// Will not combine a non-word with a preceding dictionary word longer than this
-static const int32_t THAI_ROOT_COMBINE_THRESHOLD = 3;
-
-// Will not combine a non-word that shares at least this much prefix with a
-// dictionary word, with a preceding word
-static const int32_t THAI_PREFIX_COMBINE_THRESHOLD = 3;
-
-// Ellision character
-static const int32_t THAI_PAIYANNOI = 0x0E2F;
-
-// Repeat character
-static const int32_t THAI_MAIYAMOK = 0x0E46;
-
-// Minimum word size
-static const int32_t THAI_MIN_WORD = 2;
-
-// Minimum number of characters for two words
-static const int32_t THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2;
-
-ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
- : DictionaryBreakEngine(),
- fDictionary(adoptDictionary)
-{
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
- UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai");
- fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
- if (U_SUCCESS(status)) {
- setCharacters(fThaiWordSet);
- }
- fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
- fMarkSet.add(0x0020);
- fEndWordSet = fThaiWordSet;
- fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
- fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
- fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
- fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
- fSuffixSet.add(THAI_PAIYANNOI);
- fSuffixSet.add(THAI_MAIYAMOK);
-
- // Compact for caching.
- fMarkSet.compact();
- fEndWordSet.compact();
- fBeginWordSet.compact();
- fSuffixSet.compact();
- UTRACE_EXIT_STATUS(status);
-}
-
-ThaiBreakEngine::~ThaiBreakEngine() {
- delete fDictionary;
-}
-
-int32_t
-ThaiBreakEngine::divideUpDictionaryRange( UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UVector32 &foundBreaks ) const {
- utext_setNativeIndex(text, rangeStart);
- utext_moveIndex32(text, THAI_MIN_WORD_SPAN);
- if (utext_getNativeIndex(text) >= rangeEnd) {
- return 0; // Not enough characters for two words
- }
- utext_setNativeIndex(text, rangeStart);
-
-
- uint32_t wordsFound = 0;
- int32_t cpWordLength = 0; // Word Length in Code Points.
- int32_t cuWordLength = 0; // Word length in code units (UText native indexing)
- int32_t current;
- UErrorCode status = U_ZERO_ERROR;
- PossibleWord words[THAI_LOOKAHEAD];
-
- utext_setNativeIndex(text, rangeStart);
-
- while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
- cpWordLength = 0;
- cuWordLength = 0;
-
- // Look for candidate words at the current position
- int32_t candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
-
- // If we found exactly one, use that
- if (candidates == 1) {
- cuWordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text);
- cpWordLength = words[wordsFound % THAI_LOOKAHEAD].markedCPLength();
- wordsFound += 1;
- }
- // If there was more than one, see which one can take us forward the most words
- else if (candidates > 1) {
- // If we're already at the end of the range, we're done
- if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
- goto foundBest;
- }
- do {
- int32_t wordsMatched = 1;
- if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
- if (wordsMatched < 2) {
- // Followed by another dictionary word; mark first word as a good candidate
- words[wordsFound%THAI_LOOKAHEAD].markCurrent();
- wordsMatched = 2;
- }
-
- // If we're already at the end of the range, we're done
- if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
- goto foundBest;
- }
-
- // See if any of the possible second words is followed by a third word
- do {
- // If we find a third word, stop right away
- if (words[(wordsFound + 2) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
- words[wordsFound % THAI_LOOKAHEAD].markCurrent();
- goto foundBest;
- }
- }
- while (words[(wordsFound + 1) % THAI_LOOKAHEAD].backUp(text));
- }
- }
- while (words[wordsFound % THAI_LOOKAHEAD].backUp(text));
-foundBest:
- // Set UText position to after the accepted word.
- cuWordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text);
- cpWordLength = words[wordsFound % THAI_LOOKAHEAD].markedCPLength();
- wordsFound += 1;
- }
-
- // We come here after having either found a word or not. We look ahead to the
- // next word. If it's not a dictionary word, we will combine it with the word we
- // just found (if there is one), but only if the preceding word does not exceed
- // the threshold.
- // The text iterator should now be positioned at the end of the word we found.
-
- UChar32 uc = 0;
- if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < THAI_ROOT_COMBINE_THRESHOLD) {
- // if it is a dictionary word, do nothing. If it isn't, then if there is
- // no preceding word, or the non-word shares less than the minimum threshold
- // of characters with a dictionary word, then scan to resynchronize
- if (words[wordsFound % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
- && (cuWordLength == 0
- || words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) {
- // Look for a plausible word boundary
- int32_t remaining = rangeEnd - (current+cuWordLength);
- UChar32 pc;
- int32_t chars = 0;
- for (;;) {
- int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
- pc = utext_next32(text);
- int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
- chars += pcSize;
- remaining -= pcSize;
- if (remaining <= 0) {
- break;
- }
- uc = utext_current32(text);
- if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
- // Maybe. See if it's in the dictionary.
- // NOTE: In the original Apple code, checked that the next
- // two characters after uc were not 0x0E4C THANTHAKHAT before
- // checking the dictionary. That is just a performance filter,
- // but it's not clear it's faster than checking the trie.
- int32_t num_candidates = words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
- utext_setNativeIndex(text, current + cuWordLength + chars);
- if (num_candidates > 0) {
- break;
- }
- }
- }
-
- // Bump the word count if there wasn't already one
- if (cuWordLength <= 0) {
- wordsFound += 1;
- }
-
- // Update the length with the passed-over characters
- cuWordLength += chars;
- }
- else {
- // Back up to where we were for next iteration
- utext_setNativeIndex(text, current+cuWordLength);
- }
- }
-
- // Never stop before a combining mark.
- int32_t currPos;
- while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
- utext_next32(text);
- cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
- }
-
- // Look ahead for possible suffixes if a dictionary word does not follow.
- // We do this in code rather than using a rule so that the heuristic
- // resynch continues to function. For example, one of the suffix characters
- // could be a typo in the middle of a word.
- if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cuWordLength > 0) {
- if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
- && fSuffixSet.contains(uc = utext_current32(text))) {
- if (uc == THAI_PAIYANNOI) {
- if (!fSuffixSet.contains(utext_previous32(text))) {
- // Skip over previous end and PAIYANNOI
- utext_next32(text);
- int32_t paiyannoiIndex = (int32_t)utext_getNativeIndex(text);
- utext_next32(text);
- cuWordLength += (int32_t)utext_getNativeIndex(text) - paiyannoiIndex; // Add PAIYANNOI to word
- uc = utext_current32(text); // Fetch next character
- }
- else {
- // Restore prior position
- utext_next32(text);
- }
- }
- if (uc == THAI_MAIYAMOK) {
- if (utext_previous32(text) != THAI_MAIYAMOK) {
- // Skip over previous end and MAIYAMOK
- utext_next32(text);
- int32_t maiyamokIndex = (int32_t)utext_getNativeIndex(text);
- utext_next32(text);
- cuWordLength += (int32_t)utext_getNativeIndex(text) - maiyamokIndex; // Add MAIYAMOK to word
- }
- else {
- // Restore prior position
- utext_next32(text);
- }
- }
- }
- else {
- utext_setNativeIndex(text, current+cuWordLength);
- }
- }
-
- // Did we find a word on this iteration? If so, push it on the break stack
- if (cuWordLength > 0) {
- foundBreaks.push((current+cuWordLength), status);
- }
- }
-
- // Don't return a break for the end of the dictionary range if there is one there.
- if (foundBreaks.peeki() >= rangeEnd) {
- (void) foundBreaks.popi();
- wordsFound -= 1;
- }
-
- return wordsFound;
-}
-
-/*
- ******************************************************************
- * LaoBreakEngine
- */
-
-// How many words in a row are "good enough"?
-static const int32_t LAO_LOOKAHEAD = 3;
-
-// Will not combine a non-word with a preceding dictionary word longer than this
-static const int32_t LAO_ROOT_COMBINE_THRESHOLD = 3;
-
-// Will not combine a non-word that shares at least this much prefix with a
-// dictionary word, with a preceding word
-static const int32_t LAO_PREFIX_COMBINE_THRESHOLD = 3;
-
-// Minimum word size
-static const int32_t LAO_MIN_WORD = 2;
-
-// Minimum number of characters for two words
-static const int32_t LAO_MIN_WORD_SPAN = LAO_MIN_WORD * 2;
-
-LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
- : DictionaryBreakEngine(),
- fDictionary(adoptDictionary)
-{
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
- UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo");
- fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
- if (U_SUCCESS(status)) {
- setCharacters(fLaoWordSet);
- }
- fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
- fMarkSet.add(0x0020);
- fEndWordSet = fLaoWordSet;
- fEndWordSet.remove(0x0EC0, 0x0EC4); // prefix vowels
- fBeginWordSet.add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters)
- fBeginWordSet.add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent)
- fBeginWordSet.add(0x0EC0, 0x0EC4); // prefix vowels
-
- // Compact for caching.
- fMarkSet.compact();
- fEndWordSet.compact();
- fBeginWordSet.compact();
- UTRACE_EXIT_STATUS(status);
-}
-
-LaoBreakEngine::~LaoBreakEngine() {
- delete fDictionary;
-}
-
-int32_t
-LaoBreakEngine::divideUpDictionaryRange( UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UVector32 &foundBreaks ) const {
- if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
- return 0; // Not enough characters for two words
- }
-
- uint32_t wordsFound = 0;
- int32_t cpWordLength = 0;
- int32_t cuWordLength = 0;
- int32_t current;
- UErrorCode status = U_ZERO_ERROR;
- PossibleWord words[LAO_LOOKAHEAD];
-
- utext_setNativeIndex(text, rangeStart);
-
- while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
- cuWordLength = 0;
- cpWordLength = 0;
-
- // Look for candidate words at the current position
- int32_t candidates = words[wordsFound%LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
-
- // If we found exactly one, use that
- if (candidates == 1) {
- cuWordLength = words[wordsFound % LAO_LOOKAHEAD].acceptMarked(text);
- cpWordLength = words[wordsFound % LAO_LOOKAHEAD].markedCPLength();
- wordsFound += 1;
- }
- // If there was more than one, see which one can take us forward the most words
- else if (candidates > 1) {
- // If we're already at the end of the range, we're done
- if (utext_getNativeIndex(text) >= rangeEnd) {
- goto foundBest;
- }
- do {
- int32_t wordsMatched = 1;
- if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
- if (wordsMatched < 2) {
- // Followed by another dictionary word; mark first word as a good candidate
- words[wordsFound%LAO_LOOKAHEAD].markCurrent();
- wordsMatched = 2;
- }
-
- // If we're already at the end of the range, we're done
- if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
- goto foundBest;
- }
-
- // See if any of the possible second words is followed by a third word
- do {
- // If we find a third word, stop right away
- if (words[(wordsFound + 2) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
- words[wordsFound % LAO_LOOKAHEAD].markCurrent();
- goto foundBest;
- }
- }
- while (words[(wordsFound + 1) % LAO_LOOKAHEAD].backUp(text));
- }
- }
- while (words[wordsFound % LAO_LOOKAHEAD].backUp(text));
-foundBest:
- cuWordLength = words[wordsFound % LAO_LOOKAHEAD].acceptMarked(text);
- cpWordLength = words[wordsFound % LAO_LOOKAHEAD].markedCPLength();
- wordsFound += 1;
- }
-
- // We come here after having either found a word or not. We look ahead to the
- // next word. If it's not a dictionary word, we will combine it withe the word we
- // just found (if there is one), but only if the preceding word does not exceed
- // the threshold.
- // The text iterator should now be positioned at the end of the word we found.
- if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < LAO_ROOT_COMBINE_THRESHOLD) {
- // if it is a dictionary word, do nothing. If it isn't, then if there is
- // no preceding word, or the non-word shares less than the minimum threshold
- // of characters with a dictionary word, then scan to resynchronize
- if (words[wordsFound % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
- && (cuWordLength == 0
- || words[wordsFound%LAO_LOOKAHEAD].longestPrefix() < LAO_PREFIX_COMBINE_THRESHOLD)) {
- // Look for a plausible word boundary
- int32_t remaining = rangeEnd - (current + cuWordLength);
- UChar32 pc;
- UChar32 uc;
- int32_t chars = 0;
- for (;;) {
- int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
- pc = utext_next32(text);
- int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
- chars += pcSize;
- remaining -= pcSize;
- if (remaining <= 0) {
- break;
- }
- uc = utext_current32(text);
- if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
- // Maybe. See if it's in the dictionary.
- // TODO: this looks iffy; compare with old code.
- int32_t num_candidates = words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
- utext_setNativeIndex(text, current + cuWordLength + chars);
- if (num_candidates > 0) {
- break;
- }
- }
- }
-
- // Bump the word count if there wasn't already one
- if (cuWordLength <= 0) {
- wordsFound += 1;
- }
-
- // Update the length with the passed-over characters
- cuWordLength += chars;
- }
- else {
- // Back up to where we were for next iteration
- utext_setNativeIndex(text, current + cuWordLength);
- }
- }
-
- // Never stop before a combining mark.
- int32_t currPos;
- while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
- utext_next32(text);
- cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
- }
-
- // Look ahead for possible suffixes if a dictionary word does not follow.
- // We do this in code rather than using a rule so that the heuristic
- // resynch continues to function. For example, one of the suffix characters
- // could be a typo in the middle of a word.
- // NOT CURRENTLY APPLICABLE TO LAO
-
- // Did we find a word on this iteration? If so, push it on the break stack
- if (cuWordLength > 0) {
- foundBreaks.push((current+cuWordLength), status);
- }
- }
-
- // Don't return a break for the end of the dictionary range if there is one there.
- if (foundBreaks.peeki() >= rangeEnd) {
- (void) foundBreaks.popi();
- wordsFound -= 1;
- }
-
- return wordsFound;
-}
-
-/*
- ******************************************************************
- * BurmeseBreakEngine
- */
-
-// How many words in a row are "good enough"?
-static const int32_t BURMESE_LOOKAHEAD = 3;
-
-// Will not combine a non-word with a preceding dictionary word longer than this
-static const int32_t BURMESE_ROOT_COMBINE_THRESHOLD = 3;
-
-// Will not combine a non-word that shares at least this much prefix with a
-// dictionary word, with a preceding word
-static const int32_t BURMESE_PREFIX_COMBINE_THRESHOLD = 3;
-
-// Minimum word size
-static const int32_t BURMESE_MIN_WORD = 2;
-
-// Minimum number of characters for two words
-static const int32_t BURMESE_MIN_WORD_SPAN = BURMESE_MIN_WORD * 2;
-
-BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
- : DictionaryBreakEngine(),
- fDictionary(adoptDictionary)
-{
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
- UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr");
- fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
- if (U_SUCCESS(status)) {
- setCharacters(fBurmeseWordSet);
- }
- fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
- fMarkSet.add(0x0020);
- fEndWordSet = fBurmeseWordSet;
- fBeginWordSet.add(0x1000, 0x102A); // basic consonants and independent vowels
-
- // Compact for caching.
- fMarkSet.compact();
- fEndWordSet.compact();
- fBeginWordSet.compact();
- UTRACE_EXIT_STATUS(status);
-}
-
-BurmeseBreakEngine::~BurmeseBreakEngine() {
- delete fDictionary;
-}
-
-int32_t
-BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UVector32 &foundBreaks ) const {
- if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
- return 0; // Not enough characters for two words
- }
-
- uint32_t wordsFound = 0;
- int32_t cpWordLength = 0;
- int32_t cuWordLength = 0;
- int32_t current;
- UErrorCode status = U_ZERO_ERROR;
- PossibleWord words[BURMESE_LOOKAHEAD];
-
- utext_setNativeIndex(text, rangeStart);
-
- while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
- cuWordLength = 0;
- cpWordLength = 0;
-
- // Look for candidate words at the current position
- int32_t candidates = words[wordsFound%BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
-
- // If we found exactly one, use that
- if (candidates == 1) {
- cuWordLength = words[wordsFound % BURMESE_LOOKAHEAD].acceptMarked(text);
- cpWordLength = words[wordsFound % BURMESE_LOOKAHEAD].markedCPLength();
- wordsFound += 1;
- }
- // If there was more than one, see which one can take us forward the most words
- else if (candidates > 1) {
- // If we're already at the end of the range, we're done
- if (utext_getNativeIndex(text) >= rangeEnd) {
- goto foundBest;
- }
- do {
- int32_t wordsMatched = 1;
- if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
- if (wordsMatched < 2) {
- // Followed by another dictionary word; mark first word as a good candidate
- words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
- wordsMatched = 2;
- }
-
- // If we're already at the end of the range, we're done
- if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
- goto foundBest;
- }
-
- // See if any of the possible second words is followed by a third word
- do {
- // If we find a third word, stop right away
- if (words[(wordsFound + 2) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
- words[wordsFound % BURMESE_LOOKAHEAD].markCurrent();
- goto foundBest;
- }
- }
- while (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].backUp(text));
- }
- }
- while (words[wordsFound % BURMESE_LOOKAHEAD].backUp(text));
-foundBest:
- cuWordLength = words[wordsFound % BURMESE_LOOKAHEAD].acceptMarked(text);
- cpWordLength = words[wordsFound % BURMESE_LOOKAHEAD].markedCPLength();
- wordsFound += 1;
- }
-
- // We come here after having either found a word or not. We look ahead to the
- // next word. If it's not a dictionary word, we will combine it withe the word we
- // just found (if there is one), but only if the preceding word does not exceed
- // the threshold.
- // The text iterator should now be positioned at the end of the word we found.
- if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < BURMESE_ROOT_COMBINE_THRESHOLD) {
- // if it is a dictionary word, do nothing. If it isn't, then if there is
- // no preceding word, or the non-word shares less than the minimum threshold
- // of characters with a dictionary word, then scan to resynchronize
- if (words[wordsFound % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
- && (cuWordLength == 0
- || words[wordsFound%BURMESE_LOOKAHEAD].longestPrefix() < BURMESE_PREFIX_COMBINE_THRESHOLD)) {
- // Look for a plausible word boundary
- int32_t remaining = rangeEnd - (current + cuWordLength);
- UChar32 pc;
- UChar32 uc;
- int32_t chars = 0;
- for (;;) {
- int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
- pc = utext_next32(text);
- int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
- chars += pcSize;
- remaining -= pcSize;
- if (remaining <= 0) {
- break;
- }
- uc = utext_current32(text);
- if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
- // Maybe. See if it's in the dictionary.
- // TODO: this looks iffy; compare with old code.
- int32_t num_candidates = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
- utext_setNativeIndex(text, current + cuWordLength + chars);
- if (num_candidates > 0) {
- break;
- }
- }
- }
-
- // Bump the word count if there wasn't already one
- if (cuWordLength <= 0) {
- wordsFound += 1;
- }
-
- // Update the length with the passed-over characters
- cuWordLength += chars;
- }
- else {
- // Back up to where we were for next iteration
- utext_setNativeIndex(text, current + cuWordLength);
- }
- }
-
- // Never stop before a combining mark.
- int32_t currPos;
- while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
- utext_next32(text);
- cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
- }
-
- // Look ahead for possible suffixes if a dictionary word does not follow.
- // We do this in code rather than using a rule so that the heuristic
- // resynch continues to function. For example, one of the suffix characters
- // could be a typo in the middle of a word.
- // NOT CURRENTLY APPLICABLE TO BURMESE
-
- // Did we find a word on this iteration? If so, push it on the break stack
- if (cuWordLength > 0) {
- foundBreaks.push((current+cuWordLength), status);
- }
- }
-
- // Don't return a break for the end of the dictionary range if there is one there.
- if (foundBreaks.peeki() >= rangeEnd) {
- (void) foundBreaks.popi();
- wordsFound -= 1;
- }
-
- return wordsFound;
-}
-
-/*
- ******************************************************************
- * KhmerBreakEngine
- */
-
-// How many words in a row are "good enough"?
-static const int32_t KHMER_LOOKAHEAD = 3;
-
-// Will not combine a non-word with a preceding dictionary word longer than this
-static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 3;
-
-// Will not combine a non-word that shares at least this much prefix with a
-// dictionary word, with a preceding word
-static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 3;
-
-// Minimum word size
-static const int32_t KHMER_MIN_WORD = 2;
-
-// Minimum number of characters for two words
-static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;
-
-KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
- : DictionaryBreakEngine(),
- fDictionary(adoptDictionary)
-{
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
- UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
- fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
- if (U_SUCCESS(status)) {
- setCharacters(fKhmerWordSet);
- }
- fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
- fMarkSet.add(0x0020);
- fEndWordSet = fKhmerWordSet;
- fBeginWordSet.add(0x1780, 0x17B3);
- //fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels
- //fEndWordSet.remove(0x17A5, 0x17A9); // Khmer independent vowels that can't end a word
- //fEndWordSet.remove(0x17B2); // Khmer independent vowel that can't end a word
- fEndWordSet.remove(0x17D2); // KHMER SIGN COENG that combines some following characters
- //fEndWordSet.remove(0x17B6, 0x17C5); // Remove dependent vowels
-// fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
-// fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
-// fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
-// fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
-// fSuffixSet.add(THAI_PAIYANNOI);
-// fSuffixSet.add(THAI_MAIYAMOK);
-
- // Compact for caching.
- fMarkSet.compact();
- fEndWordSet.compact();
- fBeginWordSet.compact();
-// fSuffixSet.compact();
- UTRACE_EXIT_STATUS(status);
-}
-
-KhmerBreakEngine::~KhmerBreakEngine() {
- delete fDictionary;
-}
-
-int32_t
-KhmerBreakEngine::divideUpDictionaryRange( UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UVector32 &foundBreaks ) const {
- if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
- return 0; // Not enough characters for two words
- }
-
- uint32_t wordsFound = 0;
- int32_t cpWordLength = 0;
- int32_t cuWordLength = 0;
- int32_t current;
- UErrorCode status = U_ZERO_ERROR;
- PossibleWord words[KHMER_LOOKAHEAD];
-
- utext_setNativeIndex(text, rangeStart);
-
- while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
- cuWordLength = 0;
- cpWordLength = 0;
-
- // Look for candidate words at the current position
- int32_t candidates = words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
-
- // If we found exactly one, use that
- if (candidates == 1) {
- cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text);
- cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength();
- wordsFound += 1;
- }
-
- // If there was more than one, see which one can take us forward the most words
- else if (candidates > 1) {
- // If we're already at the end of the range, we're done
- if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
- goto foundBest;
- }
- do {
- int32_t wordsMatched = 1;
- if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
- if (wordsMatched < 2) {
- // Followed by another dictionary word; mark first word as a good candidate
- words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
- wordsMatched = 2;
- }
-
- // If we're already at the end of the range, we're done
- if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
- goto foundBest;
- }
-
- // See if any of the possible second words is followed by a third word
- do {
- // If we find a third word, stop right away
- if (words[(wordsFound + 2) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
- words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
- goto foundBest;
- }
- }
- while (words[(wordsFound + 1) % KHMER_LOOKAHEAD].backUp(text));
- }
- }
- while (words[wordsFound % KHMER_LOOKAHEAD].backUp(text));
-foundBest:
- cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text);
- cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength();
- wordsFound += 1;
- }
-
- // We come here after having either found a word or not. We look ahead to the
- // next word. If it's not a dictionary word, we will combine it with the word we
- // just found (if there is one), but only if the preceding word does not exceed
- // the threshold.
- // The text iterator should now be positioned at the end of the word we found.
- if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < KHMER_ROOT_COMBINE_THRESHOLD) {
- // if it is a dictionary word, do nothing. If it isn't, then if there is
- // no preceding word, or the non-word shares less than the minimum threshold
- // of characters with a dictionary word, then scan to resynchronize
- if (words[wordsFound % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
- && (cuWordLength == 0
- || words[wordsFound % KHMER_LOOKAHEAD].longestPrefix() < KHMER_PREFIX_COMBINE_THRESHOLD)) {
- // Look for a plausible word boundary
- int32_t remaining = rangeEnd - (current+cuWordLength);
- UChar32 pc;
- UChar32 uc;
- int32_t chars = 0;
- for (;;) {
- int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
- pc = utext_next32(text);
- int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
- chars += pcSize;
- remaining -= pcSize;
- if (remaining <= 0) {
- break;
- }
- uc = utext_current32(text);
- if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
- // Maybe. See if it's in the dictionary.
- int32_t num_candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
- utext_setNativeIndex(text, current+cuWordLength+chars);
- if (num_candidates > 0) {
- break;
- }
- }
- }
-
- // Bump the word count if there wasn't already one
- if (cuWordLength <= 0) {
- wordsFound += 1;
- }
-
- // Update the length with the passed-over characters
- cuWordLength += chars;
- }
- else {
- // Back up to where we were for next iteration
- utext_setNativeIndex(text, current+cuWordLength);
- }
- }
-
- // Never stop before a combining mark.
- int32_t currPos;
- while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
- utext_next32(text);
- cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
- }
-
- // Look ahead for possible suffixes if a dictionary word does not follow.
- // We do this in code rather than using a rule so that the heuristic
- // resynch continues to function. For example, one of the suffix characters
- // could be a typo in the middle of a word.
-// if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) {
-// if (words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
-// && fSuffixSet.contains(uc = utext_current32(text))) {
-// if (uc == KHMER_PAIYANNOI) {
-// if (!fSuffixSet.contains(utext_previous32(text))) {
-// // Skip over previous end and PAIYANNOI
-// utext_next32(text);
-// utext_next32(text);
-// wordLength += 1; // Add PAIYANNOI to word
-// uc = utext_current32(text); // Fetch next character
-// }
-// else {
-// // Restore prior position
-// utext_next32(text);
-// }
-// }
-// if (uc == KHMER_MAIYAMOK) {
-// if (utext_previous32(text) != KHMER_MAIYAMOK) {
-// // Skip over previous end and MAIYAMOK
-// utext_next32(text);
-// utext_next32(text);
-// wordLength += 1; // Add MAIYAMOK to word
-// }
-// else {
-// // Restore prior position
-// utext_next32(text);
-// }
-// }
-// }
-// else {
-// utext_setNativeIndex(text, current+wordLength);
-// }
-// }
-
- // Did we find a word on this iteration? If so, push it on the break stack
- if (cuWordLength > 0) {
- foundBreaks.push((current+cuWordLength), status);
- }
- }
-
- // Don't return a break for the end of the dictionary range if there is one there.
- if (foundBreaks.peeki() >= rangeEnd) {
- (void) foundBreaks.popi();
- wordsFound -= 1;
- }
-
- return wordsFound;
-}
-
-#if !UCONFIG_NO_NORMALIZATION
-/*
- ******************************************************************
- * CjkBreakEngine
- */
-static const uint32_t kuint32max = 0xFFFFFFFF;
-CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
-: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
- UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani");
- // Korean dictionary only includes Hangul syllables
- fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
- fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
- fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
- fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);
- nfkcNorm2 = Normalizer2::getNFKCInstance(status);
-
- if (U_SUCCESS(status)) {
- // handle Korean and Japanese/Chinese using different dictionaries
- if (type == kKorean) {
- setCharacters(fHangulWordSet);
- } else { //Chinese and Japanese
- UnicodeSet cjSet;
- cjSet.addAll(fHanWordSet);
- cjSet.addAll(fKatakanaWordSet);
- cjSet.addAll(fHiraganaWordSet);
- cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
- cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
- setCharacters(cjSet);
- }
- }
- UTRACE_EXIT_STATUS(status);
-}
-
-CjkBreakEngine::~CjkBreakEngine(){
- delete fDictionary;
-}
-
-// The katakanaCost values below are based on the length frequencies of all
-// katakana phrases in the dictionary
-static const int32_t kMaxKatakanaLength = 8;
-static const int32_t kMaxKatakanaGroupLength = 20;
-static const uint32_t maxSnlp = 255;
-
-static inline uint32_t getKatakanaCost(int32_t wordLength){
- //TODO: fill array with actual values from dictionary!
- static const uint32_t katakanaCost[kMaxKatakanaLength + 1]
- = {8192, 984, 408, 240, 204, 252, 300, 372, 480};
- return (wordLength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordLength];
-}
-
-static inline bool isKatakana(UChar32 value) {
- return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) ||
- (value >= 0xFF66 && value <= 0xFF9f);
-}
-
-
-// Function for accessing internal utext flags.
-// Replicates an internal UText function.
-
-static inline int32_t utext_i32_flag(int32_t bitIndex) {
- return (int32_t)1 << bitIndex;
-}
-
-
-/*
- * @param text A UText representing the text
- * @param rangeStart The start of the range of dictionary characters
- * @param rangeEnd The end of the range of dictionary characters
- * @param foundBreaks vector<int32> to receive the break positions
- * @return The number of breaks found
- */
-int32_t
-CjkBreakEngine::divideUpDictionaryRange( UText *inText,
- int32_t rangeStart,
- int32_t rangeEnd,
- UVector32 &foundBreaks ) const {
- if (rangeStart >= rangeEnd) {
- return 0;
- }
-
- // UnicodeString version of input UText, NFKC normalized if necessary.
- UnicodeString inString;
-
- // inputMap[inStringIndex] = corresponding native index from UText inText.
- // If NULL then mapping is 1:1
- LocalPointer<UVector32> inputMap;
-
- UErrorCode status = U_ZERO_ERROR;
-
-
- // if UText has the input string as one contiguous UTF-16 chunk
- if ((inText->providerProperties & utext_i32_flag(UTEXT_PROVIDER_STABLE_CHUNKS)) &&
- inText->chunkNativeStart <= rangeStart &&
- inText->chunkNativeLimit >= rangeEnd &&
- inText->nativeIndexingLimit >= rangeEnd - inText->chunkNativeStart) {
-
- // Input UText is in one contiguous UTF-16 chunk.
- // Use Read-only aliasing UnicodeString.
- inString.setTo(FALSE,
- inText->chunkContents + rangeStart - inText->chunkNativeStart,
- rangeEnd - rangeStart);
- } else {
- // Copy the text from the original inText (UText) to inString (UnicodeString).
- // Create a map from UnicodeString indices -> UText offsets.
- utext_setNativeIndex(inText, rangeStart);
- int32_t limit = rangeEnd;
- U_ASSERT(limit <= utext_nativeLength(inText));
- if (limit > utext_nativeLength(inText)) {
- limit = (int32_t)utext_nativeLength(inText);
- }
- inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status);
- if (U_FAILURE(status)) {
- return 0;
- }
- while (utext_getNativeIndex(inText) < limit) {
- int32_t nativePosition = (int32_t)utext_getNativeIndex(inText);
- UChar32 c = utext_next32(inText);
- U_ASSERT(c != U_SENTINEL);
- inString.append(c);
- while (inputMap->size() < inString.length()) {
- inputMap->addElement(nativePosition, status);
- }
- }
- inputMap->addElement(limit, status);
- }
-
-
- if (!nfkcNorm2->isNormalized(inString, status)) {
- UnicodeString normalizedInput;
- // normalizedMap[normalizedInput position] == original UText position.
- LocalPointer<UVector32> normalizedMap(new UVector32(status), status);
- if (U_FAILURE(status)) {
- return 0;
- }
-
- UnicodeString fragment;
- UnicodeString normalizedFragment;
- for (int32_t srcI = 0; srcI < inString.length();) { // Once per normalization chunk
- fragment.remove();
- int32_t fragmentStartI = srcI;
- UChar32 c = inString.char32At(srcI);
- for (;;) {
- fragment.append(c);
- srcI = inString.moveIndex32(srcI, 1);
- if (srcI == inString.length()) {
- break;
- }
- c = inString.char32At(srcI);
- if (nfkcNorm2->hasBoundaryBefore(c)) {
- break;
- }
- }
- nfkcNorm2->normalize(fragment, normalizedFragment, status);
- normalizedInput.append(normalizedFragment);
-
- // Map every position in the normalized chunk to the start of the chunk
- // in the original input.
- int32_t fragmentOriginalStart = inputMap.isValid() ?
- inputMap->elementAti(fragmentStartI) : fragmentStartI+rangeStart;
- while (normalizedMap->size() < normalizedInput.length()) {
- normalizedMap->addElement(fragmentOriginalStart, status);
- if (U_FAILURE(status)) {
- break;
- }
- }
- }
- U_ASSERT(normalizedMap->size() == normalizedInput.length());
- int32_t nativeEnd = inputMap.isValid() ?
- inputMap->elementAti(inString.length()) : inString.length()+rangeStart;
- normalizedMap->addElement(nativeEnd, status);
-
- inputMap = std::move(normalizedMap);
- inString = std::move(normalizedInput);
- }
-
- int32_t numCodePts = inString.countChar32();
- if (numCodePts != inString.length()) {
- // There are supplementary characters in the input.
- // The dictionary will produce boundary positions in terms of code point indexes,
- // not in terms of code unit string indexes.
- // Use the inputMap mechanism to take care of this in addition to indexing differences
- // from normalization and/or UTF-8 input.
- UBool hadExistingMap = inputMap.isValid();
- if (!hadExistingMap) {
- inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status);
- if (U_FAILURE(status)) {
- return 0;
- }
- }
- int32_t cpIdx = 0;
- for (int32_t cuIdx = 0; ; cuIdx = inString.moveIndex32(cuIdx, 1)) {
- U_ASSERT(cuIdx >= cpIdx);
- if (hadExistingMap) {
- inputMap->setElementAt(inputMap->elementAti(cuIdx), cpIdx);
- } else {
- inputMap->addElement(cuIdx+rangeStart, status);
- }
- cpIdx++;
- if (cuIdx == inString.length()) {
- break;
- }
- }
- }
-
- // bestSnlp[i] is the snlp of the best segmentation of the first i
- // code points in the range to be matched.
- UVector32 bestSnlp(numCodePts + 1, status);
- bestSnlp.addElement(0, status);
- for(int32_t i = 1; i <= numCodePts; i++) {
- bestSnlp.addElement(kuint32max, status);
- }
-
-
- // prev[i] is the index of the last CJK code point in the previous word in
- // the best segmentation of the first i characters.
- UVector32 prev(numCodePts + 1, status);
- for(int32_t i = 0; i <= numCodePts; i++){
- prev.addElement(-1, status);
- }
-
- const int32_t maxWordSize = 20;
- UVector32 values(numCodePts, status);
- values.setSize(numCodePts);
- UVector32 lengths(numCodePts, status);
- lengths.setSize(numCodePts);
-
- UText fu = UTEXT_INITIALIZER;
- utext_openUnicodeString(&fu, &inString, &status);
-
- // Dynamic programming to find the best segmentation.
-
- // In outer loop, i is the code point index,
- // ix is the corresponding string (code unit) index.
- // They differ when the string contains supplementary characters.
- int32_t ix = 0;
- bool is_prev_katakana = false;
- for (int32_t i = 0; i < numCodePts; ++i, ix = inString.moveIndex32(ix, 1)) {
- if ((uint32_t)bestSnlp.elementAti(i) == kuint32max) {
- continue;
- }
-
- int32_t count;
- utext_setNativeIndex(&fu, ix);
- count = fDictionary->matches(&fu, maxWordSize, numCodePts,
- NULL, lengths.getBuffer(), values.getBuffer(), NULL);
- // Note: lengths is filled with code point lengths
- // The NULL parameter is the ignored code unit lengths.
-
- // if there are no single character matches found in the dictionary
- // starting with this character, treat character as a 1-character word
- // with the highest value possible, i.e. the least likely to occur.
- // Exclude Korean characters from this treatment, as they should be left
- // together by default.
- if ((count == 0 || lengths.elementAti(0) != 1) &&
- !fHangulWordSet.contains(inString.char32At(ix))) {
- values.setElementAt(maxSnlp, count); // 255
- lengths.setElementAt(1, count++);
- }
-
- for (int32_t j = 0; j < count; j++) {
- uint32_t newSnlp = (uint32_t)bestSnlp.elementAti(i) + (uint32_t)values.elementAti(j);
- int32_t ln_j_i = lengths.elementAti(j) + i;
- if (newSnlp < (uint32_t)bestSnlp.elementAti(ln_j_i)) {
- bestSnlp.setElementAt(newSnlp, ln_j_i);
- prev.setElementAt(i, ln_j_i);
- }
- }
-
- // In Japanese,
- // Katakana word in single character is pretty rare. So we apply
- // the following heuristic to Katakana: any continuous run of Katakana
- // characters is considered a candidate word with a default cost
- // specified in the katakanaCost table according to its length.
-
- bool is_katakana = isKatakana(inString.char32At(ix));
- int32_t katakanaRunLength = 1;
- if (!is_prev_katakana && is_katakana) {
- int32_t j = inString.moveIndex32(ix, 1);
- // Find the end of the continuous run of Katakana characters
- while (j < inString.length() && katakanaRunLength < kMaxKatakanaGroupLength &&
- isKatakana(inString.char32At(j))) {
- j = inString.moveIndex32(j, 1);
- katakanaRunLength++;
- }
- if (katakanaRunLength < kMaxKatakanaGroupLength) {
- uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength);
- if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) {
- bestSnlp.setElementAt(newSnlp, i+katakanaRunLength);
- prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i;
- }
- }
- }
- is_prev_katakana = is_katakana;
- }
- utext_close(&fu);
-
- // Start pushing the optimal offset index into t_boundary (t for tentative).
- // prev[numCodePts] is guaranteed to be meaningful.
- // We'll first push in the reverse order, i.e.,
- // t_boundary[0] = numCodePts, and afterwards do a swap.
- UVector32 t_boundary(numCodePts+1, status);
-
- int32_t numBreaks = 0;
- // No segmentation found, set boundary to end of range
- if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
- t_boundary.addElement(numCodePts, status);
- numBreaks++;
- } else {
- for (int32_t i = numCodePts; i > 0; i = prev.elementAti(i)) {
- t_boundary.addElement(i, status);
- numBreaks++;
- }
- U_ASSERT(prev.elementAti(t_boundary.elementAti(numBreaks - 1)) == 0);
- }
-
- // Add a break for the start of the dictionary range if there is not one
- // there already.
- if (foundBreaks.size() == 0 || foundBreaks.peeki() < rangeStart) {
- t_boundary.addElement(0, status);
- numBreaks++;
- }
-
- // Now that we're done, convert positions in t_boundary[] (indices in
- // the normalized input string) back to indices in the original input UText
- // while reversing t_boundary and pushing values to foundBreaks.
- int32_t prevCPPos = -1;
- int32_t prevUTextPos = -1;
- for (int32_t i = numBreaks-1; i >= 0; i--) {
- int32_t cpPos = t_boundary.elementAti(i);
- U_ASSERT(cpPos > prevCPPos);
- int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart;
- U_ASSERT(utextPos >= prevUTextPos);
- if (utextPos > prevUTextPos) {
- // Boundaries are added to foundBreaks output in ascending order.
- U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos);
- foundBreaks.push(utextPos, status);
- } else {
- // Normalization expanded the input text, the dictionary found a boundary
- // within the expansion, giving two boundaries with the same index in the
- // original text. Ignore the second. See ticket #12918.
- --numBreaks;
- }
- prevCPPos = cpPos;
- prevUTextPos = utextPos;
- }
- (void)prevCPPos; // suppress compiler warnings about unused variable
-
- // inString goes out of scope
- // inputMap goes out of scope
- return numBreaks;
-}
-#endif
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
-
diff --git a/contrib/libs/icu/common/dictbe.h b/contrib/libs/icu/common/dictbe.h
deleted file mode 100644
index 731bfdff9f2..00000000000
--- a/contrib/libs/icu/common/dictbe.h
+++ /dev/null
@@ -1,402 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- *******************************************************************************
- * Copyright (C) 2006-2014, International Business Machines Corporation *
- * and others. All Rights Reserved. *
- *******************************************************************************
- */
-
-#ifndef DICTBE_H
-#define DICTBE_H
-
-#include "unicode/utypes.h"
-#include "unicode/uniset.h"
-#include "unicode/utext.h"
-
-#include "brkeng.h"
-#include "uvectr32.h"
-
-U_NAMESPACE_BEGIN
-
-class DictionaryMatcher;
-class Normalizer2;
-
-/*******************************************************************
- * DictionaryBreakEngine
- */
-
-/**
- * <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a
- * dictionary to determine language-specific breaks.</p>
- *
- * <p>After it is constructed a DictionaryBreakEngine may be shared between
- * threads without synchronization.</p>
- */
-class DictionaryBreakEngine : public LanguageBreakEngine {
- private:
- /**
- * The set of characters handled by this engine
- * @internal
- */
-
- UnicodeSet fSet;
-
- public:
-
- /**
- * <p>Constructor </p>
- */
- DictionaryBreakEngine();
-
- /**
- * <p>Virtual destructor.</p>
- */
- virtual ~DictionaryBreakEngine();
-
- /**
- * <p>Indicate whether this engine handles a particular character for
- * a particular kind of break.</p>
- *
- * @param c A character which begins a run that the engine might handle
- * @return TRUE if this engine handles the particular character and break
- * type.
- */
- virtual UBool handles(UChar32 c) const;
-
- /**
- * <p>Find any breaks within a run in the supplied text.</p>
- *
- * @param text A UText representing the text. The iterator is left at
- * the end of the run of characters which the engine is capable of handling
- * that starts from the first character in the range.
- * @param startPos The start of the run within the supplied text.
- * @param endPos The end of the run within the supplied text.
- * @param foundBreaks vector of int32_t to receive the break positions
- * @return The number of breaks found.
- */
- virtual int32_t findBreaks( UText *text,
- int32_t startPos,
- int32_t endPos,
- UVector32 &foundBreaks ) const;
-
- protected:
-
- /**
- * <p>Set the character set handled by this engine.</p>
- *
- * @param set A UnicodeSet of the set of characters handled by the engine
- */
- virtual void setCharacters( const UnicodeSet &set );
-
- /**
- * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
- *
- * @param text A UText representing the text
- * @param rangeStart The start of the range of dictionary characters
- * @param rangeEnd The end of the range of dictionary characters
- * @param foundBreaks Output of C array of int32_t break positions, or 0
- * @return The number of breaks found
- */
- virtual int32_t divideUpDictionaryRange( UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UVector32 &foundBreaks ) const = 0;
-
-};
-
-/*******************************************************************
- * ThaiBreakEngine
- */
-
-/**
- * <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a
- * dictionary and heuristics to determine Thai-specific breaks.</p>
- *
- * <p>After it is constructed a ThaiBreakEngine may be shared between
- * threads without synchronization.</p>
- */
-class ThaiBreakEngine : public DictionaryBreakEngine {
- private:
- /**
- * The set of characters handled by this engine
- * @internal
- */
-
- UnicodeSet fThaiWordSet;
- UnicodeSet fEndWordSet;
- UnicodeSet fBeginWordSet;
- UnicodeSet fSuffixSet;
- UnicodeSet fMarkSet;
- DictionaryMatcher *fDictionary;
-
- public:
-
- /**
- * <p>Default constructor.</p>
- *
- * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
- * engine is deleted.
- */
- ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
-
- /**
- * <p>Virtual destructor.</p>
- */
- virtual ~ThaiBreakEngine();
-
- protected:
- /**
- * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
- *
- * @param text A UText representing the text
- * @param rangeStart The start of the range of dictionary characters
- * @param rangeEnd The end of the range of dictionary characters
- * @param foundBreaks Output of C array of int32_t break positions, or 0
- * @return The number of breaks found
- */
- virtual int32_t divideUpDictionaryRange( UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UVector32 &foundBreaks ) const;
-
-};
-
-/*******************************************************************
- * LaoBreakEngine
- */
-
-/**
- * <p>LaoBreakEngine is a kind of DictionaryBreakEngine that uses a
- * dictionary and heuristics to determine Lao-specific breaks.</p>
- *
- * <p>After it is constructed a LaoBreakEngine may be shared between
- * threads without synchronization.</p>
- */
-class LaoBreakEngine : public DictionaryBreakEngine {
- private:
- /**
- * The set of characters handled by this engine
- * @internal
- */
-
- UnicodeSet fLaoWordSet;
- UnicodeSet fEndWordSet;
- UnicodeSet fBeginWordSet;
- UnicodeSet fMarkSet;
- DictionaryMatcher *fDictionary;
-
- public:
-
- /**
- * <p>Default constructor.</p>
- *
- * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
- * engine is deleted.
- */
- LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
-
- /**
- * <p>Virtual destructor.</p>
- */
- virtual ~LaoBreakEngine();
-
- protected:
- /**
- * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
- *
- * @param text A UText representing the text
- * @param rangeStart The start of the range of dictionary characters
- * @param rangeEnd The end of the range of dictionary characters
- * @param foundBreaks Output of C array of int32_t break positions, or 0
- * @return The number of breaks found
- */
- virtual int32_t divideUpDictionaryRange( UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UVector32 &foundBreaks ) const;
-
-};
-
-/*******************************************************************
- * BurmeseBreakEngine
- */
-
-/**
- * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a
- * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p>
- *
- * <p>After it is constructed a BurmeseBreakEngine may be shared between
- * threads without synchronization.</p>
- */
-class BurmeseBreakEngine : public DictionaryBreakEngine {
- private:
- /**
- * The set of characters handled by this engine
- * @internal
- */
-
- UnicodeSet fBurmeseWordSet;
- UnicodeSet fEndWordSet;
- UnicodeSet fBeginWordSet;
- UnicodeSet fMarkSet;
- DictionaryMatcher *fDictionary;
-
- public:
-
- /**
- * <p>Default constructor.</p>
- *
- * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
- * engine is deleted.
- */
- BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
-
- /**
- * <p>Virtual destructor.</p>
- */
- virtual ~BurmeseBreakEngine();
-
- protected:
- /**
- * <p>Divide up a range of known dictionary characters.</p>
- *
- * @param text A UText representing the text
- * @param rangeStart The start of the range of dictionary characters
- * @param rangeEnd The end of the range of dictionary characters
- * @param foundBreaks Output of C array of int32_t break positions, or 0
- * @return The number of breaks found
- */
- virtual int32_t divideUpDictionaryRange( UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UVector32 &foundBreaks ) const;
-
-};
-
-/*******************************************************************
- * KhmerBreakEngine
- */
-
-/**
- * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
- * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
- *
- * <p>After it is constructed a KhmerBreakEngine may be shared between
- * threads without synchronization.</p>
- */
-class KhmerBreakEngine : public DictionaryBreakEngine {
- private:
- /**
- * The set of characters handled by this engine
- * @internal
- */
-
- UnicodeSet fKhmerWordSet;
- UnicodeSet fEndWordSet;
- UnicodeSet fBeginWordSet;
- UnicodeSet fMarkSet;
- DictionaryMatcher *fDictionary;
-
- public:
-
- /**
- * <p>Default constructor.</p>
- *
- * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
- * engine is deleted.
- */
- KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
-
- /**
- * <p>Virtual destructor.</p>
- */
- virtual ~KhmerBreakEngine();
-
- protected:
- /**
- * <p>Divide up a range of known dictionary characters.</p>
- *
- * @param text A UText representing the text
- * @param rangeStart The start of the range of dictionary characters
- * @param rangeEnd The end of the range of dictionary characters
- * @param foundBreaks Output of C array of int32_t break positions, or 0
- * @return The number of breaks found
- */
- virtual int32_t divideUpDictionaryRange( UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UVector32 &foundBreaks ) const;
-
-};
-
-#if !UCONFIG_NO_NORMALIZATION
-
-/*******************************************************************
- * CjkBreakEngine
- */
-
-//indicates language/script that the CjkBreakEngine will handle
-enum LanguageType {
- kKorean,
- kChineseJapanese
-};
-
-/**
- * <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a
- * dictionary with costs associated with each word and
- * Viterbi decoding to determine CJK-specific breaks.</p>
- */
-class CjkBreakEngine : public DictionaryBreakEngine {
- protected:
- /**
- * The set of characters handled by this engine
- * @internal
- */
- UnicodeSet fHangulWordSet;
- UnicodeSet fHanWordSet;
- UnicodeSet fKatakanaWordSet;
- UnicodeSet fHiraganaWordSet;
-
- DictionaryMatcher *fDictionary;
- const Normalizer2 *nfkcNorm2;
-
- public:
-
- /**
- * <p>Default constructor.</p>
- *
- * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
- * engine is deleted. The DictionaryMatcher must contain costs for each word
- * in order for the dictionary to work properly.
- */
- CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status);
-
- /**
- * <p>Virtual destructor.</p>
- */
- virtual ~CjkBreakEngine();
-
- protected:
- /**
- * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
- *
- * @param text A UText representing the text
- * @param rangeStart The start of the range of dictionary characters
- * @param rangeEnd The end of the range of dictionary characters
- * @param foundBreaks Output of C array of int32_t break positions, or 0
- * @return The number of breaks found
- */
- virtual int32_t divideUpDictionaryRange( UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UVector32 &foundBreaks ) const;
-
-};
-
-#endif
-
-U_NAMESPACE_END
-
- /* DICTBE_H */
-#endif
diff --git a/contrib/libs/icu/common/dictionarydata.cpp b/contrib/libs/icu/common/dictionarydata.cpp
deleted file mode 100644
index 6e2dbee5b61..00000000000
--- a/contrib/libs/icu/common/dictionarydata.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2014-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* dictionarydata.h
-*
-* created on: 2012may31
-* created by: Markus W. Scherer & Maxime Serrano
-*/
-
-#include "dictionarydata.h"
-#include "unicode/ucharstrie.h"
-#include "unicode/bytestrie.h"
-#include "unicode/udata.h"
-#include "cmemory.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-U_NAMESPACE_BEGIN
-
-const int32_t DictionaryData::TRIE_TYPE_BYTES = 0;
-const int32_t DictionaryData::TRIE_TYPE_UCHARS = 1;
-const int32_t DictionaryData::TRIE_TYPE_MASK = 7;
-const int32_t DictionaryData::TRIE_HAS_VALUES = 8;
-
-const int32_t DictionaryData::TRANSFORM_NONE = 0;
-const int32_t DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000;
-const int32_t DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000;
-const int32_t DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff;
-
-DictionaryMatcher::~DictionaryMatcher() {
-}
-
-UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {
- udata_close(file);
-}
-
-int32_t UCharsDictionaryMatcher::getType() const {
- return DictionaryData::TRIE_TYPE_UCHARS;
-}
-
-int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
- int32_t *lengths, int32_t *cpLengths, int32_t *values,
- int32_t *prefix) const {
-
- UCharsTrie uct(characters);
- int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
- int32_t wordCount = 0;
- int32_t codePointsMatched = 0;
-
- for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
- UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
- int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
- codePointsMatched += 1;
- if (USTRINGTRIE_HAS_VALUE(result)) {
- if (wordCount < limit) {
- if (values != NULL) {
- values[wordCount] = uct.getValue();
- }
- if (lengths != NULL) {
- lengths[wordCount] = lengthMatched;
- }
- if (cpLengths != NULL) {
- cpLengths[wordCount] = codePointsMatched;
- }
- ++wordCount;
- }
- if (result == USTRINGTRIE_FINAL_VALUE) {
- break;
- }
- }
- else if (result == USTRINGTRIE_NO_MATCH) {
- break;
- }
- if (lengthMatched >= maxLength) {
- break;
- }
- }
-
- if (prefix != NULL) {
- *prefix = codePointsMatched;
- }
- return wordCount;
-}
-
-BytesDictionaryMatcher::~BytesDictionaryMatcher() {
- udata_close(file);
-}
-
-UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {
- if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {
- if (c == 0x200D) {
- return 0xFF;
- } else if (c == 0x200C) {
- return 0xFE;
- }
- int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);
- if (delta < 0 || 0xFD < delta) {
- return U_SENTINEL;
- }
- return (UChar32)delta;
- }
- return c;
-}
-
-int32_t BytesDictionaryMatcher::getType() const {
- return DictionaryData::TRIE_TYPE_BYTES;
-}
-
-int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
- int32_t *lengths, int32_t *cpLengths, int32_t *values,
- int32_t *prefix) const {
- BytesTrie bt(characters);
- int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
- int32_t wordCount = 0;
- int32_t codePointsMatched = 0;
-
- for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
- UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
- int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
- codePointsMatched += 1;
- if (USTRINGTRIE_HAS_VALUE(result)) {
- if (wordCount < limit) {
- if (values != NULL) {
- values[wordCount] = bt.getValue();
- }
- if (lengths != NULL) {
- lengths[wordCount] = lengthMatched;
- }
- if (cpLengths != NULL) {
- cpLengths[wordCount] = codePointsMatched;
- }
- ++wordCount;
- }
- if (result == USTRINGTRIE_FINAL_VALUE) {
- break;
- }
- }
- else if (result == USTRINGTRIE_NO_MATCH) {
- break;
- }
- if (lengthMatched >= maxLength) {
- break;
- }
- }
-
- if (prefix != NULL) {
- *prefix = codePointsMatched;
- }
- return wordCount;
-}
-
-
-U_NAMESPACE_END
-
-U_NAMESPACE_USE
-
-U_CAPI int32_t U_EXPORT2
-udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,
- void *outData, UErrorCode *pErrorCode) {
- const UDataInfo *pInfo;
- int32_t headerSize;
- const uint8_t *inBytes;
- uint8_t *outBytes;
- const int32_t *inIndexes;
- int32_t indexes[DictionaryData::IX_COUNT];
- int32_t i, offset, size;
-
- headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
- if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;
- pInfo = (const UDataInfo *)((const char *)inData + 4);
- if (!(pInfo->dataFormat[0] == 0x44 &&
- pInfo->dataFormat[1] == 0x69 &&
- pInfo->dataFormat[2] == 0x63 &&
- pInfo->dataFormat[3] == 0x74 &&
- pInfo->formatVersion[0] == 1)) {
- udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",
- pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);
- *pErrorCode = U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- inBytes = (const uint8_t *)inData + headerSize;
- outBytes = (uint8_t *)outData + headerSize;
-
- inIndexes = (const int32_t *)inBytes;
- if (length >= 0) {
- length -= headerSize;
- if (length < (int32_t)(sizeof(indexes))) {
- udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);
- *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
-
- for (i = 0; i < DictionaryData::IX_COUNT; i++) {
- indexes[i] = udata_readInt32(ds, inIndexes[i]);
- }
-
- size = indexes[DictionaryData::IX_TOTAL_SIZE];
-
- if (length >= 0) {
- if (length < size) {
- udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);
- *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- if (inBytes != outBytes) {
- uprv_memcpy(outBytes, inBytes, size);
- }
-
- offset = 0;
- ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);
- offset = (int32_t)sizeof(indexes);
- int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
- int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];
-
- if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
- ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);
- } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
- // nothing to do
- } else {
- udata_printError(ds, "udict_swap(): unknown trie type!\n");
- *pErrorCode = U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- // these next two sections are empty in the current format,
- // but may be used later.
- offset = nextOffset;
- nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];
- offset = nextOffset;
- nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];
- offset = nextOffset;
- }
- return headerSize + size;
-}
-#endif
diff --git a/contrib/libs/icu/common/dictionarydata.h b/contrib/libs/icu/common/dictionarydata.h
deleted file mode 100644
index 0d303d9a8dc..00000000000
--- a/contrib/libs/icu/common/dictionarydata.h
+++ /dev/null
@@ -1,191 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* dictionarydata.h
-*
-* created on: 2012may31
-* created by: Markus W. Scherer & Maxime Serrano
-*/
-
-#ifndef __DICTIONARYDATA_H__
-#define __DICTIONARYDATA_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/utext.h"
-#include "unicode/udata.h"
-#include "udataswp.h"
-#include "unicode/uobject.h"
-#include "unicode/ustringtrie.h"
-
-U_NAMESPACE_BEGIN
-
-class UCharsTrie;
-class BytesTrie;
-
-class U_COMMON_API DictionaryData : public UMemory {
-public:
- static const int32_t TRIE_TYPE_BYTES; // = 0;
- static const int32_t TRIE_TYPE_UCHARS; // = 1;
- static const int32_t TRIE_TYPE_MASK; // = 7;
- static const int32_t TRIE_HAS_VALUES; // = 8;
-
- static const int32_t TRANSFORM_NONE; // = 0;
- static const int32_t TRANSFORM_TYPE_OFFSET; // = 0x1000000;
- static const int32_t TRANSFORM_TYPE_MASK; // = 0x7f000000;
- static const int32_t TRANSFORM_OFFSET_MASK; // = 0x1fffff;
-
- enum {
- // Byte offsets from the start of the data, after the generic header.
- IX_STRING_TRIE_OFFSET,
- IX_RESERVED1_OFFSET,
- IX_RESERVED2_OFFSET,
- IX_TOTAL_SIZE,
-
- // Trie type: TRIE_HAS_VALUES | TRIE_TYPE_BYTES etc.
- IX_TRIE_TYPE,
- // Transform specification: TRANSFORM_TYPE_OFFSET | 0xe00 etc.
- IX_TRANSFORM,
-
- IX_RESERVED6,
- IX_RESERVED7,
- IX_COUNT
- };
-};
-
-/**
- * Wrapper class around generic dictionaries, implementing matches().
- * getType() should return a TRIE_TYPE_??? constant from DictionaryData.
- *
- * All implementations of this interface must be thread-safe if they are to be used inside of the
- * dictionary-based break iteration code.
- */
-class U_COMMON_API DictionaryMatcher : public UMemory {
-public:
- DictionaryMatcher() {}
- virtual ~DictionaryMatcher();
- // this should emulate CompactTrieDictionary::matches()
- /* @param text The text in which to look for matching words. Matching begins
- * at the current position of the UText.
- * @param maxLength The max length of match to consider. Units are the native indexing
- * units of the UText.
- * @param limit Capacity of output arrays, which is also the maximum number of
- * matching words to be found.
- * @param lengths output array, filled with the lengths of the matches, in order,
- * from shortest to longest. Lengths are in native indexing units
- * of the UText. May be NULL.
- * @param cpLengths output array, filled with the lengths of the matches, in order,
- * from shortest to longest. Lengths are the number of Unicode code points.
- * May be NULL.
- * @param values Output array, filled with the values associated with the words found.
- * May be NULL.
- * @param prefix Output parameter, the code point length of the prefix match, even if that
- * prefix didn't lead to a complete word. Will always be >= the cpLength
- * of the longest complete word matched. May be NULL.
- * @return Number of matching words found.
- */
- virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
- int32_t *lengths, int32_t *cpLengths, int32_t *values,
- int32_t *prefix) const = 0;
-
- /** @return DictionaryData::TRIE_TYPE_XYZ */
- virtual int32_t getType() const = 0;
-};
-
-// Implementation of the DictionaryMatcher interface for a UCharsTrie dictionary
-class U_COMMON_API UCharsDictionaryMatcher : public DictionaryMatcher {
-public:
- // constructs a new UCharsDictionaryMatcher.
- // The UDataMemory * will be closed on this object's destruction.
- UCharsDictionaryMatcher(const UChar *c, UDataMemory *f) : characters(c), file(f) { }
- virtual ~UCharsDictionaryMatcher();
- virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
- int32_t *lengths, int32_t *cpLengths, int32_t *values,
- int32_t *prefix) const;
- virtual int32_t getType() const;
-private:
- const UChar *characters;
- UDataMemory *file;
-};
-
-// Implementation of the DictionaryMatcher interface for a BytesTrie dictionary
-class U_COMMON_API BytesDictionaryMatcher : public DictionaryMatcher {
-public:
- // constructs a new BytesTrieDictionaryMatcher
- // the transform constant should be the constant read from the file, not a masked version!
- // the UDataMemory * fed in here will be closed on this object's destruction
- BytesDictionaryMatcher(const char *c, int32_t t, UDataMemory *f)
- : characters(c), transformConstant(t), file(f) { }
- virtual ~BytesDictionaryMatcher();
- virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
- int32_t *lengths, int32_t *cpLengths, int32_t *values,
- int32_t *prefix) const;
- virtual int32_t getType() const;
-private:
- UChar32 transform(UChar32 c) const;
-
- const char *characters;
- int32_t transformConstant;
- UDataMemory *file;
-};
-
-U_NAMESPACE_END
-
-U_CAPI int32_t U_EXPORT2
-udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode);
-
-/**
- * Format of dictionary .dict data files.
- * Format version 1.0.
- *
- * A dictionary .dict data file contains a byte-serialized BytesTrie or
- * a UChars-serialized UCharsTrie.
- * Such files are used in dictionary-based break iteration (DBBI).
- *
- * For a BytesTrie, a transformation type is specified for
- * transforming Unicode strings into byte sequences.
- *
- * A .dict file begins with a standard ICU data file header
- * (DataHeader, see ucmndata.h and unicode/udata.h).
- * The UDataInfo.dataVersion field is currently unused (set to 0.0.0.0).
- *
- * After the header, the file contains the following parts.
- * Constants are defined in the DictionaryData class.
- *
- * For the data structure of BytesTrie & UCharsTrie see
- * http://site.icu-project.org/design/struct/tries
- * and the bytestrie.h and ucharstrie.h header files.
- *
- * int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_STRING_TRIE_OFFSET]/4;
- *
- * The first four indexes are byte offsets in ascending order.
- * Each byte offset marks the start of the next part in the data file,
- * and the end of the previous one.
- * When two consecutive byte offsets are the same, then the corresponding part is empty.
- * Byte offsets are offsets from after the header,
- * that is, from the beginning of the indexes[].
- * Each part starts at an offset with proper alignment for its data.
- * If necessary, the previous part may include padding bytes to achieve this alignment.
- *
- * trieType=indexes[IX_TRIE_TYPE] defines the trie type.
- * transform=indexes[IX_TRANSFORM] defines the Unicode-to-bytes transformation.
- * If the transformation type is TRANSFORM_TYPE_OFFSET,
- * then the lower 21 bits contain the offset code point.
- * Each code point c is mapped to byte b = (c - offset).
- * Code points outside the range offset..(offset+0xff) cannot be mapped
- * and do not occur in the dictionary.
- *
- * stringTrie; -- a serialized BytesTrie or UCharsTrie
- *
- * The dictionary maps strings to specific values (TRIE_HAS_VALUES bit set in trieType),
- * or it maps all strings to 0 (TRIE_HAS_VALUES bit not set).
- */
-
-#endif /* !UCONFIG_NO_BREAK_ITERATION */
-#endif /* __DICTIONARYDATA_H__ */
diff --git a/contrib/libs/icu/common/dtintrv.cpp b/contrib/libs/icu/common/dtintrv.cpp
deleted file mode 100644
index 80bb5d6dbd1..00000000000
--- a/contrib/libs/icu/common/dtintrv.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*******************************************************************************
-* Copyright (C) 2008, International Business Machines Corporation and
-* others. All Rights Reserved.
-*******************************************************************************
-*
-* File DTINTRV.CPP
-*
-*******************************************************************************
-*/
-
-
-
-#include "unicode/dtintrv.h"
-
-
-U_NAMESPACE_BEGIN
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateInterval)
-
-//DateInterval::DateInterval(){}
-
-
-DateInterval::DateInterval(UDate from, UDate to)
-: fromDate(from),
- toDate(to)
-{}
-
-
-DateInterval::~DateInterval(){}
-
-
-DateInterval::DateInterval(const DateInterval& other)
-: UObject(other) {
- *this = other;
-}
-
-
-DateInterval&
-DateInterval::operator=(const DateInterval& other) {
- if ( this != &other ) {
- fromDate = other.fromDate;
- toDate = other.toDate;
- }
- return *this;
-}
-
-
-DateInterval*
-DateInterval::clone() const {
- return new DateInterval(*this);
-}
-
-
-UBool
-DateInterval::operator==(const DateInterval& other) const {
- return ( fromDate == other.fromDate && toDate == other.toDate );
-}
-
-
-U_NAMESPACE_END
-
diff --git a/contrib/libs/icu/common/edits.cpp b/contrib/libs/icu/common/edits.cpp
deleted file mode 100644
index 95f0c19a728..00000000000
--- a/contrib/libs/icu/common/edits.cpp
+++ /dev/null
@@ -1,803 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// edits.cpp
-// created: 2017feb08 Markus W. Scherer
-
-#include "unicode/edits.h"
-#include "unicode/unistr.h"
-#include "unicode/utypes.h"
-#include "cmemory.h"
-#include "uassert.h"
-#include "util.h"
-
-U_NAMESPACE_BEGIN
-
-namespace {
-
-// 0000uuuuuuuuuuuu records u+1 unchanged text units.
-const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
-const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
-
-// 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
-const int32_t MAX_SHORT_CHANGE_OLD_LENGTH = 6;
-const int32_t MAX_SHORT_CHANGE_NEW_LENGTH = 7;
-const int32_t SHORT_CHANGE_NUM_MASK = 0x1ff;
-const int32_t MAX_SHORT_CHANGE = 0x6fff;
-
-// 0111mmmmmmnnnnnn records a replacement of m text units with n.
-// m or n = 61: actual length follows in the next edits array unit.
-// m or n = 62..63: actual length follows in the next two edits array units.
-// Bit 30 of the actual length is in the head unit.
-// Trailing units have bit 15 set.
-const int32_t LENGTH_IN_1TRAIL = 61;
-const int32_t LENGTH_IN_2TRAIL = 62;
-
-} // namespace
-
-void Edits::releaseArray() U_NOEXCEPT {
- if (array != stackArray) {
- uprv_free(array);
- }
-}
-
-Edits &Edits::copyArray(const Edits &other) {
- if (U_FAILURE(errorCode_)) {
- length = delta = numChanges = 0;
- return *this;
- }
- if (length > capacity) {
- uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
- if (newArray == nullptr) {
- length = delta = numChanges = 0;
- errorCode_ = U_MEMORY_ALLOCATION_ERROR;
- return *this;
- }
- releaseArray();
- array = newArray;
- capacity = length;
- }
- if (length > 0) {
- uprv_memcpy(array, other.array, (size_t)length * 2);
- }
- return *this;
-}
-
-Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
- if (U_FAILURE(errorCode_)) {
- length = delta = numChanges = 0;
- return *this;
- }
- releaseArray();
- if (length > STACK_CAPACITY) {
- array = src.array;
- capacity = src.capacity;
- src.array = src.stackArray;
- src.capacity = STACK_CAPACITY;
- src.reset();
- return *this;
- }
- array = stackArray;
- capacity = STACK_CAPACITY;
- if (length > 0) {
- uprv_memcpy(array, src.array, (size_t)length * 2);
- }
- return *this;
-}
-
-Edits &Edits::operator=(const Edits &other) {
- length = other.length;
- delta = other.delta;
- numChanges = other.numChanges;
- errorCode_ = other.errorCode_;
- return copyArray(other);
-}
-
-Edits &Edits::operator=(Edits &&src) U_NOEXCEPT {
- length = src.length;
- delta = src.delta;
- numChanges = src.numChanges;
- errorCode_ = src.errorCode_;
- return moveArray(src);
-}
-
-Edits::~Edits() {
- releaseArray();
-}
-
-void Edits::reset() U_NOEXCEPT {
- length = delta = numChanges = 0;
- errorCode_ = U_ZERO_ERROR;
-}
-
-void Edits::addUnchanged(int32_t unchangedLength) {
- if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; }
- if(unchangedLength < 0) {
- errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- // Merge into previous unchanged-text record, if any.
- int32_t last = lastUnit();
- if(last < MAX_UNCHANGED) {
- int32_t remaining = MAX_UNCHANGED - last;
- if (remaining >= unchangedLength) {
- setLastUnit(last + unchangedLength);
- return;
- }
- setLastUnit(MAX_UNCHANGED);
- unchangedLength -= remaining;
- }
- // Split large lengths into multiple units.
- while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
- append(MAX_UNCHANGED);
- unchangedLength -= MAX_UNCHANGED_LENGTH;
- }
- // Write a small (remaining) length.
- if(unchangedLength > 0) {
- append(unchangedLength - 1);
- }
-}
-
-void Edits::addReplace(int32_t oldLength, int32_t newLength) {
- if(U_FAILURE(errorCode_)) { return; }
- if(oldLength < 0 || newLength < 0) {
- errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if (oldLength == 0 && newLength == 0) {
- return;
- }
- ++numChanges;
- int32_t newDelta = newLength - oldLength;
- if (newDelta != 0) {
- if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
- (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
- // Integer overflow or underflow.
- errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
- delta += newDelta;
- }
-
- if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
- newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
- // Merge into previous same-lengths short-replacement record, if any.
- int32_t u = (oldLength << 12) | (newLength << 9);
- int32_t last = lastUnit();
- if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
- (last & ~SHORT_CHANGE_NUM_MASK) == u &&
- (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
- setLastUnit(last + 1);
- return;
- }
- append(u);
- return;
- }
-
- int32_t head = 0x7000;
- if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
- head |= oldLength << 6;
- head |= newLength;
- append(head);
- } else if ((capacity - length) >= 5 || growArray()) {
- int32_t limit = length + 1;
- if(oldLength < LENGTH_IN_1TRAIL) {
- head |= oldLength << 6;
- } else if(oldLength <= 0x7fff) {
- head |= LENGTH_IN_1TRAIL << 6;
- array[limit++] = (uint16_t)(0x8000 | oldLength);
- } else {
- head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
- array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15));
- array[limit++] = (uint16_t)(0x8000 | oldLength);
- }
- if(newLength < LENGTH_IN_1TRAIL) {
- head |= newLength;
- } else if(newLength <= 0x7fff) {
- head |= LENGTH_IN_1TRAIL;
- array[limit++] = (uint16_t)(0x8000 | newLength);
- } else {
- head |= LENGTH_IN_2TRAIL + (newLength >> 30);
- array[limit++] = (uint16_t)(0x8000 | (newLength >> 15));
- array[limit++] = (uint16_t)(0x8000 | newLength);
- }
- array[length] = (uint16_t)head;
- length = limit;
- }
-}
-
-void Edits::append(int32_t r) {
- if(length < capacity || growArray()) {
- array[length++] = (uint16_t)r;
- }
-}
-
-UBool Edits::growArray() {
- int32_t newCapacity;
- if (array == stackArray) {
- newCapacity = 2000;
- } else if (capacity == INT32_MAX) {
- // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
- // with a result-string-buffer overflow.
- errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
- return FALSE;
- } else if (capacity >= (INT32_MAX / 2)) {
- newCapacity = INT32_MAX;
- } else {
- newCapacity = 2 * capacity;
- }
- // Grow by at least 5 units so that a maximal change record will fit.
- if ((newCapacity - capacity) < 5) {
- errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
- return FALSE;
- }
- uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
- if (newArray == NULL) {
- errorCode_ = U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
- uprv_memcpy(newArray, array, (size_t)length * 2);
- releaseArray();
- array = newArray;
- capacity = newCapacity;
- return TRUE;
-}
-
-UBool Edits::copyErrorTo(UErrorCode &outErrorCode) const {
- if (U_FAILURE(outErrorCode)) { return TRUE; }
- if (U_SUCCESS(errorCode_)) { return FALSE; }
- outErrorCode = errorCode_;
- return TRUE;
-}
-
-Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
- if (copyErrorTo(errorCode)) { return *this; }
- // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
- // Parallel iteration over both Edits.
- Iterator abIter = ab.getFineIterator();
- Iterator bcIter = bc.getFineIterator();
- UBool abHasNext = TRUE, bcHasNext = TRUE;
- // Copy iterator state into local variables, so that we can modify and subdivide spans.
- // ab old & new length, bc old & new length
- int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
- // When we have different-intermediate-length changes, we accumulate a larger change.
- int32_t pending_aLength = 0, pending_cLength = 0;
- for (;;) {
- // At this point, for each of the two iterators:
- // Either we are done with the locally cached current edit,
- // and its intermediate-string length has been reset,
- // or we will continue to work with a truncated remainder of this edit.
- //
- // If the current edit is done, and the iterator has not yet reached the end,
- // then we fetch the next edit. This is true for at least one of the iterators.
- //
- // Normally it does not matter whether we fetch from ab and then bc or vice versa.
- // However, the result is observably different when
- // ab deletions meet bc insertions at the same intermediate-string index.
- // Some users expect the bc insertions to come first, so we fetch from bc first.
- if (bc_bLength == 0) {
- if (bcHasNext && (bcHasNext = bcIter.next(errorCode)) != 0) {
- bc_bLength = bcIter.oldLength();
- cLength = bcIter.newLength();
- if (bc_bLength == 0) {
- // insertion
- if (ab_bLength == 0 || !abIter.hasChange()) {
- addReplace(pending_aLength, pending_cLength + cLength);
- pending_aLength = pending_cLength = 0;
- } else {
- pending_cLength += cLength;
- }
- continue;
- }
- }
- // else see if the other iterator is done, too.
- }
- if (ab_bLength == 0) {
- if (abHasNext && (abHasNext = abIter.next(errorCode)) != 0) {
- aLength = abIter.oldLength();
- ab_bLength = abIter.newLength();
- if (ab_bLength == 0) {
- // deletion
- if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
- addReplace(pending_aLength + aLength, pending_cLength);
- pending_aLength = pending_cLength = 0;
- } else {
- pending_aLength += aLength;
- }
- continue;
- }
- } else if (bc_bLength == 0) {
- // Both iterators are done at the same time:
- // The intermediate-string lengths match.
- break;
- } else {
- // The ab output string is shorter than the bc input string.
- if (!copyErrorTo(errorCode)) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return *this;
- }
- }
- if (bc_bLength == 0) {
- // The bc input string is shorter than the ab output string.
- if (!copyErrorTo(errorCode)) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return *this;
- }
- // Done fetching: ab_bLength > 0 && bc_bLength > 0
-
- // The current state has two parts:
- // - Past: We accumulate a longer ac edit in the "pending" variables.
- // - Current: We have copies of the current ab/bc edits in local variables.
- // At least one side is newly fetched.
- // One side might be a truncated remainder of an edit we fetched earlier.
-
- if (!abIter.hasChange() && !bcIter.hasChange()) {
- // An unchanged span all the way from string a to string c.
- if (pending_aLength != 0 || pending_cLength != 0) {
- addReplace(pending_aLength, pending_cLength);
- pending_aLength = pending_cLength = 0;
- }
- int32_t unchangedLength = aLength <= cLength ? aLength : cLength;
- addUnchanged(unchangedLength);
- ab_bLength = aLength -= unchangedLength;
- bc_bLength = cLength -= unchangedLength;
- // At least one of the unchanged spans is now empty.
- continue;
- }
- if (!abIter.hasChange() && bcIter.hasChange()) {
- // Unchanged a->b but changed b->c.
- if (ab_bLength >= bc_bLength) {
- // Split the longer unchanged span into change + remainder.
- addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
- pending_aLength = pending_cLength = 0;
- aLength = ab_bLength -= bc_bLength;
- bc_bLength = 0;
- continue;
- }
- // Handle the shorter unchanged span below like a change.
- } else if (abIter.hasChange() && !bcIter.hasChange()) {
- // Changed a->b and then unchanged b->c.
- if (ab_bLength <= bc_bLength) {
- // Split the longer unchanged span into change + remainder.
- addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
- pending_aLength = pending_cLength = 0;
- cLength = bc_bLength -= ab_bLength;
- ab_bLength = 0;
- continue;
- }
- // Handle the shorter unchanged span below like a change.
- } else { // both abIter.hasChange() && bcIter.hasChange()
- if (ab_bLength == bc_bLength) {
- // Changes on both sides up to the same position. Emit & reset.
- addReplace(pending_aLength + aLength, pending_cLength + cLength);
- pending_aLength = pending_cLength = 0;
- ab_bLength = bc_bLength = 0;
- continue;
- }
- }
- // Accumulate the a->c change, reset the shorter side,
- // keep a remainder of the longer one.
- pending_aLength += aLength;
- pending_cLength += cLength;
- if (ab_bLength < bc_bLength) {
- bc_bLength -= ab_bLength;
- cLength = ab_bLength = 0;
- } else { // ab_bLength > bc_bLength
- ab_bLength -= bc_bLength;
- aLength = bc_bLength = 0;
- }
- }
- if (pending_aLength != 0 || pending_cLength != 0) {
- addReplace(pending_aLength, pending_cLength);
- }
- copyErrorTo(errorCode);
- return *this;
-}
-
-Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
- array(a), index(0), length(len), remaining(0),
- onlyChanges_(oc), coarse(crs),
- dir(0), changed(FALSE), oldLength_(0), newLength_(0),
- srcIndex(0), replIndex(0), destIndex(0) {}
-
-int32_t Edits::Iterator::readLength(int32_t head) {
- if (head < LENGTH_IN_1TRAIL) {
- return head;
- } else if (head < LENGTH_IN_2TRAIL) {
- U_ASSERT(index < length);
- U_ASSERT(array[index] >= 0x8000);
- return array[index++] & 0x7fff;
- } else {
- U_ASSERT((index + 2) <= length);
- U_ASSERT(array[index] >= 0x8000);
- U_ASSERT(array[index + 1] >= 0x8000);
- int32_t len = ((head & 1) << 30) |
- ((int32_t)(array[index] & 0x7fff) << 15) |
- (array[index + 1] & 0x7fff);
- index += 2;
- return len;
- }
-}
-
-void Edits::Iterator::updateNextIndexes() {
- srcIndex += oldLength_;
- if (changed) {
- replIndex += newLength_;
- }
- destIndex += newLength_;
-}
-
-void Edits::Iterator::updatePreviousIndexes() {
- srcIndex -= oldLength_;
- if (changed) {
- replIndex -= newLength_;
- }
- destIndex -= newLength_;
-}
-
-UBool Edits::Iterator::noNext() {
- // No change before or beyond the string.
- dir = 0;
- changed = FALSE;
- oldLength_ = newLength_ = 0;
- return FALSE;
-}
-
-UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
- // Forward iteration: Update the string indexes to the limit of the current span,
- // and post-increment-read array units to assemble a new span.
- // Leaves the array index one after the last unit of that span.
- if (U_FAILURE(errorCode)) { return FALSE; }
- // We have an errorCode in case we need to start guarding against integer overflows.
- // It is also convenient for caller loops if we bail out when an error was set elsewhere.
- if (dir > 0) {
- updateNextIndexes();
- } else {
- if (dir < 0) {
- // Turn around from previous() to next().
- // Post-increment-read the same span again.
- if (remaining > 0) {
- // Fine-grained iterator:
- // Stay on the current one of a sequence of compressed changes.
- ++index; // next() rests on the index after the sequence unit.
- dir = 1;
- return TRUE;
- }
- }
- dir = 1;
- }
- if (remaining >= 1) {
- // Fine-grained iterator: Continue a sequence of compressed changes.
- if (remaining > 1) {
- --remaining;
- return TRUE;
- }
- remaining = 0;
- }
- if (index >= length) {
- return noNext();
- }
- int32_t u = array[index++];
- if (u <= MAX_UNCHANGED) {
- // Combine adjacent unchanged ranges.
- changed = FALSE;
- oldLength_ = u + 1;
- while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
- ++index;
- oldLength_ += u + 1;
- }
- newLength_ = oldLength_;
- if (onlyChanges) {
- updateNextIndexes();
- if (index >= length) {
- return noNext();
- }
- // already fetched u > MAX_UNCHANGED at index
- ++index;
- } else {
- return TRUE;
- }
- }
- changed = TRUE;
- if (u <= MAX_SHORT_CHANGE) {
- int32_t oldLen = u >> 12;
- int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
- int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
- if (coarse) {
- oldLength_ = num * oldLen;
- newLength_ = num * newLen;
- } else {
- // Split a sequence of changes that was compressed into one unit.
- oldLength_ = oldLen;
- newLength_ = newLen;
- if (num > 1) {
- remaining = num; // This is the first of two or more changes.
- }
- return TRUE;
- }
- } else {
- U_ASSERT(u <= 0x7fff);
- oldLength_ = readLength((u >> 6) & 0x3f);
- newLength_ = readLength(u & 0x3f);
- if (!coarse) {
- return TRUE;
- }
- }
- // Combine adjacent changes.
- while (index < length && (u = array[index]) > MAX_UNCHANGED) {
- ++index;
- if (u <= MAX_SHORT_CHANGE) {
- int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
- oldLength_ += (u >> 12) * num;
- newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
- } else {
- U_ASSERT(u <= 0x7fff);
- oldLength_ += readLength((u >> 6) & 0x3f);
- newLength_ += readLength(u & 0x3f);
- }
- }
- return TRUE;
-}
-
-UBool Edits::Iterator::previous(UErrorCode &errorCode) {
- // Backward iteration: Pre-decrement-read array units to assemble a new span,
- // then update the string indexes to the start of that span.
- // Leaves the array index on the head unit of that span.
- if (U_FAILURE(errorCode)) { return FALSE; }
- // We have an errorCode in case we need to start guarding against integer overflows.
- // It is also convenient for caller loops if we bail out when an error was set elsewhere.
- if (dir >= 0) {
- if (dir > 0) {
- // Turn around from next() to previous().
- // Set the string indexes to the span limit and
- // pre-decrement-read the same span again.
- if (remaining > 0) {
- // Fine-grained iterator:
- // Stay on the current one of a sequence of compressed changes.
- --index; // previous() rests on the sequence unit.
- dir = -1;
- return TRUE;
- }
- updateNextIndexes();
- }
- dir = -1;
- }
- if (remaining > 0) {
- // Fine-grained iterator: Continue a sequence of compressed changes.
- int32_t u = array[index];
- U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
- if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
- ++remaining;
- updatePreviousIndexes();
- return TRUE;
- }
- remaining = 0;
- }
- if (index <= 0) {
- return noNext();
- }
- int32_t u = array[--index];
- if (u <= MAX_UNCHANGED) {
- // Combine adjacent unchanged ranges.
- changed = FALSE;
- oldLength_ = u + 1;
- while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
- --index;
- oldLength_ += u + 1;
- }
- newLength_ = oldLength_;
- // No need to handle onlyChanges as long as previous() is called only from findIndex().
- updatePreviousIndexes();
- return TRUE;
- }
- changed = TRUE;
- if (u <= MAX_SHORT_CHANGE) {
- int32_t oldLen = u >> 12;
- int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
- int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
- if (coarse) {
- oldLength_ = num * oldLen;
- newLength_ = num * newLen;
- } else {
- // Split a sequence of changes that was compressed into one unit.
- oldLength_ = oldLen;
- newLength_ = newLen;
- if (num > 1) {
- remaining = 1; // This is the last of two or more changes.
- }
- updatePreviousIndexes();
- return TRUE;
- }
- } else {
- if (u <= 0x7fff) {
- // The change is encoded in u alone.
- oldLength_ = readLength((u >> 6) & 0x3f);
- newLength_ = readLength(u & 0x3f);
- } else {
- // Back up to the head of the change, read the lengths,
- // and reset the index to the head again.
- U_ASSERT(index > 0);
- while ((u = array[--index]) > 0x7fff) {}
- U_ASSERT(u > MAX_SHORT_CHANGE);
- int32_t headIndex = index++;
- oldLength_ = readLength((u >> 6) & 0x3f);
- newLength_ = readLength(u & 0x3f);
- index = headIndex;
- }
- if (!coarse) {
- updatePreviousIndexes();
- return TRUE;
- }
- }
- // Combine adjacent changes.
- while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
- --index;
- if (u <= MAX_SHORT_CHANGE) {
- int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
- oldLength_ += (u >> 12) * num;
- newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
- } else if (u <= 0x7fff) {
- // Read the lengths, and reset the index to the head again.
- int32_t headIndex = index++;
- oldLength_ += readLength((u >> 6) & 0x3f);
- newLength_ += readLength(u & 0x3f);
- index = headIndex;
- }
- }
- updatePreviousIndexes();
- return TRUE;
-}
-
-int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode) || i < 0) { return -1; }
- int32_t spanStart, spanLength;
- if (findSource) { // find source index
- spanStart = srcIndex;
- spanLength = oldLength_;
- } else { // find destination index
- spanStart = destIndex;
- spanLength = newLength_;
- }
- if (i < spanStart) {
- if (i >= (spanStart / 2)) {
- // Search backwards.
- for (;;) {
- UBool hasPrevious = previous(errorCode);
- U_ASSERT(hasPrevious); // because i>=0 and the first span starts at 0
- (void)hasPrevious; // avoid unused-variable warning
- spanStart = findSource ? srcIndex : destIndex;
- if (i >= spanStart) {
- // The index is in the current span.
- return 0;
- }
- if (remaining > 0) {
- // Is the index in one of the remaining compressed edits?
- // spanStart is the start of the current span, first of the remaining ones.
- spanLength = findSource ? oldLength_ : newLength_;
- int32_t u = array[index];
- U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
- int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
- int32_t len = num * spanLength;
- if (i >= (spanStart - len)) {
- int32_t n = ((spanStart - i - 1) / spanLength) + 1;
- // 1 <= n <= num
- srcIndex -= n * oldLength_;
- replIndex -= n * newLength_;
- destIndex -= n * newLength_;
- remaining += n;
- return 0;
- }
- // Skip all of these edits at once.
- srcIndex -= num * oldLength_;
- replIndex -= num * newLength_;
- destIndex -= num * newLength_;
- remaining = 0;
- }
- }
- }
- // Reset the iterator to the start.
- dir = 0;
- index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
- } else if (i < (spanStart + spanLength)) {
- // The index is in the current span.
- return 0;
- }
- while (next(FALSE, errorCode)) {
- if (findSource) {
- spanStart = srcIndex;
- spanLength = oldLength_;
- } else {
- spanStart = destIndex;
- spanLength = newLength_;
- }
- if (i < (spanStart + spanLength)) {
- // The index is in the current span.
- return 0;
- }
- if (remaining > 1) {
- // Is the index in one of the remaining compressed edits?
- // spanStart is the start of the current span, first of the remaining ones.
- int32_t len = remaining * spanLength;
- if (i < (spanStart + len)) {
- int32_t n = (i - spanStart) / spanLength; // 1 <= n <= remaining - 1
- srcIndex += n * oldLength_;
- replIndex += n * newLength_;
- destIndex += n * newLength_;
- remaining -= n;
- return 0;
- }
- // Make next() skip all of these edits at once.
- oldLength_ *= remaining;
- newLength_ *= remaining;
- remaining = 0;
- }
- }
- return 1;
-}
-
-int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
- int32_t where = findIndex(i, TRUE, errorCode);
- if (where < 0) {
- // Error or before the string.
- return 0;
- }
- if (where > 0 || i == srcIndex) {
- // At or after string length, or at start of the found span.
- return destIndex;
- }
- if (changed) {
- // In a change span, map to its end.
- return destIndex + newLength_;
- } else {
- // In an unchanged span, offset 1:1 within it.
- return destIndex + (i - srcIndex);
- }
-}
-
-int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
- int32_t where = findIndex(i, FALSE, errorCode);
- if (where < 0) {
- // Error or before the string.
- return 0;
- }
- if (where > 0 || i == destIndex) {
- // At or after string length, or at start of the found span.
- return srcIndex;
- }
- if (changed) {
- // In a change span, map to its end.
- return srcIndex + oldLength_;
- } else {
- // In an unchanged span, offset within it.
- return srcIndex + (i - destIndex);
- }
-}
-
-UnicodeString& Edits::Iterator::toString(UnicodeString& sb) const {
- sb.append(u"{ src[", -1);
- ICU_Utility::appendNumber(sb, srcIndex);
- sb.append(u"..", -1);
- ICU_Utility::appendNumber(sb, srcIndex + oldLength_);
- if (changed) {
- sb.append(u"] ⇝ dest[", -1);
- } else {
- sb.append(u"] ≡ dest[", -1);
- }
- ICU_Utility::appendNumber(sb, destIndex);
- sb.append(u"..", -1);
- ICU_Utility::appendNumber(sb, destIndex + newLength_);
- if (changed) {
- sb.append(u"], repl[", -1);
- ICU_Utility::appendNumber(sb, replIndex);
- sb.append(u"..", -1);
- ICU_Utility::appendNumber(sb, replIndex + newLength_);
- sb.append(u"] }", -1);
- } else {
- sb.append(u"] (no-change) }", -1);
- }
- return sb;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/errorcode.cpp b/contrib/libs/icu/common/errorcode.cpp
deleted file mode 100644
index e7ac43b5273..00000000000
--- a/contrib/libs/icu/common/errorcode.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2009-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: errorcode.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2009mar10
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/errorcode.h"
-
-U_NAMESPACE_BEGIN
-
-ErrorCode::~ErrorCode() {}
-
-UErrorCode ErrorCode::reset() {
- UErrorCode code = errorCode;
- errorCode = U_ZERO_ERROR;
- return code;
-}
-
-void ErrorCode::assertSuccess() const {
- if(isFailure()) {
- handleFailure();
- }
-}
-
-const char* ErrorCode::errorName() const {
- return u_errorName(errorCode);
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/filteredbrk.cpp b/contrib/libs/icu/common/filteredbrk.cpp
deleted file mode 100644
index c07128cbce1..00000000000
--- a/contrib/libs/icu/common/filteredbrk.cpp
+++ /dev/null
@@ -1,710 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2014-2015, International Business Machines Corporation and
-* others. All Rights Reserved.
-*******************************************************************************
-*/
-
-#include "unicode/utypes.h"
-#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
-
-#include "cmemory.h"
-
-#include "unicode/filteredbrk.h"
-#include "unicode/ucharstriebuilder.h"
-#include "unicode/ures.h"
-
-#include "uresimp.h" // ures_getByKeyWithFallback
-#include "ubrkimpl.h" // U_ICUDATA_BRKITR
-#include "uvector.h"
-#include "cmemory.h"
-
-U_NAMESPACE_BEGIN
-
-#ifndef FB_DEBUG
-#define FB_DEBUG 0
-#endif
-
-#if FB_DEBUG
-#include <stdio.h>
-static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) {
- char buf[2048];
- if(s) {
- s->extract(0,s->length(),buf,2048);
- } else {
- strcpy(buf,"NULL");
- }
- fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
- f, l, m, buf, (const void*)s, b?'T':'F',(int)d);
-}
-
-#define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__)
-#else
-#define FB_TRACE(m,s,b,d)
-#endif
-
-/**
- * Used with sortedInsert()
- */
-static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
- const UnicodeString &a = *(const UnicodeString*)t1.pointer;
- const UnicodeString &b = *(const UnicodeString*)t2.pointer;
- return a.compare(b);
-}
-
-/**
- * A UVector which implements a set of strings.
- */
-class U_COMMON_API UStringSet : public UVector {
- public:
- UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
- uhash_compareUnicodeString,
- 1,
- status) {}
- virtual ~UStringSet();
- /**
- * Is this UnicodeSet contained?
- */
- inline UBool contains(const UnicodeString& s) {
- return contains((void*) &s);
- }
- using UVector::contains;
- /**
- * Return the ith UnicodeString alias
- */
- inline const UnicodeString* getStringAt(int32_t i) const {
- return (const UnicodeString*)elementAt(i);
- }
- /**
- * Adopt the UnicodeString if not already contained.
- * Caller no longer owns the pointer in any case.
- * @return true if adopted successfully, false otherwise (error, or else duplicate)
- */
- inline UBool adopt(UnicodeString *str, UErrorCode &status) {
- if(U_FAILURE(status) || contains(*str)) {
- delete str;
- return false;
- } else {
- sortedInsert(str, compareUnicodeString, status);
- if(U_FAILURE(status)) {
- delete str;
- return false;
- }
- return true;
- }
- }
- /**
- * Add by value.
- * @return true if successfully adopted.
- */
- inline UBool add(const UnicodeString& str, UErrorCode &status) {
- if(U_FAILURE(status)) return false;
- UnicodeString *t = new UnicodeString(str);
- if(t==NULL) {
- status = U_MEMORY_ALLOCATION_ERROR; return false;
- }
- return adopt(t, status);
- }
- /**
- * Remove this string.
- * @return true if successfully removed, false otherwise (error, or else it wasn't there)
- */
- inline UBool remove(const UnicodeString &s, UErrorCode &status) {
- if(U_FAILURE(status)) return false;
- return removeElement((void*) &s);
- }
-};
-
-/**
- * Virtual, won't be inlined
- */
-UStringSet::~UStringSet() {}
-
-/* ----------------------------------------------------------- */
-
-
-/* Filtered Break constants */
-static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie
-static const int32_t kMATCH = (1<<1); //< exact match - skip this one.
-static const int32_t kSuppressInReverse = (1<<0);
-static const int32_t kAddToForward = (1<<1);
-static const UChar kFULLSTOP = 0x002E; // '.'
-
-/**
- * Shared data for SimpleFilteredSentenceBreakIterator
- */
-class SimpleFilteredSentenceBreakData : public UMemory {
-public:
- SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
- : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
- SimpleFilteredSentenceBreakData *incr() { refcount++; return this; }
- SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
- virtual ~SimpleFilteredSentenceBreakData();
-
- LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
- LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
- int32_t refcount;
-};
-
-SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
-
-/**
- * Concrete implementation
- */
-class SimpleFilteredSentenceBreakIterator : public BreakIterator {
-public:
- SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status);
- SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other);
- virtual ~SimpleFilteredSentenceBreakIterator();
-private:
- SimpleFilteredSentenceBreakData *fData;
- LocalPointer<BreakIterator> fDelegate;
- LocalUTextPointer fText;
-
- /* -- subclass interface -- */
-public:
- /* -- cloning and other subclass stuff -- */
- virtual BreakIterator * createBufferClone(void * /*stackBuffer*/,
- int32_t &/*BufferSize*/,
- UErrorCode &status) {
- // for now - always deep clone
- status = U_SAFECLONE_ALLOCATED_WARNING;
- return clone();
- }
- virtual SimpleFilteredSentenceBreakIterator* clone() const { return new SimpleFilteredSentenceBreakIterator(*this); }
- virtual UClassID getDynamicClassID(void) const { return NULL; }
- virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }
-
- /* -- text modifying -- */
- virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
- virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
- virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
- virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
-
- /* -- other functions that are just delegated -- */
- virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
- virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
-
- /* -- ITERATION -- */
- virtual int32_t first(void);
- virtual int32_t preceding(int32_t offset);
- virtual int32_t previous(void);
- virtual UBool isBoundary(int32_t offset);
- virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
-
- virtual int32_t next(void);
-
- virtual int32_t next(int32_t n);
- virtual int32_t following(int32_t offset);
- virtual int32_t last(void);
-
-private:
- /**
- * Given that the fDelegate has already given its "initial" answer,
- * find the NEXT actual (non-excepted) break.
- * @param n initial position from delegate
- * @return new break position or UBRK_DONE
- */
- int32_t internalNext(int32_t n);
- /**
- * Given that the fDelegate has already given its "initial" answer,
- * find the PREV actual (non-excepted) break.
- * @param n initial position from delegate
- * @return new break position or UBRK_DONE
- */
- int32_t internalPrev(int32_t n);
- /**
- * set up the UText with the value of the fDelegate.
- * Call this before calling breakExceptionAt.
- * May be able to avoid excess calls
- */
- void resetState(UErrorCode &status);
- /**
- * Is there a match (exception) at this spot?
- */
- enum EFBMatchResult { kNoExceptionHere, kExceptionHere };
- /**
- * Determine if there is an exception at this spot
- * @param n spot to check
- * @return kNoExceptionHere or kExceptionHere
- **/
- enum EFBMatchResult breakExceptionAt(int32_t n);
-};
-
-SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other)
- : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate->clone())
-{
-}
-
-
-SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) :
- BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)),
- fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
- fDelegate(adopt)
-{
- // all set..
-}
-
-SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
- fData = fData->decr();
-}
-
-void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) {
- fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
-}
-
-SimpleFilteredSentenceBreakIterator::EFBMatchResult
-SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
- int64_t bestPosn = -1;
- int32_t bestValue = -1;
- // loops while 'n' points to an exception.
- utext_setNativeIndex(fText.getAlias(), n); // from n..
- fData->fBackwardsTrie->reset();
- UChar32 uch;
-
- //if(debug2) u_printf(" n@ %d\n", n);
- // Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
- if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
- // TODO only do this the 1st time?
- //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
- } else {
- //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
- uch = utext_next32(fText.getAlias());
- //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
- }
-
- UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
-
- while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
- USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
- if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
- bestPosn = utext_getNativeIndex(fText.getAlias());
- bestValue = fData->fBackwardsTrie->getValue();
- }
- //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
- }
-
- if(USTRINGTRIE_MATCHES(r)) { // exact match?
- //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
- bestValue = fData->fBackwardsTrie->getValue();
- bestPosn = utext_getNativeIndex(fText.getAlias());
- //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
- }
-
- if(bestPosn>=0) {
- //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
-
- //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
- //int32_t bestValue = fBackwardsTrie->getValue();
- ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
-
- if(bestValue == kMATCH) { // exact match!
- //if(debug2) u_printf(" exact backward match\n");
- return kExceptionHere; // See if the next is another exception.
- } else if(bestValue == kPARTIAL
- && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
- //if(debug2) u_printf(" partial backward match\n");
- // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
- // to see if it matches something going forward.
- fData->fForwardsPartialTrie->reset();
- UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
- utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
- //if(debug2) u_printf("Retrying at %d\n", bestPosn);
- while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
- USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
- //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
- }
- if(USTRINGTRIE_MATCHES(rfwd)) {
- //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
- // only full matches here, nothing to check
- // skip the next:
- return kExceptionHere;
- } else {
- //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
- // no match (no exception) -return the 'underlying' break
- return kNoExceptionHere;
- }
- } else {
- return kNoExceptionHere; // internal error and/or no forwards trie
- }
- } else {
- //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // no best match
- return kNoExceptionHere; // No match - so exit. Not an exception.
- }
-}
-
-// the workhorse single next.
-int32_t
-SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
- if(n == UBRK_DONE || // at end or
- fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
- return n;
- }
- // OK, do we need to break here?
- UErrorCode status = U_ZERO_ERROR;
- // refresh text
- resetState(status);
- if(U_FAILURE(status)) return UBRK_DONE; // bail out
- int64_t utextLen = utext_nativeLength(fText.getAlias());
-
- //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
- while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlying break (from fDelegate).
- SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
-
- switch(m) {
- case kExceptionHere:
- n = fDelegate->next(); // skip this one. Find the next lowerlevel break.
- continue;
-
- default:
- case kNoExceptionHere:
- return n;
- }
- }
- return n;
-}
-
-int32_t
-SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
- if(n == 0 || n == UBRK_DONE || // at end or
- fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
- return n;
- }
- // OK, do we need to break here?
- UErrorCode status = U_ZERO_ERROR;
- // refresh text
- resetState(status);
- if(U_FAILURE(status)) return UBRK_DONE; // bail out
-
- //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
- while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate).
- SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
-
- switch(m) {
- case kExceptionHere:
- n = fDelegate->previous(); // skip this one. Find the next lowerlevel break.
- continue;
-
- default:
- case kNoExceptionHere:
- return n;
- }
- }
- return n;
-}
-
-
-int32_t
-SimpleFilteredSentenceBreakIterator::next() {
- return internalNext(fDelegate->next());
-}
-
-int32_t
-SimpleFilteredSentenceBreakIterator::first(void) {
- // Don't suppress a break opportunity at the beginning of text.
- return fDelegate->first();
-}
-
-int32_t
-SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) {
- return internalPrev(fDelegate->preceding(offset));
-}
-
-int32_t
-SimpleFilteredSentenceBreakIterator::previous(void) {
- return internalPrev(fDelegate->previous());
-}
-
-UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
- if (!fDelegate->isBoundary(offset)) return false; // no break to suppress
-
- if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions
-
- UErrorCode status = U_ZERO_ERROR;
- resetState(status);
-
- SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offset);
-
- switch(m) {
- case kExceptionHere:
- return false;
- default:
- case kNoExceptionHere:
- return true;
- }
-}
-
-int32_t
-SimpleFilteredSentenceBreakIterator::next(int32_t offset) {
- return internalNext(fDelegate->next(offset));
-}
-
-int32_t
-SimpleFilteredSentenceBreakIterator::following(int32_t offset) {
- return internalNext(fDelegate->following(offset));
-}
-
-int32_t
-SimpleFilteredSentenceBreakIterator::last(void) {
- // Don't suppress a break opportunity at the end of text.
- return fDelegate->last();
-}
-
-
-/**
- * Concrete implementation of builder class.
- */
-class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
-public:
- virtual ~SimpleFilteredBreakIteratorBuilder();
- SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
- SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
- virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
- virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
- virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
-private:
- UStringSet fSet;
-};
-
-SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
-{
-}
-
-SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status)
- : fSet(status)
-{
-}
-
-SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
- : fSet(status)
-{
- if(U_SUCCESS(status)) {
- UErrorCode subStatus = U_ZERO_ERROR;
- LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &subStatus));
- if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
- status = subStatus; // copy the failing status
-#if FB_DEBUG
- fprintf(stderr, "open BUNDLE %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
-#endif
- return; // leaves the builder empty, if you try to use it.
- }
- LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &subStatus));
- if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
- status = subStatus; // copy the failing status
-#if FB_DEBUG
- fprintf(stderr, "open EXCEPTIONS %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
-#endif
- return; // leaves the builder empty, if you try to use it.
- }
- LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &subStatus));
-
-#if FB_DEBUG
- {
- UErrorCode subsub = subStatus;
- fprintf(stderr, "open SentenceBreak %s => %s, %s\n", fromLocale.getBaseName(), ures_getLocale(breaks.getAlias(), &subsub), u_errorName(subStatus));
- }
-#endif
-
- if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
- status = subStatus; // copy the failing status
-#if FB_DEBUG
- fprintf(stderr, "open %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
-#endif
- return; // leaves the builder empty, if you try to use it.
- }
-
- LocalUResourceBundlePointer strs;
- subStatus = status; // Pick up inherited warning status now
- do {
- strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus));
- if(strs.isValid() && U_SUCCESS(subStatus)) {
- UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status));
- suppressBreakAfter(str, status); // load the string
- }
- } while (strs.isValid() && U_SUCCESS(subStatus));
- if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) {
- status = subStatus;
- }
- }
-}
-
-UBool
-SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
-{
- UBool r = fSet.add(exception, status);
- FB_TRACE("suppressBreakAfter",&exception,r,0);
- return r;
-}
-
-UBool
-SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
-{
- UBool r = fSet.remove(exception, status);
- FB_TRACE("unsuppressBreakAfter",&exception,r,0);
- return r;
-}
-
-/**
- * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly.
- * Work around this.
- *
- * Note: "new UnicodeString[subCount]" ends up calling global operator new
- * on MSVC2012 for some reason.
- */
-static inline UnicodeString* newUnicodeStringArray(size_t count) {
- return new UnicodeString[count ? count : 1];
-}
-
-BreakIterator *
-SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) {
- LocalPointer<BreakIterator> adopt(adoptBreakIterator);
-
- LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status);
- LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status);
- if(U_FAILURE(status)) {
- return NULL;
- }
-
- int32_t revCount = 0;
- int32_t fwdCount = 0;
-
- int32_t subCount = fSet.size();
-
- UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount);
-
- LocalArray<UnicodeString> ustrs(ustrs_ptr);
-
- LocalMemory<int> partials;
- partials.allocateInsteadAndReset(subCount);
-
- LocalPointer<UCharsTrie> backwardsTrie; // i.e. ".srM" for Mrs.
- LocalPointer<UCharsTrie> forwardsPartialTrie; // Has ".a" for "a.M."
-
- int n=0;
- for ( int32_t i = 0;
- i<fSet.size();
- i++) {
- const UnicodeString *abbr = fSet.getStringAt(i);
- if(abbr) {
- FB_TRACE("build",abbr,TRUE,i);
- ustrs[n] = *abbr; // copy by value
- FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i);
- } else {
- FB_TRACE("build",abbr,FALSE,i);
- status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- partials[n] = 0; // default: not partial
- n++;
- }
- // first pass - find partials.
- for(int i=0;i<subCount;i++) {
- int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
- if(nn>-1 && (nn+1)!=ustrs[i].length()) {
- FB_TRACE("partial",&ustrs[i],FALSE,i);
- // is partial.
- // is it unique?
- int sameAs = -1;
- for(int j=0;j<subCount;j++) {
- if(j==i) continue;
- if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
- FB_TRACE("prefix",&ustrs[j],FALSE,nn+1);
- //UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn
- if(partials[j]==0) { // hasn't been processed yet
- partials[j] = kSuppressInReverse | kAddToForward;
- FB_TRACE("suppressing",&ustrs[j],FALSE,j);
- } else if(partials[j] & kSuppressInReverse) {
- sameAs = j; // the other entry is already in the reverse table.
- }
- }
- }
- FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs);
- FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]);
- UnicodeString prefix(ustrs[i], 0, nn+1);
- if(sameAs == -1 && partials[i] == 0) {
- // first one - add the prefix to the reverse table.
- prefix.reverse();
- builder->add(prefix, kPARTIAL, status);
- revCount++;
- FB_TRACE("Added partial",&prefix,FALSE, i);
- FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
- partials[i] = kSuppressInReverse | kAddToForward;
- } else {
- FB_TRACE("NOT adding partial",&prefix,FALSE, i);
- FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
- }
- }
- }
- for(int i=0;i<subCount;i++) {
- if(partials[i]==0) {
- ustrs[i].reverse();
- builder->add(ustrs[i], kMATCH, status);
- revCount++;
- FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i);
- } else {
- FB_TRACE("Adding fwd",&ustrs[i], FALSE, i);
-
- // an optimization would be to only add the portion after the '.'
- // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
- // instead of "Ph.D." since we already know the "Ph." part is a match.
- // would need the trie to be able to hold 0-length strings, though.
- builder2->add(ustrs[i], kMATCH, status); // forward
- fwdCount++;
- //ustrs[i].reverse();
- ////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
- }
- }
- FB_TRACE("AbbrCount",NULL,FALSE, subCount);
-
- if(revCount>0) {
- backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
- if(U_FAILURE(status)) {
- FB_TRACE(u_errorName(status),NULL,FALSE, -1);
- return NULL;
- }
- }
-
- if(fwdCount>0) {
- forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
- if(U_FAILURE(status)) {
- FB_TRACE(u_errorName(status),NULL,FALSE, -1);
- return NULL;
- }
- }
-
- return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status);
-}
-
-
-// ----------- Base class implementation
-
-FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
-}
-
-FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
-}
-
-FilteredBreakIteratorBuilder *
-FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) {
- if(U_FAILURE(status)) return NULL;
- LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status);
- return (U_SUCCESS(status))? ret.orphan(): NULL;
-}
-
-FilteredBreakIteratorBuilder *
-FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) {
- return createEmptyInstance(status);
-}
-
-FilteredBreakIteratorBuilder *
-FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
- if(U_FAILURE(status)) return NULL;
- LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
- return (U_SUCCESS(status))? ret.orphan(): NULL;
-}
-
-U_NAMESPACE_END
-
-#endif //#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
diff --git a/contrib/libs/icu/common/filterednormalizer2.cpp b/contrib/libs/icu/common/filterednormalizer2.cpp
deleted file mode 100644
index 1a0914d3f7b..00000000000
--- a/contrib/libs/icu/common/filterednormalizer2.cpp
+++ /dev/null
@@ -1,363 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2009-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: filterednormalizer2.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2009dec10
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/edits.h"
-#include "unicode/normalizer2.h"
-#include "unicode/stringoptions.h"
-#include "unicode/uniset.h"
-#include "unicode/unistr.h"
-#include "unicode/unorm.h"
-#include "cpputils.h"
-
-U_NAMESPACE_BEGIN
-
-FilteredNormalizer2::~FilteredNormalizer2() {}
-
-UnicodeString &
-FilteredNormalizer2::normalize(const UnicodeString &src,
- UnicodeString &dest,
- UErrorCode &errorCode) const {
- uprv_checkCanGetBuffer(src, errorCode);
- if(U_FAILURE(errorCode)) {
- dest.setToBogus();
- return dest;
- }
- if(&dest==&src) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return dest;
- }
- dest.remove();
- return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
-}
-
-// Internal: No argument checking, and appends to dest.
-// Pass as input spanCondition the one that is likely to yield a non-zero
-// span length at the start of src.
-// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
-// USET_SPAN_SIMPLE should be passed in for the start of src
-// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
-// an in-filter prefix.
-UnicodeString &
-FilteredNormalizer2::normalize(const UnicodeString &src,
- UnicodeString &dest,
- USetSpanCondition spanCondition,
- UErrorCode &errorCode) const {
- UnicodeString tempDest; // Don't throw away destination buffer between iterations.
- for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
- int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
- int32_t spanLength=spanLimit-prevSpanLimit;
- if(spanCondition==USET_SPAN_NOT_CONTAINED) {
- if(spanLength!=0) {
- dest.append(src, prevSpanLimit, spanLength);
- }
- spanCondition=USET_SPAN_SIMPLE;
- } else {
- if(spanLength!=0) {
- // Not norm2.normalizeSecondAndAppend() because we do not want
- // to modify the non-filter part of dest.
- dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
- tempDest, errorCode));
- if(U_FAILURE(errorCode)) {
- break;
- }
- }
- spanCondition=USET_SPAN_NOT_CONTAINED;
- }
- prevSpanLimit=spanLimit;
- }
- return dest;
-}
-
-void
-FilteredNormalizer2::normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
- Edits *edits, UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode)) {
- return;
- }
- if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
- edits->reset();
- }
- options |= U_EDITS_NO_RESET; // Do not reset for each span.
- normalizeUTF8(options, src.data(), src.length(), sink, edits, USET_SPAN_SIMPLE, errorCode);
-}
-
-void
-FilteredNormalizer2::normalizeUTF8(uint32_t options, const char *src, int32_t length,
- ByteSink &sink, Edits *edits,
- USetSpanCondition spanCondition,
- UErrorCode &errorCode) const {
- while (length > 0) {
- int32_t spanLength = set.spanUTF8(src, length, spanCondition);
- if (spanCondition == USET_SPAN_NOT_CONTAINED) {
- if (spanLength != 0) {
- if (edits != nullptr) {
- edits->addUnchanged(spanLength);
- }
- if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
- sink.Append(src, spanLength);
- }
- }
- spanCondition = USET_SPAN_SIMPLE;
- } else {
- if (spanLength != 0) {
- // Not norm2.normalizeSecondAndAppend() because we do not want
- // to modify the non-filter part of dest.
- norm2.normalizeUTF8(options, StringPiece(src, spanLength), sink, edits, errorCode);
- if (U_FAILURE(errorCode)) {
- break;
- }
- }
- spanCondition = USET_SPAN_NOT_CONTAINED;
- }
- src += spanLength;
- length -= spanLength;
- }
-}
-
-UnicodeString &
-FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
- const UnicodeString &second,
- UErrorCode &errorCode) const {
- return normalizeSecondAndAppend(first, second, TRUE, errorCode);
-}
-
-UnicodeString &
-FilteredNormalizer2::append(UnicodeString &first,
- const UnicodeString &second,
- UErrorCode &errorCode) const {
- return normalizeSecondAndAppend(first, second, FALSE, errorCode);
-}
-
-UnicodeString &
-FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
- const UnicodeString &second,
- UBool doNormalize,
- UErrorCode &errorCode) const {
- uprv_checkCanGetBuffer(first, errorCode);
- uprv_checkCanGetBuffer(second, errorCode);
- if(U_FAILURE(errorCode)) {
- return first;
- }
- if(&first==&second) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return first;
- }
- if(first.isEmpty()) {
- if(doNormalize) {
- return normalize(second, first, errorCode);
- } else {
- return first=second;
- }
- }
- // merge the in-filter suffix of the first string with the in-filter prefix of the second
- int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
- if(prefixLimit!=0) {
- UnicodeString prefix(second.tempSubString(0, prefixLimit));
- int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
- if(suffixStart==0) {
- if(doNormalize) {
- norm2.normalizeSecondAndAppend(first, prefix, errorCode);
- } else {
- norm2.append(first, prefix, errorCode);
- }
- } else {
- UnicodeString middle(first, suffixStart, INT32_MAX);
- if(doNormalize) {
- norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
- } else {
- norm2.append(middle, prefix, errorCode);
- }
- first.replace(suffixStart, INT32_MAX, middle);
- }
- }
- if(prefixLimit<second.length()) {
- UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
- if(doNormalize) {
- normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
- } else {
- first.append(rest);
- }
- }
- return first;
-}
-
-UBool
-FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
- return set.contains(c) && norm2.getDecomposition(c, decomposition);
-}
-
-UBool
-FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
- return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
-}
-
-UChar32
-FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
- return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
-}
-
-uint8_t
-FilteredNormalizer2::getCombiningClass(UChar32 c) const {
- return set.contains(c) ? norm2.getCombiningClass(c) : 0;
-}
-
-UBool
-FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
- uprv_checkCanGetBuffer(s, errorCode);
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
- for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
- int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
- if(spanCondition==USET_SPAN_NOT_CONTAINED) {
- spanCondition=USET_SPAN_SIMPLE;
- } else {
- if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
- U_FAILURE(errorCode)
- ) {
- return FALSE;
- }
- spanCondition=USET_SPAN_NOT_CONTAINED;
- }
- prevSpanLimit=spanLimit;
- }
- return TRUE;
-}
-
-UBool
-FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- const char *s = sp.data();
- int32_t length = sp.length();
- USetSpanCondition spanCondition = USET_SPAN_SIMPLE;
- while (length > 0) {
- int32_t spanLength = set.spanUTF8(s, length, spanCondition);
- if (spanCondition == USET_SPAN_NOT_CONTAINED) {
- spanCondition = USET_SPAN_SIMPLE;
- } else {
- if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) ||
- U_FAILURE(errorCode)) {
- return FALSE;
- }
- spanCondition = USET_SPAN_NOT_CONTAINED;
- }
- s += spanLength;
- length -= spanLength;
- }
- return TRUE;
-}
-
-UNormalizationCheckResult
-FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
- uprv_checkCanGetBuffer(s, errorCode);
- if(U_FAILURE(errorCode)) {
- return UNORM_MAYBE;
- }
- UNormalizationCheckResult result=UNORM_YES;
- USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
- for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
- int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
- if(spanCondition==USET_SPAN_NOT_CONTAINED) {
- spanCondition=USET_SPAN_SIMPLE;
- } else {
- UNormalizationCheckResult qcResult=
- norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
- if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
- return qcResult;
- } else if(qcResult==UNORM_MAYBE) {
- result=qcResult;
- }
- spanCondition=USET_SPAN_NOT_CONTAINED;
- }
- prevSpanLimit=spanLimit;
- }
- return result;
-}
-
-int32_t
-FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
- uprv_checkCanGetBuffer(s, errorCode);
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
- for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
- int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
- if(spanCondition==USET_SPAN_NOT_CONTAINED) {
- spanCondition=USET_SPAN_SIMPLE;
- } else {
- int32_t yesLimit=
- prevSpanLimit+
- norm2.spanQuickCheckYes(
- s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
- if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
- return yesLimit;
- }
- spanCondition=USET_SPAN_NOT_CONTAINED;
- }
- prevSpanLimit=spanLimit;
- }
- return s.length();
-}
-
-UBool
-FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
- return !set.contains(c) || norm2.hasBoundaryBefore(c);
-}
-
-UBool
-FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
- return !set.contains(c) || norm2.hasBoundaryAfter(c);
-}
-
-UBool
-FilteredNormalizer2::isInert(UChar32 c) const {
- return !set.contains(c) || norm2.isInert(c);
-}
-
-U_NAMESPACE_END
-
-// C API ------------------------------------------------------------------- ***
-
-U_NAMESPACE_USE
-
-U_CAPI UNormalizer2 * U_EXPORT2
-unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- if(filterSet==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
- *UnicodeSet::fromUSet(filterSet));
- if(fn2==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- }
- return (UNormalizer2 *)fn2;
-}
-
-#endif // !UCONFIG_NO_NORMALIZATION
diff --git a/contrib/libs/icu/common/hash.h b/contrib/libs/icu/common/hash.h
deleted file mode 100644
index f02cb7087a5..00000000000
--- a/contrib/libs/icu/common/hash.h
+++ /dev/null
@@ -1,248 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 1997-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-* Date Name Description
-* 03/28/00 aliu Creation.
-******************************************************************************
-*/
-
-#ifndef HASH_H
-#define HASH_H
-
-#include "unicode/unistr.h"
-#include "unicode/uobject.h"
-#include "cmemory.h"
-#include "uhash.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * Hashtable is a thin C++ wrapper around UHashtable, a general-purpose void*
- * hashtable implemented in C. Hashtable is designed to be idiomatic and
- * easy-to-use in C++.
- *
- * Hashtable is an INTERNAL CLASS.
- */
-class U_COMMON_API Hashtable : public UMemory {
- UHashtable* hash;
- UHashtable hashObj;
-
- inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
-
- inline void initSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode& status);
-
-public:
- /**
- * Construct a hashtable
- * @param ignoreKeyCase If true, keys are case insensitive.
- * @param status Error code
- */
- inline Hashtable(UBool ignoreKeyCase, UErrorCode& status);
-
- /**
- * Construct a hashtable
- * @param ignoreKeyCase If true, keys are case insensitive.
- * @param size initial size allocation
- * @param status Error code
- */
- inline Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);
-
- /**
- * Construct a hashtable
- * @param keyComp Comparator for comparing the keys
- * @param valueComp Comparator for comparing the values
- * @param status Error code
- */
- inline Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
-
- /**
- * Construct a hashtable
- * @param status Error code
- */
- inline Hashtable(UErrorCode& status);
-
- /**
- * Construct a hashtable, _disregarding any error_. Use this constructor
- * with caution.
- */
- inline Hashtable();
-
- /**
- * Non-virtual destructor; make this virtual if Hashtable is subclassed
- * in the future.
- */
- inline ~Hashtable();
-
- inline UObjectDeleter *setValueDeleter(UObjectDeleter *fn);
-
- inline int32_t count() const;
-
- inline void* put(const UnicodeString& key, void* value, UErrorCode& status);
-
- inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
-
- inline void* get(const UnicodeString& key) const;
-
- inline int32_t geti(const UnicodeString& key) const;
-
- inline void* remove(const UnicodeString& key);
-
- inline int32_t removei(const UnicodeString& key);
-
- inline void removeAll(void);
-
- inline const UHashElement* find(const UnicodeString& key) const;
-
- /**
- * @param pos - must be UHASH_FIRST on first call, and untouched afterwards.
- * @see uhash_nextElement
- */
- inline const UHashElement* nextElement(int32_t& pos) const;
-
- inline UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
-
- inline UValueComparator* setValueComparator(UValueComparator* valueComp);
-
- inline UBool equals(const Hashtable& that) const;
-private:
- Hashtable(const Hashtable &other); // forbid copying of this class
- Hashtable &operator=(const Hashtable &other); // forbid copying of this class
-};
-
-/*********************************************************************
- * Implementation
- ********************************************************************/
-
-inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
- UValueComparator *valueComp, UErrorCode& status) {
- if (U_FAILURE(status)) {
- return;
- }
- uhash_init(&hashObj, keyHash, keyComp, valueComp, &status);
- if (U_SUCCESS(status)) {
- hash = &hashObj;
- uhash_setKeyDeleter(hash, uprv_deleteUObject);
- }
-}
-
-inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
- UValueComparator *valueComp, int32_t size, UErrorCode& status) {
- if (U_FAILURE(status)) {
- return;
- }
- uhash_initSize(&hashObj, keyHash, keyComp, valueComp, size, &status);
- if (U_SUCCESS(status)) {
- hash = &hashObj;
- uhash_setKeyDeleter(hash, uprv_deleteUObject);
- }
-}
-
-inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
- UErrorCode& status) : hash(0) {
- init( uhash_hashUnicodeString, keyComp, valueComp, status);
-}
-
-inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
- : hash(0)
-{
- init(ignoreKeyCase ? uhash_hashCaselessUnicodeString
- : uhash_hashUnicodeString,
- ignoreKeyCase ? uhash_compareCaselessUnicodeString
- : uhash_compareUnicodeString,
- NULL,
- status);
-}
-
-inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
- : hash(0)
-{
- initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
- : uhash_hashUnicodeString,
- ignoreKeyCase ? uhash_compareCaselessUnicodeString
- : uhash_compareUnicodeString,
- NULL, size,
- status);
-}
-
-inline Hashtable::Hashtable(UErrorCode& status)
- : hash(0)
-{
- init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
-}
-
-inline Hashtable::Hashtable()
- : hash(0)
-{
- UErrorCode status = U_ZERO_ERROR;
- init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
-}
-
-inline Hashtable::~Hashtable() {
- if (hash != NULL) {
- uhash_close(hash);
- }
-}
-
-inline UObjectDeleter *Hashtable::setValueDeleter(UObjectDeleter *fn) {
- return uhash_setValueDeleter(hash, fn);
-}
-
-inline int32_t Hashtable::count() const {
- return uhash_count(hash);
-}
-
-inline void* Hashtable::put(const UnicodeString& key, void* value, UErrorCode& status) {
- return uhash_put(hash, new UnicodeString(key), value, &status);
-}
-
-inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCode& status) {
- return uhash_puti(hash, new UnicodeString(key), value, &status);
-}
-
-inline void* Hashtable::get(const UnicodeString& key) const {
- return uhash_get(hash, &key);
-}
-
-inline int32_t Hashtable::geti(const UnicodeString& key) const {
- return uhash_geti(hash, &key);
-}
-
-inline void* Hashtable::remove(const UnicodeString& key) {
- return uhash_remove(hash, &key);
-}
-
-inline int32_t Hashtable::removei(const UnicodeString& key) {
- return uhash_removei(hash, &key);
-}
-
-inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
- return uhash_find(hash, &key);
-}
-
-inline const UHashElement* Hashtable::nextElement(int32_t& pos) const {
- return uhash_nextElement(hash, &pos);
-}
-
-inline void Hashtable::removeAll(void) {
- uhash_removeAll(hash);
-}
-
-inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
- return uhash_setKeyComparator(hash, keyComp);
-}
-
-inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
- return uhash_setValueComparator(hash, valueComp);
-}
-
-inline UBool Hashtable::equals(const Hashtable& that)const{
- return uhash_equals(hash, that.hash);
-}
-U_NAMESPACE_END
-
-#endif
-
diff --git a/contrib/libs/icu/common/icudataver.cpp b/contrib/libs/icu/common/icudataver.cpp
deleted file mode 100644
index d3144113741..00000000000
--- a/contrib/libs/icu/common/icudataver.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2009-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/icudataver.h"
-#include "unicode/ures.h"
-#include "uresimp.h" /* for ures_getVersionByKey */
-
-U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) {
- UResourceBundle *icudatares = NULL;
-
- if (U_FAILURE(*status)) {
- return;
- }
-
- if (dataVersionFillin != NULL) {
- icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status);
- if (U_SUCCESS(*status)) {
- ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status);
- }
- ures_close(icudatares);
- }
-}
diff --git a/contrib/libs/icu/common/icuplug.cpp b/contrib/libs/icu/common/icuplug.cpp
deleted file mode 100644
index c6439cc819a..00000000000
--- a/contrib/libs/icu/common/icuplug.cpp
+++ /dev/null
@@ -1,884 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2009-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* FILE NAME : icuplug.c
-*
-* Date Name Description
-* 10/29/2009 sl New.
-******************************************************************************
-*/
-
-#include "unicode/icuplug.h"
-
-
-#if UCONFIG_ENABLE_PLUGINS
-
-
-#include "icuplugimp.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "putilimp.h"
-#include "ucln.h"
-#include <stdio.h>
-#ifdef __MVS__ /* defined by z/OS compiler */
-#define _POSIX_SOURCE
-#include <cics.h> /* 12 Nov 2011 JAM iscics() function */
-#endif
-#include "charstr.h"
-
-using namespace icu;
-
-#ifndef UPLUG_TRACE
-#define UPLUG_TRACE 0
-#endif
-
-#if UPLUG_TRACE
-#include <stdio.h>
-#define DBG(x) fprintf(stderr, "%s:%d: ",__FILE__,__LINE__); fprintf x
-#endif
-
-/**
- * Internal structure of an ICU plugin.
- */
-
-struct UPlugData {
- UPlugEntrypoint *entrypoint; /**< plugin entrypoint */
- uint32_t structSize; /**< initialized to the size of this structure */
- uint32_t token; /**< must be U_PLUG_TOKEN */
- void *lib; /**< plugin library, or NULL */
- char libName[UPLUG_NAME_MAX]; /**< library name */
- char sym[UPLUG_NAME_MAX]; /**< plugin symbol, or NULL */
- char config[UPLUG_NAME_MAX]; /**< configuration data */
- void *context; /**< user context data */
- char name[UPLUG_NAME_MAX]; /**< name of plugin */
- UPlugLevel level; /**< level of plugin */
- UBool awaitingLoad; /**< TRUE if the plugin is awaiting a load call */
- UBool dontUnload; /**< TRUE if plugin must stay resident (leak plugin and lib) */
- UErrorCode pluginStatus; /**< status code of plugin */
-};
-
-
-
-#define UPLUG_LIBRARY_INITIAL_COUNT 8
-#define UPLUG_PLUGIN_INITIAL_COUNT 12
-
-/**
- * Remove an item
- * @param list the full list
- * @param listSize the number of entries in the list
- * @param memberSize the size of one member
- * @param itemToRemove the item number of the member
- * @return the new listsize
- */
-static int32_t uplug_removeEntryAt(void *list, int32_t listSize, int32_t memberSize, int32_t itemToRemove) {
- uint8_t *bytePtr = (uint8_t *)list;
-
- /* get rid of some bad cases first */
- if(listSize<1) {
- return listSize;
- }
-
- /* is there anything to move? */
- if(listSize > itemToRemove+1) {
- memmove(bytePtr+(itemToRemove*memberSize), bytePtr+((itemToRemove+1)*memberSize), memberSize);
- }
-
- return listSize-1;
-}
-
-
-
-
-#if U_ENABLE_DYLOAD
-/**
- * Library management. Internal.
- * @internal
- */
-struct UPlugLibrary;
-
-/**
- * Library management. Internal.
- * @internal
- */
-typedef struct UPlugLibrary {
- void *lib; /**< library ptr */
- char name[UPLUG_NAME_MAX]; /**< library name */
- uint32_t ref; /**< reference count */
-} UPlugLibrary;
-
-static UPlugLibrary staticLibraryList[UPLUG_LIBRARY_INITIAL_COUNT];
-static UPlugLibrary * libraryList = staticLibraryList;
-static int32_t libraryCount = 0;
-static int32_t libraryMax = UPLUG_LIBRARY_INITIAL_COUNT;
-
-/**
- * Search for a library. Doesn't lock
- * @param libName libname to search for
- * @return the library's struct
- */
-static int32_t searchForLibraryName(const char *libName) {
- int32_t i;
-
- for(i=0;i<libraryCount;i++) {
- if(!uprv_strcmp(libName, libraryList[i].name)) {
- return i;
- }
- }
- return -1;
-}
-
-static int32_t searchForLibrary(void *lib) {
- int32_t i;
-
- for(i=0;i<libraryCount;i++) {
- if(lib==libraryList[i].lib) {
- return i;
- }
- }
- return -1;
-}
-
-U_INTERNAL char * U_EXPORT2
-uplug_findLibrary(void *lib, UErrorCode *status) {
- int32_t libEnt;
- char *ret = NULL;
- if(U_FAILURE(*status)) {
- return NULL;
- }
- libEnt = searchForLibrary(lib);
- if(libEnt!=-1) {
- ret = libraryList[libEnt].name;
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- }
- return ret;
-}
-
-U_INTERNAL void * U_EXPORT2
-uplug_openLibrary(const char *libName, UErrorCode *status) {
- int32_t libEntry = -1;
- void *lib = NULL;
-
- if(U_FAILURE(*status)) return NULL;
-
- libEntry = searchForLibraryName(libName);
- if(libEntry == -1) {
- libEntry = libraryCount++;
- if(libraryCount >= libraryMax) {
- /* Ran out of library slots. Statically allocated because we can't depend on allocating memory.. */
- *status = U_MEMORY_ALLOCATION_ERROR;
-#if UPLUG_TRACE
- DBG((stderr, "uplug_openLibrary() - out of library slots (max %d)\n", libraryMax));
-#endif
- return NULL;
- }
- /* Some operating systems don't want
- DL operations from multiple threads. */
- libraryList[libEntry].lib = uprv_dl_open(libName, status);
-#if UPLUG_TRACE
- DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
-#endif
-
- if(libraryList[libEntry].lib == NULL || U_FAILURE(*status)) {
- /* cleanup. */
- libraryList[libEntry].lib = NULL; /* failure with open */
- libraryList[libEntry].name[0] = 0;
-#if UPLUG_TRACE
- DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
-#endif
- /* no need to free - just won't increase the count. */
- libraryCount--;
- } else { /* is it still there? */
- /* link it in */
- uprv_strncpy(libraryList[libEntry].name,libName,UPLUG_NAME_MAX);
- libraryList[libEntry].ref=1;
- lib = libraryList[libEntry].lib;
- }
-
- } else {
- lib = libraryList[libEntry].lib;
- libraryList[libEntry].ref++;
- }
- return lib;
-}
-
-U_INTERNAL void U_EXPORT2
-uplug_closeLibrary(void *lib, UErrorCode *status) {
- int32_t i;
-
-#if UPLUG_TRACE
- DBG((stderr, "uplug_closeLibrary(%p,%s) list %p\n", lib, u_errorName(*status), (void*)libraryList));
-#endif
- if(U_FAILURE(*status)) return;
-
- for(i=0;i<libraryCount;i++) {
- if(lib==libraryList[i].lib) {
- if(--(libraryList[i].ref) == 0) {
- uprv_dl_close(libraryList[i].lib, status);
- libraryCount = uplug_removeEntryAt(libraryList, libraryCount, sizeof(*libraryList), i);
- }
- return;
- }
- }
- *status = U_INTERNAL_PROGRAM_ERROR; /* could not find the entry! */
-}
-
-#endif
-
-static UPlugData pluginList[UPLUG_PLUGIN_INITIAL_COUNT];
-static int32_t pluginCount = 0;
-
-
-
-
-static int32_t uplug_pluginNumber(UPlugData* d) {
- UPlugData *pastPlug = &pluginList[pluginCount];
- if(d<=pluginList) {
- return 0;
- } else if(d>=pastPlug) {
- return pluginCount;
- } else {
- return (d-pluginList)/sizeof(pluginList[0]);
- }
-}
-
-
-U_CAPI UPlugData * U_EXPORT2
-uplug_nextPlug(UPlugData *prior) {
- if(prior==NULL) {
- return pluginList;
- } else {
- UPlugData *nextPlug = &prior[1];
- UPlugData *pastPlug = &pluginList[pluginCount];
-
- if(nextPlug>=pastPlug) {
- return NULL;
- } else {
- return nextPlug;
- }
- }
-}
-
-
-
-/**
- * Call the plugin with some params
- */
-static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *status) {
- UPlugTokenReturn token;
- if(plug==NULL||U_FAILURE(*status)) {
- return;
- }
- token = (*(plug->entrypoint))(plug, reason, status);
- if(token!=UPLUG_TOKEN) {
- *status = U_INTERNAL_PROGRAM_ERROR;
- }
-}
-
-
-static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
- if(plug->awaitingLoad) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
- *status = U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- if(U_SUCCESS(plug->pluginStatus)) {
- /* Don't unload a plug which has a failing load status - means it didn't actually load. */
- uplug_callPlug(plug, UPLUG_REASON_UNLOAD, status);
- }
-}
-
-static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
- if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
- *status = U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- plug->level = UPLUG_LEVEL_INVALID;
- uplug_callPlug(plug, UPLUG_REASON_QUERY, status);
- if(U_SUCCESS(*status)) {
- if(plug->level == UPLUG_LEVEL_INVALID) {
- plug->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
- plug->awaitingLoad = FALSE;
- }
- } else {
- plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
- plug->awaitingLoad = FALSE;
- }
-}
-
-
-static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) {
- if(U_FAILURE(*status)) {
- return;
- }
- if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
- *status = U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- uplug_callPlug(plug, UPLUG_REASON_LOAD, status);
- plug->awaitingLoad = FALSE;
- if(!U_SUCCESS(*status)) {
- plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
- }
-}
-
-static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status)
-{
- UPlugData *plug = NULL;
-
- if(U_FAILURE(*status)) {
- return NULL;
- }
-
- if(pluginCount == UPLUG_PLUGIN_INITIAL_COUNT) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- plug = &pluginList[pluginCount++];
-
- plug->token = UPLUG_TOKEN;
- plug->structSize = sizeof(UPlugData);
- plug->name[0]=0;
- plug->level = UPLUG_LEVEL_UNKNOWN; /* initialize to null state */
- plug->awaitingLoad = TRUE;
- plug->dontUnload = FALSE;
- plug->pluginStatus = U_ZERO_ERROR;
- plug->libName[0] = 0;
- plug->config[0]=0;
- plug->sym[0]=0;
- plug->lib=NULL;
- plug->entrypoint=NULL;
-
-
- return plug;
-}
-
-static UPlugData *uplug_allocatePlug(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *symName,
- UErrorCode *status) {
- UPlugData *plug = uplug_allocateEmptyPlug(status);
- if(U_FAILURE(*status)) {
- return NULL;
- }
-
- if(config!=NULL) {
- uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
- } else {
- plug->config[0] = 0;
- }
-
- if(symName!=NULL) {
- uprv_strncpy(plug->sym, symName, UPLUG_NAME_MAX);
- } else {
- plug->sym[0] = 0;
- }
-
- plug->entrypoint = entrypoint;
- plug->lib = lib;
- uplug_queryPlug(plug, status);
-
- return plug;
-}
-
-static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) {
- UErrorCode subStatus = U_ZERO_ERROR;
- if(!plug->dontUnload) {
-#if U_ENABLE_DYLOAD
- uplug_closeLibrary(plug->lib, &subStatus);
-#endif
- }
- plug->lib = NULL;
- if(U_SUCCESS(*status) && U_FAILURE(subStatus)) {
- *status = subStatus;
- }
- /* shift plugins up and decrement count. */
- if(U_SUCCESS(*status)) {
- /* all ok- remove. */
- pluginCount = uplug_removeEntryAt(pluginList, pluginCount, sizeof(plug[0]), uplug_pluginNumber(plug));
- } else {
- /* not ok- leave as a message. */
- plug->awaitingLoad=FALSE;
- plug->entrypoint=0;
- plug->dontUnload=TRUE;
- }
-}
-
-static void uplug_doUnloadPlug(UPlugData *plugToRemove, UErrorCode *status) {
- if(plugToRemove != NULL) {
- uplug_unloadPlug(plugToRemove, status);
- uplug_deallocatePlug(plugToRemove, status);
- }
-}
-
-U_CAPI void U_EXPORT2
-uplug_removePlug(UPlugData *plug, UErrorCode *status) {
- UPlugData *cursor = NULL;
- UPlugData *plugToRemove = NULL;
- if(U_FAILURE(*status)) return;
-
- for(cursor=pluginList;cursor!=NULL;) {
- if(cursor==plug) {
- plugToRemove = plug;
- cursor=NULL;
- } else {
- cursor = uplug_nextPlug(cursor);
- }
- }
-
- uplug_doUnloadPlug(plugToRemove, status);
-}
-
-
-
-
-U_CAPI void U_EXPORT2
-uplug_setPlugNoUnload(UPlugData *data, UBool dontUnload)
-{
- data->dontUnload = dontUnload;
-}
-
-
-U_CAPI void U_EXPORT2
-uplug_setPlugLevel(UPlugData *data, UPlugLevel level) {
- data->level = level;
-}
-
-
-U_CAPI UPlugLevel U_EXPORT2
-uplug_getPlugLevel(UPlugData *data) {
- return data->level;
-}
-
-
-U_CAPI void U_EXPORT2
-uplug_setPlugName(UPlugData *data, const char *name) {
- uprv_strncpy(data->name, name, UPLUG_NAME_MAX);
-}
-
-
-U_CAPI const char * U_EXPORT2
-uplug_getPlugName(UPlugData *data) {
- return data->name;
-}
-
-
-U_CAPI const char * U_EXPORT2
-uplug_getSymbolName(UPlugData *data) {
- return data->sym;
-}
-
-U_CAPI const char * U_EXPORT2
-uplug_getLibraryName(UPlugData *data, UErrorCode *status) {
- if(data->libName[0]) {
- return data->libName;
- } else {
-#if U_ENABLE_DYLOAD
- return uplug_findLibrary(data->lib, status);
-#else
- return NULL;
-#endif
- }
-}
-
-U_CAPI void * U_EXPORT2
-uplug_getLibrary(UPlugData *data) {
- return data->lib;
-}
-
-U_CAPI void * U_EXPORT2
-uplug_getContext(UPlugData *data) {
- return data->context;
-}
-
-
-U_CAPI void U_EXPORT2
-uplug_setContext(UPlugData *data, void *context) {
- data->context = context;
-}
-
-U_CAPI const char* U_EXPORT2
-uplug_getConfiguration(UPlugData *data) {
- return data->config;
-}
-
-U_INTERNAL UPlugData* U_EXPORT2
-uplug_getPlugInternal(int32_t n) {
- if(n <0 || n >= pluginCount) {
- return NULL;
- } else {
- return &(pluginList[n]);
- }
-}
-
-
-U_CAPI UErrorCode U_EXPORT2
-uplug_getPlugLoadStatus(UPlugData *plug) {
- return plug->pluginStatus;
-}
-
-
-
-
-/**
- * Initialize a plugin fron an entrypoint and library - but don't load it.
- */
-static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym,
- UErrorCode *status) {
- UPlugData *plug = NULL;
-
- plug = uplug_allocatePlug(entrypoint, config, lib, sym, status);
-
- if(U_SUCCESS(*status)) {
- return plug;
- } else {
- uplug_deallocatePlug(plug, status);
- return NULL;
- }
-}
-
-U_CAPI UPlugData* U_EXPORT2
-uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status) {
- UPlugData* plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, NULL, NULL, status);
- uplug_loadPlug(plug, status);
- return plug;
-}
-
-#if U_ENABLE_DYLOAD
-
-static UPlugData*
-uplug_initErrorPlug(const char *libName, const char *sym, const char *config, const char *nameOrError, UErrorCode loadStatus, UErrorCode *status)
-{
- UPlugData *plug = uplug_allocateEmptyPlug(status);
- if(U_FAILURE(*status)) return NULL;
-
- plug->pluginStatus = loadStatus;
- plug->awaitingLoad = FALSE; /* Won't load. */
- plug->dontUnload = TRUE; /* cannot unload. */
-
- if(sym!=NULL) {
- uprv_strncpy(plug->sym, sym, UPLUG_NAME_MAX);
- }
-
- if(libName!=NULL) {
- uprv_strncpy(plug->libName, libName, UPLUG_NAME_MAX);
- }
-
- if(nameOrError!=NULL) {
- uprv_strncpy(plug->name, nameOrError, UPLUG_NAME_MAX);
- }
-
- if(config!=NULL) {
- uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
- }
-
- return plug;
-}
-
-/**
- * Fetch a plugin from DLL, and then initialize it from a library- but don't load it.
- */
-static UPlugData*
-uplug_initPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
- void *lib = NULL;
- UPlugData *plug = NULL;
- if(U_FAILURE(*status)) { return NULL; }
- lib = uplug_openLibrary(libName, status);
- if(lib!=NULL && U_SUCCESS(*status)) {
- UPlugEntrypoint *entrypoint = NULL;
- entrypoint = (UPlugEntrypoint*)uprv_dlsym_func(lib, sym, status);
-
- if(entrypoint!=NULL&&U_SUCCESS(*status)) {
- plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, lib, sym, status);
- if(plug!=NULL&&U_SUCCESS(*status)) {
- plug->lib = lib; /* plug takes ownership of library */
- lib = NULL; /* library is now owned by plugin. */
- }
- } else {
- UErrorCode subStatus = U_ZERO_ERROR;
- plug = uplug_initErrorPlug(libName,sym,config,"ERROR: Could not load entrypoint",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
- }
- if(lib!=NULL) { /* still need to close the lib */
- UErrorCode subStatus = U_ZERO_ERROR;
- uplug_closeLibrary(lib, &subStatus); /* don't care here */
- }
- } else {
- UErrorCode subStatus = U_ZERO_ERROR;
- plug = uplug_initErrorPlug(libName,sym,config,"ERROR: could not load library",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
- }
- return plug;
-}
-
-U_CAPI UPlugData* U_EXPORT2
-uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
- UPlugData *plug = NULL;
- if(U_FAILURE(*status)) { return NULL; }
- plug = uplug_initPlugFromLibrary(libName, sym, config, status);
- uplug_loadPlug(plug, status);
-
- return plug;
-}
-
-#endif
-
-static UPlugLevel gCurrentLevel = UPLUG_LEVEL_LOW;
-
-U_CAPI UPlugLevel U_EXPORT2 uplug_getCurrentLevel() {
- return gCurrentLevel;
-}
-
-static UBool U_CALLCONV uplug_cleanup(void)
-{
- int32_t i;
-
- UPlugData *pluginToRemove;
- /* cleanup plugs */
- for(i=0;i<pluginCount;i++) {
- UErrorCode subStatus = U_ZERO_ERROR;
- pluginToRemove = &pluginList[i];
- /* unload and deallocate */
- uplug_doUnloadPlug(pluginToRemove, &subStatus);
- }
- /* close other held libs? */
- gCurrentLevel = UPLUG_LEVEL_LOW;
- return TRUE;
-}
-
-#if U_ENABLE_DYLOAD
-
-static void uplug_loadWaitingPlugs(UErrorCode *status) {
- int32_t i;
- UPlugLevel currentLevel = uplug_getCurrentLevel();
-
- if(U_FAILURE(*status)) {
- return;
- }
-#if UPLUG_TRACE
- DBG((stderr, "uplug_loadWaitingPlugs() Level: %d\n", currentLevel));
-#endif
- /* pass #1: low level plugs */
- for(i=0;i<pluginCount;i++) {
- UErrorCode subStatus = U_ZERO_ERROR;
- UPlugData *pluginToLoad = &pluginList[i];
- if(pluginToLoad->awaitingLoad) {
- if(pluginToLoad->level == UPLUG_LEVEL_LOW) {
- if(currentLevel > UPLUG_LEVEL_LOW) {
- pluginToLoad->pluginStatus = U_PLUGIN_TOO_HIGH;
- } else {
- UPlugLevel newLevel;
- uplug_loadPlug(pluginToLoad, &subStatus);
- newLevel = uplug_getCurrentLevel();
- if(newLevel > currentLevel) {
- pluginToLoad->pluginStatus = U_PLUGIN_CHANGED_LEVEL_WARNING;
- currentLevel = newLevel;
- }
- }
- pluginToLoad->awaitingLoad = FALSE;
- }
- }
- }
- for(i=0;i<pluginCount;i++) {
- UErrorCode subStatus = U_ZERO_ERROR;
- UPlugData *pluginToLoad = &pluginList[i];
-
- if(pluginToLoad->awaitingLoad) {
- if(pluginToLoad->level == UPLUG_LEVEL_INVALID) {
- pluginToLoad->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
- } else if(pluginToLoad->level == UPLUG_LEVEL_UNKNOWN) {
- pluginToLoad->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
- } else {
- uplug_loadPlug(pluginToLoad, &subStatus);
- }
- pluginToLoad->awaitingLoad = FALSE;
- }
- }
-
-#if UPLUG_TRACE
- DBG((stderr, " Done Loading Plugs. Level: %d\n", (int32_t)uplug_getCurrentLevel()));
-#endif
-}
-
-/* Name of the plugin config file */
-static char plugin_file[2048] = "";
-#endif
-
-U_INTERNAL const char* U_EXPORT2
-uplug_getPluginFile() {
-#if U_ENABLE_DYLOAD && !UCONFIG_NO_FILE_IO
- return plugin_file;
-#else
- return NULL;
-#endif
-}
-
-
-// uplug_init() is called first thing from u_init().
-
-U_CAPI void U_EXPORT2
-uplug_init(UErrorCode *status) {
-#if !U_ENABLE_DYLOAD
- (void)status; /* unused */
-#elif !UCONFIG_NO_FILE_IO
- CharString plugin_dir;
- const char *env = getenv("ICU_PLUGINS");
-
- if(U_FAILURE(*status)) return;
- if(env != NULL) {
- plugin_dir.append(env, -1, *status);
- }
- if(U_FAILURE(*status)) return;
-
-#if defined(DEFAULT_ICU_PLUGINS)
- if(plugin_dir.isEmpty()) {
- plugin_dir.append(DEFAULT_ICU_PLUGINS, -1, *status);
- }
-#endif
-
-#if UPLUG_TRACE
- DBG((stderr, "ICU_PLUGINS=%s\n", plugin_dir.data()));
-#endif
-
- if(!plugin_dir.isEmpty()) {
- FILE *f;
-
- CharString pluginFile;
-#ifdef OS390BATCH
-/* There are potentially a lot of ways to implement a plugin directory on OS390/zOS */
-/* Keeping in mind that unauthorized file access is logged, monitored, and enforced */
-/* I've chosen to open a DDNAME if BATCH and leave it alone for (presumably) UNIX */
-/* System Services. Alternative techniques might be allocating a member in */
-/* SYS1.PARMLIB or setting an environment variable "ICU_PLUGIN_PATH" (?). The */
-/* DDNAME can be connected to a file in the HFS if need be. */
-
- pluginFile.append("//DD:ICUPLUG", -1, *status); /* JAM 20 Oct 2011 */
-#else
- pluginFile.append(plugin_dir, *status);
- pluginFile.append(U_FILE_SEP_STRING, -1, *status);
- pluginFile.append("icuplugins", -1, *status);
- pluginFile.append(U_ICU_VERSION_SHORT, -1, *status);
- pluginFile.append(".txt", -1, *status);
-#endif
-
-#if UPLUG_TRACE
- DBG((stderr, "status=%s\n", u_errorName(*status)));
-#endif
-
- if(U_FAILURE(*status)) {
- return;
- }
- if((size_t)pluginFile.length() > (sizeof(plugin_file)-1)) {
- *status = U_BUFFER_OVERFLOW_ERROR;
-#if UPLUG_TRACE
- DBG((stderr, "status=%s\n", u_errorName(*status)));
-#endif
- return;
- }
-
- /* plugin_file is not used for processing - it is only used
- so that uplug_getPluginFile() works (i.e. icuinfo)
- */
- uprv_strncpy(plugin_file, pluginFile.data(), sizeof(plugin_file));
-
-#if UPLUG_TRACE
- DBG((stderr, "pluginfile= %s len %d/%d\n", plugin_file, (int)strlen(plugin_file), (int)sizeof(plugin_file)));
-#endif
-
-#ifdef __MVS__
- if (iscics()) /* 12 Nov 2011 JAM */
- {
- f = NULL;
- }
- else
-#endif
- {
- f = fopen(pluginFile.data(), "r");
- }
-
- if(f != NULL) {
- char linebuf[1024];
- char *p, *libName=NULL, *symName=NULL, *config=NULL;
- int32_t line = 0;
-
-
- while(fgets(linebuf,1023,f)) {
- line++;
-
- if(!*linebuf || *linebuf=='#') {
- continue;
- } else {
- p = linebuf;
- while(*p&&isspace((int)*p))
- p++;
- if(!*p || *p=='#') continue;
- libName = p;
- while(*p&&!isspace((int)*p)) {
- p++;
- }
- if(!*p || *p=='#') continue; /* no tab after libname */
- *p=0; /* end of libname */
- p++;
- while(*p&&isspace((int)*p)) {
- p++;
- }
- if(!*p||*p=='#') continue; /* no symname after libname +tab */
- symName = p;
- while(*p&&!isspace((int)*p)) {
- p++;
- }
-
- if(*p) { /* has config */
- *p=0;
- ++p;
- while(*p&&isspace((int)*p)) {
- p++;
- }
- if(*p) {
- config = p;
- }
- }
-
- /* chop whitespace at the end of the config */
- if(config!=NULL&&*config!=0) {
- p = config+strlen(config);
- while(p>config&&isspace((int)*(--p))) {
- *p=0;
- }
- }
-
- /* OK, we're good. */
- {
- UErrorCode subStatus = U_ZERO_ERROR;
- UPlugData *plug = uplug_initPlugFromLibrary(libName, symName, config, &subStatus);
- if(U_FAILURE(subStatus) && U_SUCCESS(*status)) {
- *status = subStatus;
- }
-#if UPLUG_TRACE
- DBG((stderr, "PLUGIN libName=[%s], sym=[%s], config=[%s]\n", libName, symName, config));
- DBG((stderr, " -> %p, %s\n", (void*)plug, u_errorName(subStatus)));
-#else
- (void)plug; /* unused */
-#endif
- }
- }
- }
- fclose(f);
- } else {
-#if UPLUG_TRACE
- DBG((stderr, "Can't open plugin file %s\n", plugin_file));
-#endif
- }
- }
- uplug_loadWaitingPlugs(status);
-#endif /* U_ENABLE_DYLOAD */
- gCurrentLevel = UPLUG_LEVEL_HIGH;
- ucln_registerCleanup(UCLN_UPLUG, uplug_cleanup);
-}
-
-#endif
-
-
diff --git a/contrib/libs/icu/common/icuplugimp.h b/contrib/libs/icu/common/icuplugimp.h
deleted file mode 100644
index 3cad8f87f18..00000000000
--- a/contrib/libs/icu/common/icuplugimp.h
+++ /dev/null
@@ -1,93 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2009-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* FILE NAME : icuplugimp.h
-*
-* Internal functions for the ICU plugin system
-*
-* Date Name Description
-* 10/29/2009 sl New.
-******************************************************************************
-*/
-
-
-#ifndef ICUPLUGIMP_H
-#define ICUPLUGIMP_H
-
-#include "unicode/icuplug.h"
-
-#if UCONFIG_ENABLE_PLUGINS
-
-/*========================*/
-/** @{ Library Manipulation
- */
-
-/**
- * Open a library, adding a reference count if needed.
- * @param libName library name to load
- * @param status error code
- * @return the library pointer, or NULL
- * @internal internal use only
- */
-U_INTERNAL void * U_EXPORT2
-uplug_openLibrary(const char *libName, UErrorCode *status);
-
-/**
- * Close a library, if its reference count is 0
- * @param lib the library to close
- * @param status error code
- * @internal internal use only
- */
-U_INTERNAL void U_EXPORT2
-uplug_closeLibrary(void *lib, UErrorCode *status);
-
-/**
- * Get a library's name, or NULL if not found.
- * @param lib the library's name
- * @param status error code
- * @return the library name, or NULL if not found.
- * @internal internal use only
- */
-U_INTERNAL char * U_EXPORT2
-uplug_findLibrary(void *lib, UErrorCode *status);
-
-/** @} */
-
-/*========================*/
-/** {@ ICU Plugin internal interfaces
- */
-
-/**
- * Initialize the plugins
- * @param status error result
- * @internal - Internal use only.
- */
-U_INTERNAL void U_EXPORT2
-uplug_init(UErrorCode *status);
-
-/**
- * Get raw plug N
- * @internal - Internal use only
- */
-U_INTERNAL UPlugData* U_EXPORT2
-uplug_getPlugInternal(int32_t n);
-
-/**
- * Get the name of the plugin file.
- * @internal - Internal use only.
- */
-U_INTERNAL const char* U_EXPORT2
-uplug_getPluginFile(void);
-
-/** @} */
-
-#endif
-
-#endif
diff --git a/contrib/libs/icu/common/loadednormalizer2impl.cpp b/contrib/libs/icu/common/loadednormalizer2impl.cpp
deleted file mode 100644
index e4b36f1055f..00000000000
--- a/contrib/libs/icu/common/loadednormalizer2impl.cpp
+++ /dev/null
@@ -1,418 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* loadednormalizer2impl.cpp
-*
-* created on: 2014sep03
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/udata.h"
-#include "unicode/localpointer.h"
-#include "unicode/normalizer2.h"
-#include "unicode/ucptrie.h"
-#include "unicode/unistr.h"
-#include "unicode/unorm.h"
-#include "cstring.h"
-#include "mutex.h"
-#include "norm2allmodes.h"
-#include "normalizer2impl.h"
-#include "uassert.h"
-#include "ucln_cmn.h"
-#include "uhash.h"
-
-U_NAMESPACE_BEGIN
-
-class LoadedNormalizer2Impl : public Normalizer2Impl {
-public:
- LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
- virtual ~LoadedNormalizer2Impl();
-
- void load(const char *packageName, const char *name, UErrorCode &errorCode);
-
-private:
- static UBool U_CALLCONV
- isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
-
- UDataMemory *memory;
- UCPTrie *ownedTrie;
-};
-
-LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
- udata_close(memory);
- ucptrie_close(ownedTrie);
-}
-
-UBool U_CALLCONV
-LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
- const char * /* type */, const char * /*name*/,
- const UDataInfo *pInfo) {
- if(
- pInfo->size>=20 &&
- pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
- pInfo->charsetFamily==U_CHARSET_FAMILY &&
- pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
- pInfo->dataFormat[1]==0x72 &&
- pInfo->dataFormat[2]==0x6d &&
- pInfo->dataFormat[3]==0x32 &&
- pInfo->formatVersion[0]==4
- ) {
- // Normalizer2Impl *me=(Normalizer2Impl *)context;
- // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-void
-LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
- if(U_FAILURE(errorCode)) {
- return;
- }
- const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
- const int32_t *inIndexes=(const int32_t *)inBytes;
- int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
- if(indexesLength<=IX_MIN_LCCC_CP) {
- errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
- return;
- }
-
- int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
- int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
- ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
- inBytes+offset, nextOffset-offset, NULL,
- &errorCode);
- if(U_FAILURE(errorCode)) {
- return;
- }
-
- offset=nextOffset;
- nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
- const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
-
- // smallFCD: new in formatVersion 2
- offset=nextOffset;
- const uint8_t *inSmallFCD=inBytes+offset;
-
- init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
-}
-
-// instance cache ---------------------------------------------------------- ***
-
-Norm2AllModes *
-Norm2AllModes::createInstance(const char *packageName,
- const char *name,
- UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
- if(impl==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- impl->load(packageName, name, errorCode);
- return createInstance(impl, errorCode);
-}
-
-U_CDECL_BEGIN
-static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
-U_CDECL_END
-
-#if !NORM2_HARDCODE_NFC_DATA
-static Norm2AllModes *nfcSingleton;
-static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
-#endif
-
-static Norm2AllModes *nfkcSingleton;
-static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
-
-static Norm2AllModes *nfkc_cfSingleton;
-static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
-
-static UHashtable *cache=NULL;
-
-// UInitOnce singleton initialization function
-static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
-#if !NORM2_HARDCODE_NFC_DATA
- if (uprv_strcmp(what, "nfc") == 0) {
- nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
- } else
-#endif
- if (uprv_strcmp(what, "nfkc") == 0) {
- nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
- } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
- nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
- } else {
- UPRV_UNREACHABLE; // Unknown singleton
- }
- ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
-}
-
-U_CDECL_BEGIN
-
-static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
- delete (Norm2AllModes *)allModes;
-}
-
-static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
-#if !NORM2_HARDCODE_NFC_DATA
- delete nfcSingleton;
- nfcSingleton = NULL;
- nfcInitOnce.reset();
-#endif
-
- delete nfkcSingleton;
- nfkcSingleton = NULL;
- nfkcInitOnce.reset();
-
- delete nfkc_cfSingleton;
- nfkc_cfSingleton = NULL;
- nfkc_cfInitOnce.reset();
-
- uhash_close(cache);
- cache=NULL;
- return TRUE;
-}
-
-U_CDECL_END
-
-#if !NORM2_HARDCODE_NFC_DATA
-const Norm2AllModes *
-Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return NULL; }
- umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
- return nfcSingleton;
-}
-#endif
-
-const Norm2AllModes *
-Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return NULL; }
- umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
- return nfkcSingleton;
-}
-
-const Norm2AllModes *
-Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return NULL; }
- umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
- return nfkc_cfSingleton;
-}
-
-#if !NORM2_HARDCODE_NFC_DATA
-const Normalizer2 *
-Normalizer2::getNFCInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=NULL ? &allModes->comp : NULL;
-}
-
-const Normalizer2 *
-Normalizer2::getNFDInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=NULL ? &allModes->decomp : NULL;
-}
-
-const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=NULL ? &allModes->fcd : NULL;
-}
-
-const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=NULL ? &allModes->fcc : NULL;
-}
-
-const Normalizer2Impl *
-Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=NULL ? allModes->impl : NULL;
-}
-#endif
-
-const Normalizer2 *
-Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
- return allModes!=NULL ? &allModes->comp : NULL;
-}
-
-const Normalizer2 *
-Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
- return allModes!=NULL ? &allModes->decomp : NULL;
-}
-
-const Normalizer2 *
-Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
- return allModes!=NULL ? &allModes->comp : NULL;
-}
-
-const Normalizer2 *
-Normalizer2::getInstance(const char *packageName,
- const char *name,
- UNormalization2Mode mode,
- UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- if(name==NULL || *name==0) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- const Norm2AllModes *allModes=NULL;
- if(packageName==NULL) {
- if(0==uprv_strcmp(name, "nfc")) {
- allModes=Norm2AllModes::getNFCInstance(errorCode);
- } else if(0==uprv_strcmp(name, "nfkc")) {
- allModes=Norm2AllModes::getNFKCInstance(errorCode);
- } else if(0==uprv_strcmp(name, "nfkc_cf")) {
- allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
- }
- }
- if(allModes==NULL && U_SUCCESS(errorCode)) {
- {
- Mutex lock;
- if(cache!=NULL) {
- allModes=(Norm2AllModes *)uhash_get(cache, name);
- }
- }
- if(allModes==NULL) {
- ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
- LocalPointer<Norm2AllModes> localAllModes(
- Norm2AllModes::createInstance(packageName, name, errorCode));
- if(U_SUCCESS(errorCode)) {
- Mutex lock;
- if(cache==NULL) {
- cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- uhash_setKeyDeleter(cache, uprv_free);
- uhash_setValueDeleter(cache, deleteNorm2AllModes);
- }
- void *temp=uhash_get(cache, name);
- if(temp==NULL) {
- int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
- char *nameCopy=(char *)uprv_malloc(keyLength);
- if(nameCopy==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memcpy(nameCopy, name, keyLength);
- allModes=localAllModes.getAlias();
- uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
- } else {
- // race condition
- allModes=(Norm2AllModes *)temp;
- }
- }
- }
- }
- if(allModes!=NULL && U_SUCCESS(errorCode)) {
- switch(mode) {
- case UNORM2_COMPOSE:
- return &allModes->comp;
- case UNORM2_DECOMPOSE:
- return &allModes->decomp;
- case UNORM2_FCD:
- return &allModes->fcd;
- case UNORM2_COMPOSE_CONTIGUOUS:
- return &allModes->fcc;
- default:
- break; // do nothing
- }
- }
- return NULL;
-}
-
-const Normalizer2 *
-Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- switch(mode) {
- case UNORM_NFD:
- return Normalizer2::getNFDInstance(errorCode);
- case UNORM_NFKD:
- return Normalizer2::getNFKDInstance(errorCode);
- case UNORM_NFC:
- return Normalizer2::getNFCInstance(errorCode);
- case UNORM_NFKC:
- return Normalizer2::getNFKCInstance(errorCode);
- case UNORM_FCD:
- return getFCDInstance(errorCode);
- default: // UNORM_NONE
- return getNoopInstance(errorCode);
- }
-}
-
-const Normalizer2Impl *
-Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
- return allModes!=NULL ? allModes->impl : NULL;
-}
-
-const Normalizer2Impl *
-Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
- return allModes!=NULL ? allModes->impl : NULL;
-}
-
-U_NAMESPACE_END
-
-// C API ------------------------------------------------------------------- ***
-
-U_NAMESPACE_USE
-
-U_CAPI const UNormalizer2 * U_EXPORT2
-unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
- return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
-}
-
-U_CAPI const UNormalizer2 * U_EXPORT2
-unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
- return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
-}
-
-U_CAPI const UNormalizer2 * U_EXPORT2
-unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
- return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
-}
-
-U_CAPI const UNormalizer2 * U_EXPORT2
-unorm2_getInstance(const char *packageName,
- const char *name,
- UNormalization2Mode mode,
- UErrorCode *pErrorCode) {
- return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
-}
-
-U_CFUNC UNormalizationCheckResult
-unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
- if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
- return UNORM_YES;
- }
- UErrorCode errorCode=U_ZERO_ERROR;
- const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
- if(U_SUCCESS(errorCode)) {
- return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
- } else {
- return UNORM_MAYBE;
- }
-}
-
-#endif // !UCONFIG_NO_NORMALIZATION
diff --git a/contrib/libs/icu/common/localebuilder.cpp b/contrib/libs/icu/common/localebuilder.cpp
deleted file mode 100644
index 1dd8131e589..00000000000
--- a/contrib/libs/icu/common/localebuilder.cpp
+++ /dev/null
@@ -1,468 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-#include <utility>
-
-#include "bytesinkutil.h" // CharStringByteSink
-#include "charstr.h"
-#include "cstring.h"
-#include "ulocimp.h"
-#include "unicode/localebuilder.h"
-#include "unicode/locid.h"
-
-U_NAMESPACE_BEGIN
-
-#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
-#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
-
-const char* kAttributeKey = "attribute";
-
-static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
- switch (uprv_tolower(key)) {
- case 'u':
- return ultag_isUnicodeExtensionSubtags(s, len);
- case 't':
- return ultag_isTransformedExtensionSubtags(s, len);
- case 'x':
- return ultag_isPrivateuseValueSubtags(s, len);
- default:
- return ultag_isExtensionSubtags(s, len);
- }
-}
-
-LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
- script_(), region_(), variant_(nullptr), extensions_(nullptr)
-{
- language_[0] = 0;
- script_[0] = 0;
- region_[0] = 0;
-}
-
-LocaleBuilder::~LocaleBuilder()
-{
- delete variant_;
- delete extensions_;
-}
-
-LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
-{
- clear();
- setLanguage(locale.getLanguage());
- setScript(locale.getScript());
- setRegion(locale.getCountry());
- setVariant(locale.getVariant());
- extensions_ = locale.clone();
- if (extensions_ == nullptr) {
- status_ = U_MEMORY_ALLOCATION_ERROR;
- }
- return *this;
-}
-
-LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
-{
- Locale l = Locale::forLanguageTag(tag, status_);
- if (U_FAILURE(status_)) { return *this; }
- // Because setLocale will reset status_ we need to return
- // first if we have error in forLanguageTag.
- setLocale(l);
- return *this;
-}
-
-static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
- UBool (*test)(const char*, int32_t)) {
- if (U_FAILURE(errorCode)) { return; }
- if (input.empty()) {
- dest[0] = '\0';
- } else if (test(input.data(), input.length())) {
- uprv_memcpy(dest, input.data(), input.length());
- dest[input.length()] = '\0';
- } else {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
-{
- setField(language, language_, status_, &ultag_isLanguageSubtag);
- return *this;
-}
-
-LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
-{
- setField(script, script_, status_, &ultag_isScriptSubtag);
- return *this;
-}
-
-LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
-{
- setField(region, region_, status_, &ultag_isRegionSubtag);
- return *this;
-}
-
-static void transform(char* data, int32_t len) {
- for (int32_t i = 0; i < len; i++, data++) {
- if (*data == '_') {
- *data = '-';
- } else {
- *data = uprv_tolower(*data);
- }
- }
-}
-
-LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
-{
- if (U_FAILURE(status_)) { return *this; }
- if (variant.empty()) {
- delete variant_;
- variant_ = nullptr;
- return *this;
- }
- CharString* new_variant = new CharString(variant, status_);
- if (U_FAILURE(status_)) { return *this; }
- if (new_variant == nullptr) {
- status_ = U_MEMORY_ALLOCATION_ERROR;
- return *this;
- }
- transform(new_variant->data(), new_variant->length());
- if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
- delete new_variant;
- status_ = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- delete variant_;
- variant_ = new_variant;
- return *this;
-}
-
-static bool
-_isKeywordValue(const char* key, const char* value, int32_t value_len)
-{
- if (key[1] == '\0') {
- // one char key
- return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
- _isExtensionSubtags(key[0], value, value_len));
- } else if (uprv_strcmp(key, kAttributeKey) == 0) {
- // unicode attributes
- return ultag_isUnicodeLocaleAttributes(value, value_len);
- }
- // otherwise: unicode extension value
- // We need to convert from legacy key/value to unicode
- // key/value
- const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
- const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);
-
- return unicode_locale_key && unicode_locale_type &&
- ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
- ultag_isUnicodeLocaleType(unicode_locale_type, -1);
-}
-
-static void
-_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
- Locale& to, bool validate, UErrorCode& errorCode)
-{
- if (U_FAILURE(errorCode)) { return; }
- LocalPointer<icu::StringEnumeration> ownedKeywords;
- if (keywords == nullptr) {
- ownedKeywords.adoptInstead(from.createKeywords(errorCode));
- if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
- keywords = ownedKeywords.getAlias();
- }
- const char* key;
- while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
- CharString value;
- CharStringByteSink sink(&value);
- from.getKeywordValue(key, sink, errorCode);
- if (U_FAILURE(errorCode)) { return; }
- if (uprv_strcmp(key, kAttributeKey) == 0) {
- transform(value.data(), value.length());
- }
- if (validate &&
- !_isKeywordValue(key, value.data(), value.length())) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- to.setKeywordValue(key, value.data(), errorCode);
- if (U_FAILURE(errorCode)) { return; }
- }
-}
-
-void static
-_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
-{
- // Clear Unicode attributes
- locale.setKeywordValue(kAttributeKey, "", errorCode);
-
- // Clear all Unicode keyword values
- LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
- if (U_FAILURE(errorCode) || iter.isNull()) { return; }
- const char* key;
- while ((key = iter->next(nullptr, errorCode)) != nullptr) {
- locale.setUnicodeKeywordValue(key, nullptr, errorCode);
- }
-}
-
-static void
-_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
-{
- // Add the unicode extensions to extensions_
- CharString locale_str("und-u-", errorCode);
- locale_str.append(value, errorCode);
- _copyExtensions(
- Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
- locale, false, errorCode);
-}
-
-LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
-{
- if (U_FAILURE(status_)) { return *this; }
- if (!UPRV_ISALPHANUM(key)) {
- status_ = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- CharString value_str(value, status_);
- if (U_FAILURE(status_)) { return *this; }
- transform(value_str.data(), value_str.length());
- if (!value_str.isEmpty() &&
- !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
- status_ = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- if (extensions_ == nullptr) {
- extensions_ = new Locale();
- if (extensions_ == nullptr) {
- status_ = U_MEMORY_ALLOCATION_ERROR;
- return *this;
- }
- }
- if (uprv_tolower(key) != 'u') {
- // for t, x and others extension.
- extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
- status_);
- return *this;
- }
- _clearUAttributesAndKeyType(*extensions_, status_);
- if (U_FAILURE(status_)) { return *this; }
- if (!value.empty()) {
- _setUnicodeExtensions(*extensions_, value_str, status_);
- }
- return *this;
-}
-
-LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
- StringPiece key, StringPiece type)
-{
- if (U_FAILURE(status_)) { return *this; }
- if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
- (!type.empty() &&
- !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
- status_ = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- if (extensions_ == nullptr) {
- extensions_ = new Locale();
- }
- if (extensions_ == nullptr) {
- status_ = U_MEMORY_ALLOCATION_ERROR;
- return *this;
- }
- extensions_->setUnicodeKeywordValue(key, type, status_);
- return *this;
-}
-
-LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
- StringPiece value)
-{
- CharString value_str(value, status_);
- if (U_FAILURE(status_)) { return *this; }
- transform(value_str.data(), value_str.length());
- if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
- status_ = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- if (extensions_ == nullptr) {
- extensions_ = new Locale();
- if (extensions_ == nullptr) {
- status_ = U_MEMORY_ALLOCATION_ERROR;
- return *this;
- }
- extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
- return *this;
- }
-
- CharString attributes;
- CharStringByteSink sink(&attributes);
- UErrorCode localErrorCode = U_ZERO_ERROR;
- extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
- if (U_FAILURE(localErrorCode)) {
- CharString new_attributes(value_str.data(), status_);
- // No attributes, set the attribute.
- extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
- return *this;
- }
-
- transform(attributes.data(),attributes.length());
- const char* start = attributes.data();
- const char* limit = attributes.data() + attributes.length();
- CharString new_attributes;
- bool inserted = false;
- while (start < limit) {
- if (!inserted) {
- int cmp = uprv_strcmp(start, value_str.data());
- if (cmp == 0) { return *this; } // Found it in attributes: Just return
- if (cmp > 0) {
- if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
- new_attributes.append(value_str.data(), status_);
- inserted = true;
- }
- }
- if (!new_attributes.isEmpty()) {
- new_attributes.append('_', status_);
- }
- new_attributes.append(start, status_);
- start += uprv_strlen(start) + 1;
- }
- if (!inserted) {
- if (!new_attributes.isEmpty()) {
- new_attributes.append('_', status_);
- }
- new_attributes.append(value_str.data(), status_);
- }
- // Not yet in the attributes, set the attribute.
- extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
- return *this;
-}
-
-LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
- StringPiece value)
-{
- CharString value_str(value, status_);
- if (U_FAILURE(status_)) { return *this; }
- transform(value_str.data(), value_str.length());
- if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
- status_ = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- if (extensions_ == nullptr) { return *this; }
- UErrorCode localErrorCode = U_ZERO_ERROR;
- CharString attributes;
- CharStringByteSink sink(&attributes);
- extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
- // get failure, just return
- if (U_FAILURE(localErrorCode)) { return *this; }
- // Do not have any attributes, just return.
- if (attributes.isEmpty()) { return *this; }
-
- char* p = attributes.data();
- // Replace null terminiator in place for _ and - so later
- // we can use uprv_strcmp to compare.
- for (int32_t i = 0; i < attributes.length(); i++, p++) {
- *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
- }
-
- const char* start = attributes.data();
- const char* limit = attributes.data() + attributes.length();
- CharString new_attributes;
- bool found = false;
- while (start < limit) {
- if (uprv_strcmp(start, value_str.data()) == 0) {
- found = true;
- } else {
- if (!new_attributes.isEmpty()) {
- new_attributes.append('_', status_);
- }
- new_attributes.append(start, status_);
- }
- start += uprv_strlen(start) + 1;
- }
- // Found the value in attributes, set the attribute.
- if (found) {
- extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
- }
- return *this;
-}
-
-LocaleBuilder& LocaleBuilder::clear()
-{
- status_ = U_ZERO_ERROR;
- language_[0] = 0;
- script_[0] = 0;
- region_[0] = 0;
- delete variant_;
- variant_ = nullptr;
- clearExtensions();
- return *this;
-}
-
-LocaleBuilder& LocaleBuilder::clearExtensions()
-{
- delete extensions_;
- extensions_ = nullptr;
- return *this;
-}
-
-Locale makeBogusLocale() {
- Locale bogus;
- bogus.setToBogus();
- return bogus;
-}
-
-void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
-{
- if (U_FAILURE(errorCode)) { return; }
- LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
- if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
- // Error, or no extensions to copy.
- return;
- }
- if (extensions_ == nullptr) {
- extensions_ = new Locale();
- if (extensions_ == nullptr) {
- status_ = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- }
- _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
-}
-
-Locale LocaleBuilder::build(UErrorCode& errorCode)
-{
- if (U_FAILURE(errorCode)) {
- return makeBogusLocale();
- }
- if (U_FAILURE(status_)) {
- errorCode = status_;
- return makeBogusLocale();
- }
- CharString locale_str(language_, errorCode);
- if (uprv_strlen(script_) > 0) {
- locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
- }
- if (uprv_strlen(region_) > 0) {
- locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
- }
- if (variant_ != nullptr) {
- locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
- }
- if (U_FAILURE(errorCode)) {
- return makeBogusLocale();
- }
- Locale product(locale_str.data());
- if (extensions_ != nullptr) {
- _copyExtensions(*extensions_, nullptr, product, true, errorCode);
- }
- if (U_FAILURE(errorCode)) {
- return makeBogusLocale();
- }
- return product;
-}
-
-UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
- if (U_FAILURE(outErrorCode)) {
- // Do not overwrite the older error code
- return TRUE;
- }
- outErrorCode = status_;
- return U_FAILURE(outErrorCode);
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/localematcher.cpp b/contrib/libs/icu/common/localematcher.cpp
deleted file mode 100644
index 85db8c8bf32..00000000000
--- a/contrib/libs/icu/common/localematcher.cpp
+++ /dev/null
@@ -1,794 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-
-// localematcher.cpp
-// created: 2019may08 Markus W. Scherer
-
-#ifndef __LOCMATCHER_H__
-#define __LOCMATCHER_H__
-
-#include "unicode/utypes.h"
-#include "unicode/localebuilder.h"
-#include "unicode/localematcher.h"
-#include "unicode/locid.h"
-#include "unicode/stringpiece.h"
-#include "unicode/uloc.h"
-#include "unicode/uobject.h"
-#include "cstring.h"
-#include "localeprioritylist.h"
-#include "loclikelysubtags.h"
-#include "locdistance.h"
-#include "lsr.h"
-#include "uassert.h"
-#include "uhash.h"
-#include "ustr_imp.h"
-#include "uvector.h"
-
-#define UND_LSR LSR("und", "", "", LSR::EXPLICIT_LSR)
-
-/**
- * Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
- *
- * @draft ICU 65
- */
-enum ULocMatchLifetime {
- /**
- * Locale objects are temporary.
- * The matcher will make a copy of a locale that will be used beyond one function call.
- *
- * @draft ICU 65
- */
- ULOCMATCH_TEMPORARY_LOCALES,
- /**
- * Locale objects are stored at least as long as the matcher is used.
- * The matcher will keep only a pointer to a locale that will be used beyond one function call,
- * avoiding a copy.
- *
- * @draft ICU 65
- */
- ULOCMATCH_STORED_LOCALES // TODO: permanent? cached? clone?
-};
-#ifndef U_IN_DOXYGEN
-typedef enum ULocMatchLifetime ULocMatchLifetime;
-#endif
-
-U_NAMESPACE_BEGIN
-
-LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) U_NOEXCEPT :
- desiredLocale(src.desiredLocale),
- supportedLocale(src.supportedLocale),
- desiredIndex(src.desiredIndex),
- supportedIndex(src.supportedIndex),
- desiredIsOwned(src.desiredIsOwned) {
- if (desiredIsOwned) {
- src.desiredLocale = nullptr;
- src.desiredIndex = -1;
- src.desiredIsOwned = FALSE;
- }
-}
-
-LocaleMatcher::Result::~Result() {
- if (desiredIsOwned) {
- delete desiredLocale;
- }
-}
-
-LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) U_NOEXCEPT {
- this->~Result();
-
- desiredLocale = src.desiredLocale;
- supportedLocale = src.supportedLocale;
- desiredIndex = src.desiredIndex;
- supportedIndex = src.supportedIndex;
- desiredIsOwned = src.desiredIsOwned;
-
- if (desiredIsOwned) {
- src.desiredLocale = nullptr;
- src.desiredIndex = -1;
- src.desiredIsOwned = FALSE;
- }
- return *this;
-}
-
-Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode) || supportedLocale == nullptr) {
- return Locale::getRoot();
- }
- const Locale *bestDesired = getDesiredLocale();
- if (bestDesired == nullptr || *supportedLocale == *bestDesired) {
- return *supportedLocale;
- }
- LocaleBuilder b;
- b.setLocale(*supportedLocale);
-
- // Copy the region from bestDesired, if there is one.
- const char *region = bestDesired->getCountry();
- if (*region != 0) {
- b.setRegion(region);
- }
-
- // Copy the variants from bestDesired, if there are any.
- // Note that this will override any supportedLocale variants.
- // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
- const char *variants = bestDesired->getVariant();
- if (*variants != 0) {
- b.setVariant(variants);
- }
-
- // Copy the extensions from bestDesired, if there are any.
- // C++ note: The following note, copied from Java, may not be true,
- // as long as C++ copies by legacy ICU keyword, not by extension singleton.
- // Note that this will override any supportedLocale extensions.
- // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
- // (replacing calendar).
- b.copyExtensionsFrom(*bestDesired, errorCode);
- return b.build(errorCode);
-}
-
-LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) U_NOEXCEPT :
- errorCode_(src.errorCode_),
- supportedLocales_(src.supportedLocales_),
- thresholdDistance_(src.thresholdDistance_),
- demotion_(src.demotion_),
- defaultLocale_(src.defaultLocale_),
- favor_(src.favor_),
- direction_(src.direction_) {
- src.supportedLocales_ = nullptr;
- src.defaultLocale_ = nullptr;
-}
-
-LocaleMatcher::Builder::~Builder() {
- delete supportedLocales_;
- delete defaultLocale_;
-}
-
-LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT {
- this->~Builder();
-
- errorCode_ = src.errorCode_;
- supportedLocales_ = src.supportedLocales_;
- thresholdDistance_ = src.thresholdDistance_;
- demotion_ = src.demotion_;
- defaultLocale_ = src.defaultLocale_;
- favor_ = src.favor_;
- direction_ = src.direction_;
-
- src.supportedLocales_ = nullptr;
- src.defaultLocale_ = nullptr;
- return *this;
-}
-
-void LocaleMatcher::Builder::clearSupportedLocales() {
- if (supportedLocales_ != nullptr) {
- supportedLocales_->removeAllElements();
- }
-}
-
-bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
- if (U_FAILURE(errorCode_)) { return false; }
- if (supportedLocales_ != nullptr) { return true; }
- supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
- if (U_FAILURE(errorCode_)) { return false; }
- if (supportedLocales_ == nullptr) {
- errorCode_ = U_MEMORY_ALLOCATION_ERROR;
- return false;
- }
- return true;
-}
-
-LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString(
- StringPiece locales) {
- LocalePriorityList list(locales, errorCode_);
- if (U_FAILURE(errorCode_)) { return *this; }
- clearSupportedLocales();
- if (!ensureSupportedLocaleVector()) { return *this; }
- int32_t length = list.getLengthIncludingRemoved();
- for (int32_t i = 0; i < length; ++i) {
- Locale *locale = list.orphanLocaleAt(i);
- if (locale == nullptr) { continue; }
- supportedLocales_->addElement(locale, errorCode_);
- if (U_FAILURE(errorCode_)) {
- delete locale;
- break;
- }
- }
- return *this;
-}
-
-LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
- if (U_FAILURE(errorCode_)) { return *this; }
- clearSupportedLocales();
- if (!ensureSupportedLocaleVector()) { return *this; }
- while (locales.hasNext()) {
- const Locale &locale = locales.next();
- Locale *clone = locale.clone();
- if (clone == nullptr) {
- errorCode_ = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- supportedLocales_->addElement(clone, errorCode_);
- if (U_FAILURE(errorCode_)) {
- delete clone;
- break;
- }
- }
- return *this;
-}
-
-LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
- if (!ensureSupportedLocaleVector()) { return *this; }
- Locale *clone = locale.clone();
- if (clone == nullptr) {
- errorCode_ = U_MEMORY_ALLOCATION_ERROR;
- return *this;
- }
- supportedLocales_->addElement(clone, errorCode_);
- if (U_FAILURE(errorCode_)) {
- delete clone;
- }
- return *this;
-}
-
-LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) {
- if (U_FAILURE(errorCode_)) { return *this; }
- Locale *clone = nullptr;
- if (defaultLocale != nullptr) {
- clone = defaultLocale->clone();
- if (clone == nullptr) {
- errorCode_ = U_MEMORY_ALLOCATION_ERROR;
- return *this;
- }
- }
- delete defaultLocale_;
- defaultLocale_ = clone;
- return *this;
-}
-
-LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) {
- if (U_FAILURE(errorCode_)) { return *this; }
- favor_ = subtag;
- return *this;
-}
-
-LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) {
- if (U_FAILURE(errorCode_)) { return *this; }
- demotion_ = demotion;
- return *this;
-}
-
-#if 0
-/**
- * <i>Internal only!</i>
- *
- * @param thresholdDistance the thresholdDistance to set, with -1 = default
- * @return this Builder object
- * @internal
- * @deprecated This API is ICU internal only.
- */
-@Deprecated
-LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) {
- if (U_FAILURE(errorCode_)) { return *this; }
- if (thresholdDistance > 100) {
- thresholdDistance = 100;
- }
- thresholdDistance_ = thresholdDistance;
- return *this;
-}
-#endif
-
-UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
- if (U_FAILURE(outErrorCode)) { return TRUE; }
- if (U_SUCCESS(errorCode_)) { return FALSE; }
- outErrorCode = errorCode_;
- return TRUE;
-}
-
-LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
- if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) {
- errorCode = errorCode_;
- }
- return LocaleMatcher(*this, errorCode);
-}
-
-namespace {
-
-LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale,
- UErrorCode &errorCode) {
- if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
- return UND_LSR;
- } else {
- return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
- }
-}
-
-int32_t hashLSR(const UHashTok token) {
- const LSR *lsr = static_cast<const LSR *>(token.pointer);
- return lsr->hashCode;
-}
-
-UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
- const LSR *lsr1 = static_cast<const LSR *>(t1.pointer);
- const LSR *lsr2 = static_cast<const LSR *>(t2.pointer);
- return *lsr1 == *lsr2;
-}
-
-} // namespace
-
-int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
- UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return suppLength; }
- int32_t index = uhash_geti(supportedLsrToIndex, &lsr);
- if (index == 0) {
- uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), i + 1, &errorCode);
- if (U_SUCCESS(errorCode)) {
- supportedLSRs[suppLength] = &lsr;
- supportedIndexes[suppLength++] = i;
- }
- }
- return suppLength;
-}
-
-LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
- likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
- localeDistance(*LocaleDistance::getSingleton(errorCode)),
- thresholdDistance(builder.thresholdDistance_),
- demotionPerDesiredLocale(0),
- favorSubtag(builder.favor_),
- direction(builder.direction_),
- supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
- supportedLsrToIndex(nullptr),
- supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
- ownedDefaultLocale(nullptr), defaultLocale(nullptr) {
- if (U_FAILURE(errorCode)) { return; }
- if (thresholdDistance < 0) {
- thresholdDistance = localeDistance.getDefaultScriptDistance();
- }
- const Locale *def = builder.defaultLocale_;
- LSR builderDefaultLSR;
- const LSR *defLSR = nullptr;
- if (def != nullptr) {
- ownedDefaultLocale = def->clone();
- if (ownedDefaultLocale == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- def = ownedDefaultLocale;
- builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
- if (U_FAILURE(errorCode)) { return; }
- defLSR = &builderDefaultLSR;
- }
- supportedLocalesLength = builder.supportedLocales_ != nullptr ?
- builder.supportedLocales_->size() : 0;
- if (supportedLocalesLength > 0) {
- // Store the supported locales in input order,
- // so that when different types are used (e.g., language tag strings)
- // we can return those by parallel index.
- supportedLocales = static_cast<const Locale **>(
- uprv_malloc(supportedLocalesLength * sizeof(const Locale *)));
- // Supported LRSs in input order.
- // In C++, we store these permanently to simplify ownership management
- // in the hash tables. Duplicate LSRs (if any) are unused overhead.
- lsrs = new LSR[supportedLocalesLength];
- if (supportedLocales == nullptr || lsrs == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- // If the constructor fails partway, we need null pointers for destructibility.
- uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
- for (int32_t i = 0; i < supportedLocalesLength; ++i) {
- const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
- supportedLocales[i] = locale.clone();
- if (supportedLocales[i] == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- const Locale &supportedLocale = *supportedLocales[i];
- LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
- lsr.setHashCode();
- if (U_FAILURE(errorCode)) { return; }
- }
-
- // We need an unordered map from LSR to first supported locale with that LSR,
- // and an ordered list of (LSR, supported index) for
- // the supported locales in the following order:
- // 1. Default locale, if it is supported.
- // 2. Priority locales (aka "paradigm locales") in builder order.
- // 3. Remaining locales in builder order.
- supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
- supportedLocalesLength, &errorCode);
- if (U_FAILURE(errorCode)) { return; }
- supportedLSRs = static_cast<const LSR **>(
- uprv_malloc(supportedLocalesLength * sizeof(const LSR *)));
- supportedIndexes = static_cast<int32_t *>(
- uprv_malloc(supportedLocalesLength * sizeof(int32_t)));
- if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- int32_t suppLength = 0;
- // Determine insertion order.
- // Add locales immediately that are equivalent to the default.
- MaybeStackArray<int8_t, 100> order(supportedLocalesLength);
- if (order.getAlias() == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- int32_t numParadigms = 0;
- for (int32_t i = 0; i < supportedLocalesLength; ++i) {
- const Locale &locale = *supportedLocales[i];
- const LSR &lsr = lsrs[i];
- if (defLSR == nullptr) {
- U_ASSERT(i == 0);
- def = &locale;
- defLSR = &lsr;
- order[i] = 1;
- suppLength = putIfAbsent(lsr, 0, suppLength, errorCode);
- } else if (lsr.isEquivalentTo(*defLSR)) {
- order[i] = 1;
- suppLength = putIfAbsent(lsr, i, suppLength, errorCode);
- } else if (localeDistance.isParadigmLSR(lsr)) {
- order[i] = 2;
- ++numParadigms;
- } else {
- order[i] = 3;
- }
- if (U_FAILURE(errorCode)) { return; }
- }
- // Add supported paradigm locales.
- int32_t paradigmLimit = suppLength + numParadigms;
- for (int32_t i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) {
- if (order[i] == 2) {
- suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
- }
- }
- // Add remaining supported locales.
- for (int32_t i = 0; i < supportedLocalesLength; ++i) {
- if (order[i] == 3) {
- suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
- }
- }
- supportedLSRsLength = suppLength;
- // If supportedLSRsLength < supportedLocalesLength then
- // we waste as many array slots as there are duplicate supported LSRs,
- // but the amount of wasted space is small as long as there are few duplicates.
- }
-
- defaultLocale = def;
-
- if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
- demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
- }
-}
-
-LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
- likelySubtags(src.likelySubtags),
- localeDistance(src.localeDistance),
- thresholdDistance(src.thresholdDistance),
- demotionPerDesiredLocale(src.demotionPerDesiredLocale),
- favorSubtag(src.favorSubtag),
- direction(src.direction),
- supportedLocales(src.supportedLocales), lsrs(src.lsrs),
- supportedLocalesLength(src.supportedLocalesLength),
- supportedLsrToIndex(src.supportedLsrToIndex),
- supportedLSRs(src.supportedLSRs),
- supportedIndexes(src.supportedIndexes),
- supportedLSRsLength(src.supportedLSRsLength),
- ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale) {
- src.supportedLocales = nullptr;
- src.lsrs = nullptr;
- src.supportedLocalesLength = 0;
- src.supportedLsrToIndex = nullptr;
- src.supportedLSRs = nullptr;
- src.supportedIndexes = nullptr;
- src.supportedLSRsLength = 0;
- src.ownedDefaultLocale = nullptr;
- src.defaultLocale = nullptr;
-}
-
-LocaleMatcher::~LocaleMatcher() {
- for (int32_t i = 0; i < supportedLocalesLength; ++i) {
- delete supportedLocales[i];
- }
- uprv_free(supportedLocales);
- delete[] lsrs;
- uhash_close(supportedLsrToIndex);
- uprv_free(supportedLSRs);
- uprv_free(supportedIndexes);
- delete ownedDefaultLocale;
-}
-
-LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT {
- this->~LocaleMatcher();
-
- thresholdDistance = src.thresholdDistance;
- demotionPerDesiredLocale = src.demotionPerDesiredLocale;
- favorSubtag = src.favorSubtag;
- direction = src.direction;
- supportedLocales = src.supportedLocales;
- lsrs = src.lsrs;
- supportedLocalesLength = src.supportedLocalesLength;
- supportedLsrToIndex = src.supportedLsrToIndex;
- supportedLSRs = src.supportedLSRs;
- supportedIndexes = src.supportedIndexes;
- supportedLSRsLength = src.supportedLSRsLength;
- ownedDefaultLocale = src.ownedDefaultLocale;
- defaultLocale = src.defaultLocale;
-
- src.supportedLocales = nullptr;
- src.lsrs = nullptr;
- src.supportedLocalesLength = 0;
- src.supportedLsrToIndex = nullptr;
- src.supportedLSRs = nullptr;
- src.supportedIndexes = nullptr;
- src.supportedLSRsLength = 0;
- src.ownedDefaultLocale = nullptr;
- src.defaultLocale = nullptr;
- return *this;
-}
-
-class LocaleLsrIterator {
-public:
- LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales,
- ULocMatchLifetime lifetime) :
- likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
-
- ~LocaleLsrIterator() {
- if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) {
- delete remembered;
- }
- }
-
- bool hasNext() const {
- return locales.hasNext();
- }
-
- LSR next(UErrorCode &errorCode) {
- current = &locales.next();
- return getMaximalLsrOrUnd(likelySubtags, *current, errorCode);
- }
-
- void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return; }
- bestDesiredIndex = desiredIndex;
- if (lifetime == ULOCMATCH_STORED_LOCALES) {
- remembered = current;
- } else {
- // ULOCMATCH_TEMPORARY_LOCALES
- delete remembered;
- remembered = new Locale(*current);
- if (remembered == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- }
- }
- }
-
- const Locale *orphanRemembered() {
- const Locale *rem = remembered;
- remembered = nullptr;
- return rem;
- }
-
- int32_t getBestDesiredIndex() const {
- return bestDesiredIndex;
- }
-
-private:
- const XLikelySubtags &likelySubtags;
- Locale::Iterator &locales;
- ULocMatchLifetime lifetime;
- const Locale *current = nullptr, *remembered = nullptr;
- int32_t bestDesiredIndex = -1;
-};
-
-const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode)) { return nullptr; }
- int32_t suppIndex = getBestSuppIndex(
- getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
- nullptr, errorCode);
- return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
-}
-
-const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
- UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode)) { return nullptr; }
- if (!desiredLocales.hasNext()) {
- return defaultLocale;
- }
- LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
- int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
- return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
-}
-
-const Locale *LocaleMatcher::getBestMatchForListString(
- StringPiece desiredLocaleList, UErrorCode &errorCode) const {
- LocalePriorityList list(desiredLocaleList, errorCode);
- LocalePriorityList::Iterator iter = list.iterator();
- return getBestMatch(iter, errorCode);
-}
-
-LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
- const Locale &desiredLocale, UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode)) {
- return Result(nullptr, defaultLocale, -1, -1, FALSE);
- }
- int32_t suppIndex = getBestSuppIndex(
- getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
- nullptr, errorCode);
- if (U_FAILURE(errorCode) || suppIndex < 0) {
- return Result(nullptr, defaultLocale, -1, -1, FALSE);
- } else {
- return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE);
- }
-}
-
-LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
- Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
- return Result(nullptr, defaultLocale, -1, -1, FALSE);
- }
- LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
- int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
- if (U_FAILURE(errorCode) || suppIndex < 0) {
- return Result(nullptr, defaultLocale, -1, -1, FALSE);
- } else {
- return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
- lsrIter.getBestDesiredIndex(), suppIndex, TRUE);
- }
-}
-
-int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter,
- UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode)) { return -1; }
- int32_t desiredIndex = 0;
- int32_t bestSupportedLsrIndex = -1;
- for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
- // Quick check for exact maximized LSR.
- // Returns suppIndex+1 where 0 means not found.
- if (supportedLsrToIndex != nullptr) {
- desiredLSR.setHashCode();
- int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
- if (index != 0) {
- int32_t suppIndex = index - 1;
- if (remainingIter != nullptr) {
- remainingIter->rememberCurrent(desiredIndex, errorCode);
- }
- return suppIndex;
- }
- }
- int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
- desiredLSR, supportedLSRs, supportedLSRsLength,
- bestShiftedDistance, favorSubtag, direction);
- if (bestIndexAndDistance >= 0) {
- bestShiftedDistance = LocaleDistance::getShiftedDistance(bestIndexAndDistance);
- if (remainingIter != nullptr) {
- remainingIter->rememberCurrent(desiredIndex, errorCode);
- if (U_FAILURE(errorCode)) { return -1; }
- }
- bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance);
- }
- if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) {
- break;
- }
- if (remainingIter == nullptr || !remainingIter->hasNext()) {
- break;
- }
- desiredLSR = remainingIter->next(errorCode);
- if (U_FAILURE(errorCode)) { return -1; }
- ++desiredIndex;
- }
- if (bestSupportedLsrIndex < 0) {
- // no good match
- return -1;
- }
- return supportedIndexes[bestSupportedLsrIndex];
-}
-
-double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
- // Returns the inverse of the distance: That is, 1-distance(desired, supported).
- LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
- if (U_FAILURE(errorCode)) { return 0; }
- const LSR *pSuppLSR = &suppLSR;
- int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
- getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
- &pSuppLSR, 1,
- LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
- double distance = LocaleDistance::getDistanceDouble(indexAndDistance);
- return (100.0 - distance) / 100.0;
-}
-
-U_NAMESPACE_END
-
-// uloc_acceptLanguage() --------------------------------------------------- ***
-
-U_NAMESPACE_USE
-
-namespace {
-
-class LocaleFromTag {
-public:
- LocaleFromTag() : locale(Locale::getRoot()) {}
- const Locale &operator()(const char *tag) { return locale = Locale(tag); }
-
-private:
- // Store the locale in the converter, rather than return a reference to a temporary,
- // or a value which could go out of scope with the caller's reference to it.
- Locale locale;
-};
-
-int32_t acceptLanguage(UEnumeration &supportedLocales, Locale::Iterator &desiredLocales,
- char *dest, int32_t capacity, UAcceptResult *acceptResult,
- UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return 0; }
- LocaleMatcher::Builder builder;
- const char *locString;
- while ((locString = uenum_next(&supportedLocales, nullptr, &errorCode)) != nullptr) {
- Locale loc(locString);
- if (loc.isBogus()) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- builder.addSupportedLocale(loc);
- }
- LocaleMatcher matcher = builder.build(errorCode);
- LocaleMatcher::Result result = matcher.getBestMatchResult(desiredLocales, errorCode);
- if (U_FAILURE(errorCode)) { return 0; }
- if (result.getDesiredIndex() >= 0) {
- if (acceptResult != nullptr) {
- *acceptResult = *result.getDesiredLocale() == *result.getSupportedLocale() ?
- ULOC_ACCEPT_VALID : ULOC_ACCEPT_FALLBACK;
- }
- const char *bestStr = result.getSupportedLocale()->getName();
- int32_t bestLength = (int32_t)uprv_strlen(bestStr);
- if (bestLength <= capacity) {
- uprv_memcpy(dest, bestStr, bestLength);
- }
- return u_terminateChars(dest, capacity, bestLength, &errorCode);
- } else {
- if (acceptResult != nullptr) {
- *acceptResult = ULOC_ACCEPT_FAILED;
- }
- return u_terminateChars(dest, capacity, 0, &errorCode);
- }
-}
-
-} // namespace
-
-U_CAPI int32_t U_EXPORT2
-uloc_acceptLanguage(char *result, int32_t resultAvailable,
- UAcceptResult *outResult,
- const char **acceptList, int32_t acceptListCount,
- UEnumeration *availableLocales,
- UErrorCode *status) {
- if (U_FAILURE(*status)) { return 0; }
- if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
- (acceptList == nullptr ? acceptListCount != 0 : acceptListCount < 0) ||
- availableLocales == nullptr) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- LocaleFromTag converter;
- Locale::ConvertingIterator<const char **, LocaleFromTag> desiredLocales(
- acceptList, acceptList + acceptListCount, converter);
- return acceptLanguage(*availableLocales, desiredLocales,
- result, resultAvailable, outResult, *status);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
- UAcceptResult *outResult,
- const char *httpAcceptLanguage,
- UEnumeration *availableLocales,
- UErrorCode *status) {
- if (U_FAILURE(*status)) { return 0; }
- if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
- httpAcceptLanguage == nullptr || availableLocales == nullptr) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- LocalePriorityList list(httpAcceptLanguage, *status);
- LocalePriorityList::Iterator desiredLocales = list.iterator();
- return acceptLanguage(*availableLocales, desiredLocales,
- result, resultAvailable, outResult, *status);
-}
-
-#endif // __LOCMATCHER_H__
diff --git a/contrib/libs/icu/common/localeprioritylist.cpp b/contrib/libs/icu/common/localeprioritylist.cpp
deleted file mode 100644
index cee408269c9..00000000000
--- a/contrib/libs/icu/common/localeprioritylist.cpp
+++ /dev/null
@@ -1,239 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-
-// localeprioritylist.cpp
-// created: 2019jul11 Markus W. Scherer
-
-#include "unicode/utypes.h"
-#include "unicode/localpointer.h"
-#include "unicode/locid.h"
-#include "unicode/stringpiece.h"
-#include "unicode/uobject.h"
-#include "charstr.h"
-#include "cmemory.h"
-#include "localeprioritylist.h"
-#include "uarrsort.h"
-#include "uassert.h"
-#include "uhash.h"
-
-U_NAMESPACE_BEGIN
-
-namespace {
-
-int32_t hashLocale(const UHashTok token) {
- auto *locale = static_cast<const Locale *>(token.pointer);
- return locale->hashCode();
-}
-
-UBool compareLocales(const UHashTok t1, const UHashTok t2) {
- auto *l1 = static_cast<const Locale *>(t1.pointer);
- auto *l2 = static_cast<const Locale *>(t2.pointer);
- return *l1 == *l2;
-}
-
-constexpr int32_t WEIGHT_ONE = 1000;
-
-struct LocaleAndWeight {
- Locale *locale;
- int32_t weight; // 0..1000 = 0.0..1.0
- int32_t index; // force stable sort
-
- int32_t compare(const LocaleAndWeight &other) const {
- int32_t diff = other.weight - weight; // descending: other-this
- if (diff != 0) { return diff; }
- return index - other.index;
- }
-};
-
-int32_t U_CALLCONV
-compareLocaleAndWeight(const void * /*context*/, const void *left, const void *right) {
- return static_cast<const LocaleAndWeight *>(left)->
- compare(*static_cast<const LocaleAndWeight *>(right));
-}
-
-const char *skipSpaces(const char *p, const char *limit) {
- while (p < limit && *p == ' ') { ++p; }
- return p;
-}
-
-int32_t findTagLength(const char *p, const char *limit) {
- // Look for accept-language delimiters.
- // Leave other validation up to the Locale constructor.
- const char *q;
- for (q = p; q < limit; ++q) {
- char c = *q;
- if (c == ' ' || c == ',' || c == ';') { break; }
- }
- return static_cast<int32_t>(q - p);
-}
-
-/**
- * Parses and returns a qvalue weight in millis.
- * Advances p to after the parsed substring.
- * Returns a negative value if parsing fails.
- */
-int32_t parseWeight(const char *&p, const char *limit) {
- p = skipSpaces(p, limit);
- char c;
- if (p == limit || ((c = *p) != '0' && c != '1')) { return -1; }
- int32_t weight = (c - '0') * 1000;
- if (++p == limit || *p != '.') { return weight; }
- int32_t multiplier = 100;
- while (++p != limit && '0' <= (c = *p) && c <= '9') {
- c -= '0';
- if (multiplier > 0) {
- weight += c * multiplier;
- multiplier /= 10;
- } else if (multiplier == 0) {
- // round up
- if (c >= 5) { ++weight; }
- multiplier = -1;
- } // else ignore further fraction digits
- }
- return weight <= WEIGHT_ONE ? weight : -1; // bad if > 1.0
-}
-
-} // namespace
-
-/**
- * Nothing but a wrapper over a MaybeStackArray of LocaleAndWeight.
- *
- * This wrapper exists (and is not in an anonymous namespace)
- * so that we can forward-declare it in the header file and
- * don't have to expose the MaybeStackArray specialization and
- * the LocaleAndWeight to code (like the test) that #includes localeprioritylist.h.
- * Also, otherwise we would have to do a platform-specific
- * template export declaration of some kind for the MaybeStackArray specialization
- * to be properly exported from the common DLL.
- */
-struct LocaleAndWeightArray : public UMemory {
- MaybeStackArray<LocaleAndWeight, 20> array;
-};
-
-LocalePriorityList::LocalePriorityList(StringPiece s, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return; }
- list = new LocaleAndWeightArray();
- if (list == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- const char *p = s.data();
- const char *limit = p + s.length();
- while ((p = skipSpaces(p, limit)) != limit) {
- if (*p == ',') { // empty range field
- ++p;
- continue;
- }
- int32_t tagLength = findTagLength(p, limit);
- if (tagLength == 0) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- CharString tag(p, tagLength, errorCode);
- if (U_FAILURE(errorCode)) { return; }
- Locale locale = Locale(tag.data());
- if (locale.isBogus()) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- int32_t weight = WEIGHT_ONE;
- if ((p = skipSpaces(p + tagLength, limit)) != limit && *p == ';') {
- if ((p = skipSpaces(p + 1, limit)) == limit || *p != 'q' ||
- (p = skipSpaces(p + 1, limit)) == limit || *p != '=' ||
- (++p, (weight = parseWeight(p, limit)) < 0)) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- p = skipSpaces(p, limit);
- }
- if (p != limit && *p != ',') { // trailing junk
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- add(locale, weight, errorCode);
- if (p == limit) { break; }
- ++p;
- }
- sort(errorCode);
-}
-
-LocalePriorityList::~LocalePriorityList() {
- if (list != nullptr) {
- for (int32_t i = 0; i < listLength; ++i) {
- delete list->array[i].locale;
- }
- delete list;
- }
- uhash_close(map);
-}
-
-const Locale *LocalePriorityList::localeAt(int32_t i) const {
- return list->array[i].locale;
-}
-
-Locale *LocalePriorityList::orphanLocaleAt(int32_t i) {
- if (list == nullptr) { return nullptr; }
- LocaleAndWeight &lw = list->array[i];
- Locale *l = lw.locale;
- lw.locale = nullptr;
- return l;
-}
-
-bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return false; }
- if (map == nullptr) {
- if (weight <= 0) { return true; } // do not add q=0
- map = uhash_open(hashLocale, compareLocales, uhash_compareLong, &errorCode);
- if (U_FAILURE(errorCode)) { return false; }
- }
- LocalPointer<Locale> clone;
- int32_t index = uhash_geti(map, &locale);
- if (index != 0) {
- // Duplicate: Remove the old item and append it anew.
- LocaleAndWeight &lw = list->array[index - 1];
- clone.adoptInstead(lw.locale);
- lw.locale = nullptr;
- lw.weight = 0;
- ++numRemoved;
- }
- if (weight <= 0) { // do not add q=0
- if (index != 0) {
- // Not strictly necessary but cleaner.
- uhash_removei(map, &locale);
- }
- return true;
- }
- if (clone.isNull()) {
- clone.adoptInstead(locale.clone());
- if (clone.isNull() || (clone->isBogus() && !locale.isBogus())) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return false;
- }
- }
- if (listLength == list->array.getCapacity()) {
- int32_t newCapacity = listLength < 50 ? 100 : 4 * listLength;
- if (list->array.resize(newCapacity, listLength) == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return false;
- }
- }
- uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
- if (U_FAILURE(errorCode)) { return false; }
- LocaleAndWeight &lw = list->array[listLength];
- lw.locale = clone.orphan();
- lw.weight = weight;
- lw.index = listLength++;
- if (weight < WEIGHT_ONE) { hasWeights = true; }
- U_ASSERT(uhash_count(map) == getLength());
- return true;
-}
-
-void LocalePriorityList::sort(UErrorCode &errorCode) {
- // Sort by descending weights if there is a mix of weights.
- // The comparator forces a stable sort via the item index.
- if (U_FAILURE(errorCode) || getLength() <= 1 || !hasWeights) { return; }
- uprv_sortArray(list->array.getAlias(), listLength, sizeof(LocaleAndWeight),
- compareLocaleAndWeight, nullptr, FALSE, &errorCode);
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/localeprioritylist.h b/contrib/libs/icu/common/localeprioritylist.h
deleted file mode 100644
index 80ca38a7b52..00000000000
--- a/contrib/libs/icu/common/localeprioritylist.h
+++ /dev/null
@@ -1,115 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-
-// localeprioritylist.h
-// created: 2019jul11 Markus W. Scherer
-
-#ifndef __LOCALEPRIORITYLIST_H__
-#define __LOCALEPRIORITYLIST_H__
-
-#include "unicode/utypes.h"
-#include "unicode/locid.h"
-#include "unicode/stringpiece.h"
-#include "unicode/uobject.h"
-
-struct UHashtable;
-
-U_NAMESPACE_BEGIN
-
-struct LocaleAndWeightArray;
-
-/**
- * Parses a list of locales from an accept-language string.
- * We are a bit more lenient than the spec:
- * We accept extra whitespace in more places, empty range fields,
- * and any number of qvalue fraction digits.
- *
- * https://tools.ietf.org/html/rfc2616#section-14.4
- * 14.4 Accept-Language
- *
- * Accept-Language = "Accept-Language" ":"
- * 1#( language-range [ ";" "q" "=" qvalue ] )
- * language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
- *
- * Each language-range MAY be given an associated quality value which
- * represents an estimate of the user's preference for the languages
- * specified by that range. The quality value defaults to "q=1". For
- * example,
- *
- * Accept-Language: da, en-gb;q=0.8, en;q=0.7
- *
- * https://tools.ietf.org/html/rfc2616#section-3.9
- * 3.9 Quality Values
- *
- * HTTP content negotiation (section 12) uses short "floating point"
- * numbers to indicate the relative importance ("weight") of various
- * negotiable parameters. A weight is normalized to a real number in
- * the range 0 through 1, where 0 is the minimum and 1 the maximum
- * value. If a parameter has a quality value of 0, then content with
- * this parameter is `not acceptable' for the client. HTTP/1.1
- * applications MUST NOT generate more than three digits after the
- * decimal point. User configuration of these values SHOULD also be
- * limited in this fashion.
- *
- * qvalue = ( "0" [ "." 0*3DIGIT ] )
- * | ( "1" [ "." 0*3("0") ] )
- */
-class U_COMMON_API LocalePriorityList : public UMemory {
-public:
- class Iterator : public Locale::Iterator {
- public:
- UBool hasNext() const override { return count < length; }
-
- const Locale &next() override {
- for(;;) {
- const Locale *locale = list.localeAt(index++);
- if (locale != nullptr) {
- ++count;
- return *locale;
- }
- }
- }
-
- private:
- friend class LocalePriorityList;
-
- Iterator(const LocalePriorityList &list) : list(list), length(list.getLength()) {}
-
- const LocalePriorityList &list;
- int32_t index = 0;
- int32_t count = 0;
- const int32_t length;
- };
-
- LocalePriorityList(StringPiece s, UErrorCode &errorCode);
-
- ~LocalePriorityList();
-
- int32_t getLength() const { return listLength - numRemoved; }
-
- int32_t getLengthIncludingRemoved() const { return listLength; }
-
- Iterator iterator() const { return Iterator(*this); }
-
- const Locale *localeAt(int32_t i) const;
-
- Locale *orphanLocaleAt(int32_t i);
-
-private:
- LocalePriorityList(const LocalePriorityList &) = delete;
- LocalePriorityList &operator=(const LocalePriorityList &) = delete;
-
- bool add(const Locale &locale, int32_t weight, UErrorCode &errorCode);
-
- void sort(UErrorCode &errorCode);
-
- LocaleAndWeightArray *list = nullptr;
- int32_t listLength = 0;
- int32_t numRemoved = 0;
- bool hasWeights = false; // other than 1.0
- UHashtable *map = nullptr;
-};
-
-U_NAMESPACE_END
-
-#endif // __LOCALEPRIORITYLIST_H__
diff --git a/contrib/libs/icu/common/localsvc.h b/contrib/libs/icu/common/localsvc.h
deleted file mode 100644
index 33640195135..00000000000
--- a/contrib/libs/icu/common/localsvc.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-***************************************************************************
-* Copyright (C) 2006 International Business Machines Corporation *
-* and others. All rights reserved. *
-***************************************************************************
-*/
-
-#ifndef LOCALSVC_H
-#define LOCALSVC_H
-
-#include "unicode/utypes.h"
-
-#if defined(U_LOCAL_SERVICE_HOOK) && U_LOCAL_SERVICE_HOOK
-/**
- * Prototype for user-supplied service hook. This function is expected to return
- * a type of factory object specific to the requested service.
- *
- * @param what service-specific string identifying the specific user hook
- * @param status error status
- * @return a service-specific hook, or NULL on failure.
- */
-U_CAPI void* uprv_svc_hook(const char *what, UErrorCode *status);
-#endif
-
-#endif
diff --git a/contrib/libs/icu/common/locavailable.cpp b/contrib/libs/icu/common/locavailable.cpp
deleted file mode 100644
index e8ec512e370..00000000000
--- a/contrib/libs/icu/common/locavailable.cpp
+++ /dev/null
@@ -1,270 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1997-2013, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: locavailable.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010feb25
-* created by: Markus W. Scherer
-*
-* Code for available locales, separated out from other .cpp files
-* that then do not depend on resource bundle code and res_index bundles.
-*/
-
-#include "unicode/errorcode.h"
-#include "unicode/utypes.h"
-#include "unicode/locid.h"
-#include "unicode/uloc.h"
-#include "unicode/ures.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "ucln_cmn.h"
-#include "uassert.h"
-#include "umutex.h"
-#include "uresimp.h"
-
-// C++ API ----------------------------------------------------------------- ***
-
-U_NAMESPACE_BEGIN
-
-static icu::Locale* availableLocaleList = NULL;
-static int32_t availableLocaleListCount;
-static icu::UInitOnce gInitOnceLocale = U_INITONCE_INITIALIZER;
-
-U_NAMESPACE_END
-
-U_CDECL_BEGIN
-
-static UBool U_CALLCONV locale_available_cleanup(void)
-{
- U_NAMESPACE_USE
-
- if (availableLocaleList) {
- delete []availableLocaleList;
- availableLocaleList = NULL;
- }
- availableLocaleListCount = 0;
- gInitOnceLocale.reset();
-
- return TRUE;
-}
-
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-void U_CALLCONV locale_available_init() {
- // This function is a friend of class Locale.
- // This function is only invoked via umtx_initOnce().
-
- // for now, there is a hardcoded list, so just walk through that list and set it up.
- // Note: this function is a friend of class Locale.
- availableLocaleListCount = uloc_countAvailable();
- if(availableLocaleListCount) {
- availableLocaleList = new Locale[availableLocaleListCount];
- }
- if (availableLocaleList == NULL) {
- availableLocaleListCount= 0;
- }
- for (int32_t locCount=availableLocaleListCount-1; locCount>=0; --locCount) {
- availableLocaleList[locCount].setFromPOSIXID(uloc_getAvailable(locCount));
- }
- ucln_common_registerCleanup(UCLN_COMMON_LOCALE_AVAILABLE, locale_available_cleanup);
-}
-
-const Locale* U_EXPORT2
-Locale::getAvailableLocales(int32_t& count)
-{
- umtx_initOnce(gInitOnceLocale, &locale_available_init);
- count = availableLocaleListCount;
- return availableLocaleList;
-}
-
-
-U_NAMESPACE_END
-
-// C API ------------------------------------------------------------------- ***
-
-U_NAMESPACE_USE
-
-/* ### Constants **************************************************/
-
-namespace {
-
-// Enough capacity for the two lists in the res_index.res file
-const char** gAvailableLocaleNames[2] = {};
-int32_t gAvailableLocaleCounts[2] = {};
-icu::UInitOnce ginstalledLocalesInitOnce = U_INITONCE_INITIALIZER;
-
-class AvailableLocalesSink : public ResourceSink {
- public:
- void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE {
- ResourceTable resIndexTable = value.getTable(status);
- if (U_FAILURE(status)) {
- return;
- }
- for (int32_t i = 0; resIndexTable.getKeyAndValue(i, key, value); ++i) {
- ULocAvailableType type;
- if (uprv_strcmp(key, "InstalledLocales") == 0) {
- type = ULOC_AVAILABLE_DEFAULT;
- } else if (uprv_strcmp(key, "AliasLocales") == 0) {
- type = ULOC_AVAILABLE_ONLY_LEGACY_ALIASES;
- } else {
- // CLDRVersion, etc.
- continue;
- }
- ResourceTable availableLocalesTable = value.getTable(status);
- if (U_FAILURE(status)) {
- return;
- }
- gAvailableLocaleCounts[type] = availableLocalesTable.getSize();
- gAvailableLocaleNames[type] = static_cast<const char**>(
- uprv_malloc(gAvailableLocaleCounts[type] * sizeof(const char*)));
- if (gAvailableLocaleNames[type] == nullptr) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- for (int32_t j = 0; availableLocalesTable.getKeyAndValue(j, key, value); ++j) {
- gAvailableLocaleNames[type][j] = key;
- }
- }
- }
-};
-
-class AvailableLocalesStringEnumeration : public StringEnumeration {
- public:
- AvailableLocalesStringEnumeration(ULocAvailableType type) : fType(type) {
- }
-
- const char* next(int32_t *resultLength, UErrorCode&) override {
- ULocAvailableType actualType = fType;
- int32_t actualIndex = fIndex++;
-
- // If the "combined" list was requested, resolve that now
- if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) {
- int32_t defaultLocalesCount = gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT];
- if (actualIndex < defaultLocalesCount) {
- actualType = ULOC_AVAILABLE_DEFAULT;
- } else {
- actualIndex -= defaultLocalesCount;
- actualType = ULOC_AVAILABLE_ONLY_LEGACY_ALIASES;
- }
- }
-
- // Return the requested string
- int32_t count = gAvailableLocaleCounts[actualType];
- const char* result;
- if (actualIndex < count) {
- result = gAvailableLocaleNames[actualType][actualIndex];
- if (resultLength != nullptr) {
- *resultLength = static_cast<int32_t>(uprv_strlen(result));
- }
- } else {
- result = nullptr;
- if (resultLength != nullptr) {
- *resultLength = 0;
- }
- }
- return result;
- }
-
- void reset(UErrorCode&) override {
- fIndex = 0;
- }
-
- int32_t count(UErrorCode&) const override {
- if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) {
- return gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT]
- + gAvailableLocaleCounts[ULOC_AVAILABLE_ONLY_LEGACY_ALIASES];
- } else {
- return gAvailableLocaleCounts[fType];
- }
- }
-
- private:
- ULocAvailableType fType;
- int32_t fIndex = 0;
-};
-
-/* ### Get available **************************************************/
-
-static UBool U_CALLCONV uloc_cleanup(void) {
- for (int32_t i = 0; i < UPRV_LENGTHOF(gAvailableLocaleNames); i++) {
- uprv_free(gAvailableLocaleNames[i]);
- gAvailableLocaleNames[i] = nullptr;
- gAvailableLocaleCounts[i] = 0;
- }
- ginstalledLocalesInitOnce.reset();
- return TRUE;
-}
-
-// Load Installed Locales. This function will be called exactly once
-// via the initOnce mechanism.
-
-static void U_CALLCONV loadInstalledLocales(UErrorCode& status) {
- ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
-
- icu::LocalUResourceBundlePointer rb(ures_openDirect(NULL, "res_index", &status));
- AvailableLocalesSink sink;
- ures_getAllItemsWithFallback(rb.getAlias(), "", sink, status);
-}
-
-void _load_installedLocales(UErrorCode& status) {
- umtx_initOnce(ginstalledLocalesInitOnce, &loadInstalledLocales, status);
-}
-
-} // namespace
-
-U_CAPI const char* U_EXPORT2
-uloc_getAvailable(int32_t offset) {
- icu::ErrorCode status;
- _load_installedLocales(status);
- if (status.isFailure()) {
- return nullptr;
- }
- if (offset > gAvailableLocaleCounts[0]) {
- // *status = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- return gAvailableLocaleNames[0][offset];
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_countAvailable() {
- icu::ErrorCode status;
- _load_installedLocales(status);
- if (status.isFailure()) {
- return 0;
- }
- return gAvailableLocaleCounts[0];
-}
-
-U_CAPI UEnumeration* U_EXPORT2
-uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status) {
- if (U_FAILURE(*status)) {
- return nullptr;
- }
- if (type < 0 || type >= ULOC_AVAILABLE_COUNT) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- _load_installedLocales(*status);
- if (U_FAILURE(*status)) {
- return nullptr;
- }
- LocalPointer<AvailableLocalesStringEnumeration> result(
- new AvailableLocalesStringEnumeration(type), *status);
- if (U_FAILURE(*status)) {
- return nullptr;
- }
- return uenum_openFromStringEnumeration(result.orphan(), status);
-}
-
diff --git a/contrib/libs/icu/common/locbased.cpp b/contrib/libs/icu/common/locbased.cpp
deleted file mode 100644
index ff378b4cc78..00000000000
--- a/contrib/libs/icu/common/locbased.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2004-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: January 16 2004
-* Since: ICU 2.8
-**********************************************************************
-*/
-#include "locbased.h"
-#include "cstring.h"
-
-U_NAMESPACE_BEGIN
-
-Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
- const char* id = getLocaleID(type, status);
- return Locale((id != 0) ? id : "");
-}
-
-const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
- if (U_FAILURE(status)) {
- return NULL;
- }
-
- switch(type) {
- case ULOC_VALID_LOCALE:
- return valid;
- case ULOC_ACTUAL_LOCALE:
- return actual;
- default:
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-}
-
-void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
- if (validID != 0) {
- uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY);
- valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
- }
- if (actualID != 0) {
- uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY);
- actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
- }
-}
-
-void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) {
- uprv_strcpy(valid, validID.getName());
- uprv_strcpy(actual, actualID.getName());
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/locbased.h b/contrib/libs/icu/common/locbased.h
deleted file mode 100644
index 45738863b5e..00000000000
--- a/contrib/libs/icu/common/locbased.h
+++ /dev/null
@@ -1,107 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2004-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: January 16 2004
-* Since: ICU 2.8
-**********************************************************************
-*/
-#ifndef LOCBASED_H
-#define LOCBASED_H
-
-#include "unicode/locid.h"
-#include "unicode/uobject.h"
-
-/**
- * Macro to declare a locale LocaleBased wrapper object for the given
- * object, which must have two members named `validLocale' and
- * `actualLocale' of size ULOC_FULLNAME_CAPACITY
- */
-#define U_LOCALE_BASED(varname, objname) \
- LocaleBased varname((objname).validLocale, (objname).actualLocale)
-
-U_NAMESPACE_BEGIN
-
-/**
- * A utility class that unifies the implementation of getLocale() by
- * various ICU services. This class is likely to be removed in the
- * ICU 3.0 time frame in favor of an integrated approach with the
- * services framework.
- * @since ICU 2.8
- */
-class U_COMMON_API LocaleBased : public UMemory {
-
- public:
-
- /**
- * Construct a LocaleBased wrapper around the two pointers. These
- * will be aliased for the lifetime of this object.
- */
- inline LocaleBased(char* validAlias, char* actualAlias);
-
- /**
- * Construct a LocaleBased wrapper around the two const pointers.
- * These will be aliased for the lifetime of this object.
- */
- inline LocaleBased(const char* validAlias, const char* actualAlias);
-
- /**
- * Return locale meta-data for the service object wrapped by this
- * object. Either the valid or the actual locale may be
- * retrieved.
- * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
- * @param status input-output error code
- * @return the indicated locale
- */
- Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
-
- /**
- * Return the locale ID for the service object wrapped by this
- * object. Either the valid or the actual locale may be
- * retrieved.
- * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
- * @param status input-output error code
- * @return the indicated locale ID
- */
- const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
-
- /**
- * Set the locale meta-data for the service object wrapped by this
- * object. If either parameter is zero, it is ignored.
- * @param valid the ID of the valid locale
- * @param actual the ID of the actual locale
- */
- void setLocaleIDs(const char* valid, const char* actual);
-
- /**
- * Set the locale meta-data for the service object wrapped by this
- * object.
- * @param valid the ID of the valid locale
- * @param actual the ID of the actual locale
- */
- void setLocaleIDs(const Locale& valid, const Locale& actual);
-
- private:
-
- char* valid;
-
- char* actual;
-};
-
-inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
- valid(validAlias), actual(actualAlias) {
-}
-
-inline LocaleBased::LocaleBased(const char* validAlias,
- const char* actualAlias) :
- // ugh: cast away const
- valid((char*)validAlias), actual((char*)actualAlias) {
-}
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/locdispnames.cpp b/contrib/libs/icu/common/locdispnames.cpp
deleted file mode 100644
index d92348e31c8..00000000000
--- a/contrib/libs/icu/common/locdispnames.cpp
+++ /dev/null
@@ -1,885 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1997-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: locdispnames.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010feb25
-* created by: Markus W. Scherer
-*
-* Code for locale display names, separated out from other .cpp files
-* that then do not depend on resource bundle code and display name data.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/brkiter.h"
-#include "unicode/locid.h"
-#include "unicode/uenum.h"
-#include "unicode/uloc.h"
-#include "unicode/ures.h"
-#include "unicode/ustring.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "putilimp.h"
-#include "ulocimp.h"
-#include "uresimp.h"
-#include "ureslocs.h"
-#include "ustr_imp.h"
-
-// C++ API ----------------------------------------------------------------- ***
-
-U_NAMESPACE_BEGIN
-
-UnicodeString&
-Locale::getDisplayLanguage(UnicodeString& dispLang) const
-{
- return this->getDisplayLanguage(getDefault(), dispLang);
-}
-
-/*We cannot make any assumptions on the size of the output display strings
-* Yet, since we are calling through to a C API, we need to set limits on
-* buffer size. For all the following getDisplay functions we first attempt
-* to fill up a stack allocated buffer. If it is to small we heap allocated
-* the exact buffer we need copy it to the UnicodeString and delete it*/
-
-UnicodeString&
-Locale::getDisplayLanguage(const Locale &displayLocale,
- UnicodeString &result) const {
- UChar *buffer;
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t length;
-
- buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
- if(buffer==0) {
- result.truncate(0);
- return result;
- }
-
- length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
- buffer, result.getCapacity(),
- &errorCode);
- result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
-
- if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
- buffer=result.getBuffer(length);
- if(buffer==0) {
- result.truncate(0);
- return result;
- }
- errorCode=U_ZERO_ERROR;
- length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
- buffer, result.getCapacity(),
- &errorCode);
- result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
- }
-
- return result;
-}
-
-UnicodeString&
-Locale::getDisplayScript(UnicodeString& dispScript) const
-{
- return this->getDisplayScript(getDefault(), dispScript);
-}
-
-UnicodeString&
-Locale::getDisplayScript(const Locale &displayLocale,
- UnicodeString &result) const {
- UChar *buffer;
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t length;
-
- buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
- if(buffer==0) {
- result.truncate(0);
- return result;
- }
-
- length=uloc_getDisplayScript(fullName, displayLocale.fullName,
- buffer, result.getCapacity(),
- &errorCode);
- result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
-
- if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
- buffer=result.getBuffer(length);
- if(buffer==0) {
- result.truncate(0);
- return result;
- }
- errorCode=U_ZERO_ERROR;
- length=uloc_getDisplayScript(fullName, displayLocale.fullName,
- buffer, result.getCapacity(),
- &errorCode);
- result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
- }
-
- return result;
-}
-
-UnicodeString&
-Locale::getDisplayCountry(UnicodeString& dispCntry) const
-{
- return this->getDisplayCountry(getDefault(), dispCntry);
-}
-
-UnicodeString&
-Locale::getDisplayCountry(const Locale &displayLocale,
- UnicodeString &result) const {
- UChar *buffer;
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t length;
-
- buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
- if(buffer==0) {
- result.truncate(0);
- return result;
- }
-
- length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
- buffer, result.getCapacity(),
- &errorCode);
- result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
-
- if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
- buffer=result.getBuffer(length);
- if(buffer==0) {
- result.truncate(0);
- return result;
- }
- errorCode=U_ZERO_ERROR;
- length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
- buffer, result.getCapacity(),
- &errorCode);
- result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
- }
-
- return result;
-}
-
-UnicodeString&
-Locale::getDisplayVariant(UnicodeString& dispVar) const
-{
- return this->getDisplayVariant(getDefault(), dispVar);
-}
-
-UnicodeString&
-Locale::getDisplayVariant(const Locale &displayLocale,
- UnicodeString &result) const {
- UChar *buffer;
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t length;
-
- buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
- if(buffer==0) {
- result.truncate(0);
- return result;
- }
-
- length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
- buffer, result.getCapacity(),
- &errorCode);
- result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
-
- if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
- buffer=result.getBuffer(length);
- if(buffer==0) {
- result.truncate(0);
- return result;
- }
- errorCode=U_ZERO_ERROR;
- length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
- buffer, result.getCapacity(),
- &errorCode);
- result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
- }
-
- return result;
-}
-
-UnicodeString&
-Locale::getDisplayName( UnicodeString& name ) const
-{
- return this->getDisplayName(getDefault(), name);
-}
-
-UnicodeString&
-Locale::getDisplayName(const Locale &displayLocale,
- UnicodeString &result) const {
- UChar *buffer;
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t length;
-
- buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
- if(buffer==0) {
- result.truncate(0);
- return result;
- }
-
- length=uloc_getDisplayName(fullName, displayLocale.fullName,
- buffer, result.getCapacity(),
- &errorCode);
- result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
-
- if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
- buffer=result.getBuffer(length);
- if(buffer==0) {
- result.truncate(0);
- return result;
- }
- errorCode=U_ZERO_ERROR;
- length=uloc_getDisplayName(fullName, displayLocale.fullName,
- buffer, result.getCapacity(),
- &errorCode);
- result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
- }
-
- return result;
-}
-
-#if ! UCONFIG_NO_BREAK_ITERATION
-
-// -------------------------------------
-// Gets the objectLocale display name in the default locale language.
-UnicodeString& U_EXPORT2
-BreakIterator::getDisplayName(const Locale& objectLocale,
- UnicodeString& name)
-{
- return objectLocale.getDisplayName(name);
-}
-
-// -------------------------------------
-// Gets the objectLocale display name in the displayLocale language.
-UnicodeString& U_EXPORT2
-BreakIterator::getDisplayName(const Locale& objectLocale,
- const Locale& displayLocale,
- UnicodeString& name)
-{
- return objectLocale.getDisplayName(displayLocale, name);
-}
-
-#endif
-
-
-U_NAMESPACE_END
-
-// C API ------------------------------------------------------------------- ***
-
-U_NAMESPACE_USE
-
-/* ### Constants **************************************************/
-
-/* These strings describe the resources we attempt to load from
- the locale ResourceBundle data file.*/
-static const char _kLanguages[] = "Languages";
-static const char _kScripts[] = "Scripts";
-static const char _kScriptsStandAlone[] = "Scripts%stand-alone";
-static const char _kCountries[] = "Countries";
-static const char _kVariants[] = "Variants";
-static const char _kKeys[] = "Keys";
-static const char _kTypes[] = "Types";
-//static const char _kRootName[] = "root";
-static const char _kCurrency[] = "currency";
-static const char _kCurrencies[] = "Currencies";
-static const char _kLocaleDisplayPattern[] = "localeDisplayPattern";
-static const char _kPattern[] = "pattern";
-static const char _kSeparator[] = "separator";
-
-/* ### Display name **************************************************/
-
-static int32_t
-_getStringOrCopyKey(const char *path, const char *locale,
- const char *tableKey,
- const char* subTableKey,
- const char *itemKey,
- const char *substitute,
- UChar *dest, int32_t destCapacity,
- UErrorCode *pErrorCode) {
- const UChar *s = NULL;
- int32_t length = 0;
-
- if(itemKey==NULL) {
- /* top-level item: normal resource bundle access */
- icu::LocalUResourceBundlePointer rb(ures_open(path, locale, pErrorCode));
-
- if(U_SUCCESS(*pErrorCode)) {
- s=ures_getStringByKey(rb.getAlias(), tableKey, &length, pErrorCode);
- /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
- }
- } else {
- /* Language code should not be a number. If it is, set the error code. */
- if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) {
- *pErrorCode = U_MISSING_RESOURCE_ERROR;
- } else {
- /* second-level item, use special fallback */
- s=uloc_getTableStringWithFallback(path, locale,
- tableKey,
- subTableKey,
- itemKey,
- &length,
- pErrorCode);
- }
- }
-
- if(U_SUCCESS(*pErrorCode)) {
- int32_t copyLength=uprv_min(length, destCapacity);
- if(copyLength>0 && s != NULL) {
- u_memcpy(dest, s, copyLength);
- }
- } else {
- /* no string from a resource bundle: convert the substitute */
- length=(int32_t)uprv_strlen(substitute);
- u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
- *pErrorCode=U_USING_DEFAULT_WARNING;
- }
-
- return u_terminateUChars(dest, destCapacity, length, pErrorCode);
-}
-
-typedef int32_t U_CALLCONV UDisplayNameGetter(const char *, char *, int32_t, UErrorCode *);
-
-static int32_t
-_getDisplayNameForComponent(const char *locale,
- const char *displayLocale,
- UChar *dest, int32_t destCapacity,
- UDisplayNameGetter *getter,
- const char *tag,
- UErrorCode *pErrorCode) {
- char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
- int32_t length;
- UErrorCode localStatus;
- const char* root = NULL;
-
- /* argument checking */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- localStatus = U_ZERO_ERROR;
- length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
- if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- if(length==0) {
- // For the display name, we treat this as unknown language (ICU-20273).
- if (getter == uloc_getLanguage) {
- uprv_strcpy(localeBuffer, "und");
- } else {
- return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
- }
- }
-
- root = tag == _kCountries ? U_ICUDATA_REGION : U_ICUDATA_LANG;
-
- return _getStringOrCopyKey(root, displayLocale,
- tag, NULL, localeBuffer,
- localeBuffer,
- dest, destCapacity,
- pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_getDisplayLanguage(const char *locale,
- const char *displayLocale,
- UChar *dest, int32_t destCapacity,
- UErrorCode *pErrorCode) {
- return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
- uloc_getLanguage, _kLanguages, pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_getDisplayScript(const char* locale,
- const char* displayLocale,
- UChar *dest, int32_t destCapacity,
- UErrorCode *pErrorCode)
-{
- UErrorCode err = U_ZERO_ERROR;
- int32_t res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
- uloc_getScript, _kScriptsStandAlone, &err);
-
- if ( err == U_USING_DEFAULT_WARNING ) {
- return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
- uloc_getScript, _kScripts, pErrorCode);
- } else {
- *pErrorCode = err;
- return res;
- }
-}
-
-U_INTERNAL int32_t U_EXPORT2
-uloc_getDisplayScriptInContext(const char* locale,
- const char* displayLocale,
- UChar *dest, int32_t destCapacity,
- UErrorCode *pErrorCode)
-{
- return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
- uloc_getScript, _kScripts, pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_getDisplayCountry(const char *locale,
- const char *displayLocale,
- UChar *dest, int32_t destCapacity,
- UErrorCode *pErrorCode) {
- return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
- uloc_getCountry, _kCountries, pErrorCode);
-}
-
-/*
- * TODO separate variant1_variant2_variant3...
- * by getting each tag's display string and concatenating them with ", "
- * in between - similar to uloc_getDisplayName()
- */
-U_CAPI int32_t U_EXPORT2
-uloc_getDisplayVariant(const char *locale,
- const char *displayLocale,
- UChar *dest, int32_t destCapacity,
- UErrorCode *pErrorCode) {
- return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
- uloc_getVariant, _kVariants, pErrorCode);
-}
-
-/* Instead of having a separate pass for 'special' patterns, reintegrate the two
- * so we don't get bitten by preflight bugs again. We can be reasonably efficient
- * without two separate code paths, this code isn't that performance-critical.
- *
- * This code is general enough to deal with patterns that have a prefix or swap the
- * language and remainder components, since we gave developers enough rope to do such
- * things if they futz with the pattern data. But since we don't give them a way to
- * specify a pattern for arbitrary combinations of components, there's not much use in
- * that. I don't think our data includes such patterns, the only variable I know if is
- * whether there is a space before the open paren, or not. Oh, and zh uses different
- * chars than the standard open/close paren (which ja and ko use, btw).
- */
-U_CAPI int32_t U_EXPORT2
-uloc_getDisplayName(const char *locale,
- const char *displayLocale,
- UChar *dest, int32_t destCapacity,
- UErrorCode *pErrorCode)
-{
- static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */
- static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
- static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */
- static const int32_t subLen = 3;
- static const UChar defaultPattern[10] = {
- 0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000
- }; /* {0} ({1}) */
- static const int32_t defaultPatLen = 9;
- static const int32_t defaultSub0Pos = 0;
- static const int32_t defaultSub1Pos = 5;
-
- int32_t length; /* of formatted result */
-
- const UChar *separator;
- int32_t sepLen = 0;
- const UChar *pattern;
- int32_t patLen = 0;
- int32_t sub0Pos, sub1Pos;
-
- UChar formatOpenParen = 0x0028; // (
- UChar formatReplaceOpenParen = 0x005B; // [
- UChar formatCloseParen = 0x0029; // )
- UChar formatReplaceCloseParen = 0x005D; // ]
-
- UBool haveLang = TRUE; /* assume true, set false if we find we don't have
- a lang component in the locale */
- UBool haveRest = TRUE; /* assume true, set false if we find we don't have
- any other component in the locale */
- UBool retry = FALSE; /* set true if we need to retry, see below */
-
- int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- {
- UErrorCode status = U_ZERO_ERROR;
-
- icu::LocalUResourceBundlePointer locbundle(
- ures_open(U_ICUDATA_LANG, displayLocale, &status));
- icu::LocalUResourceBundlePointer dspbundle(
- ures_getByKeyWithFallback(locbundle.getAlias(), _kLocaleDisplayPattern, NULL, &status));
-
- separator=ures_getStringByKeyWithFallback(dspbundle.getAlias(), _kSeparator, &sepLen, &status);
- pattern=ures_getStringByKeyWithFallback(dspbundle.getAlias(), _kPattern, &patLen, &status);
- }
-
- /* If we couldn't find any data, then use the defaults */
- if(sepLen == 0) {
- separator = defaultSeparator;
- }
- /* #10244: Even though separator is now a pattern, it is awkward to handle it as such
- * here since we are trying to build the display string in place in the dest buffer,
- * and to handle it as a pattern would entail having separate storage for the
- * substrings that need to be combined (the first of which may be the result of
- * previous such combinations). So for now we continue to treat the portion between
- * {0} and {1} as a string to be appended when joining substrings, ignoring anything
- * that is before {0} or after {1} (no existing separator pattern has any such thing).
- * This is similar to how pattern is handled below.
- */
- {
- UChar *p0=u_strstr(separator, sub0);
- UChar *p1=u_strstr(separator, sub1);
- if (p0==NULL || p1==NULL || p1<p0) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- separator = (const UChar *)p0 + subLen;
- sepLen = static_cast<int32_t>(p1 - separator);
- }
-
- if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
- pattern=defaultPattern;
- patLen=defaultPatLen;
- sub0Pos=defaultSub0Pos;
- sub1Pos=defaultSub1Pos;
- // use default formatOpenParen etc. set above
- } else { /* non-default pattern */
- UChar *p0=u_strstr(pattern, sub0);
- UChar *p1=u_strstr(pattern, sub1);
- if (p0==NULL || p1==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- sub0Pos = static_cast<int32_t>(p0-pattern);
- sub1Pos = static_cast<int32_t>(p1-pattern);
- if (sub1Pos < sub0Pos) { /* a very odd pattern */
- int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
- langi=1;
- }
- if (u_strchr(pattern, 0xFF08) != NULL) {
- formatOpenParen = 0xFF08; // fullwidth (
- formatReplaceOpenParen = 0xFF3B; // fullwidth [
- formatCloseParen = 0xFF09; // fullwidth )
- formatReplaceCloseParen = 0xFF3D; // fullwidth ]
- }
- }
-
- /* We loop here because there is one case in which after the first pass we could need to
- * reextract the data. If there's initial padding before the first element, we put in
- * the padding and then write that element. If it turns out there's no second element,
- * we didn't need the padding. If we do need the data (no preflight), and the first element
- * would have fit but for the padding, we need to reextract. In this case (only) we
- * adjust the parameters so padding is not added, and repeat.
- */
- do {
- UChar* p=dest;
- int32_t patPos=0; /* position in the pattern, used for non-substitution portions */
- int32_t langLen=0; /* length of language substitution */
- int32_t langPos=0; /* position in output of language substitution */
- int32_t restLen=0; /* length of 'everything else' substitution */
- int32_t restPos=0; /* position in output of 'everything else' substitution */
- icu::LocalUEnumerationPointer kenum; /* keyword enumeration */
-
- /* prefix of pattern, extremely likely to be empty */
- if(sub0Pos) {
- if(destCapacity >= sub0Pos) {
- while (patPos < sub0Pos) {
- *p++ = pattern[patPos++];
- }
- } else {
- patPos=sub0Pos;
- }
- length=sub0Pos;
- } else {
- length=0;
- }
-
- for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/
- UBool subdone = FALSE; /* set true when ready to move to next substitution */
-
- /* prep p and cap for calls to get display components, pin cap to 0 since
- they complain if cap is negative */
- int32_t cap=destCapacity-length;
- if (cap <= 0) {
- cap=0;
- } else {
- p=dest+length;
- }
-
- if (subi == langi) { /* {0}*/
- if(haveLang) {
- langPos=length;
- langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode);
- length+=langLen;
- haveLang=langLen>0;
- }
- subdone=TRUE;
- } else { /* {1} */
- if(!haveRest) {
- subdone=TRUE;
- } else {
- int32_t len; /* length of component (plus other stuff) we just fetched */
- switch(resti++) {
- case 0:
- restPos=length;
- len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode);
- break;
- case 1:
- len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode);
- break;
- case 2:
- len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode);
- break;
- case 3:
- kenum.adoptInstead(uloc_openKeywords(locale, pErrorCode));
- U_FALLTHROUGH;
- default: {
- const char* kw=uenum_next(kenum.getAlias(), &len, pErrorCode);
- if (kw == NULL) {
- len=0; /* mark that we didn't add a component */
- subdone=TRUE;
- } else {
- /* incorporating this behavior into the loop made it even more complex,
- so just special case it here */
- len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode);
- if(len) {
- if(len < cap) {
- p[len]=0x3d; /* '=', assume we'll need it */
- }
- len+=1;
-
- /* adjust for call to get keyword */
- cap-=len;
- if(cap <= 0) {
- cap=0;
- } else {
- p+=len;
- }
- }
- /* reset for call below */
- if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
- *pErrorCode=U_ZERO_ERROR;
- }
- int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale,
- p, cap, pErrorCode);
- if(len) {
- if(vlen==0) {
- --len; /* remove unneeded '=' */
- }
- /* restore cap and p to what they were at start */
- cap=destCapacity-length;
- if(cap <= 0) {
- cap=0;
- } else {
- p=dest+length;
- }
- }
- len+=vlen; /* total we added for key + '=' + value */
- }
- } break;
- } /* end switch */
-
- if (len>0) {
- /* we addeed a component, so add separator and write it if there's room. */
- if(len+sepLen<=cap) {
- const UChar * plimit = p + len;
- for (; p < plimit; p++) {
- if (*p == formatOpenParen) {
- *p = formatReplaceOpenParen;
- } else if (*p == formatCloseParen) {
- *p = formatReplaceCloseParen;
- }
- }
- for(int32_t i=0;i<sepLen;++i) {
- *p++=separator[i];
- }
- }
- length+=len+sepLen;
- } else if(subdone) {
- /* remove separator if we added it */
- if (length!=restPos) {
- length-=sepLen;
- }
- restLen=length-restPos;
- haveRest=restLen>0;
- }
- }
- }
-
- if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
- *pErrorCode=U_ZERO_ERROR;
- }
-
- if(subdone) {
- if(haveLang && haveRest) {
- /* append internal portion of pattern, the first time,
- or last portion of pattern the second time */
- int32_t padLen;
- patPos+=subLen;
- padLen=(subi==0 ? sub1Pos : patLen)-patPos;
- if(length+padLen < destCapacity) {
- p=dest+length;
- for(int32_t i=0;i<padLen;++i) {
- *p++=pattern[patPos++];
- }
- } else {
- patPos+=padLen;
- }
- length+=padLen;
- } else if(subi==0) {
- /* don't have first component, reset for second component */
- sub0Pos=0;
- length=0;
- } else if(length>0) {
- /* true length is the length of just the component we got. */
- length=haveLang?langLen:restLen;
- if(dest && sub0Pos!=0) {
- if (sub0Pos+length<=destCapacity) {
- /* first component not at start of result,
- but we have full component in buffer. */
- u_memmove(dest, dest+(haveLang?langPos:restPos), length);
- } else {
- /* would have fit, but didn't because of pattern prefix. */
- sub0Pos=0; /* stops initial padding (and a second retry,
- so we won't end up here again) */
- retry=TRUE;
- }
- }
- }
-
- ++subi; /* move on to next substitution */
- }
- }
- } while(retry);
-
- return u_terminateUChars(dest, destCapacity, length, pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_getDisplayKeyword(const char* keyword,
- const char* displayLocale,
- UChar* dest,
- int32_t destCapacity,
- UErrorCode* status){
-
- /* argument checking */
- if(status==NULL || U_FAILURE(*status)) {
- return 0;
- }
-
- if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
- *status=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
-
- /* pass itemKey=NULL to look for a top-level item */
- return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
- _kKeys, NULL,
- keyword,
- keyword,
- dest, destCapacity,
- status);
-
-}
-
-
-#define UCURRENCY_DISPLAY_NAME_INDEX 1
-
-U_CAPI int32_t U_EXPORT2
-uloc_getDisplayKeywordValue( const char* locale,
- const char* keyword,
- const char* displayLocale,
- UChar* dest,
- int32_t destCapacity,
- UErrorCode* status){
-
-
- char keywordValue[ULOC_FULLNAME_CAPACITY*4];
- int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
- int32_t keywordValueLen =0;
-
- /* argument checking */
- if(status==NULL || U_FAILURE(*status)) {
- return 0;
- }
-
- if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
- *status=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* get the keyword value */
- keywordValue[0]=0;
- keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
- if (*status == U_STRING_NOT_TERMINATED_WARNING)
- *status = U_BUFFER_OVERFLOW_ERROR;
-
- /*
- * if the keyword is equal to currency .. then to get the display name
- * we need to do the fallback ourselves
- */
- if(uprv_stricmp(keyword, _kCurrency)==0){
-
- int32_t dispNameLen = 0;
- const UChar *dispName = NULL;
-
- icu::LocalUResourceBundlePointer bundle(
- ures_open(U_ICUDATA_CURR, displayLocale, status));
- icu::LocalUResourceBundlePointer currencies(
- ures_getByKey(bundle.getAlias(), _kCurrencies, NULL, status));
- icu::LocalUResourceBundlePointer currency(
- ures_getByKeyWithFallback(currencies.getAlias(), keywordValue, NULL, status));
-
- dispName = ures_getStringByIndex(currency.getAlias(), UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
-
- if(U_FAILURE(*status)){
- if(*status == U_MISSING_RESOURCE_ERROR){
- /* we just want to write the value over if nothing is available */
- *status = U_USING_DEFAULT_WARNING;
- }else{
- return 0;
- }
- }
-
- /* now copy the dispName over if not NULL */
- if(dispName != NULL){
- if(dispNameLen <= destCapacity){
- u_memcpy(dest, dispName, dispNameLen);
- return u_terminateUChars(dest, destCapacity, dispNameLen, status);
- }else{
- *status = U_BUFFER_OVERFLOW_ERROR;
- return dispNameLen;
- }
- }else{
- /* we have not found the display name for the value .. just copy over */
- if(keywordValueLen <= destCapacity){
- u_charsToUChars(keywordValue, dest, keywordValueLen);
- return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
- }else{
- *status = U_BUFFER_OVERFLOW_ERROR;
- return keywordValueLen;
- }
- }
-
-
- }else{
-
- return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
- _kTypes, keyword,
- keywordValue,
- keywordValue,
- dest, destCapacity,
- status);
- }
-}
diff --git a/contrib/libs/icu/common/locdistance.cpp b/contrib/libs/icu/common/locdistance.cpp
deleted file mode 100644
index 18e4d91bce9..00000000000
--- a/contrib/libs/icu/common/locdistance.cpp
+++ /dev/null
@@ -1,415 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-
-// locdistance.cpp
-// created: 2019may08 Markus W. Scherer
-
-#include "unicode/utypes.h"
-#include "unicode/bytestrie.h"
-#include "unicode/localematcher.h"
-#include "unicode/locid.h"
-#include "unicode/uobject.h"
-#include "unicode/ures.h"
-#include "cstring.h"
-#include "locdistance.h"
-#include "loclikelysubtags.h"
-#include "uassert.h"
-#include "ucln_cmn.h"
-#include "uinvchar.h"
-#include "umutex.h"
-
-U_NAMESPACE_BEGIN
-
-namespace {
-
-/**
- * Bit flag used on the last character of a subtag in the trie.
- * Must be set consistently by the builder and the lookup code.
- */
-constexpr int32_t END_OF_SUBTAG = 0x80;
-/** Distance value bit flag, set by the builder. */
-constexpr int32_t DISTANCE_SKIP_SCRIPT = 0x80;
-/** Distance value bit flag, set by trieNext(). */
-constexpr int32_t DISTANCE_IS_FINAL = 0x100;
-constexpr int32_t DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
-
-constexpr int32_t ABOVE_THRESHOLD = 100;
-
-// Indexes into array of distances.
-enum {
- IX_DEF_LANG_DISTANCE,
- IX_DEF_SCRIPT_DISTANCE,
- IX_DEF_REGION_DISTANCE,
- IX_MIN_REGION_DISTANCE,
- IX_LIMIT
-};
-
-LocaleDistance *gLocaleDistance = nullptr;
-UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
-
-UBool U_CALLCONV cleanup() {
- delete gLocaleDistance;
- gLocaleDistance = nullptr;
- gInitOnce.reset();
- return TRUE;
-}
-
-} // namespace
-
-void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) {
- // This function is invoked only via umtx_initOnce().
- U_ASSERT(gLocaleDistance == nullptr);
- const XLikelySubtags &likely = *XLikelySubtags::getSingleton(errorCode);
- if (U_FAILURE(errorCode)) { return; }
- const LocaleDistanceData &data = likely.getDistanceData();
- if (data.distanceTrieBytes == nullptr ||
- data.regionToPartitions == nullptr || data.partitions == nullptr ||
- // ok if no paradigms
- data.distances == nullptr) {
- errorCode = U_MISSING_RESOURCE_ERROR;
- return;
- }
- gLocaleDistance = new LocaleDistance(data, likely);
- if (gLocaleDistance == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- ucln_common_registerCleanup(UCLN_COMMON_LOCALE_DISTANCE, cleanup);
-}
-
-const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return nullptr; }
- umtx_initOnce(gInitOnce, &LocaleDistance::initLocaleDistance, errorCode);
- return gLocaleDistance;
-}
-
-LocaleDistance::LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely) :
- likelySubtags(likely),
- trie(data.distanceTrieBytes),
- regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions),
- paradigmLSRs(data.paradigms), paradigmLSRsLength(data.paradigmsLength),
- defaultLanguageDistance(data.distances[IX_DEF_LANG_DISTANCE]),
- defaultScriptDistance(data.distances[IX_DEF_SCRIPT_DISTANCE]),
- defaultRegionDistance(data.distances[IX_DEF_REGION_DISTANCE]),
- minRegionDistance(data.distances[IX_MIN_REGION_DISTANCE]) {
- // For the default demotion value, use the
- // default region distance between unrelated Englishes.
- // Thus, unless demotion is turned off,
- // a mere region difference for one desired locale
- // is as good as a perfect match for the next following desired locale.
- // As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
- LSR en("en", "Latn", "US", LSR::EXPLICIT_LSR);
- LSR enGB("en", "Latn", "GB", LSR::EXPLICIT_LSR);
- const LSR *p_enGB = &enGB;
- int32_t indexAndDistance = getBestIndexAndDistance(en, &p_enGB, 1,
- shiftDistance(50), ULOCMATCH_FAVOR_LANGUAGE, ULOCMATCH_DIRECTION_WITH_ONE_WAY);
- defaultDemotionPerDesiredLocale = getDistanceFloor(indexAndDistance);
-}
-
-int32_t LocaleDistance::getBestIndexAndDistance(
- const LSR &desired,
- const LSR **supportedLSRs, int32_t supportedLSRsLength,
- int32_t shiftedThreshold,
- ULocMatchFavorSubtag favorSubtag, ULocMatchDirection direction) const {
- BytesTrie iter(trie);
- // Look up the desired language only once for all supported LSRs.
- // Its "distance" is either a match point value of 0, or a non-match negative value.
- // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
- int32_t desLangDistance = trieNext(iter, desired.language, false);
- uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0;
- // Index of the supported LSR with the lowest distance.
- int32_t bestIndex = -1;
- // Cached lookup info from XLikelySubtags.compareLikely().
- int32_t bestLikelyInfo = -1;
- for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) {
- const LSR &supported = *supportedLSRs[slIndex];
- bool star = false;
- int32_t distance = desLangDistance;
- if (distance >= 0) {
- U_ASSERT((distance & DISTANCE_IS_FINAL) == 0);
- if (slIndex != 0) {
- iter.resetToState64(desLangState);
- }
- distance = trieNext(iter, supported.language, true);
- }
- // Note: The data builder verifies that there are no rules with "any" (*) language and
- // real (non *) script or region subtags.
- // This means that if the lookup for either language fails we can use
- // the default distances without further lookups.
- int32_t flags;
- if (distance >= 0) {
- flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
- distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
- } else { // <*, *>
- if (uprv_strcmp(desired.language, supported.language) == 0) {
- distance = 0;
- } else {
- distance = defaultLanguageDistance;
- }
- flags = 0;
- star = true;
- }
- U_ASSERT(0 <= distance && distance <= 100);
- // Round up the shifted threshold (if fraction bits are not 0)
- // for comparison with un-shifted distances until we need fraction bits.
- // (If we simply shifted non-zero fraction bits away, then we might ignore a language
- // when it's really still a micro distance below the threshold.)
- int32_t roundedThreshold = (shiftedThreshold + DISTANCE_FRACTION_MASK) >> DISTANCE_SHIFT;
- // We implement "favor subtag" by reducing the language subtag distance
- // (unscientifically reducing it to a quarter of the normal value),
- // so that the script distance is relatively more important.
- // For example, given a default language distance of 80, we reduce it to 20,
- // which is below the default threshold of 50, which is the default script distance.
- if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) {
- distance >>= 2;
- }
- // Let distance == roundedThreshold pass until the tie-breaker logic
- // at the end of the loop.
- if (distance > roundedThreshold) {
- continue;
- }
-
- int32_t scriptDistance;
- if (star || flags != 0) {
- if (uprv_strcmp(desired.script, supported.script) == 0) {
- scriptDistance = 0;
- } else {
- scriptDistance = defaultScriptDistance;
- }
- } else {
- scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(),
- desired.script, supported.script);
- flags = scriptDistance & DISTANCE_IS_FINAL;
- scriptDistance &= ~DISTANCE_IS_FINAL;
- }
- distance += scriptDistance;
- if (distance > roundedThreshold) {
- continue;
- }
-
- if (uprv_strcmp(desired.region, supported.region) == 0) {
- // regionDistance = 0
- } else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
- distance += defaultRegionDistance;
- } else {
- int32_t remainingThreshold = roundedThreshold - distance;
- if (minRegionDistance > remainingThreshold) {
- continue;
- }
-
- // From here on we know the regions are not equal.
- // Map each region to zero or more partitions. (zero = one non-matching string)
- // (Each array of single-character partition strings is encoded as one string.)
- // If either side has more than one, then we find the maximum distance.
- // This could be optimized by adding some more structure, but probably not worth it.
- distance += getRegionPartitionsDistance(
- iter, iter.getState64(),
- partitionsForRegion(desired),
- partitionsForRegion(supported),
- remainingThreshold);
- }
- int32_t shiftedDistance = shiftDistance(distance);
- if (shiftedDistance == 0) {
- // Distinguish between equivalent but originally unequal locales via an
- // additional micro distance.
- shiftedDistance |= (desired.flags ^ supported.flags);
- if (shiftedDistance < shiftedThreshold) {
- if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY ||
- // Is there also a match when we swap desired/supported?
- isMatch(supported, desired, shiftedThreshold, favorSubtag)) {
- if (shiftedDistance == 0) {
- return slIndex << INDEX_SHIFT;
- }
- bestIndex = slIndex;
- shiftedThreshold = shiftedDistance;
- bestLikelyInfo = -1;
- }
- }
- } else {
- if (shiftedDistance < shiftedThreshold) {
- if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY ||
- // Is there also a match when we swap desired/supported?
- isMatch(supported, desired, shiftedThreshold, favorSubtag)) {
- bestIndex = slIndex;
- shiftedThreshold = shiftedDistance;
- bestLikelyInfo = -1;
- }
- } else if (shiftedDistance == shiftedThreshold && bestIndex >= 0) {
- if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY ||
- // Is there also a match when we swap desired/supported?
- isMatch(supported, desired, shiftedThreshold, favorSubtag)) {
- bestLikelyInfo = likelySubtags.compareLikely(
- supported, *supportedLSRs[bestIndex], bestLikelyInfo);
- if ((bestLikelyInfo & 1) != 0) {
- // This supported locale matches as well as the previous best match,
- // and neither matches perfectly,
- // but this one is "more likely" (has more-default subtags).
- bestIndex = slIndex;
- }
- }
- }
- }
- }
- return bestIndex >= 0 ?
- (bestIndex << INDEX_SHIFT) | shiftedThreshold :
- INDEX_NEG_1 | shiftDistance(ABOVE_THRESHOLD);
-}
-
-int32_t LocaleDistance::getDesSuppScriptDistance(
- BytesTrie &iter, uint64_t startState, const char *desired, const char *supported) {
- // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
- int32_t distance = trieNext(iter, desired, false);
- if (distance >= 0) {
- distance = trieNext(iter, supported, true);
- }
- if (distance < 0) {
- UStringTrieResult result = iter.resetToState64(startState).next(u'*'); // <*, *>
- U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
- if (uprv_strcmp(desired, supported) == 0) {
- distance = 0; // same script
- } else {
- distance = iter.getValue();
- U_ASSERT(distance >= 0);
- }
- if (result == USTRINGTRIE_FINAL_VALUE) {
- distance |= DISTANCE_IS_FINAL;
- }
- }
- return distance;
-}
-
-int32_t LocaleDistance::getRegionPartitionsDistance(
- BytesTrie &iter, uint64_t startState,
- const char *desiredPartitions, const char *supportedPartitions, int32_t threshold) {
- char desired = *desiredPartitions++;
- char supported = *supportedPartitions++;
- U_ASSERT(desired != 0 && supported != 0);
- // See if we have single desired/supported partitions, from NUL-terminated
- // partition strings without explicit length.
- bool suppLengthGt1 = *supportedPartitions != 0; // gt1: more than 1 character
- // equivalent to: if (desLength == 1 && suppLength == 1)
- if (*desiredPartitions == 0 && !suppLengthGt1) {
- // Fastpath for single desired/supported partitions.
- UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
- if (USTRINGTRIE_HAS_NEXT(result)) {
- result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
- if (USTRINGTRIE_HAS_VALUE(result)) {
- return iter.getValue();
- }
- }
- return getFallbackRegionDistance(iter, startState);
- }
-
- const char *supportedStart = supportedPartitions - 1; // for restart of inner loop
- int32_t regionDistance = 0;
- // Fall back to * only once, not for each pair of partition strings.
- bool star = false;
- for (;;) {
- // Look up each desired-partition string only once,
- // not for each (desired, supported) pair.
- UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
- if (USTRINGTRIE_HAS_NEXT(result)) {
- uint64_t desState = suppLengthGt1 ? iter.getState64() : 0;
- for (;;) {
- result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
- int32_t d;
- if (USTRINGTRIE_HAS_VALUE(result)) {
- d = iter.getValue();
- } else if (star) {
- d = 0;
- } else {
- d = getFallbackRegionDistance(iter, startState);
- star = true;
- }
- if (d > threshold) {
- return d;
- } else if (regionDistance < d) {
- regionDistance = d;
- }
- if ((supported = *supportedPartitions++) != 0) {
- iter.resetToState64(desState);
- } else {
- break;
- }
- }
- } else if (!star) {
- int32_t d = getFallbackRegionDistance(iter, startState);
- if (d > threshold) {
- return d;
- } else if (regionDistance < d) {
- regionDistance = d;
- }
- star = true;
- }
- if ((desired = *desiredPartitions++) != 0) {
- iter.resetToState64(startState);
- supportedPartitions = supportedStart;
- supported = *supportedPartitions++;
- } else {
- break;
- }
- }
- return regionDistance;
-}
-
-int32_t LocaleDistance::getFallbackRegionDistance(BytesTrie &iter, uint64_t startState) {
-#if U_DEBUG
- UStringTrieResult result =
-#endif
- iter.resetToState64(startState).next(u'*'); // <*, *>
- U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
- int32_t distance = iter.getValue();
- U_ASSERT(distance >= 0);
- return distance;
-}
-
-int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue) {
- uint8_t c;
- if ((c = *s) == 0) {
- return -1; // no empty subtags in the distance data
- }
- for (;;) {
- c = uprv_invCharToAscii(c);
- // EBCDIC: If *s is not an invariant character,
- // then c is now 0 and will simply not match anything, which is harmless.
- uint8_t next = *++s;
- if (next != 0) {
- if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
- return -1;
- }
- } else {
- // last character of this subtag
- UStringTrieResult result = iter.next(c | END_OF_SUBTAG);
- if (wantValue) {
- if (USTRINGTRIE_HAS_VALUE(result)) {
- int32_t value = iter.getValue();
- if (result == USTRINGTRIE_FINAL_VALUE) {
- value |= DISTANCE_IS_FINAL;
- }
- return value;
- }
- } else {
- if (USTRINGTRIE_HAS_NEXT(result)) {
- return 0;
- }
- }
- return -1;
- }
- c = next;
- }
-}
-
-UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
- // Linear search for a very short list (length 6 as of 2019),
- // because we look for equivalence not equality, and
- // because it's easy.
- // If there are many paradigm LSRs we should use a hash set
- // with custom comparator and hasher.
- U_ASSERT(paradigmLSRsLength <= 15);
- for (int32_t i = 0; i < paradigmLSRsLength; ++i) {
- if (lsr.isEquivalentTo(paradigmLSRs[i])) { return true; }
- }
- return false;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/locdistance.h b/contrib/libs/icu/common/locdistance.h
deleted file mode 100644
index ad841513833..00000000000
--- a/contrib/libs/icu/common/locdistance.h
+++ /dev/null
@@ -1,151 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-
-// locdistance.h
-// created: 2019may08 Markus W. Scherer
-
-#ifndef __LOCDISTANCE_H__
-#define __LOCDISTANCE_H__
-
-#include "unicode/utypes.h"
-#include "unicode/bytestrie.h"
-#include "unicode/localematcher.h"
-#include "unicode/locid.h"
-#include "unicode/uobject.h"
-#include "lsr.h"
-
-U_NAMESPACE_BEGIN
-
-struct LocaleDistanceData;
-
-/**
- * Offline-built data for LocaleMatcher.
- * Mostly but not only the data for mapping locales to their maximized forms.
- */
-class LocaleDistance final : public UMemory {
-public:
- static const LocaleDistance *getSingleton(UErrorCode &errorCode);
-
- static int32_t shiftDistance(int32_t distance) {
- return distance << DISTANCE_SHIFT;
- }
-
- static int32_t getShiftedDistance(int32_t indexAndDistance) {
- return indexAndDistance & DISTANCE_MASK;
- }
-
- static double getDistanceDouble(int32_t indexAndDistance) {
- double shiftedDistance = getShiftedDistance(indexAndDistance);
- return shiftedDistance / (1 << DISTANCE_SHIFT);
- }
-
- static int32_t getIndex(int32_t indexAndDistance) {
- // assert indexAndDistance >= 0;
- return indexAndDistance >> INDEX_SHIFT;
- }
-
- /**
- * Finds the supported LSR with the smallest distance from the desired one.
- * Equivalent LSR subtags must be normalized into a canonical form.
- *
- * <p>Returns the index of the lowest-distance supported LSR in the high bits
- * (negative if none has a distance below the threshold),
- * and its distance (0..ABOVE_THRESHOLD) in the low bits.
- */
- int32_t getBestIndexAndDistance(const LSR &desired,
- const LSR **supportedLSRs, int32_t supportedLSRsLength,
- int32_t shiftedThreshold,
- ULocMatchFavorSubtag favorSubtag,
- ULocMatchDirection direction) const;
-
- UBool isParadigmLSR(const LSR &lsr) const;
-
- int32_t getDefaultScriptDistance() const {
- return defaultScriptDistance;
- }
-
- int32_t getDefaultDemotionPerDesiredLocale() const {
- return defaultDemotionPerDesiredLocale;
- }
-
-private:
- // The distance is shifted left to gain some fraction bits.
- static constexpr int32_t DISTANCE_SHIFT = 3;
- static constexpr int32_t DISTANCE_FRACTION_MASK = 7;
- // 7 bits for 0..100
- static constexpr int32_t DISTANCE_INT_SHIFT = 7;
- static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
- static constexpr int32_t DISTANCE_MASK = 0x3ff;
- // tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit
- static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
-
- static int32_t getDistanceFloor(int32_t indexAndDistance) {
- return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
- }
-
- LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely);
- LocaleDistance(const LocaleDistance &other) = delete;
- LocaleDistance &operator=(const LocaleDistance &other) = delete;
-
- static void initLocaleDistance(UErrorCode &errorCode);
-
- UBool isMatch(const LSR &desired, const LSR &supported,
- int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
- const LSR *pSupp = &supported;
- return getBestIndexAndDistance(
- desired, &pSupp, 1,
- shiftedThreshold, favorSubtag, ULOCMATCH_DIRECTION_WITH_ONE_WAY) >= 0;
- }
-
- static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
- const char *desired, const char *supported);
-
- static int32_t getRegionPartitionsDistance(
- BytesTrie &iter, uint64_t startState,
- const char *desiredPartitions, const char *supportedPartitions,
- int32_t threshold);
-
- static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
-
- static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
-
- const char *partitionsForRegion(const LSR &lsr) const {
- // ill-formed region -> one non-matching string
- int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
- return partitionArrays[pIndex];
- }
-
- int32_t getDefaultRegionDistance() const {
- return defaultRegionDistance;
- }
-
- const XLikelySubtags &likelySubtags;
-
- // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
- // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
- // There is also a trie value for each subsequence of whole subtags.
- // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
- BytesTrie trie;
-
- /**
- * Maps each region to zero or more single-character partitions.
- */
- const uint8_t *regionToPartitionsIndex;
- const char **partitionArrays;
-
- /**
- * Used to get the paradigm region for a cluster, if there is one.
- */
- const LSR *paradigmLSRs;
- int32_t paradigmLSRsLength;
-
- int32_t defaultLanguageDistance;
- int32_t defaultScriptDistance;
- int32_t defaultRegionDistance;
- int32_t minRegionDistance;
- int32_t defaultDemotionPerDesiredLocale;
-};
-
-U_NAMESPACE_END
-
-#endif // __LOCDISTANCE_H__
diff --git a/contrib/libs/icu/common/locdspnm.cpp b/contrib/libs/icu/common/locdspnm.cpp
deleted file mode 100644
index 43334f51964..00000000000
--- a/contrib/libs/icu/common/locdspnm.cpp
+++ /dev/null
@@ -1,1110 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2010-2016, International Business Machines Corporation and
-* others. All Rights Reserved.
-*******************************************************************************
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/locdspnm.h"
-#include "unicode/simpleformatter.h"
-#include "unicode/ucasemap.h"
-#include "unicode/ures.h"
-#include "unicode/udisplaycontext.h"
-#include "unicode/brkiter.h"
-#include "unicode/ucurr.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "mutex.h"
-#include "ulocimp.h"
-#include "umutex.h"
-#include "ureslocs.h"
-#include "uresimp.h"
-
-#include <stdarg.h>
-
-/**
- * Concatenate a number of null-terminated strings to buffer, leaving a
- * null-terminated string. The last argument should be the null pointer.
- * Return the length of the string in the buffer, not counting the trailing
- * null. Return -1 if there is an error (buffer is null, or buflen < 1).
- */
-static int32_t ncat(char *buffer, uint32_t buflen, ...) {
- va_list args;
- char *str;
- char *p = buffer;
- const char* e = buffer + buflen - 1;
-
- if (buffer == NULL || buflen < 1) {
- return -1;
- }
-
- va_start(args, buflen);
- while ((str = va_arg(args, char *)) != 0) {
- char c;
- while (p != e && (c = *str++) != 0) {
- *p++ = c;
- }
- }
- *p = 0;
- va_end(args);
-
- return static_cast<int32_t>(p - buffer);
-}
-
-U_NAMESPACE_BEGIN
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Access resource data for locale components.
-// Wrap code in uloc.c for now.
-class ICUDataTable {
- const char* path;
- Locale locale;
-
-public:
- ICUDataTable(const char* path, const Locale& locale);
- ~ICUDataTable();
-
- const Locale& getLocale();
-
- UnicodeString& get(const char* tableKey, const char* itemKey,
- UnicodeString& result) const;
- UnicodeString& get(const char* tableKey, const char* subTableKey, const char* itemKey,
- UnicodeString& result) const;
-
- UnicodeString& getNoFallback(const char* tableKey, const char* itemKey,
- UnicodeString &result) const;
- UnicodeString& getNoFallback(const char* tableKey, const char* subTableKey, const char* itemKey,
- UnicodeString &result) const;
-};
-
-inline UnicodeString &
-ICUDataTable::get(const char* tableKey, const char* itemKey, UnicodeString& result) const {
- return get(tableKey, NULL, itemKey, result);
-}
-
-inline UnicodeString &
-ICUDataTable::getNoFallback(const char* tableKey, const char* itemKey, UnicodeString& result) const {
- return getNoFallback(tableKey, NULL, itemKey, result);
-}
-
-ICUDataTable::ICUDataTable(const char* path, const Locale& locale)
- : path(NULL), locale(Locale::getRoot())
-{
- if (path) {
- int32_t len = static_cast<int32_t>(uprv_strlen(path));
- this->path = (const char*) uprv_malloc(len + 1);
- if (this->path) {
- uprv_strcpy((char *)this->path, path);
- this->locale = locale;
- }
- }
-}
-
-ICUDataTable::~ICUDataTable() {
- if (path) {
- uprv_free((void*) path);
- path = NULL;
- }
-}
-
-const Locale&
-ICUDataTable::getLocale() {
- return locale;
-}
-
-UnicodeString &
-ICUDataTable::get(const char* tableKey, const char* subTableKey, const char* itemKey,
- UnicodeString &result) const {
- UErrorCode status = U_ZERO_ERROR;
- int32_t len = 0;
-
- const UChar *s = uloc_getTableStringWithFallback(path, locale.getName(),
- tableKey, subTableKey, itemKey,
- &len, &status);
- if (U_SUCCESS(status) && len > 0) {
- return result.setTo(s, len);
- }
- return result.setTo(UnicodeString(itemKey, -1, US_INV));
-}
-
-UnicodeString &
-ICUDataTable::getNoFallback(const char* tableKey, const char* subTableKey, const char* itemKey,
- UnicodeString& result) const {
- UErrorCode status = U_ZERO_ERROR;
- int32_t len = 0;
-
- const UChar *s = uloc_getTableStringWithFallback(path, locale.getName(),
- tableKey, subTableKey, itemKey,
- &len, &status);
- if (U_SUCCESS(status)) {
- return result.setTo(s, len);
- }
-
- result.setToBogus();
- return result;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-LocaleDisplayNames::~LocaleDisplayNames() {}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#if 0 // currently unused
-
-class DefaultLocaleDisplayNames : public LocaleDisplayNames {
- UDialectHandling dialectHandling;
-
-public:
- // constructor
- DefaultLocaleDisplayNames(UDialectHandling dialectHandling);
-
- virtual ~DefaultLocaleDisplayNames();
-
- virtual const Locale& getLocale() const;
- virtual UDialectHandling getDialectHandling() const;
-
- virtual UnicodeString& localeDisplayName(const Locale& locale,
- UnicodeString& result) const;
- virtual UnicodeString& localeDisplayName(const char* localeId,
- UnicodeString& result) const;
- virtual UnicodeString& languageDisplayName(const char* lang,
- UnicodeString& result) const;
- virtual UnicodeString& scriptDisplayName(const char* script,
- UnicodeString& result) const;
- virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode,
- UnicodeString& result) const;
- virtual UnicodeString& regionDisplayName(const char* region,
- UnicodeString& result) const;
- virtual UnicodeString& variantDisplayName(const char* variant,
- UnicodeString& result) const;
- virtual UnicodeString& keyDisplayName(const char* key,
- UnicodeString& result) const;
- virtual UnicodeString& keyValueDisplayName(const char* key,
- const char* value,
- UnicodeString& result) const;
-};
-
-DefaultLocaleDisplayNames::DefaultLocaleDisplayNames(UDialectHandling dialectHandling)
- : dialectHandling(dialectHandling) {
-}
-
-DefaultLocaleDisplayNames::~DefaultLocaleDisplayNames() {
-}
-
-const Locale&
-DefaultLocaleDisplayNames::getLocale() const {
- return Locale::getRoot();
-}
-
-UDialectHandling
-DefaultLocaleDisplayNames::getDialectHandling() const {
- return dialectHandling;
-}
-
-UnicodeString&
-DefaultLocaleDisplayNames::localeDisplayName(const Locale& locale,
- UnicodeString& result) const {
- return result = UnicodeString(locale.getName(), -1, US_INV);
-}
-
-UnicodeString&
-DefaultLocaleDisplayNames::localeDisplayName(const char* localeId,
- UnicodeString& result) const {
- return result = UnicodeString(localeId, -1, US_INV);
-}
-
-UnicodeString&
-DefaultLocaleDisplayNames::languageDisplayName(const char* lang,
- UnicodeString& result) const {
- return result = UnicodeString(lang, -1, US_INV);
-}
-
-UnicodeString&
-DefaultLocaleDisplayNames::scriptDisplayName(const char* script,
- UnicodeString& result) const {
- return result = UnicodeString(script, -1, US_INV);
-}
-
-UnicodeString&
-DefaultLocaleDisplayNames::scriptDisplayName(UScriptCode scriptCode,
- UnicodeString& result) const {
- const char* name = uscript_getName(scriptCode);
- if (name) {
- return result = UnicodeString(name, -1, US_INV);
- }
- return result.remove();
-}
-
-UnicodeString&
-DefaultLocaleDisplayNames::regionDisplayName(const char* region,
- UnicodeString& result) const {
- return result = UnicodeString(region, -1, US_INV);
-}
-
-UnicodeString&
-DefaultLocaleDisplayNames::variantDisplayName(const char* variant,
- UnicodeString& result) const {
- return result = UnicodeString(variant, -1, US_INV);
-}
-
-UnicodeString&
-DefaultLocaleDisplayNames::keyDisplayName(const char* key,
- UnicodeString& result) const {
- return result = UnicodeString(key, -1, US_INV);
-}
-
-UnicodeString&
-DefaultLocaleDisplayNames::keyValueDisplayName(const char* /* key */,
- const char* value,
- UnicodeString& result) const {
- return result = UnicodeString(value, -1, US_INV);
-}
-
-#endif // currently unused class DefaultLocaleDisplayNames
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-class LocaleDisplayNamesImpl : public LocaleDisplayNames {
- Locale locale;
- UDialectHandling dialectHandling;
- ICUDataTable langData;
- ICUDataTable regionData;
- SimpleFormatter separatorFormat;
- SimpleFormatter format;
- SimpleFormatter keyTypeFormat;
- UDisplayContext capitalizationContext;
-#if !UCONFIG_NO_BREAK_ITERATION
- BreakIterator* capitalizationBrkIter;
-#else
- UObject* capitalizationBrkIter;
-#endif
- UnicodeString formatOpenParen;
- UnicodeString formatReplaceOpenParen;
- UnicodeString formatCloseParen;
- UnicodeString formatReplaceCloseParen;
- UDisplayContext nameLength;
- UDisplayContext substitute;
-
- // Constants for capitalization context usage types.
- enum CapContextUsage {
- kCapContextUsageLanguage,
- kCapContextUsageScript,
- kCapContextUsageTerritory,
- kCapContextUsageVariant,
- kCapContextUsageKey,
- kCapContextUsageKeyValue,
- kCapContextUsageCount
- };
- // Capitalization transforms. For each usage type, indicates whether to titlecase for
- // the context specified in capitalizationContext (which we know at construction time)
- UBool fCapitalization[kCapContextUsageCount];
-
-public:
- // constructor
- LocaleDisplayNamesImpl(const Locale& locale, UDialectHandling dialectHandling);
- LocaleDisplayNamesImpl(const Locale& locale, UDisplayContext *contexts, int32_t length);
- virtual ~LocaleDisplayNamesImpl();
-
- virtual const Locale& getLocale() const;
- virtual UDialectHandling getDialectHandling() const;
- virtual UDisplayContext getContext(UDisplayContextType type) const;
-
- virtual UnicodeString& localeDisplayName(const Locale& locale,
- UnicodeString& result) const;
- virtual UnicodeString& localeDisplayName(const char* localeId,
- UnicodeString& result) const;
- virtual UnicodeString& languageDisplayName(const char* lang,
- UnicodeString& result) const;
- virtual UnicodeString& scriptDisplayName(const char* script,
- UnicodeString& result) const;
- virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode,
- UnicodeString& result) const;
- virtual UnicodeString& regionDisplayName(const char* region,
- UnicodeString& result) const;
- virtual UnicodeString& variantDisplayName(const char* variant,
- UnicodeString& result) const;
- virtual UnicodeString& keyDisplayName(const char* key,
- UnicodeString& result) const;
- virtual UnicodeString& keyValueDisplayName(const char* key,
- const char* value,
- UnicodeString& result) const;
-private:
- UnicodeString& localeIdName(const char* localeId,
- UnicodeString& result, bool substitute) const;
- UnicodeString& appendWithSep(UnicodeString& buffer, const UnicodeString& src) const;
- UnicodeString& adjustForUsageAndContext(CapContextUsage usage, UnicodeString& result) const;
- UnicodeString& scriptDisplayName(const char* script, UnicodeString& result, UBool skipAdjust) const;
- UnicodeString& regionDisplayName(const char* region, UnicodeString& result, UBool skipAdjust) const;
- UnicodeString& variantDisplayName(const char* variant, UnicodeString& result, UBool skipAdjust) const;
- UnicodeString& keyDisplayName(const char* key, UnicodeString& result, UBool skipAdjust) const;
- UnicodeString& keyValueDisplayName(const char* key, const char* value,
- UnicodeString& result, UBool skipAdjust) const;
- void initialize(void);
-
- struct CapitalizationContextSink;
-};
-
-LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
- UDialectHandling dialectHandling)
- : dialectHandling(dialectHandling)
- , langData(U_ICUDATA_LANG, locale)
- , regionData(U_ICUDATA_REGION, locale)
- , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
- , capitalizationBrkIter(NULL)
- , nameLength(UDISPCTX_LENGTH_FULL)
- , substitute(UDISPCTX_SUBSTITUTE)
-{
- initialize();
-}
-
-LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
- UDisplayContext *contexts, int32_t length)
- : dialectHandling(ULDN_STANDARD_NAMES)
- , langData(U_ICUDATA_LANG, locale)
- , regionData(U_ICUDATA_REGION, locale)
- , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
- , capitalizationBrkIter(NULL)
- , nameLength(UDISPCTX_LENGTH_FULL)
- , substitute(UDISPCTX_SUBSTITUTE)
-{
- while (length-- > 0) {
- UDisplayContext value = *contexts++;
- UDisplayContextType selector = (UDisplayContextType)((uint32_t)value >> 8);
- switch (selector) {
- case UDISPCTX_TYPE_DIALECT_HANDLING:
- dialectHandling = (UDialectHandling)value;
- break;
- case UDISPCTX_TYPE_CAPITALIZATION:
- capitalizationContext = value;
- break;
- case UDISPCTX_TYPE_DISPLAY_LENGTH:
- nameLength = value;
- break;
- case UDISPCTX_TYPE_SUBSTITUTE_HANDLING:
- substitute = value;
- break;
- default:
- break;
- }
- }
- initialize();
-}
-
-struct LocaleDisplayNamesImpl::CapitalizationContextSink : public ResourceSink {
- UBool hasCapitalizationUsage;
- LocaleDisplayNamesImpl& parent;
-
- CapitalizationContextSink(LocaleDisplayNamesImpl& _parent)
- : hasCapitalizationUsage(FALSE), parent(_parent) {}
- virtual ~CapitalizationContextSink();
-
- virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
- UErrorCode &errorCode) {
- ResourceTable contexts = value.getTable(errorCode);
- if (U_FAILURE(errorCode)) { return; }
- for (int i = 0; contexts.getKeyAndValue(i, key, value); ++i) {
-
- CapContextUsage usageEnum;
- if (uprv_strcmp(key, "key") == 0) {
- usageEnum = kCapContextUsageKey;
- } else if (uprv_strcmp(key, "keyValue") == 0) {
- usageEnum = kCapContextUsageKeyValue;
- } else if (uprv_strcmp(key, "languages") == 0) {
- usageEnum = kCapContextUsageLanguage;
- } else if (uprv_strcmp(key, "script") == 0) {
- usageEnum = kCapContextUsageScript;
- } else if (uprv_strcmp(key, "territory") == 0) {
- usageEnum = kCapContextUsageTerritory;
- } else if (uprv_strcmp(key, "variant") == 0) {
- usageEnum = kCapContextUsageVariant;
- } else {
- continue;
- }
-
- int32_t len = 0;
- const int32_t* intVector = value.getIntVector(len, errorCode);
- if (U_FAILURE(errorCode)) { return; }
- if (len < 2) { continue; }
-
- int32_t titlecaseInt = (parent.capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU) ? intVector[0] : intVector[1];
- if (titlecaseInt == 0) { continue; }
-
- parent.fCapitalization[usageEnum] = TRUE;
- hasCapitalizationUsage = TRUE;
- }
- }
-};
-
-// Virtual destructors must be defined out of line.
-LocaleDisplayNamesImpl::CapitalizationContextSink::~CapitalizationContextSink() {}
-
-void
-LocaleDisplayNamesImpl::initialize(void) {
- LocaleDisplayNamesImpl *nonConstThis = (LocaleDisplayNamesImpl *)this;
- nonConstThis->locale = langData.getLocale() == Locale::getRoot()
- ? regionData.getLocale()
- : langData.getLocale();
-
- UnicodeString sep;
- langData.getNoFallback("localeDisplayPattern", "separator", sep);
- if (sep.isBogus()) {
- sep = UnicodeString("{0}, {1}", -1, US_INV);
- }
- UErrorCode status = U_ZERO_ERROR;
- separatorFormat.applyPatternMinMaxArguments(sep, 2, 2, status);
-
- UnicodeString pattern;
- langData.getNoFallback("localeDisplayPattern", "pattern", pattern);
- if (pattern.isBogus()) {
- pattern = UnicodeString("{0} ({1})", -1, US_INV);
- }
- format.applyPatternMinMaxArguments(pattern, 2, 2, status);
- if (pattern.indexOf((UChar)0xFF08) >= 0) {
- formatOpenParen.setTo((UChar)0xFF08); // fullwidth (
- formatReplaceOpenParen.setTo((UChar)0xFF3B); // fullwidth [
- formatCloseParen.setTo((UChar)0xFF09); // fullwidth )
- formatReplaceCloseParen.setTo((UChar)0xFF3D); // fullwidth ]
- } else {
- formatOpenParen.setTo((UChar)0x0028); // (
- formatReplaceOpenParen.setTo((UChar)0x005B); // [
- formatCloseParen.setTo((UChar)0x0029); // )
- formatReplaceCloseParen.setTo((UChar)0x005D); // ]
- }
-
- UnicodeString ktPattern;
- langData.get("localeDisplayPattern", "keyTypePattern", ktPattern);
- if (ktPattern.isBogus()) {
- ktPattern = UnicodeString("{0}={1}", -1, US_INV);
- }
- keyTypeFormat.applyPatternMinMaxArguments(ktPattern, 2, 2, status);
-
- uprv_memset(fCapitalization, 0, sizeof(fCapitalization));
-#if !UCONFIG_NO_BREAK_ITERATION
- // Only get the context data if we need it! This is a const object so we know now...
- // Also check whether we will need a break iterator (depends on the data)
- UBool needBrkIter = FALSE;
- if (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE) {
- LocalUResourceBundlePointer resource(ures_open(NULL, locale.getName(), &status));
- if (U_FAILURE(status)) { return; }
- CapitalizationContextSink sink(*this);
- ures_getAllItemsWithFallback(resource.getAlias(), "contextTransforms", sink, status);
- if (status == U_MISSING_RESOURCE_ERROR) {
- // Silently ignore. Not every locale has contextTransforms.
- status = U_ZERO_ERROR;
- } else if (U_FAILURE(status)) {
- return;
- }
- needBrkIter = sink.hasCapitalizationUsage;
- }
- // Get a sentence break iterator if we will need it
- if (needBrkIter || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE) {
- status = U_ZERO_ERROR;
- capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
- if (U_FAILURE(status)) {
- delete capitalizationBrkIter;
- capitalizationBrkIter = NULL;
- }
- }
-#endif
-}
-
-LocaleDisplayNamesImpl::~LocaleDisplayNamesImpl() {
-#if !UCONFIG_NO_BREAK_ITERATION
- delete capitalizationBrkIter;
-#endif
-}
-
-const Locale&
-LocaleDisplayNamesImpl::getLocale() const {
- return locale;
-}
-
-UDialectHandling
-LocaleDisplayNamesImpl::getDialectHandling() const {
- return dialectHandling;
-}
-
-UDisplayContext
-LocaleDisplayNamesImpl::getContext(UDisplayContextType type) const {
- switch (type) {
- case UDISPCTX_TYPE_DIALECT_HANDLING:
- return (UDisplayContext)dialectHandling;
- case UDISPCTX_TYPE_CAPITALIZATION:
- return capitalizationContext;
- case UDISPCTX_TYPE_DISPLAY_LENGTH:
- return nameLength;
- case UDISPCTX_TYPE_SUBSTITUTE_HANDLING:
- return substitute;
- default:
- break;
- }
- return (UDisplayContext)0;
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::adjustForUsageAndContext(CapContextUsage usage,
- UnicodeString& result) const {
-#if !UCONFIG_NO_BREAK_ITERATION
- // check to see whether we need to titlecase result
- if ( result.length() > 0 && u_islower(result.char32At(0)) && capitalizationBrkIter!= NULL &&
- ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || fCapitalization[usage] ) ) {
- // note fCapitalization[usage] won't be set unless capitalizationContext is UI_LIST_OR_MENU or STANDALONE
- static UMutex capitalizationBrkIterLock;
- Mutex lock(&capitalizationBrkIterLock);
- result.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
- }
-#endif
- return result;
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
- UnicodeString& result) const {
- if (loc.isBogus()) {
- result.setToBogus();
- return result;
- }
- UnicodeString resultName;
-
- const char* lang = loc.getLanguage();
- if (uprv_strlen(lang) == 0) {
- lang = "root";
- }
- const char* script = loc.getScript();
- const char* country = loc.getCountry();
- const char* variant = loc.getVariant();
-
- UBool hasScript = uprv_strlen(script) > 0;
- UBool hasCountry = uprv_strlen(country) > 0;
- UBool hasVariant = uprv_strlen(variant) > 0;
-
- if (dialectHandling == ULDN_DIALECT_NAMES) {
- char buffer[ULOC_FULLNAME_CAPACITY];
- do { // loop construct is so we can break early out of search
- if (hasScript && hasCountry) {
- ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, "_", country, (char *)0);
- localeIdName(buffer, resultName, false);
- if (!resultName.isBogus()) {
- hasScript = FALSE;
- hasCountry = FALSE;
- break;
- }
- }
- if (hasScript) {
- ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, (char *)0);
- localeIdName(buffer, resultName, false);
- if (!resultName.isBogus()) {
- hasScript = FALSE;
- break;
- }
- }
- if (hasCountry) {
- ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", country, (char*)0);
- localeIdName(buffer, resultName, false);
- if (!resultName.isBogus()) {
- hasCountry = FALSE;
- break;
- }
- }
- } while (FALSE);
- }
- if (resultName.isBogus() || resultName.isEmpty()) {
- localeIdName(lang, resultName, substitute == UDISPCTX_SUBSTITUTE);
- if (resultName.isBogus()) {
- result.setToBogus();
- return result;
- }
- }
-
- UnicodeString resultRemainder;
- UnicodeString temp;
- UErrorCode status = U_ZERO_ERROR;
-
- if (hasScript) {
- UnicodeString script_str = scriptDisplayName(script, temp, TRUE);
- if (script_str.isBogus()) {
- result.setToBogus();
- return result;
- }
- resultRemainder.append(script_str);
- }
- if (hasCountry) {
- UnicodeString region_str = regionDisplayName(country, temp, TRUE);
- if (region_str.isBogus()) {
- result.setToBogus();
- return result;
- }
- appendWithSep(resultRemainder, region_str);
- }
- if (hasVariant) {
- UnicodeString variant_str = variantDisplayName(variant, temp, TRUE);
- if (variant_str.isBogus()) {
- result.setToBogus();
- return result;
- }
- appendWithSep(resultRemainder, variant_str);
- }
- resultRemainder.findAndReplace(formatOpenParen, formatReplaceOpenParen);
- resultRemainder.findAndReplace(formatCloseParen, formatReplaceCloseParen);
-
- LocalPointer<StringEnumeration> e(loc.createKeywords(status));
- if (e.isValid() && U_SUCCESS(status)) {
- UnicodeString temp2;
- char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY
- const char* key;
- while ((key = e->next((int32_t *)0, status)) != NULL) {
- value[0] = 0;
- loc.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
- if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
- return result;
- }
- keyDisplayName(key, temp, TRUE);
- temp.findAndReplace(formatOpenParen, formatReplaceOpenParen);
- temp.findAndReplace(formatCloseParen, formatReplaceCloseParen);
- keyValueDisplayName(key, value, temp2, TRUE);
- temp2.findAndReplace(formatOpenParen, formatReplaceOpenParen);
- temp2.findAndReplace(formatCloseParen, formatReplaceCloseParen);
- if (temp2 != UnicodeString(value, -1, US_INV)) {
- appendWithSep(resultRemainder, temp2);
- } else if (temp != UnicodeString(key, -1, US_INV)) {
- UnicodeString temp3;
- keyTypeFormat.format(temp, temp2, temp3, status);
- appendWithSep(resultRemainder, temp3);
- } else {
- appendWithSep(resultRemainder, temp)
- .append((UChar)0x3d /* = */)
- .append(temp2);
- }
- }
- }
-
- if (!resultRemainder.isEmpty()) {
- format.format(resultName, resultRemainder, result.remove(), status);
- return adjustForUsageAndContext(kCapContextUsageLanguage, result);
- }
-
- result = resultName;
- return adjustForUsageAndContext(kCapContextUsageLanguage, result);
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::appendWithSep(UnicodeString& buffer, const UnicodeString& src) const {
- if (buffer.isEmpty()) {
- buffer.setTo(src);
- } else {
- const UnicodeString *values[2] = { &buffer, &src };
- UErrorCode status = U_ZERO_ERROR;
- separatorFormat.formatAndReplace(values, 2, buffer, NULL, 0, status);
- }
- return buffer;
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::localeDisplayName(const char* localeId,
- UnicodeString& result) const {
- return localeDisplayName(Locale(localeId), result);
-}
-
-// private
-UnicodeString&
-LocaleDisplayNamesImpl::localeIdName(const char* localeId,
- UnicodeString& result, bool substitute) const {
- if (nameLength == UDISPCTX_LENGTH_SHORT) {
- langData.getNoFallback("Languages%short", localeId, result);
- if (!result.isBogus()) {
- return result;
- }
- }
- if (substitute) {
- return langData.get("Languages", localeId, result);
- } else {
- return langData.getNoFallback("Languages", localeId, result);
- }
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::languageDisplayName(const char* lang,
- UnicodeString& result) const {
- if (uprv_strcmp("root", lang) == 0 || uprv_strchr(lang, '_') != NULL) {
- return result = UnicodeString(lang, -1, US_INV);
- }
- if (nameLength == UDISPCTX_LENGTH_SHORT) {
- langData.getNoFallback("Languages%short", lang, result);
- if (!result.isBogus()) {
- return adjustForUsageAndContext(kCapContextUsageLanguage, result);
- }
- }
- if (substitute == UDISPCTX_SUBSTITUTE) {
- langData.get("Languages", lang, result);
- } else {
- langData.getNoFallback("Languages", lang, result);
- }
- return adjustForUsageAndContext(kCapContextUsageLanguage, result);
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::scriptDisplayName(const char* script,
- UnicodeString& result,
- UBool skipAdjust) const {
- if (nameLength == UDISPCTX_LENGTH_SHORT) {
- langData.getNoFallback("Scripts%short", script, result);
- if (!result.isBogus()) {
- return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result);
- }
- }
- if (substitute == UDISPCTX_SUBSTITUTE) {
- langData.get("Scripts", script, result);
- } else {
- langData.getNoFallback("Scripts", script, result);
- }
- return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result);
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::scriptDisplayName(const char* script,
- UnicodeString& result) const {
- return scriptDisplayName(script, result, FALSE);
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::scriptDisplayName(UScriptCode scriptCode,
- UnicodeString& result) const {
- return scriptDisplayName(uscript_getName(scriptCode), result, FALSE);
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::regionDisplayName(const char* region,
- UnicodeString& result,
- UBool skipAdjust) const {
- if (nameLength == UDISPCTX_LENGTH_SHORT) {
- regionData.getNoFallback("Countries%short", region, result);
- if (!result.isBogus()) {
- return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result);
- }
- }
- if (substitute == UDISPCTX_SUBSTITUTE) {
- regionData.get("Countries", region, result);
- } else {
- regionData.getNoFallback("Countries", region, result);
- }
- return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result);
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::regionDisplayName(const char* region,
- UnicodeString& result) const {
- return regionDisplayName(region, result, FALSE);
-}
-
-
-UnicodeString&
-LocaleDisplayNamesImpl::variantDisplayName(const char* variant,
- UnicodeString& result,
- UBool skipAdjust) const {
- // don't have a resource for short variant names
- if (substitute == UDISPCTX_SUBSTITUTE) {
- langData.get("Variants", variant, result);
- } else {
- langData.getNoFallback("Variants", variant, result);
- }
- return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageVariant, result);
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::variantDisplayName(const char* variant,
- UnicodeString& result) const {
- return variantDisplayName(variant, result, FALSE);
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::keyDisplayName(const char* key,
- UnicodeString& result,
- UBool skipAdjust) const {
- // don't have a resource for short key names
- if (substitute == UDISPCTX_SUBSTITUTE) {
- langData.get("Keys", key, result);
- } else {
- langData.getNoFallback("Keys", key, result);
- }
- return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKey, result);
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::keyDisplayName(const char* key,
- UnicodeString& result) const {
- return keyDisplayName(key, result, FALSE);
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::keyValueDisplayName(const char* key,
- const char* value,
- UnicodeString& result,
- UBool skipAdjust) const {
- if (uprv_strcmp(key, "currency") == 0) {
- // ICU4C does not have ICU4J CurrencyDisplayInfo equivalent for now.
- UErrorCode sts = U_ZERO_ERROR;
- UnicodeString ustrValue(value, -1, US_INV);
- int32_t len;
- const UChar *currencyName = ucurr_getName(ustrValue.getTerminatedBuffer(),
- locale.getBaseName(), UCURR_LONG_NAME, nullptr /* isChoiceFormat */, &len, &sts);
- if (U_FAILURE(sts)) {
- // Return the value as is on failure
- result = ustrValue;
- return result;
- }
- result.setTo(currencyName, len);
- return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result);
- }
-
- if (nameLength == UDISPCTX_LENGTH_SHORT) {
- langData.getNoFallback("Types%short", key, value, result);
- if (!result.isBogus()) {
- return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result);
- }
- }
- if (substitute == UDISPCTX_SUBSTITUTE) {
- langData.get("Types", key, value, result);
- } else {
- langData.getNoFallback("Types", key, value, result);
- }
- return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result);
-}
-
-UnicodeString&
-LocaleDisplayNamesImpl::keyValueDisplayName(const char* key,
- const char* value,
- UnicodeString& result) const {
- return keyValueDisplayName(key, value, result, FALSE);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-LocaleDisplayNames*
-LocaleDisplayNames::createInstance(const Locale& locale,
- UDialectHandling dialectHandling) {
- return new LocaleDisplayNamesImpl(locale, dialectHandling);
-}
-
-LocaleDisplayNames*
-LocaleDisplayNames::createInstance(const Locale& locale,
- UDisplayContext *contexts, int32_t length) {
- if (contexts == NULL) {
- length = 0;
- }
- return new LocaleDisplayNamesImpl(locale, contexts, length);
-}
-
-U_NAMESPACE_END
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-U_NAMESPACE_USE
-
-U_CAPI ULocaleDisplayNames * U_EXPORT2
-uldn_open(const char * locale,
- UDialectHandling dialectHandling,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if (locale == NULL) {
- locale = uloc_getDefault();
- }
- return (ULocaleDisplayNames *)LocaleDisplayNames::createInstance(Locale(locale), dialectHandling);
-}
-
-U_CAPI ULocaleDisplayNames * U_EXPORT2
-uldn_openForContext(const char * locale,
- UDisplayContext *contexts, int32_t length,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if (locale == NULL) {
- locale = uloc_getDefault();
- }
- return (ULocaleDisplayNames *)LocaleDisplayNames::createInstance(Locale(locale), contexts, length);
-}
-
-
-U_CAPI void U_EXPORT2
-uldn_close(ULocaleDisplayNames *ldn) {
- delete (LocaleDisplayNames *)ldn;
-}
-
-U_CAPI const char * U_EXPORT2
-uldn_getLocale(const ULocaleDisplayNames *ldn) {
- if (ldn) {
- return ((const LocaleDisplayNames *)ldn)->getLocale().getName();
- }
- return NULL;
-}
-
-U_CAPI UDialectHandling U_EXPORT2
-uldn_getDialectHandling(const ULocaleDisplayNames *ldn) {
- if (ldn) {
- return ((const LocaleDisplayNames *)ldn)->getDialectHandling();
- }
- return ULDN_STANDARD_NAMES;
-}
-
-U_CAPI UDisplayContext U_EXPORT2
-uldn_getContext(const ULocaleDisplayNames *ldn,
- UDisplayContextType type,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return (UDisplayContext)0;
- }
- return ((const LocaleDisplayNames *)ldn)->getContext(type);
-}
-
-U_CAPI int32_t U_EXPORT2
-uldn_localeDisplayName(const ULocaleDisplayNames *ldn,
- const char *locale,
- UChar *result,
- int32_t maxResultSize,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if (ldn == NULL || locale == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString temp(result, 0, maxResultSize);
- ((const LocaleDisplayNames *)ldn)->localeDisplayName(locale, temp);
- if (temp.isBogus()) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- return temp.extract(result, maxResultSize, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uldn_languageDisplayName(const ULocaleDisplayNames *ldn,
- const char *lang,
- UChar *result,
- int32_t maxResultSize,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if (ldn == NULL || lang == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString temp(result, 0, maxResultSize);
- ((const LocaleDisplayNames *)ldn)->languageDisplayName(lang, temp);
- return temp.extract(result, maxResultSize, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uldn_scriptDisplayName(const ULocaleDisplayNames *ldn,
- const char *script,
- UChar *result,
- int32_t maxResultSize,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if (ldn == NULL || script == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString temp(result, 0, maxResultSize);
- ((const LocaleDisplayNames *)ldn)->scriptDisplayName(script, temp);
- return temp.extract(result, maxResultSize, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn,
- UScriptCode scriptCode,
- UChar *result,
- int32_t maxResultSize,
- UErrorCode *pErrorCode) {
- return uldn_scriptDisplayName(ldn, uscript_getName(scriptCode), result, maxResultSize, pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uldn_regionDisplayName(const ULocaleDisplayNames *ldn,
- const char *region,
- UChar *result,
- int32_t maxResultSize,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if (ldn == NULL || region == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString temp(result, 0, maxResultSize);
- ((const LocaleDisplayNames *)ldn)->regionDisplayName(region, temp);
- return temp.extract(result, maxResultSize, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uldn_variantDisplayName(const ULocaleDisplayNames *ldn,
- const char *variant,
- UChar *result,
- int32_t maxResultSize,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if (ldn == NULL || variant == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString temp(result, 0, maxResultSize);
- ((const LocaleDisplayNames *)ldn)->variantDisplayName(variant, temp);
- return temp.extract(result, maxResultSize, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uldn_keyDisplayName(const ULocaleDisplayNames *ldn,
- const char *key,
- UChar *result,
- int32_t maxResultSize,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if (ldn == NULL || key == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString temp(result, 0, maxResultSize);
- ((const LocaleDisplayNames *)ldn)->keyDisplayName(key, temp);
- return temp.extract(result, maxResultSize, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn,
- const char *key,
- const char *value,
- UChar *result,
- int32_t maxResultSize,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if (ldn == NULL || key == NULL || value == NULL || (result == NULL && maxResultSize > 0)
- || maxResultSize < 0) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString temp(result, 0, maxResultSize);
- ((const LocaleDisplayNames *)ldn)->keyValueDisplayName(key, value, temp);
- return temp.extract(result, maxResultSize, *pErrorCode);
-}
-
-#endif
diff --git a/contrib/libs/icu/common/locid.cpp b/contrib/libs/icu/common/locid.cpp
deleted file mode 100644
index 753a452120e..00000000000
--- a/contrib/libs/icu/common/locid.cpp
+++ /dev/null
@@ -1,1663 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- **********************************************************************
- * Copyright (C) 1997-2016, International Business Machines
- * Corporation and others. All Rights Reserved.
- **********************************************************************
-*
-* File locid.cpp
-*
-* Created by: Richard Gillam
-*
-* Modification History:
-*
-* Date Name Description
-* 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
-* methods to get and set it.
-* 04/02/97 aliu Made operator!= inline; fixed return value
-* of getName().
-* 04/15/97 aliu Cleanup for AIX/Win32.
-* 04/24/97 aliu Numerous changes per code review.
-* 08/18/98 stephen Changed getDisplayName()
-* Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
-* Added getISOCountries(), getISOLanguages(),
-* getLanguagesForCountry()
-* 03/16/99 bertrand rehaul.
-* 07/21/99 stephen Added U_CFUNC setDefault
-* 11/09/99 weiv Added const char * getName() const;
-* 04/12/00 srl removing unicodestring api's and cached hash code
-* 08/10/01 grhoten Change the static Locales to accessor functions
-******************************************************************************
-*/
-
-#include <utility>
-
-#include "unicode/bytestream.h"
-#include "unicode/locid.h"
-#include "unicode/strenum.h"
-#include "unicode/stringpiece.h"
-#include "unicode/uloc.h"
-#include "unicode/ures.h"
-
-#include "bytesinkutil.h"
-#include "charstr.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "mutex.h"
-#include "putilimp.h"
-#include "uassert.h"
-#include "ucln_cmn.h"
-#include "uhash.h"
-#include "ulocimp.h"
-#include "umutex.h"
-#include "ustr_imp.h"
-
-U_CDECL_BEGIN
-static UBool U_CALLCONV locale_cleanup(void);
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-static Locale *gLocaleCache = NULL;
-static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER;
-
-// gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
-static UMutex gDefaultLocaleMutex;
-static UHashtable *gDefaultLocalesHashT = NULL;
-static Locale *gDefaultLocale = NULL;
-
-/**
- * \def ULOC_STRING_LIMIT
- * strings beyond this value crash in CharString
- */
-#define ULOC_STRING_LIMIT 357913941
-
-U_NAMESPACE_END
-
-typedef enum ELocalePos {
- eENGLISH,
- eFRENCH,
- eGERMAN,
- eITALIAN,
- eJAPANESE,
- eKOREAN,
- eCHINESE,
-
- eFRANCE,
- eGERMANY,
- eITALY,
- eJAPAN,
- eKOREA,
- eCHINA, /* Alias for PRC */
- eTAIWAN,
- eUK,
- eUS,
- eCANADA,
- eCANADA_FRENCH,
- eROOT,
-
-
- //eDEFAULT,
- eMAX_LOCALES
-} ELocalePos;
-
-U_CFUNC int32_t locale_getKeywords(const char *localeID,
- char prev,
- char *keywords, int32_t keywordCapacity,
- UBool valuesToo,
- UErrorCode *status);
-
-U_CDECL_BEGIN
-//
-// Deleter function for Locales owned by the default Locale hash table/
-//
-static void U_CALLCONV
-deleteLocale(void *obj) {
- delete (icu::Locale *) obj;
-}
-
-static UBool U_CALLCONV locale_cleanup(void)
-{
- U_NAMESPACE_USE
-
- delete [] gLocaleCache;
- gLocaleCache = NULL;
- gLocaleCacheInitOnce.reset();
-
- if (gDefaultLocalesHashT) {
- uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
- gDefaultLocalesHashT = NULL;
- }
- gDefaultLocale = NULL;
- return TRUE;
-}
-
-
-static void U_CALLCONV locale_init(UErrorCode &status) {
- U_NAMESPACE_USE
-
- U_ASSERT(gLocaleCache == NULL);
- gLocaleCache = new Locale[(int)eMAX_LOCALES];
- if (gLocaleCache == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
- gLocaleCache[eROOT] = Locale("");
- gLocaleCache[eENGLISH] = Locale("en");
- gLocaleCache[eFRENCH] = Locale("fr");
- gLocaleCache[eGERMAN] = Locale("de");
- gLocaleCache[eITALIAN] = Locale("it");
- gLocaleCache[eJAPANESE] = Locale("ja");
- gLocaleCache[eKOREAN] = Locale("ko");
- gLocaleCache[eCHINESE] = Locale("zh");
- gLocaleCache[eFRANCE] = Locale("fr", "FR");
- gLocaleCache[eGERMANY] = Locale("de", "DE");
- gLocaleCache[eITALY] = Locale("it", "IT");
- gLocaleCache[eJAPAN] = Locale("ja", "JP");
- gLocaleCache[eKOREA] = Locale("ko", "KR");
- gLocaleCache[eCHINA] = Locale("zh", "CN");
- gLocaleCache[eTAIWAN] = Locale("zh", "TW");
- gLocaleCache[eUK] = Locale("en", "GB");
- gLocaleCache[eUS] = Locale("en", "US");
- gLocaleCache[eCANADA] = Locale("en", "CA");
- gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
-}
-
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
- // Synchronize this entire function.
- Mutex lock(&gDefaultLocaleMutex);
-
- UBool canonicalize = FALSE;
-
- // If given a NULL string for the locale id, grab the default
- // name from the system.
- // (Different from most other locale APIs, where a null name means use
- // the current ICU default locale.)
- if (id == NULL) {
- id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
- canonicalize = TRUE; // always canonicalize host ID
- }
-
- CharString localeNameBuf;
- {
- CharStringByteSink sink(&localeNameBuf);
- if (canonicalize) {
- ulocimp_canonicalize(id, sink, &status);
- } else {
- ulocimp_getName(id, sink, &status);
- }
- }
-
- if (U_FAILURE(status)) {
- return gDefaultLocale;
- }
-
- if (gDefaultLocalesHashT == NULL) {
- gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
- if (U_FAILURE(status)) {
- return gDefaultLocale;
- }
- uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale);
- ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
- }
-
- Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf.data());
- if (newDefault == NULL) {
- newDefault = new Locale(Locale::eBOGUS);
- if (newDefault == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return gDefaultLocale;
- }
- newDefault->init(localeNameBuf.data(), FALSE);
- uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
- if (U_FAILURE(status)) {
- return gDefaultLocale;
- }
- }
- gDefaultLocale = newDefault;
- return gDefaultLocale;
-}
-
-U_NAMESPACE_END
-
-/* sfb 07/21/99 */
-U_CFUNC void
-locale_set_default(const char *id)
-{
- U_NAMESPACE_USE
- UErrorCode status = U_ZERO_ERROR;
- locale_set_default_internal(id, status);
-}
-/* end */
-
-U_CFUNC const char *
-locale_get_default(void)
-{
- U_NAMESPACE_USE
- return Locale::getDefault().getName();
-}
-
-
-U_NAMESPACE_BEGIN
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
-
-/*Character separating the posix id fields*/
-// '_'
-// In the platform codepage.
-#define SEP_CHAR '_'
-
-Locale::~Locale()
-{
- if (baseName != fullName) {
- uprv_free(baseName);
- }
- baseName = NULL;
- /*if fullName is on the heap, we free it*/
- if (fullName != fullNameBuffer)
- {
- uprv_free(fullName);
- fullName = NULL;
- }
-}
-
-Locale::Locale()
- : UObject(), fullName(fullNameBuffer), baseName(NULL)
-{
- init(NULL, FALSE);
-}
-
-/*
- * Internal constructor to allow construction of a locale object with
- * NO side effects. (Default constructor tries to get
- * the default locale.)
- */
-Locale::Locale(Locale::ELocaleType)
- : UObject(), fullName(fullNameBuffer), baseName(NULL)
-{
- setToBogus();
-}
-
-
-Locale::Locale( const char * newLanguage,
- const char * newCountry,
- const char * newVariant,
- const char * newKeywords)
- : UObject(), fullName(fullNameBuffer), baseName(NULL)
-{
- if( (newLanguage==NULL) && (newCountry == NULL) && (newVariant == NULL) )
- {
- init(NULL, FALSE); /* shortcut */
- }
- else
- {
- UErrorCode status = U_ZERO_ERROR;
- int32_t size = 0;
- int32_t lsize = 0;
- int32_t csize = 0;
- int32_t vsize = 0;
- int32_t ksize = 0;
-
- // Calculate the size of the resulting string.
-
- // Language
- if ( newLanguage != NULL )
- {
- lsize = (int32_t)uprv_strlen(newLanguage);
- if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
- setToBogus();
- return;
- }
- size = lsize;
- }
-
- CharString togo(newLanguage, lsize, status); // start with newLanguage
-
- // _Country
- if ( newCountry != NULL )
- {
- csize = (int32_t)uprv_strlen(newCountry);
- if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
- setToBogus();
- return;
- }
- size += csize;
- }
-
- // _Variant
- if ( newVariant != NULL )
- {
- // remove leading _'s
- while(newVariant[0] == SEP_CHAR)
- {
- newVariant++;
- }
-
- // remove trailing _'s
- vsize = (int32_t)uprv_strlen(newVariant);
- if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
- setToBogus();
- return;
- }
- while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
- {
- vsize--;
- }
- }
-
- if( vsize > 0 )
- {
- size += vsize;
- }
-
- // Separator rules:
- if ( vsize > 0 )
- {
- size += 2; // at least: __v
- }
- else if ( csize > 0 )
- {
- size += 1; // at least: _v
- }
-
- if ( newKeywords != NULL)
- {
- ksize = (int32_t)uprv_strlen(newKeywords);
- if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
- setToBogus();
- return;
- }
- size += ksize + 1;
- }
-
- // NOW we have the full locale string..
- // Now, copy it back.
-
- // newLanguage is already copied
-
- if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
- { // ^
- togo.append(SEP_CHAR, status);
- }
-
- if ( csize != 0 )
- {
- togo.append(newCountry, status);
- }
-
- if ( vsize != 0)
- {
- togo.append(SEP_CHAR, status)
- .append(newVariant, vsize, status);
- }
-
- if ( ksize != 0)
- {
- if (uprv_strchr(newKeywords, '=')) {
- togo.append('@', status); /* keyword parsing */
- }
- else {
- togo.append('_', status); /* Variant parsing with a script */
- if ( vsize == 0) {
- togo.append('_', status); /* No country found */
- }
- }
- togo.append(newKeywords, status);
- }
-
- if (U_FAILURE(status)) {
- // Something went wrong with appending, etc.
- setToBogus();
- return;
- }
- // Parse it, because for example 'language' might really be a complete
- // string.
- init(togo.data(), FALSE);
- }
-}
-
-Locale::Locale(const Locale &other)
- : UObject(other), fullName(fullNameBuffer), baseName(NULL)
-{
- *this = other;
-}
-
-Locale::Locale(Locale&& other) U_NOEXCEPT
- : UObject(other), fullName(fullNameBuffer), baseName(fullName) {
- *this = std::move(other);
-}
-
-Locale& Locale::operator=(const Locale& other) {
- if (this == &other) {
- return *this;
- }
-
- setToBogus();
-
- if (other.fullName == other.fullNameBuffer) {
- uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
- } else if (other.fullName == nullptr) {
- fullName = nullptr;
- } else {
- fullName = uprv_strdup(other.fullName);
- if (fullName == nullptr) return *this;
- }
-
- if (other.baseName == other.fullName) {
- baseName = fullName;
- } else if (other.baseName != nullptr) {
- baseName = uprv_strdup(other.baseName);
- if (baseName == nullptr) return *this;
- }
-
- uprv_strcpy(language, other.language);
- uprv_strcpy(script, other.script);
- uprv_strcpy(country, other.country);
-
- variantBegin = other.variantBegin;
- fIsBogus = other.fIsBogus;
-
- return *this;
-}
-
-Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
- if (baseName != fullName) uprv_free(baseName);
- if (fullName != fullNameBuffer) uprv_free(fullName);
-
- if (other.fullName == other.fullNameBuffer) {
- uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
- fullName = fullNameBuffer;
- } else {
- fullName = other.fullName;
- }
-
- if (other.baseName == other.fullName) {
- baseName = fullName;
- } else {
- baseName = other.baseName;
- }
-
- uprv_strcpy(language, other.language);
- uprv_strcpy(script, other.script);
- uprv_strcpy(country, other.country);
-
- variantBegin = other.variantBegin;
- fIsBogus = other.fIsBogus;
-
- other.baseName = other.fullName = other.fullNameBuffer;
-
- return *this;
-}
-
-Locale *
-Locale::clone() const {
- return new Locale(*this);
-}
-
-UBool
-Locale::operator==( const Locale& other) const
-{
- return (uprv_strcmp(other.fullName, fullName) == 0);
-}
-
-#define ISASCIIALPHA(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
-
-namespace {
-
-CharString& AppendLSCVE(CharString& out, const char* language, const char* script,
- const char* country, const char* variants, const char* extension,
- UErrorCode& status) {
- out.append(language, status);
- if (script && script[0] != '\0') {
- out.append('_', status);
- out.append(script, status);
- }
- if (country && country[0] != '\0') {
- out.append('_', status);
- out.append(country, status);
- }
- if (variants && variants[0] != '\0') {
- if ((script == nullptr || script[0] == '\0') &&
- (country == nullptr || country[0] == '\0')) {
- out.append('_', status);
- }
- out.append('_', status);
- out.append(variants, status);
- }
- if (extension && extension[0] != '\0') {
- out.append(extension, status);
- }
- return out;
-}
-
-} // namespace
-
-/*This function initializes a Locale from a C locale ID*/
-Locale& Locale::init(const char* localeID, UBool canonicalize)
-{
- fIsBogus = FALSE;
- /* Free our current storage */
- if (baseName != fullName) {
- uprv_free(baseName);
- }
- baseName = NULL;
- if(fullName != fullNameBuffer) {
- uprv_free(fullName);
- fullName = fullNameBuffer;
- }
-
- // not a loop:
- // just an easy way to have a common error-exit
- // without goto and without another function
- do {
- char *separator;
- char *field[5] = {0};
- int32_t fieldLen[5] = {0};
- int32_t fieldIdx;
- int32_t variantField;
- int32_t length;
- UErrorCode err;
-
- if(localeID == NULL) {
- // not an error, just set the default locale
- return *this = getDefault();
- }
-
- /* preset all fields to empty */
- language[0] = script[0] = country[0] = 0;
-
- // "canonicalize" the locale ID to ICU/Java format
- err = U_ZERO_ERROR;
- length = canonicalize ?
- uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
- uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
-
- if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
- /*Go to heap for the fullName if necessary*/
- fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
- if(fullName == 0) {
- fullName = fullNameBuffer;
- break; // error: out of memory
- }
- err = U_ZERO_ERROR;
- length = canonicalize ?
- uloc_canonicalize(localeID, fullName, length+1, &err) :
- uloc_getName(localeID, fullName, length+1, &err);
- }
- if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
- /* should never occur */
- break;
- }
-
- variantBegin = length;
-
- /* after uloc_getName/canonicalize() we know that only '_' are separators */
- /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
- separator = field[0] = fullName;
- fieldIdx = 1;
- char* at = uprv_strchr(fullName, '@');
- while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 &&
- fieldIdx < UPRV_LENGTHOF(field)-1 &&
- (at == nullptr || separator < at)) {
- field[fieldIdx] = separator + 1;
- fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
- fieldIdx++;
- }
- // variant may contain @foo or .foo POSIX cruft; remove it
- separator = uprv_strchr(field[fieldIdx-1], '@');
- char* sep2 = uprv_strchr(field[fieldIdx-1], '.');
- if (separator!=NULL || sep2!=NULL) {
- if (separator==NULL || (sep2!=NULL && separator > sep2)) {
- separator = sep2;
- }
- fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
- } else {
- fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName);
- }
-
- if (fieldLen[0] >= (int32_t)(sizeof(language)))
- {
- break; // error: the language field is too long
- }
-
- variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
- if (fieldLen[0] > 0) {
- /* We have a language */
- uprv_memcpy(language, fullName, fieldLen[0]);
- language[fieldLen[0]] = 0;
- }
- if (fieldLen[1] == 4 && ISASCIIALPHA(field[1][0]) &&
- ISASCIIALPHA(field[1][1]) && ISASCIIALPHA(field[1][2]) &&
- ISASCIIALPHA(field[1][3])) {
- /* We have at least a script */
- uprv_memcpy(script, field[1], fieldLen[1]);
- script[fieldLen[1]] = 0;
- variantField++;
- }
-
- if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
- /* We have a country */
- uprv_memcpy(country, field[variantField], fieldLen[variantField]);
- country[fieldLen[variantField]] = 0;
- variantField++;
- } else if (fieldLen[variantField] == 0) {
- variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
- }
-
- if (fieldLen[variantField] > 0) {
- /* We have a variant */
- variantBegin = (int32_t)(field[variantField] - fullName);
- }
-
- err = U_ZERO_ERROR;
- initBaseName(err);
- if (U_FAILURE(err)) {
- break;
- }
-
- if (canonicalize) {
- UErrorCode status = U_ZERO_ERROR;
- // TODO: Try to use ResourceDataValue and ures_getValueWithFallback() etc.
- LocalUResourceBundlePointer metadata(ures_openDirect(NULL, "metadata", &status));
- LocalUResourceBundlePointer metadataAlias(ures_getByKey(metadata.getAlias(), "alias", NULL, &status));
- // Look up the metadata:alias:language:$key:replacement entries
- // key could be one of the following:
- // language
- // language_Script_REGION
- // language_REGION
- // language_variant
- do {
- // The resource structure looks like
- // metadata {
- // alias {
- // language {
- // art_lojban {
- // replacement{"jbo"}
- // }
- // ...
- // ks_Arab_IN {
- // replacement{"ks_IN"}
- // }
- // ...
- // no {
- // replacement{"nb"}
- // }
- // ....
- // zh_CN {
- // replacement{"zh_Hans_CN"}
- // }
- // }
- // ...
- // }
- // }
- LocalUResourceBundlePointer languageAlias(ures_getByKey(metadataAlias.getAlias(), "language", NULL, &status));
- if (U_FAILURE(status))
- break;
- CharString temp;
- // Handle cases of key pattern "language _ variant"
- // ex: Map "art_lojban" to "jbo"
- const char* variants = getVariant();
- if (variants != nullptr && variants[0] != '\0') {
- const char* begin = variants;
- const char* end = begin;
- // We may have multiple variants, need to look at each of
- // them.
- do {
- status = U_ZERO_ERROR;
- end = uprv_strchr(begin, '_');
- int32_t len = (end == nullptr) ? int32_t(uprv_strlen(begin)) : int32_t(end - begin);
- temp.clear().append(getLanguage(), status).append("_", status).append(begin, len, status);
- LocalUResourceBundlePointer languageVariantAlias(
- ures_getByKey(languageAlias.getAlias(),
- temp.data(),
- NULL, &status));
- temp.clear().appendInvariantChars(
- UnicodeString(ures_getStringByKey(languageVariantAlias.getAlias(), "replacement", nullptr, &status)), status);
- if (U_SUCCESS(status)) {
- CharString newVar;
- if (begin != variants) {
- newVar.append(variants, static_cast<int32_t>(begin - variants - 1), status);
- }
- if (end != nullptr) {
- if (begin != variants) {
- newVar.append("_", status);
- }
- newVar.append(end + 1, status);
- }
- Locale l(temp.data());
- init(AppendLSCVE(temp.clear(),
- l.getLanguage(),
- (getScript() != nullptr && getScript()[0] != '\0') ? getScript() : l.getScript(),
- (getCountry() != nullptr && getCountry()[0] != '\0') ? getCountry() : l.getCountry(),
- newVar.data(),
- uprv_strchr(fullName, '@'), status).data(), false);
- break;
- }
- begin = end + 1;
- } while (end != nullptr);
- } // End of handle language _ variant
- // Handle cases of key pattern "language _ Script _ REGION"
- // ex: Map "ks_Arab_IN" to "ks_IN"
- if (getScript() != nullptr && getScript()[0] != '\0' &&
- getCountry() != nullptr && getCountry()[0] != '\0') {
- status = U_ZERO_ERROR;
- LocalUResourceBundlePointer replacedAlias(
- ures_getByKey(languageAlias.getAlias(),
- AppendLSCVE(temp.clear(), getLanguage(), getScript(), getCountry(),
- nullptr, nullptr, status).data(), NULL, &status));
- temp.clear().appendInvariantChars(
- UnicodeString(ures_getStringByKey(replacedAlias.getAlias(), "replacement", nullptr, &status)), status);
- if (U_SUCCESS(status)) {
- Locale l(temp.data());
- init(AppendLSCVE(temp.clear(),
- l.getLanguage(),
- l.getScript(),
- l.getCountry(),
- getVariant(),
- uprv_strchr(fullName, '@'), status).data(), false);
- }
- } // End of handle language _ Script _ REGION
- // Handle cases of key pattern "language _ REGION"
- // ex: Map "zh_CN" to "zh_Hans_CN"
- if (getCountry() != nullptr && getCountry()[0] != '\0') {
- status = U_ZERO_ERROR;
- LocalUResourceBundlePointer replacedAlias(
- ures_getByKey(languageAlias.getAlias(),
- AppendLSCVE(temp.clear(), getLanguage(), nullptr, getCountry(),
- nullptr, nullptr, status).data(), NULL, &status));
- temp.clear().appendInvariantChars(
- UnicodeString(ures_getStringByKey(replacedAlias.getAlias(), "replacement", nullptr, &status)), status);
- if (U_SUCCESS(status)) {
- Locale l(temp.data());
- init(AppendLSCVE(temp.clear(),
- l.getLanguage(),
- (getScript() != nullptr && getScript()[0] != '\0') ? getScript() : l.getScript(),
- l.getCountry(),
- getVariant(),
- uprv_strchr(fullName, '@'), status).data(), false);
- }
- } // End of handle "language _ REGION"
- // Handle cases of key pattern "language"
- // ex: Map "no" to "nb"
- {
- status = U_ZERO_ERROR;
- LocalUResourceBundlePointer replaceLanguageAlias(ures_getByKey(languageAlias.getAlias(), getLanguage(), NULL, &status));
- temp.clear().appendInvariantChars(
- UnicodeString(ures_getStringByKey(replaceLanguageAlias.getAlias(), "replacement", nullptr, &status)), status);
- if (U_SUCCESS(status)) {
- Locale l(temp.data());
- init(AppendLSCVE(temp.clear(),
- l.getLanguage(),
- (getScript() != nullptr && getScript()[0] != '\0') ? getScript() : l.getScript(),
- (getCountry() != nullptr && getCountry()[0] != '\0') ? getCountry() : l.getCountry(),
- getVariant(),
- uprv_strchr(fullName, '@'), status).data(), false);
- }
- } // End of handle "language"
-
- // Look up the metadata:alias:territory:$key:replacement entries
- // key is region code.
- if (getCountry() != nullptr) {
- status = U_ZERO_ERROR;
- // The resource structure looks like
- // metadata {
- // alias {
- // ...
- // territory: {
- // 172 {
- // replacement{"RU AM AZ BY GE KG KZ MD TJ TM UA UZ"}
- // }
- // ...
- // 554 {
- // replacement{"NZ"}
- // }
- // }
- // }
- // }
- LocalUResourceBundlePointer territoryAlias(ures_getByKey(metadataAlias.getAlias(), "territory", NULL, &status));
- LocalUResourceBundlePointer countryAlias(ures_getByKey(territoryAlias.getAlias(), getCountry(), NULL, &status));
- UnicodeString replacements(
- ures_getStringByKey(countryAlias.getAlias(), "replacement", nullptr, &status));
- if (U_SUCCESS(status)) {
- CharString replacedCountry;
- int32_t delPos = replacements.indexOf(' ');
- if (delPos == -1) {
- replacedCountry.appendInvariantChars(replacements, status);
- } else {
- Locale l(AppendLSCVE(temp.clear(), getLanguage(), nullptr, getScript(),
- nullptr, nullptr, status).data());
- l.addLikelySubtags(status);
- if (replacements.indexOf(UnicodeString(l.getCountry())) != -1) {
- replacedCountry.append(l.getCountry(), status);
- } else {
- replacedCountry.appendInvariantChars(replacements.getBuffer(), delPos, status);
- }
- }
- init(AppendLSCVE(temp.clear(),
- getLanguage(),
- getScript(),
- replacedCountry.data(),
- getVariant(),
- uprv_strchr(fullName, '@'), status).data(), false);
- }
- } // End of handle REGION
- } while (0);
- } // if (canonicalize) {
-
- // successful end of init()
- return *this;
- } while(0); /*loop doesn't iterate*/
-
- // when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
- setToBogus();
-
- return *this;
-}
-
-/*
- * Set up the base name.
- * If there are no key words, it's exactly the full name.
- * If key words exist, it's the full name truncated at the '@' character.
- * Need to set up both at init() and after setting a keyword.
- */
-void
-Locale::initBaseName(UErrorCode &status) {
- if (U_FAILURE(status)) {
- return;
- }
- U_ASSERT(baseName==NULL || baseName==fullName);
- const char *atPtr = uprv_strchr(fullName, '@');
- const char *eqPtr = uprv_strchr(fullName, '=');
- if (atPtr && eqPtr && atPtr < eqPtr) {
- // Key words exist.
- int32_t baseNameLength = (int32_t)(atPtr - fullName);
- baseName = (char *)uprv_malloc(baseNameLength + 1);
- if (baseName == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- uprv_strncpy(baseName, fullName, baseNameLength);
- baseName[baseNameLength] = 0;
-
- // The original computation of variantBegin leaves it equal to the length
- // of fullName if there is no variant. It should instead be
- // the length of the baseName.
- if (variantBegin > baseNameLength) {
- variantBegin = baseNameLength;
- }
- } else {
- baseName = fullName;
- }
-}
-
-
-int32_t
-Locale::hashCode() const
-{
- return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
-}
-
-void
-Locale::setToBogus() {
- /* Free our current storage */
- if(baseName != fullName) {
- uprv_free(baseName);
- }
- baseName = NULL;
- if(fullName != fullNameBuffer) {
- uprv_free(fullName);
- fullName = fullNameBuffer;
- }
- *fullNameBuffer = 0;
- *language = 0;
- *script = 0;
- *country = 0;
- fIsBogus = TRUE;
- variantBegin = 0;
-}
-
-const Locale& U_EXPORT2
-Locale::getDefault()
-{
- {
- Mutex lock(&gDefaultLocaleMutex);
- if (gDefaultLocale != NULL) {
- return *gDefaultLocale;
- }
- }
- UErrorCode status = U_ZERO_ERROR;
- return *locale_set_default_internal(NULL, status);
-}
-
-
-
-void U_EXPORT2
-Locale::setDefault( const Locale& newLocale,
- UErrorCode& status)
-{
- if (U_FAILURE(status)) {
- return;
- }
-
- /* Set the default from the full name string of the supplied locale.
- * This is a convenient way to access the default locale caching mechanisms.
- */
- const char *localeID = newLocale.getName();
- locale_set_default_internal(localeID, status);
-}
-
-void
-Locale::addLikelySubtags(UErrorCode& status) {
- if (U_FAILURE(status)) {
- return;
- }
-
- CharString maximizedLocaleID;
- {
- CharStringByteSink sink(&maximizedLocaleID);
- ulocimp_addLikelySubtags(fullName, sink, &status);
- }
-
- if (U_FAILURE(status)) {
- return;
- }
-
- init(maximizedLocaleID.data(), /*canonicalize=*/FALSE);
- if (isBogus()) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-void
-Locale::minimizeSubtags(UErrorCode& status) {
- if (U_FAILURE(status)) {
- return;
- }
-
- CharString minimizedLocaleID;
- {
- CharStringByteSink sink(&minimizedLocaleID);
- ulocimp_minimizeSubtags(fullName, sink, &status);
- }
-
- if (U_FAILURE(status)) {
- return;
- }
-
- init(minimizedLocaleID.data(), /*canonicalize=*/FALSE);
- if (isBogus()) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-void
-Locale::canonicalize(UErrorCode& status) {
- if (U_FAILURE(status)) {
- return;
- }
- if (isBogus()) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- CharString uncanonicalized(fullName, status);
- if (U_FAILURE(status)) {
- return;
- }
- init(uncanonicalized.data(), /*canonicalize=*/TRUE);
- if (isBogus()) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-Locale U_EXPORT2
-Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
-{
- Locale result(Locale::eBOGUS);
-
- if (U_FAILURE(status)) {
- return result;
- }
-
- // If a BCP-47 language tag is passed as the language parameter to the
- // normal Locale constructor, it will actually fall back to invoking
- // uloc_forLanguageTag() to parse it if it somehow is able to detect that
- // the string actually is BCP-47. This works well for things like strings
- // using BCP-47 extensions, but it does not at all work for things like
- // BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also
- // interpret as ICU locale IDs and because of that won't trigger the BCP-47
- // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
- // and then Locale::init(), instead of just calling the normal constructor.
-
- CharString localeID;
- int32_t parsedLength;
- {
- CharStringByteSink sink(&localeID);
- ulocimp_forLanguageTag(
- tag.data(),
- tag.length(),
- sink,
- &parsedLength,
- &status);
- }
-
- if (U_FAILURE(status)) {
- return result;
- }
-
- if (parsedLength != tag.size()) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return result;
- }
-
- result.init(localeID.data(), /*canonicalize=*/FALSE);
- if (result.isBogus()) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return result;
-}
-
-void
-Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
-{
- if (U_FAILURE(status)) {
- return;
- }
-
- if (fIsBogus) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- ulocimp_toLanguageTag(fullName, sink, /*strict=*/FALSE, &status);
-}
-
-Locale U_EXPORT2
-Locale::createFromName (const char *name)
-{
- if (name) {
- Locale l("");
- l.init(name, FALSE);
- return l;
- }
- else {
- return getDefault();
- }
-}
-
-Locale U_EXPORT2
-Locale::createCanonical(const char* name) {
- Locale loc("");
- loc.init(name, TRUE);
- return loc;
-}
-
-const char *
-Locale::getISO3Language() const
-{
- return uloc_getISO3Language(fullName);
-}
-
-
-const char *
-Locale::getISO3Country() const
-{
- return uloc_getISO3Country(fullName);
-}
-
-/**
- * Return the LCID value as specified in the "LocaleID" resource for this
- * locale. The LocaleID must be expressed as a hexadecimal number, from
- * one to four digits. If the LocaleID resource is not present, or is
- * in an incorrect format, 0 is returned. The LocaleID is for use in
- * Windows (it is an LCID), but is available on all platforms.
- */
-uint32_t
-Locale::getLCID() const
-{
- return uloc_getLCID(fullName);
-}
-
-const char* const* U_EXPORT2 Locale::getISOCountries()
-{
- return uloc_getISOCountries();
-}
-
-const char* const* U_EXPORT2 Locale::getISOLanguages()
-{
- return uloc_getISOLanguages();
-}
-
-// Set the locale's data based on a posix id.
-void Locale::setFromPOSIXID(const char *posixID)
-{
- init(posixID, TRUE);
-}
-
-const Locale & U_EXPORT2
-Locale::getRoot(void)
-{
- return getLocale(eROOT);
-}
-
-const Locale & U_EXPORT2
-Locale::getEnglish(void)
-{
- return getLocale(eENGLISH);
-}
-
-const Locale & U_EXPORT2
-Locale::getFrench(void)
-{
- return getLocale(eFRENCH);
-}
-
-const Locale & U_EXPORT2
-Locale::getGerman(void)
-{
- return getLocale(eGERMAN);
-}
-
-const Locale & U_EXPORT2
-Locale::getItalian(void)
-{
- return getLocale(eITALIAN);
-}
-
-const Locale & U_EXPORT2
-Locale::getJapanese(void)
-{
- return getLocale(eJAPANESE);
-}
-
-const Locale & U_EXPORT2
-Locale::getKorean(void)
-{
- return getLocale(eKOREAN);
-}
-
-const Locale & U_EXPORT2
-Locale::getChinese(void)
-{
- return getLocale(eCHINESE);
-}
-
-const Locale & U_EXPORT2
-Locale::getSimplifiedChinese(void)
-{
- return getLocale(eCHINA);
-}
-
-const Locale & U_EXPORT2
-Locale::getTraditionalChinese(void)
-{
- return getLocale(eTAIWAN);
-}
-
-
-const Locale & U_EXPORT2
-Locale::getFrance(void)
-{
- return getLocale(eFRANCE);
-}
-
-const Locale & U_EXPORT2
-Locale::getGermany(void)
-{
- return getLocale(eGERMANY);
-}
-
-const Locale & U_EXPORT2
-Locale::getItaly(void)
-{
- return getLocale(eITALY);
-}
-
-const Locale & U_EXPORT2
-Locale::getJapan(void)
-{
- return getLocale(eJAPAN);
-}
-
-const Locale & U_EXPORT2
-Locale::getKorea(void)
-{
- return getLocale(eKOREA);
-}
-
-const Locale & U_EXPORT2
-Locale::getChina(void)
-{
- return getLocale(eCHINA);
-}
-
-const Locale & U_EXPORT2
-Locale::getPRC(void)
-{
- return getLocale(eCHINA);
-}
-
-const Locale & U_EXPORT2
-Locale::getTaiwan(void)
-{
- return getLocale(eTAIWAN);
-}
-
-const Locale & U_EXPORT2
-Locale::getUK(void)
-{
- return getLocale(eUK);
-}
-
-const Locale & U_EXPORT2
-Locale::getUS(void)
-{
- return getLocale(eUS);
-}
-
-const Locale & U_EXPORT2
-Locale::getCanada(void)
-{
- return getLocale(eCANADA);
-}
-
-const Locale & U_EXPORT2
-Locale::getCanadaFrench(void)
-{
- return getLocale(eCANADA_FRENCH);
-}
-
-const Locale &
-Locale::getLocale(int locid)
-{
- Locale *localeCache = getLocaleCache();
- U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0));
- if (localeCache == NULL) {
- // Failure allocating the locale cache.
- // The best we can do is return a NULL reference.
- locid = 0;
- }
- return localeCache[locid]; /*operating on NULL*/
-}
-
-/*
-This function is defined this way in order to get around static
-initialization and static destruction.
- */
-Locale *
-Locale::getLocaleCache(void)
-{
- UErrorCode status = U_ZERO_ERROR;
- umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
- return gLocaleCache;
-}
-
-class KeywordEnumeration : public StringEnumeration {
-private:
- char *keywords;
- char *current;
- int32_t length;
- UnicodeString currUSKey;
- static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
-
-public:
- static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
- virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
-public:
- KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
- : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
- if(U_SUCCESS(status) && keywordLen != 0) {
- if(keys == NULL || keywordLen < 0) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- } else {
- keywords = (char *)uprv_malloc(keywordLen+1);
- if (keywords == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- else {
- uprv_memcpy(keywords, keys, keywordLen);
- keywords[keywordLen] = 0;
- current = keywords + currentIndex;
- length = keywordLen;
- }
- }
- }
- }
-
- virtual ~KeywordEnumeration();
-
- virtual StringEnumeration * clone() const
- {
- UErrorCode status = U_ZERO_ERROR;
- return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
- }
-
- virtual int32_t count(UErrorCode &/*status*/) const {
- char *kw = keywords;
- int32_t result = 0;
- while(*kw) {
- result++;
- kw += uprv_strlen(kw)+1;
- }
- return result;
- }
-
- virtual const char* next(int32_t* resultLength, UErrorCode& status) {
- const char* result;
- int32_t len;
- if(U_SUCCESS(status) && *current != 0) {
- result = current;
- len = (int32_t)uprv_strlen(current);
- current += len+1;
- if(resultLength != NULL) {
- *resultLength = len;
- }
- } else {
- if(resultLength != NULL) {
- *resultLength = 0;
- }
- result = NULL;
- }
- return result;
- }
-
- virtual const UnicodeString* snext(UErrorCode& status) {
- int32_t resultLength = 0;
- const char *s = next(&resultLength, status);
- return setChars(s, resultLength, status);
- }
-
- virtual void reset(UErrorCode& /*status*/) {
- current = keywords;
- }
-};
-
-const char KeywordEnumeration::fgClassID = '\0';
-
-KeywordEnumeration::~KeywordEnumeration() {
- uprv_free(keywords);
-}
-
-// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
-// the next() method for each keyword before returning it.
-class UnicodeKeywordEnumeration : public KeywordEnumeration {
-public:
- using KeywordEnumeration::KeywordEnumeration;
- virtual ~UnicodeKeywordEnumeration();
-
- virtual const char* next(int32_t* resultLength, UErrorCode& status) {
- const char* legacy_key = KeywordEnumeration::next(nullptr, status);
- if (U_SUCCESS(status) && legacy_key != nullptr) {
- const char* key = uloc_toUnicodeLocaleKey(legacy_key);
- if (key == nullptr) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- } else {
- if (resultLength != nullptr) {
- *resultLength = static_cast<int32_t>(uprv_strlen(key));
- }
- return key;
- }
- }
- if (resultLength != nullptr) *resultLength = 0;
- return nullptr;
- }
-};
-
-// Out-of-line virtual destructor to serve as the "key function".
-UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
-
-StringEnumeration *
-Locale::createKeywords(UErrorCode &status) const
-{
- char keywords[256];
- int32_t keywordCapacity = sizeof keywords;
- StringEnumeration *result = NULL;
-
- if (U_FAILURE(status)) {
- return result;
- }
-
- const char* variantStart = uprv_strchr(fullName, '@');
- const char* assignment = uprv_strchr(fullName, '=');
- if(variantStart) {
- if(assignment > variantStart) {
- int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, FALSE, &status);
- if(U_SUCCESS(status) && keyLen) {
- result = new KeywordEnumeration(keywords, keyLen, 0, status);
- if (!result) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- }
- } else {
- status = U_INVALID_FORMAT_ERROR;
- }
- }
- return result;
-}
-
-StringEnumeration *
-Locale::createUnicodeKeywords(UErrorCode &status) const
-{
- char keywords[256];
- int32_t keywordCapacity = sizeof keywords;
- StringEnumeration *result = NULL;
-
- if (U_FAILURE(status)) {
- return result;
- }
-
- const char* variantStart = uprv_strchr(fullName, '@');
- const char* assignment = uprv_strchr(fullName, '=');
- if(variantStart) {
- if(assignment > variantStart) {
- int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, FALSE, &status);
- if(U_SUCCESS(status) && keyLen) {
- result = new UnicodeKeywordEnumeration(keywords, keyLen, 0, status);
- if (!result) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- }
- } else {
- status = U_INVALID_FORMAT_ERROR;
- }
- }
- return result;
-}
-
-int32_t
-Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
-{
- return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
-}
-
-void
-Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
- if (U_FAILURE(status)) {
- return;
- }
-
- if (fIsBogus) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- // TODO: Remove the need for a const char* to a NUL terminated buffer.
- const CharString keywordName_nul(keywordName, status);
- if (U_FAILURE(status)) {
- return;
- }
-
- LocalMemory<char> scratch;
- int32_t scratch_capacity = 16; // Arbitrarily chosen default size.
-
- char* buffer;
- int32_t result_capacity, reslen;
-
- for (;;) {
- if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- buffer = sink.GetAppendBuffer(
- /*min_capacity=*/scratch_capacity,
- /*desired_capacity_hint=*/scratch_capacity,
- scratch.getAlias(),
- scratch_capacity,
- &result_capacity);
-
- reslen = uloc_getKeywordValue(
- fullName,
- keywordName_nul.data(),
- buffer,
- result_capacity,
- &status);
-
- if (status != U_BUFFER_OVERFLOW_ERROR) {
- break;
- }
-
- scratch_capacity = reslen;
- status = U_ZERO_ERROR;
- }
-
- if (U_FAILURE(status)) {
- return;
- }
-
- sink.Append(buffer, reslen);
- if (status == U_STRING_NOT_TERMINATED_WARNING) {
- status = U_ZERO_ERROR; // Terminators not used.
- }
-}
-
-void
-Locale::getUnicodeKeywordValue(StringPiece keywordName,
- ByteSink& sink,
- UErrorCode& status) const {
- // TODO: Remove the need for a const char* to a NUL terminated buffer.
- const CharString keywordName_nul(keywordName, status);
- if (U_FAILURE(status)) {
- return;
- }
-
- const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
-
- if (legacy_key == nullptr) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- CharString legacy_value;
- {
- CharStringByteSink sink(&legacy_value);
- getKeywordValue(legacy_key, sink, status);
- }
-
- if (U_FAILURE(status)) {
- return;
- }
-
- const char* unicode_value = uloc_toUnicodeLocaleType(
- keywordName_nul.data(), legacy_value.data());
-
- if (unicode_value == nullptr) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
-}
-
-void
-Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
-{
- if (U_FAILURE(status)) {
- return;
- }
- int32_t bufferLength = uprv_max((int32_t)(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY);
- int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName,
- bufferLength, &status) + 1;
- /* Handle the case the current buffer is not enough to hold the new id */
- if (status == U_BUFFER_OVERFLOW_ERROR) {
- U_ASSERT(newLength > bufferLength);
- char* newFullName = (char *)uprv_malloc(newLength);
- if (newFullName == nullptr) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- uprv_strcpy(newFullName, fullName);
- if (fullName != fullNameBuffer) {
- // if full Name is already on the heap, need to free it.
- uprv_free(fullName);
- }
- fullName = newFullName;
- status = U_ZERO_ERROR;
- uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status);
- } else {
- U_ASSERT(newLength <= bufferLength);
- }
- if (U_SUCCESS(status) && baseName == fullName) {
- // May have added the first keyword, meaning that the fullName is no longer also the baseName.
- initBaseName(status);
- }
-}
-
-void
-Locale::setKeywordValue(StringPiece keywordName,
- StringPiece keywordValue,
- UErrorCode& status) {
- // TODO: Remove the need for a const char* to a NUL terminated buffer.
- const CharString keywordName_nul(keywordName, status);
- const CharString keywordValue_nul(keywordValue, status);
- setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
-}
-
-void
-Locale::setUnicodeKeywordValue(StringPiece keywordName,
- StringPiece keywordValue,
- UErrorCode& status) {
- // TODO: Remove the need for a const char* to a NUL terminated buffer.
- const CharString keywordName_nul(keywordName, status);
- const CharString keywordValue_nul(keywordValue, status);
-
- if (U_FAILURE(status)) {
- return;
- }
-
- const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
-
- if (legacy_key == nullptr) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- const char* legacy_value = nullptr;
-
- if (!keywordValue_nul.isEmpty()) {
- legacy_value =
- uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
-
- if (legacy_value == nullptr) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- }
-
- setKeywordValue(legacy_key, legacy_value, status);
-}
-
-const char *
-Locale::getBaseName() const {
- return baseName;
-}
-
-Locale::Iterator::~Iterator() = default;
-
-//eof
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/loclikely.cpp b/contrib/libs/icu/common/loclikely.cpp
deleted file mode 100644
index a4a4181cb13..00000000000
--- a/contrib/libs/icu/common/loclikely.cpp
+++ /dev/null
@@ -1,1358 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1997-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: loclikely.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010feb25
-* created by: Markus W. Scherer
-*
-* Code for likely and minimized locale subtags, separated out from other .cpp files
-* that then do not depend on resource bundle code and likely-subtags data.
-*/
-
-#include "unicode/bytestream.h"
-#include "unicode/utypes.h"
-#include "unicode/locid.h"
-#include "unicode/putil.h"
-#include "unicode/uchar.h"
-#include "unicode/uloc.h"
-#include "unicode/ures.h"
-#include "unicode/uscript.h"
-#include "bytesinkutil.h"
-#include "charstr.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "ulocimp.h"
-#include "ustr_imp.h"
-
-/**
- * These are the canonical strings for unknown languages, scripts and regions.
- **/
-static const char* const unknownLanguage = "und";
-static const char* const unknownScript = "Zzzz";
-static const char* const unknownRegion = "ZZ";
-
-/**
- * This function looks for the localeID in the likelySubtags resource.
- *
- * @param localeID The tag to find.
- * @param buffer A buffer to hold the matching entry
- * @param bufferLength The length of the output buffer
- * @return A pointer to "buffer" if found, or a null pointer if not.
- */
-static const char* U_CALLCONV
-findLikelySubtags(const char* localeID,
- char* buffer,
- int32_t bufferLength,
- UErrorCode* err) {
- const char* result = NULL;
-
- if (!U_FAILURE(*err)) {
- int32_t resLen = 0;
- const UChar* s = NULL;
- UErrorCode tmpErr = U_ZERO_ERROR;
- icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr));
- if (U_SUCCESS(tmpErr)) {
- icu::CharString und;
- if (localeID != NULL) {
- if (*localeID == '\0') {
- localeID = unknownLanguage;
- } else if (*localeID == '_') {
- und.append(unknownLanguage, *err);
- und.append(localeID, *err);
- if (U_FAILURE(*err)) {
- return NULL;
- }
- localeID = und.data();
- }
- }
- s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
-
- if (U_FAILURE(tmpErr)) {
- /*
- * If a resource is missing, it's not really an error, it's
- * just that we don't have any data for that particular locale ID.
- */
- if (tmpErr != U_MISSING_RESOURCE_ERROR) {
- *err = tmpErr;
- }
- }
- else if (resLen >= bufferLength) {
- /* The buffer should never overflow. */
- *err = U_INTERNAL_PROGRAM_ERROR;
- }
- else {
- u_UCharsToChars(s, buffer, resLen + 1);
- if (resLen >= 3 &&
- uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
- (resLen == 3 || buffer[3] == '_')) {
- uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
- }
- result = buffer;
- }
- } else {
- *err = tmpErr;
- }
- }
-
- return result;
-}
-
-/**
- * Append a tag to a buffer, adding the separator if necessary. The buffer
- * must be large enough to contain the resulting tag plus any separator
- * necessary. The tag must not be a zero-length string.
- *
- * @param tag The tag to add.
- * @param tagLength The length of the tag.
- * @param buffer The output buffer.
- * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
- **/
-static void U_CALLCONV
-appendTag(
- const char* tag,
- int32_t tagLength,
- char* buffer,
- int32_t* bufferLength,
- UBool withSeparator) {
-
- if (withSeparator) {
- buffer[*bufferLength] = '_';
- ++(*bufferLength);
- }
-
- uprv_memmove(
- &buffer[*bufferLength],
- tag,
- tagLength);
-
- *bufferLength += tagLength;
-}
-
-/**
- * Create a tag string from the supplied parameters. The lang, script and region
- * parameters may be NULL pointers. If they are, their corresponding length parameters
- * must be less than or equal to 0.
- *
- * If any of the language, script or region parameters are empty, and the alternateTags
- * parameter is not NULL, it will be parsed for potential language, script and region tags
- * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
- * it contains no language tag, the default tag for the unknown language is used.
- *
- * If the length of the new string exceeds the capacity of the output buffer,
- * the function copies as many bytes to the output buffer as it can, and returns
- * the error U_BUFFER_OVERFLOW_ERROR.
- *
- * If an illegal argument is provided, the function returns the error
- * U_ILLEGAL_ARGUMENT_ERROR.
- *
- * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
- * the tag string fits in the output buffer, but the null terminator doesn't.
- *
- * @param lang The language tag to use.
- * @param langLength The length of the language tag.
- * @param script The script tag to use.
- * @param scriptLength The length of the script tag.
- * @param region The region tag to use.
- * @param regionLength The length of the region tag.
- * @param trailing Any trailing data to append to the new tag.
- * @param trailingLength The length of the trailing data.
- * @param alternateTags A string containing any alternate tags.
- * @param sink The output sink receiving the tag string.
- * @param err A pointer to a UErrorCode for error reporting.
- **/
-static void U_CALLCONV
-createTagStringWithAlternates(
- const char* lang,
- int32_t langLength,
- const char* script,
- int32_t scriptLength,
- const char* region,
- int32_t regionLength,
- const char* trailing,
- int32_t trailingLength,
- const char* alternateTags,
- icu::ByteSink& sink,
- UErrorCode* err) {
-
- if (U_FAILURE(*err)) {
- goto error;
- }
- else if (langLength >= ULOC_LANG_CAPACITY ||
- scriptLength >= ULOC_SCRIPT_CAPACITY ||
- regionLength >= ULOC_COUNTRY_CAPACITY) {
- goto error;
- }
- else {
- /**
- * ULOC_FULLNAME_CAPACITY will provide enough capacity
- * that we can build a string that contains the language,
- * script and region code without worrying about overrunning
- * the user-supplied buffer.
- **/
- char tagBuffer[ULOC_FULLNAME_CAPACITY];
- int32_t tagLength = 0;
- UBool regionAppended = FALSE;
-
- if (langLength > 0) {
- appendTag(
- lang,
- langLength,
- tagBuffer,
- &tagLength,
- /*withSeparator=*/FALSE);
- }
- else if (alternateTags == NULL) {
- /*
- * Use the empty string for an unknown language, if
- * we found no language.
- */
- }
- else {
- /*
- * Parse the alternateTags string for the language.
- */
- char alternateLang[ULOC_LANG_CAPACITY];
- int32_t alternateLangLength = sizeof(alternateLang);
-
- alternateLangLength =
- uloc_getLanguage(
- alternateTags,
- alternateLang,
- alternateLangLength,
- err);
- if(U_FAILURE(*err) ||
- alternateLangLength >= ULOC_LANG_CAPACITY) {
- goto error;
- }
- else if (alternateLangLength == 0) {
- /*
- * Use the empty string for an unknown language, if
- * we found no language.
- */
- }
- else {
- appendTag(
- alternateLang,
- alternateLangLength,
- tagBuffer,
- &tagLength,
- /*withSeparator=*/FALSE);
- }
- }
-
- if (scriptLength > 0) {
- appendTag(
- script,
- scriptLength,
- tagBuffer,
- &tagLength,
- /*withSeparator=*/TRUE);
- }
- else if (alternateTags != NULL) {
- /*
- * Parse the alternateTags string for the script.
- */
- char alternateScript[ULOC_SCRIPT_CAPACITY];
-
- const int32_t alternateScriptLength =
- uloc_getScript(
- alternateTags,
- alternateScript,
- sizeof(alternateScript),
- err);
-
- if (U_FAILURE(*err) ||
- alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
- goto error;
- }
- else if (alternateScriptLength > 0) {
- appendTag(
- alternateScript,
- alternateScriptLength,
- tagBuffer,
- &tagLength,
- /*withSeparator=*/TRUE);
- }
- }
-
- if (regionLength > 0) {
- appendTag(
- region,
- regionLength,
- tagBuffer,
- &tagLength,
- /*withSeparator=*/TRUE);
-
- regionAppended = TRUE;
- }
- else if (alternateTags != NULL) {
- /*
- * Parse the alternateTags string for the region.
- */
- char alternateRegion[ULOC_COUNTRY_CAPACITY];
-
- const int32_t alternateRegionLength =
- uloc_getCountry(
- alternateTags,
- alternateRegion,
- sizeof(alternateRegion),
- err);
- if (U_FAILURE(*err) ||
- alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
- goto error;
- }
- else if (alternateRegionLength > 0) {
- appendTag(
- alternateRegion,
- alternateRegionLength,
- tagBuffer,
- &tagLength,
- /*withSeparator=*/TRUE);
-
- regionAppended = TRUE;
- }
- }
-
- /**
- * Copy the partial tag from our internal buffer to the supplied
- * target.
- **/
- sink.Append(tagBuffer, tagLength);
-
- if (trailingLength > 0) {
- if (*trailing != '@') {
- sink.Append("_", 1);
- if (!regionAppended) {
- /* extra separator is required */
- sink.Append("_", 1);
- }
- }
-
- /*
- * Copy the trailing data into the supplied buffer.
- */
- sink.Append(trailing, trailingLength);
- }
-
- return;
- }
-
-error:
-
- /**
- * An overflow indicates the locale ID passed in
- * is ill-formed. If we got here, and there was
- * no previous error, it's an implicit overflow.
- **/
- if (*err == U_BUFFER_OVERFLOW_ERROR ||
- U_SUCCESS(*err)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-/**
- * Create a tag string from the supplied parameters. The lang, script and region
- * parameters may be NULL pointers. If they are, their corresponding length parameters
- * must be less than or equal to 0. If the lang parameter is an empty string, the
- * default value for an unknown language is written to the output buffer.
- *
- * If the length of the new string exceeds the capacity of the output buffer,
- * the function copies as many bytes to the output buffer as it can, and returns
- * the error U_BUFFER_OVERFLOW_ERROR.
- *
- * If an illegal argument is provided, the function returns the error
- * U_ILLEGAL_ARGUMENT_ERROR.
- *
- * @param lang The language tag to use.
- * @param langLength The length of the language tag.
- * @param script The script tag to use.
- * @param scriptLength The length of the script tag.
- * @param region The region tag to use.
- * @param regionLength The length of the region tag.
- * @param trailing Any trailing data to append to the new tag.
- * @param trailingLength The length of the trailing data.
- * @param sink The output sink receiving the tag string.
- * @param err A pointer to a UErrorCode for error reporting.
- **/
-static void U_CALLCONV
-createTagString(
- const char* lang,
- int32_t langLength,
- const char* script,
- int32_t scriptLength,
- const char* region,
- int32_t regionLength,
- const char* trailing,
- int32_t trailingLength,
- icu::ByteSink& sink,
- UErrorCode* err)
-{
- createTagStringWithAlternates(
- lang,
- langLength,
- script,
- scriptLength,
- region,
- regionLength,
- trailing,
- trailingLength,
- NULL,
- sink,
- err);
-}
-
-/**
- * Parse the language, script, and region subtags from a tag string, and copy the
- * results into the corresponding output parameters. The buffers are null-terminated,
- * unless overflow occurs.
- *
- * The langLength, scriptLength, and regionLength parameters are input/output
- * parameters, and must contain the capacity of their corresponding buffers on
- * input. On output, they will contain the actual length of the buffers, not
- * including the null terminator.
- *
- * If the length of any of the output subtags exceeds the capacity of the corresponding
- * buffer, the function copies as many bytes to the output buffer as it can, and returns
- * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
- * occurs.
- *
- * If an illegal argument is provided, the function returns the error
- * U_ILLEGAL_ARGUMENT_ERROR.
- *
- * @param localeID The locale ID to parse.
- * @param lang The language tag buffer.
- * @param langLength The length of the language tag.
- * @param script The script tag buffer.
- * @param scriptLength The length of the script tag.
- * @param region The region tag buffer.
- * @param regionLength The length of the region tag.
- * @param err A pointer to a UErrorCode for error reporting.
- * @return The number of chars of the localeID parameter consumed.
- **/
-static int32_t U_CALLCONV
-parseTagString(
- const char* localeID,
- char* lang,
- int32_t* langLength,
- char* script,
- int32_t* scriptLength,
- char* region,
- int32_t* regionLength,
- UErrorCode* err)
-{
- const char* position = localeID;
- int32_t subtagLength = 0;
-
- if(U_FAILURE(*err) ||
- localeID == NULL ||
- lang == NULL ||
- langLength == NULL ||
- script == NULL ||
- scriptLength == NULL ||
- region == NULL ||
- regionLength == NULL) {
- goto error;
- }
-
- subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
- u_terminateChars(lang, *langLength, subtagLength, err);
-
- /*
- * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
- * to be an error, because it indicates the user-supplied tag is
- * not well-formed.
- */
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- *langLength = subtagLength;
-
- /*
- * If no language was present, use the empty string instead.
- * Otherwise, move past any separator.
- */
- if (_isIDSeparator(*position)) {
- ++position;
- }
-
- subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
- u_terminateChars(script, *scriptLength, subtagLength, err);
-
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- *scriptLength = subtagLength;
-
- if (*scriptLength > 0) {
- if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
- /**
- * If the script part is the "unknown" script, then don't return it.
- **/
- *scriptLength = 0;
- }
-
- /*
- * Move past any separator.
- */
- if (_isIDSeparator(*position)) {
- ++position;
- }
- }
-
- subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
- u_terminateChars(region, *regionLength, subtagLength, err);
-
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- *regionLength = subtagLength;
-
- if (*regionLength > 0) {
- if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
- /**
- * If the region part is the "unknown" region, then don't return it.
- **/
- *regionLength = 0;
- }
- } else if (*position != 0 && *position != '@') {
- /* back up over consumed trailing separator */
- --position;
- }
-
-exit:
-
- return (int32_t)(position - localeID);
-
-error:
-
- /**
- * If we get here, we have no explicit error, it's the result of an
- * illegal argument.
- **/
- if (!U_FAILURE(*err)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
-
- goto exit;
-}
-
-static UBool U_CALLCONV
-createLikelySubtagsString(
- const char* lang,
- int32_t langLength,
- const char* script,
- int32_t scriptLength,
- const char* region,
- int32_t regionLength,
- const char* variants,
- int32_t variantsLength,
- icu::ByteSink& sink,
- UErrorCode* err) {
- /**
- * ULOC_FULLNAME_CAPACITY will provide enough capacity
- * that we can build a string that contains the language,
- * script and region code without worrying about overrunning
- * the user-supplied buffer.
- **/
- char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
-
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- /**
- * Try the language with the script and region first.
- **/
- if (scriptLength > 0 && regionLength > 0) {
-
- const char* likelySubtags = NULL;
-
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink sink(&tagBuffer);
- createTagString(
- lang,
- langLength,
- script,
- scriptLength,
- region,
- regionLength,
- NULL,
- 0,
- sink,
- err);
- }
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- likelySubtags =
- findLikelySubtags(
- tagBuffer.data(),
- likelySubtagsBuffer,
- sizeof(likelySubtagsBuffer),
- err);
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- if (likelySubtags != NULL) {
- /* Always use the language tag from the
- maximal string, since it may be more
- specific than the one provided. */
- createTagStringWithAlternates(
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- 0,
- variants,
- variantsLength,
- likelySubtags,
- sink,
- err);
- return TRUE;
- }
- }
-
- /**
- * Try the language with just the script.
- **/
- if (scriptLength > 0) {
-
- const char* likelySubtags = NULL;
-
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink sink(&tagBuffer);
- createTagString(
- lang,
- langLength,
- script,
- scriptLength,
- NULL,
- 0,
- NULL,
- 0,
- sink,
- err);
- }
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- likelySubtags =
- findLikelySubtags(
- tagBuffer.data(),
- likelySubtagsBuffer,
- sizeof(likelySubtagsBuffer),
- err);
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- if (likelySubtags != NULL) {
- /* Always use the language tag from the
- maximal string, since it may be more
- specific than the one provided. */
- createTagStringWithAlternates(
- NULL,
- 0,
- NULL,
- 0,
- region,
- regionLength,
- variants,
- variantsLength,
- likelySubtags,
- sink,
- err);
- return TRUE;
- }
- }
-
- /**
- * Try the language with just the region.
- **/
- if (regionLength > 0) {
-
- const char* likelySubtags = NULL;
-
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink sink(&tagBuffer);
- createTagString(
- lang,
- langLength,
- NULL,
- 0,
- region,
- regionLength,
- NULL,
- 0,
- sink,
- err);
- }
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- likelySubtags =
- findLikelySubtags(
- tagBuffer.data(),
- likelySubtagsBuffer,
- sizeof(likelySubtagsBuffer),
- err);
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- if (likelySubtags != NULL) {
- /* Always use the language tag from the
- maximal string, since it may be more
- specific than the one provided. */
- createTagStringWithAlternates(
- NULL,
- 0,
- script,
- scriptLength,
- NULL,
- 0,
- variants,
- variantsLength,
- likelySubtags,
- sink,
- err);
- return TRUE;
- }
- }
-
- /**
- * Finally, try just the language.
- **/
- {
- const char* likelySubtags = NULL;
-
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink sink(&tagBuffer);
- createTagString(
- lang,
- langLength,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- 0,
- sink,
- err);
- }
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- likelySubtags =
- findLikelySubtags(
- tagBuffer.data(),
- likelySubtagsBuffer,
- sizeof(likelySubtagsBuffer),
- err);
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- if (likelySubtags != NULL) {
- /* Always use the language tag from the
- maximal string, since it may be more
- specific than the one provided. */
- createTagStringWithAlternates(
- NULL,
- 0,
- script,
- scriptLength,
- region,
- regionLength,
- variants,
- variantsLength,
- likelySubtags,
- sink,
- err);
- return TRUE;
- }
- }
-
- return FALSE;
-
-error:
-
- if (!U_FAILURE(*err)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
-
- return FALSE;
-}
-
-#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t count = 0; \
- int32_t i; \
- for (i = 0; i < trailingLength; i++) { \
- if (trailing[i] == '-' || trailing[i] == '_') { \
- count = 0; \
- if (count > 8) { \
- goto error; \
- } \
- } else if (trailing[i] == '@') { \
- break; \
- } else if (count > 8) { \
- goto error; \
- } else { \
- count++; \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-static void
-_uloc_addLikelySubtags(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* err) {
- char lang[ULOC_LANG_CAPACITY];
- int32_t langLength = sizeof(lang);
- char script[ULOC_SCRIPT_CAPACITY];
- int32_t scriptLength = sizeof(script);
- char region[ULOC_COUNTRY_CAPACITY];
- int32_t regionLength = sizeof(region);
- const char* trailing = "";
- int32_t trailingLength = 0;
- int32_t trailingIndex = 0;
- UBool success = FALSE;
-
- if(U_FAILURE(*err)) {
- goto error;
- }
- if (localeID == NULL) {
- goto error;
- }
-
- trailingIndex = parseTagString(
- localeID,
- lang,
- &langLength,
- script,
- &scriptLength,
- region,
- &regionLength,
- err);
- if(U_FAILURE(*err)) {
- /* Overflow indicates an illegal argument error */
- if (*err == U_BUFFER_OVERFLOW_ERROR) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
-
- goto error;
- }
-
- /* Find the length of the trailing portion. */
- while (_isIDSeparator(localeID[trailingIndex])) {
- trailingIndex++;
- }
- trailing = &localeID[trailingIndex];
- trailingLength = (int32_t)uprv_strlen(trailing);
-
- CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
-
- success =
- createLikelySubtagsString(
- lang,
- langLength,
- script,
- scriptLength,
- region,
- regionLength,
- trailing,
- trailingLength,
- sink,
- err);
-
- if (!success) {
- const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
-
- /*
- * If we get here, we need to return localeID.
- */
- sink.Append(localeID, localIDLength);
- }
-
- return;
-
-error:
-
- if (!U_FAILURE(*err)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-static void
-_uloc_minimizeSubtags(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* err) {
- icu::CharString maximizedTagBuffer;
-
- char lang[ULOC_LANG_CAPACITY];
- int32_t langLength = sizeof(lang);
- char script[ULOC_SCRIPT_CAPACITY];
- int32_t scriptLength = sizeof(script);
- char region[ULOC_COUNTRY_CAPACITY];
- int32_t regionLength = sizeof(region);
- const char* trailing = "";
- int32_t trailingLength = 0;
- int32_t trailingIndex = 0;
-
- if(U_FAILURE(*err)) {
- goto error;
- }
- else if (localeID == NULL) {
- goto error;
- }
-
- trailingIndex =
- parseTagString(
- localeID,
- lang,
- &langLength,
- script,
- &scriptLength,
- region,
- &regionLength,
- err);
- if(U_FAILURE(*err)) {
-
- /* Overflow indicates an illegal argument error */
- if (*err == U_BUFFER_OVERFLOW_ERROR) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
-
- goto error;
- }
-
- /* Find the spot where the variants or the keywords begin, if any. */
- while (_isIDSeparator(localeID[trailingIndex])) {
- trailingIndex++;
- }
- trailing = &localeID[trailingIndex];
- trailingLength = (int32_t)uprv_strlen(trailing);
-
- CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
-
- {
- icu::CharString base;
- {
- icu::CharStringByteSink sink(&base);
- createTagString(
- lang,
- langLength,
- script,
- scriptLength,
- region,
- regionLength,
- NULL,
- 0,
- sink,
- err);
- }
-
- /**
- * First, we need to first get the maximization
- * from AddLikelySubtags.
- **/
- {
- icu::CharStringByteSink sink(&maximizedTagBuffer);
- ulocimp_addLikelySubtags(base.data(), sink, err);
- }
- }
-
- if(U_FAILURE(*err)) {
- goto error;
- }
-
- /**
- * Start first with just the language.
- **/
- {
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink sink(&tagBuffer);
- createLikelySubtagsString(
- lang,
- langLength,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- 0,
- sink,
- err);
- }
-
- if(U_FAILURE(*err)) {
- goto error;
- }
- else if (!tagBuffer.isEmpty() && uprv_strnicmp(
- maximizedTagBuffer.data(),
- tagBuffer.data(),
- tagBuffer.length()) == 0) {
-
- createTagString(
- lang,
- langLength,
- NULL,
- 0,
- NULL,
- 0,
- trailing,
- trailingLength,
- sink,
- err);
- return;
- }
- }
-
- /**
- * Next, try the language and region.
- **/
- if (regionLength > 0) {
-
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink sink(&tagBuffer);
- createLikelySubtagsString(
- lang,
- langLength,
- NULL,
- 0,
- region,
- regionLength,
- NULL,
- 0,
- sink,
- err);
- }
-
- if(U_FAILURE(*err)) {
- goto error;
- }
- else if (uprv_strnicmp(
- maximizedTagBuffer.data(),
- tagBuffer.data(),
- tagBuffer.length()) == 0) {
-
- createTagString(
- lang,
- langLength,
- NULL,
- 0,
- region,
- regionLength,
- trailing,
- trailingLength,
- sink,
- err);
- return;
- }
- }
-
- /**
- * Finally, try the language and script. This is our last chance,
- * since trying with all three subtags would only yield the
- * maximal version that we already have.
- **/
- if (scriptLength > 0 && regionLength > 0) {
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink sink(&tagBuffer);
- createLikelySubtagsString(
- lang,
- langLength,
- script,
- scriptLength,
- NULL,
- 0,
- NULL,
- 0,
- sink,
- err);
- }
-
- if(U_FAILURE(*err)) {
- goto error;
- }
- else if (uprv_strnicmp(
- maximizedTagBuffer.data(),
- tagBuffer.data(),
- tagBuffer.length()) == 0) {
-
- createTagString(
- lang,
- langLength,
- script,
- scriptLength,
- NULL,
- 0,
- trailing,
- trailingLength,
- sink,
- err);
- return;
- }
- }
-
- {
- /**
- * If we got here, return the locale ID parameter.
- **/
- const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
- sink.Append(localeID, localeIDLength);
- return;
- }
-
-error:
-
- if (!U_FAILURE(*err)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-static UBool
-do_canonicalize(const char* localeID,
- char* buffer,
- int32_t bufferCapacity,
- UErrorCode* err)
-{
- uloc_canonicalize(
- localeID,
- buffer,
- bufferCapacity,
- err);
-
- if (*err == U_STRING_NOT_TERMINATED_WARNING ||
- *err == U_BUFFER_OVERFLOW_ERROR) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
-
- return FALSE;
- }
- else if (U_FAILURE(*err)) {
-
- return FALSE;
- }
- else {
- return TRUE;
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_addLikelySubtags(const char* localeID,
- char* maximizedLocaleID,
- int32_t maximizedLocaleIDCapacity,
- UErrorCode* status) {
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- icu::CheckedArrayByteSink sink(
- maximizedLocaleID, maximizedLocaleIDCapacity);
-
- ulocimp_addLikelySubtags(localeID, sink, status);
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*status)) {
- return sink.Overflowed() ? reslen : -1;
- }
-
- if (sink.Overflowed()) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(
- maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
- }
-
- return reslen;
-}
-
-U_CAPI void U_EXPORT2
-ulocimp_addLikelySubtags(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* status) {
- char localeBuffer[ULOC_FULLNAME_CAPACITY];
-
- if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
- _uloc_addLikelySubtags(localeBuffer, sink, status);
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_minimizeSubtags(const char* localeID,
- char* minimizedLocaleID,
- int32_t minimizedLocaleIDCapacity,
- UErrorCode* status) {
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- icu::CheckedArrayByteSink sink(
- minimizedLocaleID, minimizedLocaleIDCapacity);
-
- ulocimp_minimizeSubtags(localeID, sink, status);
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*status)) {
- return sink.Overflowed() ? reslen : -1;
- }
-
- if (sink.Overflowed()) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(
- minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
- }
-
- return reslen;
-}
-
-U_CAPI void U_EXPORT2
-ulocimp_minimizeSubtags(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* status) {
- char localeBuffer[ULOC_FULLNAME_CAPACITY];
-
- if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
- _uloc_minimizeSubtags(localeBuffer, sink, status);
- }
-}
-
-// Pairs of (language subtag, + or -) for finding out fast if common languages
-// are LTR (minus) or RTL (plus).
-static const char LANG_DIR_STRING[] =
- "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
-
-// Implemented here because this calls ulocimp_addLikelySubtags().
-U_CAPI UBool U_EXPORT2
-uloc_isRightToLeft(const char *locale) {
- UErrorCode errorCode = U_ZERO_ERROR;
- char script[8];
- int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
- if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
- scriptLength == 0) {
- // Fastpath: We know the likely scripts and their writing direction
- // for some common languages.
- errorCode = U_ZERO_ERROR;
- char lang[8];
- int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
- if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
- return FALSE;
- }
- if (langLength > 0) {
- const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
- if (langPtr != NULL) {
- switch (langPtr[langLength]) {
- case '-': return FALSE;
- case '+': return TRUE;
- default: break; // partial match of a longer code
- }
- }
- }
- // Otherwise, find the likely script.
- errorCode = U_ZERO_ERROR;
- icu::CharString likely;
- {
- icu::CharStringByteSink sink(&likely);
- ulocimp_addLikelySubtags(locale, sink, &errorCode);
- }
- if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
- return FALSE;
- }
- scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
- if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
- scriptLength == 0) {
- return FALSE;
- }
- }
- UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
- return uscript_isRightToLeft(scriptCode);
-}
-
-U_NAMESPACE_BEGIN
-
-UBool
-Locale::isRightToLeft() const {
- return uloc_isRightToLeft(getBaseName());
-}
-
-U_NAMESPACE_END
-
-// The following must at least allow for rg key value (6) plus terminator (1).
-#define ULOC_RG_BUFLEN 8
-
-U_CAPI int32_t U_EXPORT2
-ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
- char *region, int32_t regionCapacity, UErrorCode* status) {
- if (U_FAILURE(*status)) {
- return 0;
- }
- char rgBuf[ULOC_RG_BUFLEN];
- UErrorCode rgStatus = U_ZERO_ERROR;
-
- // First check for rg keyword value
- int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
- if (U_FAILURE(rgStatus) || rgLen != 6) {
- rgLen = 0;
- } else {
- // rgBuf guaranteed to be zero terminated here, with text len 6
- char *rgPtr = rgBuf;
- for (; *rgPtr!= 0; rgPtr++) {
- *rgPtr = uprv_toupper(*rgPtr);
- }
- rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
- }
-
- if (rgLen == 0) {
- // No valid rg keyword value, try for unicode_region_subtag
- rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
- if (U_FAILURE(*status)) {
- rgLen = 0;
- } else if (rgLen == 0 && inferRegion) {
- // no unicode_region_subtag but inferRegion TRUE, try likely subtags
- rgStatus = U_ZERO_ERROR;
- icu::CharString locBuf;
- {
- icu::CharStringByteSink sink(&locBuf);
- ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
- }
- if (U_SUCCESS(rgStatus)) {
- rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
- if (U_FAILURE(*status)) {
- rgLen = 0;
- }
- }
- }
- }
-
- rgBuf[rgLen] = 0;
- uprv_strncpy(region, rgBuf, regionCapacity);
- return u_terminateChars(region, regionCapacity, rgLen, status);
-}
-
diff --git a/contrib/libs/icu/common/loclikelysubtags.cpp b/contrib/libs/icu/common/loclikelysubtags.cpp
deleted file mode 100644
index 1fbf1a14632..00000000000
--- a/contrib/libs/icu/common/loclikelysubtags.cpp
+++ /dev/null
@@ -1,746 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-
-// loclikelysubtags.cpp
-// created: 2019may08 Markus W. Scherer
-
-#include <utility>
-#include "unicode/utypes.h"
-#include "unicode/bytestrie.h"
-#include "unicode/localpointer.h"
-#include "unicode/locid.h"
-#include "unicode/uobject.h"
-#include "unicode/ures.h"
-#include "charstr.h"
-#include "cstring.h"
-#include "loclikelysubtags.h"
-#include "lsr.h"
-#include "uassert.h"
-#include "ucln_cmn.h"
-#include "uhash.h"
-#include "uinvchar.h"
-#include "umutex.h"
-#include "uresdata.h"
-#include "uresimp.h"
-
-U_NAMESPACE_BEGIN
-
-namespace {
-
-constexpr char PSEUDO_ACCENTS_PREFIX = '\''; // -XA, -PSACCENT
-constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI
-constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK
-
-/**
- * Stores NUL-terminated strings with duplicate elimination.
- * Checks for unique UTF-16 string pointers and converts to invariant characters.
- */
-class UniqueCharStrings {
-public:
- UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
- uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
- if (U_FAILURE(errorCode)) { return; }
- strings = new CharString();
- if (strings == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- }
- }
- ~UniqueCharStrings() {
- uhash_close(&map);
- delete strings;
- }
-
- /** Returns/orphans the CharString that contains all strings. */
- CharString *orphanCharStrings() {
- CharString *result = strings;
- strings = nullptr;
- return result;
- }
-
- /** Adds a string and returns a unique number for it. */
- int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return 0; }
- if (isFrozen) {
- errorCode = U_NO_WRITE_PERMISSION;
- return 0;
- }
- // The string points into the resource bundle.
- const char16_t *p = s.getBuffer();
- int32_t oldIndex = uhash_geti(&map, p);
- if (oldIndex != 0) { // found duplicate
- return oldIndex;
- }
- // Explicit NUL terminator for the previous string.
- // The strings object is also terminated with one implicit NUL.
- strings->append(0, errorCode);
- int32_t newIndex = strings->length();
- strings->appendInvariantChars(s, errorCode);
- uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
- return newIndex;
- }
-
- void freeze() { isFrozen = true; }
-
- /**
- * Returns a string pointer for its unique number, if this object is frozen.
- * Otherwise nullptr.
- */
- const char *get(int32_t i) const {
- U_ASSERT(isFrozen);
- return isFrozen && i > 0 ? strings->data() + i : nullptr;
- }
-
-private:
- UHashtable map;
- CharString *strings;
- bool isFrozen = false;
-};
-
-} // namespace
-
-LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
- distanceTrieBytes(data.distanceTrieBytes),
- regionToPartitions(data.regionToPartitions),
- partitions(data.partitions),
- paradigms(data.paradigms), paradigmsLength(data.paradigmsLength),
- distances(data.distances) {
- data.partitions = nullptr;
- data.paradigms = nullptr;
-}
-
-LocaleDistanceData::~LocaleDistanceData() {
- uprv_free(partitions);
- delete[] paradigms;
-}
-
-// TODO(ICU-20777): Rename to just LikelySubtagsData.
-struct XLikelySubtagsData {
- UResourceBundle *langInfoBundle = nullptr;
- UniqueCharStrings strings;
- CharStringMap languageAliases;
- CharStringMap regionAliases;
- const uint8_t *trieBytes = nullptr;
- LSR *lsrs = nullptr;
- int32_t lsrsLength = 0;
-
- LocaleDistanceData distanceData;
-
- XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
-
- ~XLikelySubtagsData() {
- ures_close(langInfoBundle);
- delete[] lsrs;
- }
-
- void load(UErrorCode &errorCode) {
- langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode);
- if (U_FAILURE(errorCode)) { return; }
- StackUResourceBundle stackTempBundle;
- ResourceDataValue value;
- ures_getValueWithFallback(langInfoBundle, "likely", stackTempBundle.getAlias(),
- value, errorCode);
- ResourceTable likelyTable = value.getTable(errorCode);
- if (U_FAILURE(errorCode)) { return; }
-
- // Read all strings in the resource bundle and convert them to invariant char *.
- LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes;
- int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
- if (!readStrings(likelyTable, "languageAliases", value,
- languageIndexes, languagesLength, errorCode) ||
- !readStrings(likelyTable, "regionAliases", value,
- regionIndexes, regionsLength, errorCode) ||
- !readStrings(likelyTable, "lsrs", value,
- lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
- return;
- }
- if ((languagesLength & 1) != 0 ||
- (regionsLength & 1) != 0 ||
- (lsrSubtagsLength % 3) != 0) {
- errorCode = U_INVALID_FORMAT_ERROR;
- return;
- }
- if (lsrSubtagsLength == 0) {
- errorCode = U_MISSING_RESOURCE_ERROR;
- return;
- }
-
- if (!likelyTable.findValue("trie", value)) {
- errorCode = U_MISSING_RESOURCE_ERROR;
- return;
- }
- int32_t length;
- trieBytes = value.getBinary(length, errorCode);
- if (U_FAILURE(errorCode)) { return; }
-
- // Also read distance/matcher data if available,
- // to open & keep only one resource bundle pointer
- // and to use one single UniqueCharStrings.
- UErrorCode matchErrorCode = U_ZERO_ERROR;
- ures_getValueWithFallback(langInfoBundle, "match", stackTempBundle.getAlias(),
- value, matchErrorCode);
- LocalMemory<int32_t> partitionIndexes, paradigmSubtagIndexes;
- int32_t partitionsLength = 0, paradigmSubtagsLength = 0;
- if (U_SUCCESS(matchErrorCode)) {
- ResourceTable matchTable = value.getTable(errorCode);
- if (U_FAILURE(errorCode)) { return; }
-
- if (matchTable.findValue("trie", value)) {
- distanceData.distanceTrieBytes = value.getBinary(length, errorCode);
- if (U_FAILURE(errorCode)) { return; }
- }
-
- if (matchTable.findValue("regionToPartitions", value)) {
- distanceData.regionToPartitions = value.getBinary(length, errorCode);
- if (U_FAILURE(errorCode)) { return; }
- if (length < LSR::REGION_INDEX_LIMIT) {
- errorCode = U_INVALID_FORMAT_ERROR;
- return;
- }
- }
-
- if (!readStrings(matchTable, "partitions", value,
- partitionIndexes, partitionsLength, errorCode) ||
- !readStrings(matchTable, "paradigms", value,
- paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
- return;
- }
- if ((paradigmSubtagsLength % 3) != 0) {
- errorCode = U_INVALID_FORMAT_ERROR;
- return;
- }
-
- if (matchTable.findValue("distances", value)) {
- distanceData.distances = value.getIntVector(length, errorCode);
- if (U_FAILURE(errorCode)) { return; }
- if (length < 4) { // LocaleDistance IX_LIMIT
- errorCode = U_INVALID_FORMAT_ERROR;
- return;
- }
- }
- } else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) {
- // ok for likely subtags
- } else { // error other than missing resource
- errorCode = matchErrorCode;
- return;
- }
-
- // Fetch & store invariant-character versions of strings
- // only after we have collected and de-duplicated all of them.
- strings.freeze();
-
- languageAliases = CharStringMap(languagesLength / 2, errorCode);
- for (int32_t i = 0; i < languagesLength; i += 2) {
- languageAliases.put(strings.get(languageIndexes[i]),
- strings.get(languageIndexes[i + 1]), errorCode);
- }
-
- regionAliases = CharStringMap(regionsLength / 2, errorCode);
- for (int32_t i = 0; i < regionsLength; i += 2) {
- regionAliases.put(strings.get(regionIndexes[i]),
- strings.get(regionIndexes[i + 1]), errorCode);
- }
- if (U_FAILURE(errorCode)) { return; }
-
- lsrsLength = lsrSubtagsLength / 3;
- lsrs = new LSR[lsrsLength];
- if (lsrs == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
- lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
- strings.get(lsrSubtagIndexes[i + 1]),
- strings.get(lsrSubtagIndexes[i + 2]),
- LSR::IMPLICIT_LSR);
- }
-
- if (partitionsLength > 0) {
- distanceData.partitions = static_cast<const char **>(
- uprv_malloc(partitionsLength * sizeof(const char *)));
- if (distanceData.partitions == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- for (int32_t i = 0; i < partitionsLength; ++i) {
- distanceData.partitions[i] = strings.get(partitionIndexes[i]);
- }
- }
-
- if (paradigmSubtagsLength > 0) {
- distanceData.paradigmsLength = paradigmSubtagsLength / 3;
- LSR *paradigms = new LSR[distanceData.paradigmsLength];
- if (paradigms == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
- paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
- strings.get(paradigmSubtagIndexes[i + 1]),
- strings.get(paradigmSubtagIndexes[i + 2]),
- LSR::DONT_CARE_FLAGS);
- }
- distanceData.paradigms = paradigms;
- }
- }
-
-private:
- bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
- LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
- if (table.findValue(key, value)) {
- ResourceArray stringArray = value.getArray(errorCode);
- if (U_FAILURE(errorCode)) { return false; }
- length = stringArray.getSize();
- if (length == 0) { return true; }
- int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length);
- if (rawIndexes == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return false;
- }
- for (int i = 0; i < length; ++i) {
- stringArray.getValue(i, value); // returns TRUE because i < length
- rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
- if (U_FAILURE(errorCode)) { return false; }
- }
- }
- return true;
- }
-};
-
-namespace {
-
-XLikelySubtags *gLikelySubtags = nullptr;
-UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
-
-UBool U_CALLCONV cleanup() {
- delete gLikelySubtags;
- gLikelySubtags = nullptr;
- gInitOnce.reset();
- return TRUE;
-}
-
-} // namespace
-
-void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
- // This function is invoked only via umtx_initOnce().
- U_ASSERT(gLikelySubtags == nullptr);
- XLikelySubtagsData data(errorCode);
- data.load(errorCode);
- if (U_FAILURE(errorCode)) { return; }
- gLikelySubtags = new XLikelySubtags(data);
- if (gLikelySubtags == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
-}
-
-const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return nullptr; }
- umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
- return gLikelySubtags;
-}
-
-XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
- langInfoBundle(data.langInfoBundle),
- strings(data.strings.orphanCharStrings()),
- languageAliases(std::move(data.languageAliases)),
- regionAliases(std::move(data.regionAliases)),
- trie(data.trieBytes),
- lsrs(data.lsrs),
-#if U_DEBUG
- lsrsLength(data.lsrsLength),
-#endif
- distanceData(std::move(data.distanceData)) {
- data.langInfoBundle = nullptr;
- data.lsrs = nullptr;
-
- // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
- UStringTrieResult result = trie.next(u'*');
- U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
- trieUndState = trie.getState64();
- result = trie.next(u'*');
- U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
- trieUndZzzzState = trie.getState64();
- result = trie.next(u'*');
- U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
- defaultLsrIndex = trie.getValue();
- trie.reset();
-
- for (char16_t c = u'a'; c <= u'z'; ++c) {
- result = trie.next(c);
- if (result == USTRINGTRIE_NO_VALUE) {
- trieFirstLetterStates[c - u'a'] = trie.getState64();
- }
- trie.reset();
- }
-}
-
-XLikelySubtags::~XLikelySubtags() {
- ures_close(langInfoBundle);
- delete strings;
- delete[] lsrs;
-}
-
-LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
- const char *name = locale.getName();
- if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
- // Private use language tag x-subtag-subtag...
- return LSR(name, "", "", LSR::EXPLICIT_LSR);
- }
- return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
- locale.getVariant(), errorCode);
-}
-
-namespace {
-
-const char *getCanonical(const CharStringMap &aliases, const char *alias) {
- const char *canonical = aliases.get(alias);
- return canonical == nullptr ? alias : canonical;
-}
-
-} // namespace
-
-LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
- const char *variant, UErrorCode &errorCode) const {
- // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
- // They should match only themselves,
- // not other locales with what looks like the same language and script subtags.
- char c1;
- if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
- switch (c1) {
- case 'A':
- return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region,
- LSR::EXPLICIT_LSR, errorCode);
- case 'B':
- return LSR(PSEUDO_BIDI_PREFIX, language, script, region,
- LSR::EXPLICIT_LSR, errorCode);
- case 'C':
- return LSR(PSEUDO_CRACKED_PREFIX, language, script, region,
- LSR::EXPLICIT_LSR, errorCode);
- default: // normal locale
- break;
- }
- }
-
- if (variant[0] == 'P' && variant[1] == 'S') {
- int32_t lsrFlags = *region == 0 ?
- LSR::EXPLICIT_LANGUAGE | LSR::EXPLICIT_SCRIPT : LSR::EXPLICIT_LSR;
- if (uprv_strcmp(variant, "PSACCENT") == 0) {
- return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
- *region == 0 ? "XA" : region, lsrFlags, errorCode);
- } else if (uprv_strcmp(variant, "PSBIDI") == 0) {
- return LSR(PSEUDO_BIDI_PREFIX, language, script,
- *region == 0 ? "XB" : region, lsrFlags, errorCode);
- } else if (uprv_strcmp(variant, "PSCRACK") == 0) {
- return LSR(PSEUDO_CRACKED_PREFIX, language, script,
- *region == 0 ? "XC" : region, lsrFlags, errorCode);
- }
- // else normal locale
- }
-
- language = getCanonical(languageAliases, language);
- // (We have no script mappings.)
- region = getCanonical(regionAliases, region);
- return maximize(language, script, region);
-}
-
-LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
- if (uprv_strcmp(language, "und") == 0) {
- language = "";
- }
- if (uprv_strcmp(script, "Zzzz") == 0) {
- script = "";
- }
- if (uprv_strcmp(region, "ZZ") == 0) {
- region = "";
- }
- if (*script != 0 && *region != 0 && *language != 0) {
- return LSR(language, script, region, LSR::EXPLICIT_LSR); // already maximized
- }
-
- uint32_t retainOldMask = 0;
- BytesTrie iter(trie);
- uint64_t state;
- int32_t value;
- // Small optimization: Array lookup for first language letter.
- int32_t c0;
- if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
- language[1] != 0 && // language.length() >= 2
- (state = trieFirstLetterStates[c0]) != 0) {
- value = trieNext(iter.resetToState64(state), language, 1);
- } else {
- value = trieNext(iter, language, 0);
- }
- if (value >= 0) {
- if (*language != 0) {
- retainOldMask |= 4;
- }
- state = iter.getState64();
- } else {
- retainOldMask |= 4;
- iter.resetToState64(trieUndState); // "und" ("*")
- state = 0;
- }
-
- if (value > 0) {
- // Intermediate or final value from just language.
- if (value == SKIP_SCRIPT) {
- value = 0;
- }
- if (*script != 0) {
- retainOldMask |= 2;
- }
- } else {
- value = trieNext(iter, script, 0);
- if (value >= 0) {
- if (*script != 0) {
- retainOldMask |= 2;
- }
- state = iter.getState64();
- } else {
- retainOldMask |= 2;
- if (state == 0) {
- iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
- } else {
- iter.resetToState64(state);
- value = trieNext(iter, "", 0);
- U_ASSERT(value >= 0);
- state = iter.getState64();
- }
- }
- }
-
- if (value > 0) {
- // Final value from just language or language+script.
- if (*region != 0) {
- retainOldMask |= 1;
- }
- } else {
- value = trieNext(iter, region, 0);
- if (value >= 0) {
- if (*region != 0) {
- retainOldMask |= 1;
- }
- } else {
- retainOldMask |= 1;
- if (state == 0) {
- value = defaultLsrIndex;
- } else {
- iter.resetToState64(state);
- value = trieNext(iter, "", 0);
- U_ASSERT(value > 0);
- }
- }
- }
- U_ASSERT(value < lsrsLength);
- const LSR &result = lsrs[value];
-
- if (*language == 0) {
- language = "und";
- }
-
- if (retainOldMask == 0) {
- // Quickly return a copy of the lookup-result LSR
- // without new allocation of the subtags.
- return LSR(result.language, result.script, result.region, result.flags);
- }
- if ((retainOldMask & 4) == 0) {
- language = result.language;
- }
- if ((retainOldMask & 2) == 0) {
- script = result.script;
- }
- if ((retainOldMask & 1) == 0) {
- region = result.region;
- }
- // retainOldMask flags = LSR explicit-subtag flags
- return LSR(language, script, region, retainOldMask);
-}
-
-int32_t XLikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const {
- // If likelyInfo >= 0:
- // likelyInfo bit 1 is set if the previous comparison with lsr
- // was for equal language and script.
- // Otherwise the scripts differed.
- if (uprv_strcmp(lsr.language, other.language) != 0) {
- return 0xfffffffc; // negative, lsr not better than other
- }
- if (uprv_strcmp(lsr.script, other.script) != 0) {
- int32_t index;
- if (likelyInfo >= 0 && (likelyInfo & 2) == 0) {
- index = likelyInfo >> 2;
- } else {
- index = getLikelyIndex(lsr.language, "");
- likelyInfo = index << 2;
- }
- const LSR &likely = lsrs[index];
- if (uprv_strcmp(lsr.script, likely.script) == 0) {
- return likelyInfo | 1;
- } else {
- return likelyInfo & ~1;
- }
- }
- if (uprv_strcmp(lsr.region, other.region) != 0) {
- int32_t index;
- if (likelyInfo >= 0 && (likelyInfo & 2) != 0) {
- index = likelyInfo >> 2;
- } else {
- index = getLikelyIndex(lsr.language, lsr.region);
- likelyInfo = (index << 2) | 2;
- }
- const LSR &likely = lsrs[index];
- if (uprv_strcmp(lsr.region, likely.region) == 0) {
- return likelyInfo | 1;
- } else {
- return likelyInfo & ~1;
- }
- }
- return likelyInfo & ~1; // lsr not better than other
-}
-
-// Subset of maximize().
-int32_t XLikelySubtags::getLikelyIndex(const char *language, const char *script) const {
- if (uprv_strcmp(language, "und") == 0) {
- language = "";
- }
- if (uprv_strcmp(script, "Zzzz") == 0) {
- script = "";
- }
-
- BytesTrie iter(trie);
- uint64_t state;
- int32_t value;
- // Small optimization: Array lookup for first language letter.
- int32_t c0;
- if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
- language[1] != 0 && // language.length() >= 2
- (state = trieFirstLetterStates[c0]) != 0) {
- value = trieNext(iter.resetToState64(state), language, 1);
- } else {
- value = trieNext(iter, language, 0);
- }
- if (value >= 0) {
- state = iter.getState64();
- } else {
- iter.resetToState64(trieUndState); // "und" ("*")
- state = 0;
- }
-
- if (value > 0) {
- // Intermediate or final value from just language.
- if (value == SKIP_SCRIPT) {
- value = 0;
- }
- } else {
- value = trieNext(iter, script, 0);
- if (value >= 0) {
- state = iter.getState64();
- } else {
- if (state == 0) {
- iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
- } else {
- iter.resetToState64(state);
- value = trieNext(iter, "", 0);
- U_ASSERT(value >= 0);
- state = iter.getState64();
- }
- }
- }
-
- if (value > 0) {
- // Final value from just language or language+script.
- } else {
- value = trieNext(iter, "", 0);
- U_ASSERT(value > 0);
- }
- U_ASSERT(value < lsrsLength);
- return value;
-}
-
-int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
- UStringTrieResult result;
- uint8_t c;
- if ((c = s[i]) == 0) {
- result = iter.next(u'*');
- } else {
- for (;;) {
- c = uprv_invCharToAscii(c);
- // EBCDIC: If s[i] is not an invariant character,
- // then c is now 0 and will simply not match anything, which is harmless.
- uint8_t next = s[++i];
- if (next != 0) {
- if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
- return -1;
- }
- } else {
- // last character of this subtag
- result = iter.next(c | 0x80);
- break;
- }
- c = next;
- }
- }
- switch (result) {
- case USTRINGTRIE_NO_MATCH: return -1;
- case USTRINGTRIE_NO_VALUE: return 0;
- case USTRINGTRIE_INTERMEDIATE_VALUE:
- U_ASSERT(iter.getValue() == SKIP_SCRIPT);
- return SKIP_SCRIPT;
- case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
- default: return -1;
- }
-}
-
-// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
-// in loclikely.cpp to this new code, including activating this
-// minimizeSubtags() function. The LocaleMatcher does not minimize.
-#if 0
-LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
- const char *regionIn, ULocale.Minimize fieldToFavor,
- UErrorCode &errorCode) const {
- LSR result = maximize(languageIn, scriptIn, regionIn);
-
- // We could try just a series of checks, like:
- // LSR result2 = addLikelySubtags(languageIn, "", "");
- // if result.equals(result2) return result2;
- // However, we can optimize 2 of the cases:
- // (languageIn, "", "")
- // (languageIn, "", regionIn)
-
- // value00 = lookup(result.language, "", "")
- BytesTrie iter = new BytesTrie(trie);
- int value = trieNext(iter, result.language, 0);
- U_ASSERT(value >= 0);
- if (value == 0) {
- value = trieNext(iter, "", 0);
- U_ASSERT(value >= 0);
- if (value == 0) {
- value = trieNext(iter, "", 0);
- }
- }
- U_ASSERT(value > 0);
- LSR value00 = lsrs[value];
- boolean favorRegionOk = false;
- if (result.script.equals(value00.script)) { //script is default
- if (result.region.equals(value00.region)) {
- return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
- } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
- return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
- } else {
- favorRegionOk = true;
- }
- }
-
- // The last case is not as easy to optimize.
- // Maybe do later, but for now use the straightforward code.
- LSR result2 = maximize(languageIn, scriptIn, "");
- if (result2.equals(result)) {
- return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
- } else if (favorRegionOk) {
- return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
- }
- return result;
-}
-#endif
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/loclikelysubtags.h b/contrib/libs/icu/common/loclikelysubtags.h
deleted file mode 100644
index 90ddfffaca6..00000000000
--- a/contrib/libs/icu/common/loclikelysubtags.h
+++ /dev/null
@@ -1,157 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-
-// loclikelysubtags.h
-// created: 2019may08 Markus W. Scherer
-
-#ifndef __LOCLIKELYSUBTAGS_H__
-#define __LOCLIKELYSUBTAGS_H__
-
-#include <utility>
-#include "unicode/utypes.h"
-#include "unicode/bytestrie.h"
-#include "unicode/locid.h"
-#include "unicode/uobject.h"
-#include "unicode/ures.h"
-#include "lsr.h"
-#include "uhash.h"
-
-U_NAMESPACE_BEGIN
-
-struct XLikelySubtagsData;
-
-/**
- * Map of const char * keys & values.
- * Stores pointers as is: Does not own/copy/adopt/release strings.
- */
-class CharStringMap final : public UMemory {
-public:
- /** Constructs an unusable non-map. */
- CharStringMap() : map(nullptr) {}
- CharStringMap(int32_t size, UErrorCode &errorCode) {
- map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
- size, &errorCode);
- }
- CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) {
- other.map = nullptr;
- }
- CharStringMap(const CharStringMap &other) = delete;
- ~CharStringMap() {
- uhash_close(map);
- }
-
- CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT {
- map = other.map;
- other.map = nullptr;
- return *this;
- }
- CharStringMap &operator=(const CharStringMap &other) = delete;
-
- const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
- void put(const char *key, const char *value, UErrorCode &errorCode) {
- uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
- }
-
-private:
- UHashtable *map;
-};
-
-struct LocaleDistanceData {
- LocaleDistanceData() = default;
- LocaleDistanceData(LocaleDistanceData &&data);
- ~LocaleDistanceData();
-
- const uint8_t *distanceTrieBytes = nullptr;
- const uint8_t *regionToPartitions = nullptr;
- const char **partitions = nullptr;
- const LSR *paradigms = nullptr;
- int32_t paradigmsLength = 0;
- const int32_t *distances = nullptr;
-
-private:
- LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
-};
-
-// TODO(ICU-20777): Rename to just LikelySubtags.
-class XLikelySubtags final : public UMemory {
-public:
- ~XLikelySubtags();
-
- static constexpr int32_t SKIP_SCRIPT = 1;
-
- // VisibleForTesting
- static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
-
- // VisibleForTesting
- LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
-
- /**
- * Tests whether lsr is "more likely" than other.
- * For example, fr-Latn-FR is more likely than fr-Latn-CH because
- * FR is the default region for fr-Latn.
- *
- * The likelyInfo caches lookup information between calls.
- * The return value is an updated likelyInfo value,
- * with bit 0 set if lsr is "more likely".
- * The initial value of likelyInfo must be negative.
- */
- int32_t compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const;
-
- // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
- // in loclikely.cpp to this new code, including activating this
- // minimizeSubtags() function. The LocaleMatcher does not minimize.
-#if 0
- LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
- ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
-#endif
-
- // visible for LocaleDistance
- const LocaleDistanceData &getDistanceData() const { return distanceData; }
-
-private:
- XLikelySubtags(XLikelySubtagsData &data);
- XLikelySubtags(const XLikelySubtags &other) = delete;
- XLikelySubtags &operator=(const XLikelySubtags &other) = delete;
-
- static void initLikelySubtags(UErrorCode &errorCode);
-
- LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
- const char *variant, UErrorCode &errorCode) const;
-
- /**
- * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
- */
- LSR maximize(const char *language, const char *script, const char *region) const;
-
- int32_t getLikelyIndex(const char *language, const char *script) const;
-
- static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
-
- UResourceBundle *langInfoBundle;
- // We could store the strings by value, except that if there were few enough strings,
- // moving the contents could copy it to a different array,
- // invalidating the pointers stored in the maps.
- CharString *strings;
- CharStringMap languageAliases;
- CharStringMap regionAliases;
-
- // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
- // There is also a trie value for each intermediate lang and lang+script.
- // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
- BytesTrie trie;
- uint64_t trieUndState;
- uint64_t trieUndZzzzState;
- int32_t defaultLsrIndex;
- uint64_t trieFirstLetterStates[26];
- const LSR *lsrs;
-#if U_DEBUG
- int32_t lsrsLength;
-#endif
-
- // distance/matcher data: see comment in XLikelySubtagsData::load()
- LocaleDistanceData distanceData;
-};
-
-U_NAMESPACE_END
-
-#endif // __LOCLIKELYSUBTAGS_H__
diff --git a/contrib/libs/icu/common/locmap.cpp b/contrib/libs/icu/common/locmap.cpp
deleted file mode 100644
index a6311343551..00000000000
--- a/contrib/libs/icu/common/locmap.cpp
+++ /dev/null
@@ -1,1309 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- **********************************************************************
- * Copyright (C) 1996-2016, International Business Machines
- * Corporation and others. All Rights Reserved.
- **********************************************************************
- *
- * Provides functionality for mapping between
- * LCID and Posix IDs or ICU locale to codepage
- *
- * Note: All classes and code in this file are
- * intended for internal use only.
- *
- * Methods of interest:
- * unsigned long convertToLCID(const char*);
- * const char* convertToPosix(unsigned long);
- *
- * Kathleen Wilson, 4/30/96
- *
- * Date Name Description
- * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
- * setId() method and safety check against
- * MAX_ID_LENGTH.
- * 04/23/99 stephen Added C wrapper for convertToPosix.
- * 09/18/00 george Removed the memory leaks.
- * 08/23/01 george Convert to C
- */
-
-#include "locmap.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "unicode/uloc.h"
-
-#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
-#include <windows.h>
-#include <winnls.h> // LCIDToLocaleName and LocaleNameToLCID
-#endif
-
-/*
- * Note:
- * The mapping from Win32 locale ID numbers to POSIX locale strings should
- * be the faster one.
- *
- * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
- * [MS-LCID] Windows Language Code Identifier (LCID) Reference
- */
-
-/*
-////////////////////////////////////////////////
-//
-// Internal Classes for LCID <--> POSIX Mapping
-//
-/////////////////////////////////////////////////
-*/
-
-typedef struct ILcidPosixElement
-{
- const uint32_t hostID;
- const char * const posixID;
-} ILcidPosixElement;
-
-typedef struct ILcidPosixMap
-{
- const uint32_t numRegions;
- const struct ILcidPosixElement* const regionMaps;
-} ILcidPosixMap;
-
-
-/*
-/////////////////////////////////////////////////
-//
-// Easy macros to make the LCID <--> POSIX Mapping
-//
-/////////////////////////////////////////////////
-*/
-
-/**
- * The standard one language/one country mapping for LCID.
- * The first element must be the language, and the following
- * elements are the language with the country.
- * @param hostID LCID in host format such as 0x044d
- * @param languageID posix ID of just the language such as 'de'
- * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
- */
-#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
-static const ILcidPosixElement locmap_ ## languageID [] = { \
- {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
- {hostID, #posixID}, \
-};
-
-/**
- * Define a subtable by ID
- * @param id the POSIX ID, either a language or language_TERRITORY
- */
-#define ILCID_POSIX_SUBTABLE(id) \
-static const ILcidPosixElement locmap_ ## id [] =
-
-
-/**
- * Create the map for the posixID. This macro supposes that the language string
- * name is the same as the global variable name, and that the first element
- * in the ILcidPosixElement is just the language.
- * @param _posixID the full POSIX ID for this entry.
- */
-#define ILCID_POSIX_MAP(_posixID) \
- {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
-
-/*
-////////////////////////////////////////////
-//
-// Create the table of LCID to POSIX Mapping
-// None of it should be dynamically created.
-//
-// Keep static locale variables inside the function so that
-// it can be created properly during static init.
-//
-// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier
-// (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
-//
-// Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
-// maintained for support of older Windows version.
-// Update: Windows 7 (091130)
-//
-// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
-// @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
-// called from uloc_getLCID(), keywords other than collation are already removed. If we really need
-// to support other keywords in this mapping data, we must update the implementation.
-////////////////////////////////////////////
-*/
-
-// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as
-// LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
-
-ILCID_POSIX_SUBTABLE(ar) {
- {0x01, "ar"},
- {0x3801, "ar_AE"},
- {0x3c01, "ar_BH"},
- {0x1401, "ar_DZ"},
- {0x0c01, "ar_EG"},
- {0x0801, "ar_IQ"},
- {0x2c01, "ar_JO"},
- {0x3401, "ar_KW"},
- {0x3001, "ar_LB"},
- {0x1001, "ar_LY"},
- {0x1801, "ar_MA"},
- {0x1801, "ar_MO"},
- {0x2001, "ar_OM"},
- {0x4001, "ar_QA"},
- {0x0401, "ar_SA"},
- {0x2801, "ar_SY"},
- {0x1c01, "ar_TN"},
- {0x2401, "ar_YE"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
-ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
-ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
-
-ILCID_POSIX_SUBTABLE(az) {
- {0x2c, "az"},
- {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
- {0x742c, "az_Cyrl"}, /* Cyrillic based */
- {0x042c, "az_Latn_AZ"}, /* Latin based */
- {0x782c, "az_Latn"}, /* Latin based */
- {0x042c, "az_AZ"} /* Latin based */
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
-ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
-
-/*ILCID_POSIX_SUBTABLE(ber) {
- {0x5f, "ber"},
- {0x045f, "ber_Arab_DZ"},
- {0x045f, "ber_Arab"},
- {0x085f, "ber_Latn_DZ"},
- {0x085f, "ber_Latn"}
-};*/
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
-
-ILCID_POSIX_SUBTABLE(bin) {
- {0x66, "bin"},
- {0x0466, "bin_NG"}
-};
-
-ILCID_POSIX_SUBTABLE(bn) {
- {0x45, "bn"},
- {0x0845, "bn_BD"},
- {0x0445, "bn_IN"}
-};
-
-ILCID_POSIX_SUBTABLE(bo) {
- {0x51, "bo"},
- {0x0851, "bo_BT"},
- {0x0451, "bo_CN"},
- {0x0c51, "dz_BT"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
-
-ILCID_POSIX_SUBTABLE(ca) {
- {0x03, "ca"},
- {0x0403, "ca_ES"},
- {0x0803, "ca_ES_VALENCIA"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
-
-ILCID_POSIX_SUBTABLE(chr) {
- {0x05c, "chr"},
- {0x7c5c, "chr_Cher"},
- {0x045c, "chr_Cher_US"},
- {0x045c, "chr_US"}
-};
-
-// ICU has chosen different names for these.
-ILCID_POSIX_SUBTABLE(ckb) {
- {0x92, "ckb"},
- {0x7c92, "ckb_Arab"},
- {0x0492, "ckb_Arab_IQ"}
-};
-
-/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
-ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
-ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
-
-// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
-ILCID_POSIX_SUBTABLE(de) {
- {0x07, "de"},
- {0x0c07, "de_AT"},
- {0x0807, "de_CH"},
- {0x0407, "de_DE"},
- {0x1407, "de_LI"},
- {0x1007, "de_LU"},
- {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
- {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
-ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
-
-// Windows uses an empty string for 'invariant'
-ILCID_POSIX_SUBTABLE(en) {
- {0x09, "en"},
- {0x0c09, "en_AU"},
- {0x2809, "en_BZ"},
- {0x1009, "en_CA"},
- {0x0809, "en_GB"},
- {0x3c09, "en_HK"},
- {0x3809, "en_ID"},
- {0x1809, "en_IE"},
- {0x4009, "en_IN"},
- {0x2009, "en_JM"},
- {0x4409, "en_MY"},
- {0x1409, "en_NZ"},
- {0x3409, "en_PH"},
- {0x4809, "en_SG"},
- {0x2C09, "en_TT"},
- {0x0409, "en_US"},
- {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
- {0x2409, "en_029"},
- {0x1c09, "en_ZA"},
- {0x3009, "en_ZW"},
- {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
- {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
- {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
- {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
- {0x0409, "en_MP"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
- {0x0409, "en_UM"} /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
-};
-
-ILCID_POSIX_SUBTABLE(en_US_POSIX) {
- {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
-};
-
-// Windows doesn't know POSIX or BCP47 Unicode traditional sort names
-ILCID_POSIX_SUBTABLE(es) {
- {0x0a, "es"},
- {0x2c0a, "es_AR"},
- {0x400a, "es_BO"},
- {0x340a, "es_CL"},
- {0x240a, "es_CO"},
- {0x140a, "es_CR"},
- {0x5c0a, "es_CU"},
- {0x1c0a, "es_DO"},
- {0x300a, "es_EC"},
- {0x0c0a, "es_ES"}, /*Modern sort.*/
- {0x100a, "es_GT"},
- {0x480a, "es_HN"},
- {0x080a, "es_MX"},
- {0x4c0a, "es_NI"},
- {0x180a, "es_PA"},
- {0x280a, "es_PE"},
- {0x500a, "es_PR"},
- {0x3c0a, "es_PY"},
- {0x440a, "es_SV"},
- {0x540a, "es_US"},
- {0x380a, "es_UY"},
- {0x200a, "es_VE"},
- {0x580a, "es_419"},
- {0x040a, "es_ES@collation=traditional"},
- {0x040a, "es@collation=traditional"} // Windows will treat this as es-ES@collation=traditional
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
-ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
-
-/* ISO-639 doesn't distinguish between Persian and Dari.*/
-ILCID_POSIX_SUBTABLE(fa) {
- {0x29, "fa"},
- {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
- {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
-};
-
-
-/* duplicate for roundtripping */
-ILCID_POSIX_SUBTABLE(fa_AF) {
- {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
- {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
-};
-
-ILCID_POSIX_SUBTABLE(ff) {
- {0x67, "ff"},
- {0x7c67, "ff_Latn"},
- {0x0867, "ff_Latn_SN"},
- {0x0467, "ff_NG"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
-ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
-ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
-
-ILCID_POSIX_SUBTABLE(fr) {
- {0x0c, "fr"},
- {0x080c, "fr_BE"},
- {0x0c0c, "fr_CA"},
- {0x240c, "fr_CD"},
- {0x240c, "fr_CG"},
- {0x100c, "fr_CH"},
- {0x300c, "fr_CI"},
- {0x2c0c, "fr_CM"},
- {0x040c, "fr_FR"},
- {0x3c0c, "fr_HT"},
- {0x140c, "fr_LU"},
- {0x380c, "fr_MA"},
- {0x180c, "fr_MC"},
- {0x340c, "fr_ML"},
- {0x200c, "fr_RE"},
- {0x280c, "fr_SN"},
- {0xe40c, "fr_015"},
- {0x1c0c, "fr_029"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
-
-ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
- {0x3c, "ga"},
- {0x083c, "ga_IE"},
- {0x043c, "gd_GB"}
-};
-
-ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
- {0x91, "gd"},
- {0x0491, "gd_GB"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
-ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
-ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
-ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
-
-ILCID_POSIX_SUBTABLE(ha) {
- {0x68, "ha"},
- {0x7c68, "ha_Latn"},
- {0x0468, "ha_Latn_NG"},
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
-ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
-ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
-
-/* This LCID is really four different locales.*/
-ILCID_POSIX_SUBTABLE(hr) {
- {0x1a, "hr"},
- {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
- {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
- {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
- {0x781a, "bs"}, /* Bosnian */
- {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
- {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
- {0x101a, "hr_BA"}, /* Croatian in Bosnia */
- {0x041a, "hr_HR"}, /* Croatian*/
- {0x2c1a, "sr_Latn_ME"},
- {0x241a, "sr_Latn_RS"},
- {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
- {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
- {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
- {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
- {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
- {0x301a, "sr_Cyrl_ME"},
- {0x281a, "sr_Cyrl_RS"},
- {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
- {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
-};
-
-ILCID_POSIX_SUBTABLE(hsb) {
- {0x2E, "hsb"},
- {0x042E, "hsb_DE"},
- {0x082E, "dsb_DE"},
- {0x7C2E, "dsb"},
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
-ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
-
-ILCID_POSIX_SUBTABLE(ibb) {
- {0x69, "ibb"},
- {0x0469, "ibb_NG"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
-ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
-ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
-ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
-
-ILCID_POSIX_SUBTABLE(it) {
- {0x10, "it"},
- {0x0810, "it_CH"},
- {0x0410, "it_IT"}
-};
-
-ILCID_POSIX_SUBTABLE(iu) {
- {0x5d, "iu"},
- {0x045d, "iu_Cans_CA"},
- {0x785d, "iu_Cans"},
- {0x085d, "iu_Latn_CA"},
- {0x7c5d, "iu_Latn"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
-ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
-ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
-ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
-ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
-ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
-ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
-
-ILCID_POSIX_SUBTABLE(ko) {
- {0x12, "ko"},
- {0x0812, "ko_KP"},
- {0x0412, "ko_KR"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
-ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
-
-ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
- {0x60, "ks"},
- {0x0460, "ks_Arab_IN"},
- {0x0860, "ks_Deva_IN"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
-
-ILCID_POSIX_SUBTABLE(la) {
- {0x76, "la"},
- {0x0476, "la_001"},
- {0x0476, "la_IT"} /*Left in for compatibility*/
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
-ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
-ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
-ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
-ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
-ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
-ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
-
-ILCID_POSIX_SUBTABLE(mn) {
- {0x50, "mn"},
- {0x0450, "mn_MN"},
- {0x7c50, "mn_Mong"},
- {0x0850, "mn_Mong_CN"},
- {0x0850, "mn_CN"},
- {0x7850, "mn_Cyrl"},
- {0x0c50, "mn_Mong_MN"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
-ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
-ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
-
-ILCID_POSIX_SUBTABLE(ms) {
- {0x3e, "ms"},
- {0x083e, "ms_BN"}, /* Brunei Darussalam*/
- {0x043e, "ms_MY"} /* Malaysia*/
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
-ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
-
-ILCID_POSIX_SUBTABLE(ne) {
- {0x61, "ne"},
- {0x0861, "ne_IN"}, /* India*/
- {0x0461, "ne_NP"} /* Nepal*/
-};
-
-ILCID_POSIX_SUBTABLE(nl) {
- {0x13, "nl"},
- {0x0813, "nl_BE"},
- {0x0413, "nl_NL"}
-};
-
-/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
-// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
-ILCID_POSIX_SUBTABLE(no) {
- {0x14, "no"}, /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */
- {0x7c14, "nb"}, /* really nb */
- {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
- {0x0414, "no_NO"}, /* really nb_NO */
- {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
- {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
- {0x0814, "no_NO_NY"}/* really nn_NO */
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
-ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
-
-ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
- {0x72, "om"},
- {0x0472, "om_ET"},
- {0x0472, "gaz_ET"}
-};
-
-/* Declared as or_IN to get around compiler errors*/
-ILCID_POSIX_SUBTABLE(or_IN) {
- {0x48, "or"},
- {0x0448, "or_IN"},
-};
-
-ILCID_POSIX_SUBTABLE(pa) {
- {0x46, "pa"},
- {0x0446, "pa_IN"},
- {0x0846, "pa_Arab_PK"},
- {0x0846, "pa_PK"}
-};
-
-ILCID_POSIX_SUBTABLE(pap) {
- {0x79, "pap"},
- {0x0479, "pap_029"},
- {0x0479, "pap_AN"} /*Left in for compatibility*/
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
-ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
-
-ILCID_POSIX_SUBTABLE(pt) {
- {0x16, "pt"},
- {0x0416, "pt_BR"},
- {0x0816, "pt_PT"}
-};
-
-ILCID_POSIX_SUBTABLE(qu) {
- {0x6b, "qu"},
- {0x046b, "qu_BO"},
- {0x086b, "qu_EC"},
- {0x0C6b, "qu_PE"},
- {0x046b, "quz_BO"},
- {0x086b, "quz_EC"},
- {0x0C6b, "quz_PE"}
-};
-
-ILCID_POSIX_SUBTABLE(quc) {
- {0x93, "quc"},
- {0x0493, "quc_CO"},
- /*
- "quc_Latn_GT" is an exceptional case. Language ID of "quc"
- is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
- under the group of "qut". "qut" is a retired ISO 639-3 language
- code for West Central Quiche, and merged to "quc".
- It looks Windows previously reserved "qut" for K'iche', but,
- decided to use "quc" when adding a locale for K'iche' (Guatemala).
-
- This data structure used here assumes language ID bits in
- LCID is unique for alphabetic language code. But this is not true
- for "quc_Latn_GT". If we don't have the data below, LCID look up
- by alphabetic locale ID (POSIX) will fail. The same entry is found
- under "qut" below, which is required for reverse look up.
- */
- {0x0486, "quc_Latn_GT"}
-};
-
-ILCID_POSIX_SUBTABLE(qut) {
- {0x86, "qut"},
- {0x0486, "qut_GT"},
- /*
- See the note in "quc" above.
- */
- {0x0486, "quc_Latn_GT"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
-
-ILCID_POSIX_SUBTABLE(ro) {
- {0x18, "ro"},
- {0x0418, "ro_RO"},
- {0x0818, "ro_MD"}
-};
-
-// TODO: This is almost certainly 'wrong'. 0 in Windows is a synonym for LOCALE_USER_DEFAULT.
-// More likely this is a similar concept to the Windows 0x7f Invariant locale ""
-// (Except that it's not invariant in ICU)
-ILCID_POSIX_SUBTABLE(root) {
- {0x00, "root"}
-};
-
-ILCID_POSIX_SUBTABLE(ru) {
- {0x19, "ru"},
- {0x0419, "ru_RU"},
- {0x0819, "ru_MD"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
-ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
-ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
-
-ILCID_POSIX_SUBTABLE(sd) {
- {0x59, "sd"},
- {0x0459, "sd_Deva_IN"},
- {0x0459, "sd_IN"},
- {0x0859, "sd_Arab_PK"},
- {0x0859, "sd_PK"},
- {0x7c59, "sd_Arab"}
-};
-
-ILCID_POSIX_SUBTABLE(se) {
- {0x3b, "se"},
- {0x0c3b, "se_FI"},
- {0x043b, "se_NO"},
- {0x083b, "se_SE"},
- {0x783b, "sma"},
- {0x183b, "sma_NO"},
- {0x1c3b, "sma_SE"},
- {0x7c3b, "smj"},
- {0x703b, "smn"},
- {0x743b, "sms"},
- {0x103b, "smj_NO"},
- {0x143b, "smj_SE"},
- {0x243b, "smn_FI"},
- {0x203b, "sms_FI"},
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
-ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
-ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
-
-ILCID_POSIX_SUBTABLE(so) {
- {0x77, "so"},
- {0x0477, "so_SO"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
-ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
-
-ILCID_POSIX_SUBTABLE(sv) {
- {0x1d, "sv"},
- {0x081d, "sv_FI"},
- {0x041d, "sv_SE"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
-ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
-
-ILCID_POSIX_SUBTABLE(ta) {
- {0x49, "ta"},
- {0x0449, "ta_IN"},
- {0x0849, "ta_LK"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
-
-/* Cyrillic based by default */
-ILCID_POSIX_SUBTABLE(tg) {
- {0x28, "tg"},
- {0x7c28, "tg_Cyrl"},
- {0x0428, "tg_Cyrl_TJ"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
-
-ILCID_POSIX_SUBTABLE(ti) {
- {0x73, "ti"},
- {0x0873, "ti_ER"},
- {0x0473, "ti_ET"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
-
-ILCID_POSIX_SUBTABLE(tn) {
- {0x32, "tn"},
- {0x0832, "tn_BW"},
- {0x0432, "tn_ZA"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
-ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
-ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
-
-ILCID_POSIX_SUBTABLE(tzm) {
- {0x5f, "tzm"},
- {0x7c5f, "tzm_Latn"},
- {0x085f, "tzm_Latn_DZ"},
- {0x105f, "tzm_Tfng_MA"},
- {0x045f, "tzm_Arab_MA"},
- {0x045f, "tmz"}
-};
-
-ILCID_POSIX_SUBTABLE(ug) {
- {0x80, "ug"},
- {0x0480, "ug_CN"},
- {0x0480, "ug_Arab_CN"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
-
-ILCID_POSIX_SUBTABLE(ur) {
- {0x20, "ur"},
- {0x0820, "ur_IN"},
- {0x0420, "ur_PK"}
-};
-
-ILCID_POSIX_SUBTABLE(uz) {
- {0x43, "uz"},
- {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
- {0x7843, "uz_Cyrl"}, /* Cyrillic based */
- {0x0843, "uz_UZ"}, /* Cyrillic based */
- {0x0443, "uz_Latn_UZ"}, /* Latin based */
- {0x7c43, "uz_Latn"} /* Latin based */
-};
-
-ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
- {0x33, "ve"},
- {0x0433, "ve_ZA"},
- {0x0433, "ven_ZA"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
-ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
-ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
-
-ILCID_POSIX_SUBTABLE(yi) {
- {0x003d, "yi"},
- {0x043d, "yi_001"}
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
-
-// Windows & ICU tend to different names for some of these
-// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
-ILCID_POSIX_SUBTABLE(zh) {
- {0x0004, "zh_Hans"},
- {0x7804, "zh"},
- {0x0804, "zh_CN"},
- {0x0804, "zh_Hans_CN"},
- {0x0c04, "zh_Hant_HK"},
- {0x0c04, "zh_HK"},
- {0x1404, "zh_Hant_MO"},
- {0x1404, "zh_MO"},
- {0x1004, "zh_Hans_SG"},
- {0x1004, "zh_SG"},
- {0x0404, "zh_Hant_TW"},
- {0x7c04, "zh_Hant"},
- {0x0404, "zh_TW"},
- {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
- {0x30404,"zh_TW"}, /* Bopomofo order */
- {0x20004,"zh@collation=stroke"},
- {0x20404,"zh_Hant@collation=stroke"},
- {0x20404,"zh_Hant_TW@collation=stroke"},
- {0x20404,"zh_TW@collation=stroke"},
- {0x20804,"zh_Hans@collation=stroke"},
- {0x20804,"zh_Hans_CN@collation=stroke"},
- {0x20804,"zh_CN@collation=stroke"}
- // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
-};
-
-ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
-
-/* This must be static and grouped by LCID. */
-static const ILcidPosixMap gPosixIDmap[] = {
- ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
- ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
- ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
- ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
- ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
- ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
- ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
- ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
-/* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
- ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
- ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */
- ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
- ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
- ILCID_POSIX_MAP(br), /* br Breton 0x7e */
- ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
- ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
- ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */
- ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
- ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */
- ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
- ILCID_POSIX_MAP(da), /* da Danish 0x06 */
- ILCID_POSIX_MAP(de), /* de German 0x07 */
- ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
- ILCID_POSIX_MAP(el), /* el Greek 0x08 */
- ILCID_POSIX_MAP(en), /* en English 0x09 */
- ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
- ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
- ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
- ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
- ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
- ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
- ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */
- ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
- ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
- ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
- ILCID_POSIX_MAP(fr), /* fr French 0x0c */
- ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */
- ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
- ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
- ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */
- ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
- ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
- ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
- ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
- ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
- ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
- ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
- ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
- ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
- ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */
- ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
- ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
- ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */
- ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
- ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
- ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
- ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
- ILCID_POSIX_MAP(it), /* it Italian 0x10 */
- ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
- ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
- ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
- ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
- ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
- ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
- ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
- ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
- ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
- ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
- ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
- ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
- ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
- ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
- ILCID_POSIX_MAP(la), /* la Latin 0x76 */
- ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
- ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
- ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
- ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
- ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
- ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
- ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
- ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
- ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
- ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
- ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
- ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
- ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
-/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
- ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
- ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
-/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
- ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
- ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
- ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
- ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
- ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
- ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
- ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */
- ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
- ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
- ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
- ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
- ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */
- ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
- ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
- ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
- ILCID_POSIX_MAP(root), /* root 0x00 */
- ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
- ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
- ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
- ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
- ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
- ILCID_POSIX_MAP(se), /* se Sami 0x3b */
-/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
- ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
- ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
- ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
- ILCID_POSIX_MAP(so), /* so Somali 0x77 */
- ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
-/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
- ILCID_POSIX_MAP(st), /* st Sutu 0x30 */
- ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
- ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
- ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
- ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
- ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
- ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
- ILCID_POSIX_MAP(th), /* th Thai 0x1e */
- ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
- ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
- ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
- ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
- ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */
- ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
- ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */
- ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
- ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
- ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
- ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
- ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
- ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
- ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
- ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
- ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */
- ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
- ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
- ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
-};
-
-static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
-
-/**
- * Do not call this function. It is called by hostID.
- * The function is not private because this struct must stay as a C struct,
- * and this is an internal class.
- */
-static int32_t
-idCmp(const char* id1, const char* id2)
-{
- int32_t diffIdx = 0;
- while (*id1 == *id2 && *id1 != 0) {
- diffIdx++;
- id1++;
- id2++;
- }
- return diffIdx;
-}
-
-/**
- * Searches for a Windows LCID
- *
- * @param posixID the Posix style locale id.
- * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
- * no equivalent Windows LCID.
- * @return the LCID
- */
-static uint32_t
-getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
-{
- int32_t bestIdx = 0;
- int32_t bestIdxDiff = 0;
- int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
- uint32_t idx;
-
- for (idx = 0; idx < this_0->numRegions; idx++ ) {
- int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
- if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
- if (posixIDlen == sameChars) {
- /* Exact match */
- return this_0->regionMaps[idx].hostID;
- }
- bestIdxDiff = sameChars;
- bestIdx = idx;
- }
- }
- /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
- /* We also have to make sure that sid and si and similar string subsets don't match. */
- if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
- && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
- {
- *status = U_USING_FALLBACK_WARNING;
- return this_0->regionMaps[bestIdx].hostID;
- }
-
- /*no match found */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return this_0->regionMaps->hostID;
-}
-
-static const char*
-getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
-{
- uint32_t i;
- for (i = 0; i < this_0->numRegions; i++)
- {
- if (this_0->regionMaps[i].hostID == hostID)
- {
- return this_0->regionMaps[i].posixID;
- }
- }
-
- /* If you get here, then no matching region was found,
- so return the language id with the wild card region. */
- return this_0->regionMaps[0].posixID;
-}
-
-/*
-//////////////////////////////////////
-//
-// LCID --> POSIX
-//
-/////////////////////////////////////
-*/
-#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
-/*
- * Various language tags needs to be changed:
- * quz -> qu
- * prs -> fa
- */
-#define FIX_LANGUAGE_ID_TAG(buffer, len) \
- if (len >= 3) { \
- if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
- buffer[2] = 0; \
- uprv_strcat(buffer, buffer+3); \
- } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
- buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
- uprv_strcat(buffer, buffer+3); \
- } \
- }
-
-#endif
-
-U_CAPI int32_t
-uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
-{
- uint16_t langID;
- uint32_t localeIndex;
- UBool bLookup = TRUE;
- const char *pPosixID = NULL;
-
-#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
- static_assert(ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH, "Windows locale names have smaller length than ICU locale names.");
-
- char locName[LOCALE_NAME_MAX_LENGTH] = {};
-
- // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
- // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
- // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
- // use the Windows API to resolve locale ID for this specific case.
- if ((hostid & 0x3FF) != 0x92) {
- int32_t tmpLen = 0;
- char16_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {};
-
- // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
- tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
- if (tmpLen > 1) {
- int32_t i = 0;
- // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
- bLookup = FALSE;
- for (i = 0; i < UPRV_LENGTHOF(locName); i++)
- {
- locName[i] = (char)(windowsLocaleName[i]);
-
- // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
- // In such cases, we need special mapping data found in the hardcoded table
- // in this source file.
- if (windowsLocaleName[i] == L'_')
- {
- // Keep the base locale, without variant
- // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
- locName[i] = '\0';
- tmpLen = i;
- bLookup = TRUE;
- break;
- }
- else if (windowsLocaleName[i] == L'-')
- {
- // Windows names use -, ICU uses _
- locName[i] = '_';
- }
- else if (windowsLocaleName[i] == L'\0')
- {
- // No point in doing more work than necessary
- break;
- }
- }
- // TODO: Need to understand this better, why isn't it an alias?
- FIX_LANGUAGE_ID_TAG(locName, tmpLen);
- pPosixID = locName;
- }
- }
-#endif
-
- if (bLookup) {
- const char *pCandidate = NULL;
- langID = LANGUAGE_LCID(hostid);
-
- for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
- if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
- pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
- break;
- }
- }
-
- /* On Windows, when locale name has a variant, we still look up the hardcoded table.
- If a match in the hardcoded table is longer than the Windows locale name without
- variant, we use the one as the result */
- if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
- pPosixID = pCandidate;
- }
- }
-
- if (pPosixID) {
- int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
- int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
- uprv_memcpy(posixID, pPosixID, copyLen);
- if (resLen < posixIDCapacity) {
- posixID[resLen] = 0;
- if (*status == U_STRING_NOT_TERMINATED_WARNING) {
- *status = U_ZERO_ERROR;
- }
- } else if (resLen == posixIDCapacity) {
- *status = U_STRING_NOT_TERMINATED_WARNING;
- } else {
- *status = U_BUFFER_OVERFLOW_ERROR;
- }
- return resLen;
- }
-
- /* no match found */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return -1;
-}
-
-/*
-//////////////////////////////////////
-//
-// POSIX --> LCID
-// This should only be called from uloc_getLCID.
-// The locale ID must be in canonical form.
-//
-/////////////////////////////////////
-*/
-U_CAPI uint32_t
-uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
-{
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- // The purpose of this function is to leverage the Windows platform name->lcid
- // conversion functionality when available.
-#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
- int32_t len;
- char collVal[ULOC_KEYWORDS_CAPACITY] = {};
- char baseName[ULOC_FULLNAME_CAPACITY] = {};
- const char * mylocaleID = localeID;
-
- // Check any for keywords.
- if (uprv_strchr(localeID, '@'))
- {
- len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, status);
- if (U_SUCCESS(*status) && len > 0)
- {
- // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
- return 0;
- }
- else
- {
- // If the locale ID contains keywords other than collation, just use the base name.
- len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, status);
-
- if (U_SUCCESS(*status) && len > 0)
- {
- baseName[len] = 0;
- mylocaleID = baseName;
- }
- }
- }
-
- char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
- // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
- (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, status);
-
- if (U_SUCCESS(*status))
- {
- // Need it to be UTF-16, not 8-bit
- wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
- int32_t i;
- for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
- {
- if (asciiBCP47Tag[i] == '\0')
- {
- break;
- }
- else
- {
- // Copy the character
- bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
- }
- }
-
- if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
- {
- // Ensure it's null terminated
- bcp47Tag[i] = L'\0';
- LCID lcid = LocaleNameToLCID(bcp47Tag, LOCALE_ALLOW_NEUTRAL_NAMES);
- if (lcid > 0)
- {
- // Found LCID from windows, return that one, unless its completely ambiguous
- // LOCALE_USER_DEFAULT and transients are OK because they will round trip
- // for this process.
- if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
- {
- return lcid;
- }
- }
- }
- }
-#else
- (void) localeID; // Suppress unused variable warning.
-#endif
-
- // Nothing found, or not implemented.
- return 0;
-}
-
-U_CAPI uint32_t
-uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
-{
- // This function does the table lookup when native platform name->lcid conversion isn't available,
- // or for locales that don't follow patterns the platform expects.
- uint32_t low = 0;
- uint32_t high = gLocaleCount;
- uint32_t mid;
- uint32_t oldmid = 0;
- int32_t compVal;
-
- uint32_t value = 0;
- uint32_t fallbackValue = (uint32_t)-1;
- UErrorCode myStatus;
- uint32_t idx;
-
- /* Check for incomplete id. */
- if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
- return 0;
- }
-
- /*Binary search for the map entry for normal cases */
-
- while (high > low) /*binary search*/{
-
- mid = (high+low) >> 1; /*Finds median*/
-
- if (mid == oldmid)
- break;
-
- compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
- if (compVal < 0){
- high = mid;
- }
- else if (compVal > 0){
- low = mid;
- }
- else /*we found it*/{
- return getHostID(&gPosixIDmap[mid], posixID, status);
- }
- oldmid = mid;
- }
-
- /*
- * Sometimes we can't do a binary search on posixID because some LCIDs
- * go to different locales. We hit one of those special cases.
- */
- for (idx = 0; idx < gLocaleCount; idx++ ) {
- myStatus = U_ZERO_ERROR;
- value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
- if (myStatus == U_ZERO_ERROR) {
- return value;
- }
- else if (myStatus == U_USING_FALLBACK_WARNING) {
- fallbackValue = value;
- }
- }
-
- if (fallbackValue != (uint32_t)-1) {
- *status = U_USING_FALLBACK_WARNING;
- return fallbackValue;
- }
-
- /* no match found */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0; /* return international (root) */
-}
diff --git a/contrib/libs/icu/common/locmap.h b/contrib/libs/icu/common/locmap.h
deleted file mode 100644
index e669873a143..00000000000
--- a/contrib/libs/icu/common/locmap.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1996-2013, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* File locmap.h : Locale Mapping Classes
-*
-*
-* Created by: Helena Shih
-*
-* Modification History:
-*
-* Date Name Description
-* 3/11/97 aliu Added setId().
-* 4/20/99 Madhu Added T_convertToPosix()
-* 09/18/00 george Removed the memory leaks.
-* 08/23/01 george Convert to C
-*============================================================================
-*/
-
-#ifndef LOCMAP_H
-#define LOCMAP_H
-
-#include "unicode/utypes.h"
-
-#define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID)
-
-U_CAPI int32_t uprv_convertToPosix(uint32_t hostid, char* posixID, int32_t posixIDCapacity, UErrorCode* status);
-
-/* Don't call these functions directly. Use uloc_getLCID instead. */
-U_CAPI uint32_t uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status); // Leverage platform conversion if possible
-U_CAPI uint32_t uprv_convertToLCID(const char* langID, const char* posixID, UErrorCode* status);
-
-#endif /* LOCMAP_H */
-
diff --git a/contrib/libs/icu/common/locresdata.cpp b/contrib/libs/icu/common/locresdata.cpp
deleted file mode 100644
index d1d9a4729f1..00000000000
--- a/contrib/libs/icu/common/locresdata.cpp
+++ /dev/null
@@ -1,220 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1997-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: loclikely.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010feb25
-* created by: Markus W. Scherer
-*
-* Code for miscellaneous locale-related resource bundle data access,
-* separated out from other .cpp files
-* that then do not depend on resource bundle code and this data.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/putil.h"
-#include "unicode/uloc.h"
-#include "unicode/ures.h"
-#include "cstring.h"
-#include "ulocimp.h"
-#include "uresimp.h"
-
-/*
- * Lookup a resource bundle table item with fallback on the table level.
- * Regular resource bundle lookups perform fallback to parent locale bundles
- * and eventually the root bundle, but only for top-level items.
- * This function takes the name of a top-level table and of an item in that table
- * and performs a lookup of both, falling back until a bundle contains a table
- * with this item.
- *
- * Note: Only the opening of entire bundles falls back through the default locale
- * before root. Once a bundle is open, item lookups do not go through the
- * default locale because that would result in a mix of languages that is
- * unpredictable to the programmer and most likely useless.
- */
-U_CAPI const UChar * U_EXPORT2
-uloc_getTableStringWithFallback(const char *path, const char *locale,
- const char *tableKey, const char *subTableKey,
- const char *itemKey,
- int32_t *pLength,
- UErrorCode *pErrorCode)
-{
-/* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
- const UChar *item=NULL;
- UErrorCode errorCode;
- char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
-
- /*
- * open the bundle for the current locale
- * this falls back through the locale's chain to root
- */
- errorCode=U_ZERO_ERROR;
- icu::LocalUResourceBundlePointer rb(ures_open(path, locale, &errorCode));
-
- if(U_FAILURE(errorCode)) {
- /* total failure, not even root could be opened */
- *pErrorCode=errorCode;
- return NULL;
- } else if(errorCode==U_USING_DEFAULT_WARNING ||
- (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
- ) {
- /* set the "strongest" error code (success->fallback->default->failure) */
- *pErrorCode=errorCode;
- }
-
- for(;;){
- icu::StackUResourceBundle table;
- icu::StackUResourceBundle subTable;
- ures_getByKeyWithFallback(rb.getAlias(), tableKey, table.getAlias(), &errorCode);
-
- if (subTableKey != NULL) {
- /*
- ures_getByKeyWithFallback(table.getAlias(), subTableKey, subTable.getAlias(), &errorCode);
- item = ures_getStringByKeyWithFallback(subTable.getAlias(), itemKey, pLength, &errorCode);
- if(U_FAILURE(errorCode)){
- *pErrorCode = errorCode;
- }
-
- break;*/
-
- ures_getByKeyWithFallback(table.getAlias(), subTableKey, table.getAlias(), &errorCode);
- }
- if(U_SUCCESS(errorCode)){
- item = ures_getStringByKeyWithFallback(table.getAlias(), itemKey, pLength, &errorCode);
- if(U_FAILURE(errorCode)){
- const char* replacement = NULL;
- *pErrorCode = errorCode; /*save the errorCode*/
- errorCode = U_ZERO_ERROR;
- /* may be a deprecated code */
- if(uprv_strcmp(tableKey, "Countries")==0){
- replacement = uloc_getCurrentCountryID(itemKey);
- }else if(uprv_strcmp(tableKey, "Languages")==0){
- replacement = uloc_getCurrentLanguageID(itemKey);
- }
- /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
- if(replacement!=NULL && itemKey != replacement){
- item = ures_getStringByKeyWithFallback(table.getAlias(), replacement, pLength, &errorCode);
- if(U_SUCCESS(errorCode)){
- *pErrorCode = errorCode;
- break;
- }
- }
- }else{
- break;
- }
- }
-
- if(U_FAILURE(errorCode)){
-
- /* still can't figure out ?.. try the fallback mechanism */
- int32_t len = 0;
- const UChar* fallbackLocale = NULL;
- *pErrorCode = errorCode;
- errorCode = U_ZERO_ERROR;
-
- fallbackLocale = ures_getStringByKeyWithFallback(table.getAlias(), "Fallback", &len, &errorCode);
- if(U_FAILURE(errorCode)){
- *pErrorCode = errorCode;
- break;
- }
-
- u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
-
- /* guard against recursive fallback */
- if(uprv_strcmp(explicitFallbackName, locale)==0){
- *pErrorCode = U_INTERNAL_PROGRAM_ERROR;
- break;
- }
- rb.adoptInstead(ures_open(path, explicitFallbackName, &errorCode));
- if(U_FAILURE(errorCode)){
- *pErrorCode = errorCode;
- break;
- }
- /* succeeded in opening the fallback bundle .. continue and try to fetch the item */
- }else{
- break;
- }
- }
-
- return item;
-}
-
-static ULayoutType
-_uloc_getOrientationHelper(const char* localeId,
- const char* key,
- UErrorCode *status)
-{
- ULayoutType result = ULOC_LAYOUT_UNKNOWN;
-
- if (!U_FAILURE(*status)) {
- int32_t length = 0;
- char localeBuffer[ULOC_FULLNAME_CAPACITY];
-
- uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
-
- if (!U_FAILURE(*status)) {
- const UChar* const value =
- uloc_getTableStringWithFallback(
- NULL,
- localeBuffer,
- "layout",
- NULL,
- key,
- &length,
- status);
-
- if (!U_FAILURE(*status) && length != 0) {
- switch(value[0])
- {
- case 0x0062: /* 'b' */
- result = ULOC_LAYOUT_BTT;
- break;
- case 0x006C: /* 'l' */
- result = ULOC_LAYOUT_LTR;
- break;
- case 0x0072: /* 'r' */
- result = ULOC_LAYOUT_RTL;
- break;
- case 0x0074: /* 't' */
- result = ULOC_LAYOUT_TTB;
- break;
- default:
- *status = U_INTERNAL_PROGRAM_ERROR;
- break;
- }
- }
- }
- }
-
- return result;
-}
-
-U_CAPI ULayoutType U_EXPORT2
-uloc_getCharacterOrientation(const char* localeId,
- UErrorCode *status)
-{
- return _uloc_getOrientationHelper(localeId, "characters", status);
-}
-
-/**
- * Get the layout line orientation for the specified locale.
- *
- * @param localeID locale name
- * @param status Error status
- * @return an enum indicating the layout orientation for lines.
- */
-U_CAPI ULayoutType U_EXPORT2
-uloc_getLineOrientation(const char* localeId,
- UErrorCode *status)
-{
- return _uloc_getOrientationHelper(localeId, "lines", status);
-}
diff --git a/contrib/libs/icu/common/locutil.cpp b/contrib/libs/icu/common/locutil.cpp
deleted file mode 100644
index 3d9d69ff7ed..00000000000
--- a/contrib/libs/icu/common/locutil.cpp
+++ /dev/null
@@ -1,275 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- *******************************************************************************
- * Copyright (C) 2002-2014, International Business Machines Corporation and
- * others. All Rights Reserved.
- *******************************************************************************
- */
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
-
-#include "unicode/resbund.h"
-#include "unicode/uenum.h"
-#include "cmemory.h"
-#include "ustrfmt.h"
-#include "locutil.h"
-#include "charstr.h"
-#include "ucln_cmn.h"
-#include "uassert.h"
-#include "umutex.h"
-
-// see LocaleUtility::getAvailableLocaleNames
-static icu::UInitOnce LocaleUtilityInitOnce = U_INITONCE_INITIALIZER;
-static icu::Hashtable * LocaleUtility_cache = NULL;
-
-#define UNDERSCORE_CHAR ((UChar)0x005f)
-#define AT_SIGN_CHAR ((UChar)64)
-#define PERIOD_CHAR ((UChar)46)
-
-/*
- ******************************************************************
- */
-
-/**
- * Release all static memory held by Locale Utility.
- */
-U_CDECL_BEGIN
-static UBool U_CALLCONV service_cleanup(void) {
- if (LocaleUtility_cache) {
- delete LocaleUtility_cache;
- LocaleUtility_cache = NULL;
- }
- return TRUE;
-}
-
-
-static void U_CALLCONV locale_utility_init(UErrorCode &status) {
- using namespace icu;
- U_ASSERT(LocaleUtility_cache == NULL);
- ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
- LocaleUtility_cache = new Hashtable(status);
- if (U_FAILURE(status)) {
- delete LocaleUtility_cache;
- LocaleUtility_cache = NULL;
- return;
- }
- if (LocaleUtility_cache == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- LocaleUtility_cache->setValueDeleter(uhash_deleteHashtable);
-}
-
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-UnicodeString&
-LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
-{
- if (id == NULL) {
- result.setToBogus();
- } else {
- // Fix case only (no other changes) up to the first '@' or '.' or
- // end of string, whichever comes first. In 3.0 I changed this to
- // stop at first '@' or '.'. It used to run out to the end of
- // string. My fix makes the tests pass but is probably
- // structurally incorrect. See below. [alan 3.0]
-
- // TODO: Doug, you might want to revise this...
- result = *id;
- int32_t i = 0;
- int32_t end = result.indexOf(AT_SIGN_CHAR);
- int32_t n = result.indexOf(PERIOD_CHAR);
- if (n >= 0 && n < end) {
- end = n;
- }
- if (end < 0) {
- end = result.length();
- }
- n = result.indexOf(UNDERSCORE_CHAR);
- if (n < 0) {
- n = end;
- }
- for (; i < n; ++i) {
- UChar c = result.charAt(i);
- if (c >= 0x0041 && c <= 0x005a) {
- c += 0x20;
- result.setCharAt(i, c);
- }
- }
- for (n = end; i < n; ++i) {
- UChar c = result.charAt(i);
- if (c >= 0x0061 && c <= 0x007a) {
- c -= 0x20;
- result.setCharAt(i, c);
- }
- }
- }
- return result;
-
-#if 0
- // This code does a proper full level 2 canonicalization of id.
- // It's nasty to go from UChar to char to char to UChar -- but
- // that's what you have to do to use the uloc_canonicalize
- // function on UnicodeStrings.
-
- // I ended up doing the alternate fix (see above) not for
- // performance reasons, although performance will certainly be
- // better, but because doing a full level 2 canonicalization
- // causes some tests to fail. [alan 3.0]
-
- // TODO: Doug, you might want to revisit this...
- result.setToBogus();
- if (id != 0) {
- int32_t buflen = id->length() + 8; // space for NUL
- char* buf = (char*) uprv_malloc(buflen);
- char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen);
- if (buf != 0 && canon != 0) {
- U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen);
- UErrorCode ec = U_ZERO_ERROR;
- uloc_canonicalize(buf, canon, buflen, &ec);
- if (U_SUCCESS(ec)) {
- result = UnicodeString(canon);
- }
- }
- uprv_free(buf);
- uprv_free(canon);
- }
- return result;
-#endif
-}
-
-Locale&
-LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
-{
- enum { BUFLEN = 128 }; // larger than ever needed
-
- if (id.isBogus() || id.length() >= BUFLEN) {
- result.setToBogus();
- } else {
- /*
- * We need to convert from a UnicodeString to char * in order to
- * create a Locale.
- *
- * Problem: Locale ID strings may contain '@' which is a variant
- * character and cannot be handled by invariant-character conversion.
- *
- * Hack: Since ICU code can handle locale IDs with multiple encodings
- * of '@' (at least for EBCDIC; it's not known to be a problem for
- * ASCII-based systems),
- * we use regular invariant-character conversion for everything else
- * and manually convert U+0040 into a compiler-char-constant '@'.
- * While this compilation-time constant may not match the runtime
- * encoding of '@', it should be one of the encodings which ICU
- * recognizes.
- *
- * There should be only at most one '@' in a locale ID.
- */
- char buffer[BUFLEN];
- int32_t prev, i;
- prev = 0;
- for(;;) {
- i = id.indexOf((UChar)0x40, prev);
- if(i < 0) {
- // no @ between prev and the rest of the string
- id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
- break; // done
- } else {
- // normal invariant-character conversion for text between @s
- id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
- // manually "convert" U+0040 at id[i] into '@' at buffer[i]
- buffer[i] = '@';
- prev = i + 1;
- }
- }
- result = Locale::createFromName(buffer);
- }
- return result;
-}
-
-UnicodeString&
-LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
-{
- if (locale.isBogus()) {
- result.setToBogus();
- } else {
- result.append(UnicodeString(locale.getName(), -1, US_INV));
- }
- return result;
-}
-
-const Hashtable*
-LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
-{
- // LocaleUtility_cache is a hash-of-hashes. The top-level keys
- // are path strings ('bundleID') passed to
- // ures_openAvailableLocales. The top-level values are
- // second-level hashes. The second-level keys are result strings
- // from ures_openAvailableLocales. The second-level values are
- // garbage ((void*)1 or other random pointer).
-
- UErrorCode status = U_ZERO_ERROR;
- umtx_initOnce(LocaleUtilityInitOnce, locale_utility_init, status);
- Hashtable *cache = LocaleUtility_cache;
- if (cache == NULL) {
- // Catastrophic failure.
- return NULL;
- }
-
- Hashtable* htp;
- umtx_lock(NULL);
- htp = (Hashtable*) cache->get(bundleID);
- umtx_unlock(NULL);
-
- if (htp == NULL) {
- htp = new Hashtable(status);
- if (htp && U_SUCCESS(status)) {
- CharString cbundleID;
- cbundleID.appendInvariantChars(bundleID, status);
- const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data();
- icu::LocalUEnumerationPointer uenum(ures_openAvailableLocales(path, &status));
- for (;;) {
- const UChar* id = uenum_unext(uenum.getAlias(), NULL, &status);
- if (id == NULL) {
- break;
- }
- htp->put(UnicodeString(id), (void*)htp, status);
- }
- if (U_FAILURE(status)) {
- delete htp;
- return NULL;
- }
- umtx_lock(NULL);
- Hashtable *t = static_cast<Hashtable *>(cache->get(bundleID));
- if (t != NULL) {
- // Another thread raced through this code, creating the cache entry first.
- // Discard ours and return theirs.
- umtx_unlock(NULL);
- delete htp;
- htp = t;
- } else {
- cache->put(bundleID, (void*)htp, status);
- umtx_unlock(NULL);
- }
- }
- }
- return htp;
-}
-
-UBool
-LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
-{
- return child.indexOf(root) == 0 &&
- (child.length() == root.length() ||
- child.charAt(root.length()) == UNDERSCORE_CHAR);
-}
-
-U_NAMESPACE_END
-
-/* !UCONFIG_NO_SERVICE */
-#endif
-
-
diff --git a/contrib/libs/icu/common/locutil.h b/contrib/libs/icu/common/locutil.h
deleted file mode 100644
index 31bfffd7a59..00000000000
--- a/contrib/libs/icu/common/locutil.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- *******************************************************************************
- * Copyright (C) 2002-2005, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- *
- *******************************************************************************
- */
-#ifndef LOCUTIL_H
-#define LOCUTIL_H
-
-#include "unicode/utypes.h"
-#include "hash.h"
-
-#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
-
-
-U_NAMESPACE_BEGIN
-
-// temporary utility functions, till I know where to find them
-// in header so tests can also access them
-
-class U_COMMON_API LocaleUtility {
-public:
- static UnicodeString& canonicalLocaleString(const UnicodeString* id, UnicodeString& result);
- static Locale& initLocaleFromName(const UnicodeString& id, Locale& result);
- static UnicodeString& initNameFromLocale(const Locale& locale, UnicodeString& result);
- static const Hashtable* getAvailableLocaleNames(const UnicodeString& bundleID);
- static UBool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
-};
-
-U_NAMESPACE_END
-
-
-#endif
-
-#endif
diff --git a/contrib/libs/icu/common/lsr.cpp b/contrib/libs/icu/common/lsr.cpp
deleted file mode 100644
index d4308ad0275..00000000000
--- a/contrib/libs/icu/common/lsr.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-
-// lsr.cpp
-// created: 2019may08 Markus W. Scherer
-
-#include "unicode/utypes.h"
-#include "charstr.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "lsr.h"
-#include "uinvchar.h"
-#include "ustr_imp.h"
-
-U_NAMESPACE_BEGIN
-
-LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
- UErrorCode &errorCode) :
- language(nullptr), script(nullptr), region(r),
- regionIndex(indexForRegion(region)), flags(f) {
- if (U_SUCCESS(errorCode)) {
- CharString langScript;
- langScript.append(prefix, errorCode).append(lang, errorCode).append('\0', errorCode);
- int32_t scriptOffset = langScript.length();
- langScript.append(prefix, errorCode).append(scr, errorCode);
- owned = langScript.cloneData(errorCode);
- if (U_SUCCESS(errorCode)) {
- language = owned;
- script = owned + scriptOffset;
- }
- }
-}
-
-LSR::LSR(LSR &&other) U_NOEXCEPT :
- language(other.language), script(other.script), region(other.region), owned(other.owned),
- regionIndex(other.regionIndex), flags(other.flags),
- hashCode(other.hashCode) {
- if (owned != nullptr) {
- other.language = other.script = "";
- other.owned = nullptr;
- other.hashCode = 0;
- }
-}
-
-void LSR::deleteOwned() {
- uprv_free(owned);
-}
-
-LSR &LSR::operator=(LSR &&other) U_NOEXCEPT {
- this->~LSR();
- language = other.language;
- script = other.script;
- region = other.region;
- regionIndex = other.regionIndex;
- flags = other.flags;
- owned = other.owned;
- hashCode = other.hashCode;
- if (owned != nullptr) {
- other.language = other.script = "";
- other.owned = nullptr;
- other.hashCode = 0;
- }
- return *this;
-}
-
-UBool LSR::isEquivalentTo(const LSR &other) const {
- return
- uprv_strcmp(language, other.language) == 0 &&
- uprv_strcmp(script, other.script) == 0 &&
- regionIndex == other.regionIndex &&
- // Compare regions if both are ill-formed (and their indexes are 0).
- (regionIndex > 0 || uprv_strcmp(region, other.region) == 0);
-}
-
-UBool LSR::operator==(const LSR &other) const {
- return
- uprv_strcmp(language, other.language) == 0 &&
- uprv_strcmp(script, other.script) == 0 &&
- regionIndex == other.regionIndex &&
- // Compare regions if both are ill-formed (and their indexes are 0).
- (regionIndex > 0 || uprv_strcmp(region, other.region) == 0) &&
- flags == other.flags;
-}
-
-int32_t LSR::indexForRegion(const char *region) {
- int32_t c = region[0];
- int32_t a = c - '0';
- if (0 <= a && a <= 9) { // digits: "419"
- int32_t b = region[1] - '0';
- if (b < 0 || 9 < b) { return 0; }
- c = region[2] - '0';
- if (c < 0 || 9 < c || region[3] != 0) { return 0; }
- return (10 * a + b) * 10 + c + 1;
- } else { // letters: "DE"
- a = uprv_upperOrdinal(c);
- if (a < 0 || 25 < a) { return 0; }
- int32_t b = uprv_upperOrdinal(region[1]);
- if (b < 0 || 25 < b || region[2] != 0) { return 0; }
- return 26 * a + b + 1001;
- }
- return 0;
-}
-
-LSR &LSR::setHashCode() {
- if (hashCode == 0) {
- uint32_t h = ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language)));
- h = h * 37 + ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script)));
- h = h * 37 + regionIndex;
- hashCode = h * 37 + flags;
- }
- return *this;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/lsr.h b/contrib/libs/icu/common/lsr.h
deleted file mode 100644
index d535e5b0376..00000000000
--- a/contrib/libs/icu/common/lsr.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-
-// lsr.h
-// created: 2019may08 Markus W. Scherer
-
-#ifndef __LSR_H__
-#define __LSR_H__
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "cstring.h"
-
-U_NAMESPACE_BEGIN
-
-struct LSR final : public UMemory {
- static constexpr int32_t REGION_INDEX_LIMIT = 1001 + 26 * 26;
-
- static constexpr int32_t EXPLICIT_LSR = 7;
- static constexpr int32_t EXPLICIT_LANGUAGE = 4;
- static constexpr int32_t EXPLICIT_SCRIPT = 2;
- static constexpr int32_t EXPLICIT_REGION = 1;
- static constexpr int32_t IMPLICIT_LSR = 0;
- static constexpr int32_t DONT_CARE_FLAGS = 0;
-
- const char *language;
- const char *script;
- const char *region;
- char *owned = nullptr;
- /** Index for region, 0 if ill-formed. @see indexForRegion */
- int32_t regionIndex = 0;
- int32_t flags = 0;
- /** Only set for LSRs that will be used in a hash table. */
- int32_t hashCode = 0;
-
- LSR() : language("und"), script(""), region("") {}
-
- /** Constructor which aliases all subtag pointers. */
- LSR(const char *lang, const char *scr, const char *r, int32_t f) :
- language(lang), script(scr), region(r),
- regionIndex(indexForRegion(region)), flags(f) {}
- /**
- * Constructor which prepends the prefix to the language and script,
- * copies those into owned memory, and aliases the region.
- */
- LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
- UErrorCode &errorCode);
- LSR(LSR &&other) U_NOEXCEPT;
- LSR(const LSR &other) = delete;
- inline ~LSR() {
- // Pure inline code for almost all instances.
- if (owned != nullptr) {
- deleteOwned();
- }
- }
-
- LSR &operator=(LSR &&other) U_NOEXCEPT;
- LSR &operator=(const LSR &other) = delete;
-
- /**
- * Returns a positive index (>0) for a well-formed region code.
- * Do not rely on a particular region->index mapping; it may change.
- * Returns 0 for ill-formed strings.
- */
- static int32_t indexForRegion(const char *region);
-
- UBool isEquivalentTo(const LSR &other) const;
- UBool operator==(const LSR &other) const;
-
- inline UBool operator!=(const LSR &other) const {
- return !operator==(other);
- }
-
- LSR &setHashCode();
-
-private:
- void deleteOwned();
-};
-
-U_NAMESPACE_END
-
-#endif // __LSR_H__
diff --git a/contrib/libs/icu/common/messageimpl.h b/contrib/libs/icu/common/messageimpl.h
deleted file mode 100644
index dc7a6edd6c0..00000000000
--- a/contrib/libs/icu/common/messageimpl.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: messageimpl.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011apr04
-* created by: Markus W. Scherer
-*/
-
-#ifndef __MESSAGEIMPL_H__
-#define __MESSAGEIMPL_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/messagepattern.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * Helper functions for use of MessagePattern.
- * In Java, these are package-private methods in MessagePattern itself.
- * In C++, they are declared here and implemented in messagepattern.cpp.
- */
-class U_COMMON_API MessageImpl {
-public:
- /**
- * @return TRUE if getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED
- */
- static UBool jdkAposMode(const MessagePattern &msgPattern) {
- return msgPattern.getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED;
- }
-
- /**
- * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
- * according to JDK pattern behavior.
- */
- static void appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
- UnicodeString &sb);
-
- /**
- * Appends the sub-message to the result string.
- * Omits SKIP_SYNTAX and appends whole arguments using appendReducedApostrophes().
- */
- static UnicodeString &appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
- int32_t msgStart,
- UnicodeString &result);
-
-private:
- MessageImpl(); // no constructor: all static methods
-};
-
-U_NAMESPACE_END
-
-#endif // !UCONFIG_NO_FORMATTING
-
-#endif // __MESSAGEIMPL_H__
diff --git a/contrib/libs/icu/common/messagepattern.cpp b/contrib/libs/icu/common/messagepattern.cpp
deleted file mode 100644
index f223d067116..00000000000
--- a/contrib/libs/icu/common/messagepattern.cpp
+++ /dev/null
@@ -1,1233 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2011-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: messagepattern.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011mar14
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/messagepattern.h"
-#include "unicode/unistr.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "messageimpl.h"
-#include "patternprops.h"
-#include "putilimp.h"
-#include "uassert.h"
-
-U_NAMESPACE_BEGIN
-
-// Unicode character/code point constants ---------------------------------- ***
-
-static const UChar u_pound=0x23;
-static const UChar u_apos=0x27;
-static const UChar u_plus=0x2B;
-static const UChar u_comma=0x2C;
-static const UChar u_minus=0x2D;
-static const UChar u_dot=0x2E;
-static const UChar u_colon=0x3A;
-static const UChar u_lessThan=0x3C;
-static const UChar u_equal=0x3D;
-static const UChar u_A=0x41;
-static const UChar u_C=0x43;
-static const UChar u_D=0x44;
-static const UChar u_E=0x45;
-static const UChar u_H=0x48;
-static const UChar u_I=0x49;
-static const UChar u_L=0x4C;
-static const UChar u_N=0x4E;
-static const UChar u_O=0x4F;
-static const UChar u_P=0x50;
-static const UChar u_R=0x52;
-static const UChar u_S=0x53;
-static const UChar u_T=0x54;
-static const UChar u_U=0x55;
-static const UChar u_Z=0x5A;
-static const UChar u_a=0x61;
-static const UChar u_c=0x63;
-static const UChar u_d=0x64;
-static const UChar u_e=0x65;
-static const UChar u_f=0x66;
-static const UChar u_h=0x68;
-static const UChar u_i=0x69;
-static const UChar u_l=0x6C;
-static const UChar u_n=0x6E;
-static const UChar u_o=0x6F;
-static const UChar u_p=0x70;
-static const UChar u_r=0x72;
-static const UChar u_s=0x73;
-static const UChar u_t=0x74;
-static const UChar u_u=0x75;
-static const UChar u_z=0x7A;
-static const UChar u_leftCurlyBrace=0x7B;
-static const UChar u_pipe=0x7C;
-static const UChar u_rightCurlyBrace=0x7D;
-static const UChar u_lessOrEqual=0x2264; // U+2264 is <=
-
-static const UChar kOffsetColon[]={ // "offset:"
- u_o, u_f, u_f, u_s, u_e, u_t, u_colon
-};
-
-static const UChar kOther[]={ // "other"
- u_o, u_t, u_h, u_e, u_r
-};
-
-// MessagePatternList ------------------------------------------------------ ***
-
-template<typename T, int32_t stackCapacity>
-class MessagePatternList : public UMemory {
-public:
- MessagePatternList() {}
- void copyFrom(const MessagePatternList<T, stackCapacity> &other,
- int32_t length,
- UErrorCode &errorCode);
- UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode);
- UBool equals(const MessagePatternList<T, stackCapacity> &other, int32_t length) const {
- for(int32_t i=0; i<length; ++i) {
- if(a[i]!=other.a[i]) { return FALSE; }
- }
- return TRUE;
- }
-
- MaybeStackArray<T, stackCapacity> a;
-};
-
-template<typename T, int32_t stackCapacity>
-void
-MessagePatternList<T, stackCapacity>::copyFrom(
- const MessagePatternList<T, stackCapacity> &other,
- int32_t length,
- UErrorCode &errorCode) {
- if(U_SUCCESS(errorCode) && length>0) {
- if(length>a.getCapacity() && NULL==a.resize(length)) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- uprv_memcpy(a.getAlias(), other.a.getAlias(), (size_t)length*sizeof(T));
- }
-}
-
-template<typename T, int32_t stackCapacity>
-UBool
-MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=NULL) {
- return TRUE;
- }
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
-}
-
-// MessagePatternList specializations -------------------------------------- ***
-
-class MessagePatternDoubleList : public MessagePatternList<double, 8> {
-};
-
-class MessagePatternPartsList : public MessagePatternList<MessagePattern::Part, 32> {
-};
-
-// MessagePattern constructors etc. ---------------------------------------- ***
-
-MessagePattern::MessagePattern(UErrorCode &errorCode)
- : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
- partsList(NULL), parts(NULL), partsLength(0),
- numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
- hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) {
- init(errorCode);
-}
-
-MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode)
- : aposMode(mode),
- partsList(NULL), parts(NULL), partsLength(0),
- numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
- hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) {
- init(errorCode);
-}
-
-MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
- : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
- partsList(NULL), parts(NULL), partsLength(0),
- numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
- hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) {
- if(init(errorCode)) {
- parse(pattern, parseError, errorCode);
- }
-}
-
-UBool
-MessagePattern::init(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- partsList=new MessagePatternPartsList();
- if(partsList==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
- parts=partsList->a.getAlias();
- return TRUE;
-}
-
-MessagePattern::MessagePattern(const MessagePattern &other)
- : UObject(other), aposMode(other.aposMode), msg(other.msg),
- partsList(NULL), parts(NULL), partsLength(0),
- numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
- hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers),
- needsAutoQuoting(other.needsAutoQuoting) {
- UErrorCode errorCode=U_ZERO_ERROR;
- if(!copyStorage(other, errorCode)) {
- clear();
- }
-}
-
-MessagePattern &
-MessagePattern::operator=(const MessagePattern &other) {
- if(this==&other) {
- return *this;
- }
- aposMode=other.aposMode;
- msg=other.msg;
- hasArgNames=other.hasArgNames;
- hasArgNumbers=other.hasArgNumbers;
- needsAutoQuoting=other.needsAutoQuoting;
- UErrorCode errorCode=U_ZERO_ERROR;
- if(!copyStorage(other, errorCode)) {
- clear();
- }
- return *this;
-}
-
-UBool
-MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- parts=NULL;
- partsLength=0;
- numericValues=NULL;
- numericValuesLength=0;
- if(partsList==NULL) {
- partsList=new MessagePatternPartsList();
- if(partsList==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
- parts=partsList->a.getAlias();
- }
- if(other.partsLength>0) {
- partsList->copyFrom(*other.partsList, other.partsLength, errorCode);
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- parts=partsList->a.getAlias();
- partsLength=other.partsLength;
- }
- if(other.numericValuesLength>0) {
- if(numericValuesList==NULL) {
- numericValuesList=new MessagePatternDoubleList();
- if(numericValuesList==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
- numericValues=numericValuesList->a.getAlias();
- }
- numericValuesList->copyFrom(
- *other.numericValuesList, other.numericValuesLength, errorCode);
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- numericValues=numericValuesList->a.getAlias();
- numericValuesLength=other.numericValuesLength;
- }
- return TRUE;
-}
-
-MessagePattern::~MessagePattern() {
- delete partsList;
- delete numericValuesList;
-}
-
-// MessagePattern API ------------------------------------------------------ ***
-
-MessagePattern &
-MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
- preParse(pattern, parseError, errorCode);
- parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode);
- postParse();
- return *this;
-}
-
-MessagePattern &
-MessagePattern::parseChoiceStyle(const UnicodeString &pattern,
- UParseError *parseError, UErrorCode &errorCode) {
- preParse(pattern, parseError, errorCode);
- parseChoiceStyle(0, 0, parseError, errorCode);
- postParse();
- return *this;
-}
-
-MessagePattern &
-MessagePattern::parsePluralStyle(const UnicodeString &pattern,
- UParseError *parseError, UErrorCode &errorCode) {
- preParse(pattern, parseError, errorCode);
- parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode);
- postParse();
- return *this;
-}
-
-MessagePattern &
-MessagePattern::parseSelectStyle(const UnicodeString &pattern,
- UParseError *parseError, UErrorCode &errorCode) {
- preParse(pattern, parseError, errorCode);
- parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode);
- postParse();
- return *this;
-}
-
-void
-MessagePattern::clear() {
- // Mostly the same as preParse().
- msg.remove();
- hasArgNames=hasArgNumbers=FALSE;
- needsAutoQuoting=FALSE;
- partsLength=0;
- numericValuesLength=0;
-}
-
-UBool
-MessagePattern::operator==(const MessagePattern &other) const {
- if(this==&other) {
- return TRUE;
- }
- return
- aposMode==other.aposMode &&
- msg==other.msg &&
- // parts.equals(o.parts)
- partsLength==other.partsLength &&
- (partsLength==0 || partsList->equals(*other.partsList, partsLength));
- // No need to compare numericValues if msg and parts are the same.
-}
-
-int32_t
-MessagePattern::hashCode() const {
- int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength;
- for(int32_t i=0; i<partsLength; ++i) {
- hash=hash*37+parts[i].hashCode();
- }
- return hash;
-}
-
-int32_t
-MessagePattern::validateArgumentName(const UnicodeString &name) {
- if(!PatternProps::isIdentifier(name.getBuffer(), name.length())) {
- return UMSGPAT_ARG_NAME_NOT_VALID;
- }
- return parseArgNumber(name, 0, name.length());
-}
-
-UnicodeString
-MessagePattern::autoQuoteApostropheDeep() const {
- if(!needsAutoQuoting) {
- return msg;
- }
- UnicodeString modified(msg);
- // Iterate backward so that the insertion indexes do not change.
- int32_t count=countParts();
- for(int32_t i=count; i>0;) {
- const Part &part=getPart(--i);
- if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) {
- modified.insert(part.index, (UChar)part.value);
- }
- }
- return modified;
-}
-
-double
-MessagePattern::getNumericValue(const Part &part) const {
- UMessagePatternPartType type=part.type;
- if(type==UMSGPAT_PART_TYPE_ARG_INT) {
- return part.value;
- } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) {
- return numericValues[part.value];
- } else {
- return UMSGPAT_NO_NUMERIC_VALUE;
- }
-}
-
-/**
- * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
- * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
- * @return the "offset:" value.
- * @draft ICU 4.8
- */
-double
-MessagePattern::getPluralOffset(int32_t pluralStart) const {
- const Part &part=getPart(pluralStart);
- if(Part::hasNumericValue(part.type)) {
- return getNumericValue(part);
- } else {
- return 0;
- }
-}
-
-// MessagePattern::Part ---------------------------------------------------- ***
-
-UBool
-MessagePattern::Part::operator==(const Part &other) const {
- if(this==&other) {
- return TRUE;
- }
- return
- type==other.type &&
- index==other.index &&
- length==other.length &&
- value==other.value &&
- limitPartIndex==other.limitPartIndex;
-}
-
-// MessagePattern parser --------------------------------------------------- ***
-
-void
-MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- if(parseError!=NULL) {
- parseError->line=0;
- parseError->offset=0;
- parseError->preContext[0]=0;
- parseError->postContext[0]=0;
- }
- msg=pattern;
- hasArgNames=hasArgNumbers=FALSE;
- needsAutoQuoting=FALSE;
- partsLength=0;
- numericValuesLength=0;
-}
-
-void
-MessagePattern::postParse() {
- if(partsList!=NULL) {
- parts=partsList->a.getAlias();
- }
- if(numericValuesList!=NULL) {
- numericValues=numericValuesList->a.getAlias();
- }
-}
-
-int32_t
-MessagePattern::parseMessage(int32_t index, int32_t msgStartLength,
- int32_t nestingLevel, UMessagePatternArgType parentType,
- UParseError *parseError, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- if(nestingLevel>Part::MAX_VALUE) {
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- int32_t msgStart=partsLength;
- addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode);
- index+=msgStartLength;
- for(;;) { // while(index<msg.length()) with U_FAILURE(errorCode) check
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- if(index>=msg.length()) {
- break;
- }
- UChar c=msg.charAt(index++);
- if(c==u_apos) {
- if(index==msg.length()) {
- // The apostrophe is the last character in the pattern.
- // Add a Part for auto-quoting.
- addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
- u_apos, errorCode); // value=char to be inserted
- needsAutoQuoting=TRUE;
- } else {
- c=msg.charAt(index);
- if(c==u_apos) {
- // double apostrophe, skip the second one
- addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
- } else if(
- aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED ||
- c==u_leftCurlyBrace || c==u_rightCurlyBrace ||
- (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) ||
- (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound)
- ) {
- // skip the quote-starting apostrophe
- addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode);
- // find the end of the quoted literal text
- for(;;) {
- index=msg.indexOf(u_apos, index+1);
- if(index>=0) {
- if(/*(index+1)<msg.length() &&*/ msg.charAt(index+1)==u_apos) {
- // double apostrophe inside quoted literal text
- // still encodes a single apostrophe, skip the second one
- addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, ++index, 1, 0, errorCode);
- } else {
- // skip the quote-ending apostrophe
- addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
- break;
- }
- } else {
- // The quoted text reaches to the end of the of the message.
- index=msg.length();
- // Add a Part for auto-quoting.
- addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
- u_apos, errorCode); // value=char to be inserted
- needsAutoQuoting=TRUE;
- break;
- }
- }
- } else {
- // Interpret the apostrophe as literal text.
- // Add a Part for auto-quoting.
- addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
- u_apos, errorCode); // value=char to be inserted
- needsAutoQuoting=TRUE;
- }
- }
- } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) {
- // The unquoted # in a plural message fragment will be replaced
- // with the (number-offset).
- addPart(UMSGPAT_PART_TYPE_REPLACE_NUMBER, index-1, 1, 0, errorCode);
- } else if(c==u_leftCurlyBrace) {
- index=parseArg(index-1, 1, nestingLevel, parseError, errorCode);
- } else if((nestingLevel>0 && c==u_rightCurlyBrace) ||
- (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) {
- // Finish the message before the terminator.
- // In a choice style, report the "}" substring only for the following ARG_LIMIT,
- // not for this MSG_LIMIT.
- int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1;
- addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength,
- nestingLevel, errorCode);
- if(parentType==UMSGPAT_ARG_TYPE_CHOICE) {
- // Let the choice style parser see the '}' or '|'.
- return index-1;
- } else {
- // continue parsing after the '}'
- return index;
- }
- } // else: c is part of literal text
- }
- if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
- setParseError(parseError, 0); // Unmatched '{' braces in message.
- errorCode=U_UNMATCHED_BRACES;
- return 0;
- }
- addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode);
- return index;
-}
-
-int32_t
-MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
- UParseError *parseError, UErrorCode &errorCode) {
- int32_t argStart=partsLength;
- UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE;
- addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode);
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- int32_t nameIndex=index=skipWhiteSpace(index+argStartLength);
- if(index==msg.length()) {
- setParseError(parseError, 0); // Unmatched '{' braces in message.
- errorCode=U_UNMATCHED_BRACES;
- return 0;
- }
- // parse argument name or number
- index=skipIdentifier(index);
- int32_t number=parseArgNumber(nameIndex, index);
- if(number>=0) {
- int32_t length=index-nameIndex;
- if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) {
- setParseError(parseError, nameIndex); // Argument number too large.
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- hasArgNumbers=TRUE;
- addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode);
- } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) {
- int32_t length=index-nameIndex;
- if(length>Part::MAX_LENGTH) {
- setParseError(parseError, nameIndex); // Argument name too long.
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- hasArgNames=TRUE;
- addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode);
- } else { // number<-1 (ARG_NAME_NOT_VALID)
- setParseError(parseError, nameIndex); // Bad argument syntax.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- index=skipWhiteSpace(index);
- if(index==msg.length()) {
- setParseError(parseError, 0); // Unmatched '{' braces in message.
- errorCode=U_UNMATCHED_BRACES;
- return 0;
- }
- UChar c=msg.charAt(index);
- if(c==u_rightCurlyBrace) {
- // all done
- } else if(c!=u_comma) {
- setParseError(parseError, nameIndex); // Bad argument syntax.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- } else /* ',' */ {
- // parse argument type: case-sensitive a-zA-Z
- int32_t typeIndex=index=skipWhiteSpace(index+1);
- while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
- ++index;
- }
- int32_t length=index-typeIndex;
- index=skipWhiteSpace(index);
- if(index==msg.length()) {
- setParseError(parseError, 0); // Unmatched '{' braces in message.
- errorCode=U_UNMATCHED_BRACES;
- return 0;
- }
- if(length==0 || ((c=msg.charAt(index))!=u_comma && c!=u_rightCurlyBrace)) {
- setParseError(parseError, nameIndex); // Bad argument syntax.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- if(length>Part::MAX_LENGTH) {
- setParseError(parseError, nameIndex); // Argument type name too long.
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- argType=UMSGPAT_ARG_TYPE_SIMPLE;
- if(length==6) {
- // case-insensitive comparisons for complex-type names
- if(isChoice(typeIndex)) {
- argType=UMSGPAT_ARG_TYPE_CHOICE;
- } else if(isPlural(typeIndex)) {
- argType=UMSGPAT_ARG_TYPE_PLURAL;
- } else if(isSelect(typeIndex)) {
- argType=UMSGPAT_ARG_TYPE_SELECT;
- }
- } else if(length==13) {
- if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
- argType=UMSGPAT_ARG_TYPE_SELECTORDINAL;
- }
- }
- // change the ARG_START type from NONE to argType
- partsList->a[argStart].value=(int16_t)argType;
- if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
- addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode);
- }
- // look for an argument style (pattern)
- if(c==u_rightCurlyBrace) {
- if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) {
- setParseError(parseError, nameIndex); // No style field for complex argument.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- } else /* ',' */ {
- ++index;
- if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
- index=parseSimpleStyle(index, parseError, errorCode);
- } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) {
- index=parseChoiceStyle(index, nestingLevel, parseError, errorCode);
- } else {
- index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode);
- }
- }
- }
- // Argument parsing stopped on the '}'.
- addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode);
- return index+1;
-}
-
-int32_t
-MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- int32_t start=index;
- int32_t nestedBraces=0;
- while(index<msg.length()) {
- UChar c=msg.charAt(index++);
- if(c==u_apos) {
- // Treat apostrophe as quoting but include it in the style part.
- // Find the end of the quoted literal text.
- index=msg.indexOf(u_apos, index);
- if(index<0) {
- // Quoted literal argument style text reaches to the end of the message.
- setParseError(parseError, start);
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- // skip the quote-ending apostrophe
- ++index;
- } else if(c==u_leftCurlyBrace) {
- ++nestedBraces;
- } else if(c==u_rightCurlyBrace) {
- if(nestedBraces>0) {
- --nestedBraces;
- } else {
- int32_t length=--index-start;
- if(length>Part::MAX_LENGTH) {
- setParseError(parseError, start); // Argument style text too long.
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode);
- return index;
- }
- } // c is part of literal text
- }
- setParseError(parseError, 0); // Unmatched '{' braces in message.
- errorCode=U_UNMATCHED_BRACES;
- return 0;
-}
-
-int32_t
-MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel,
- UParseError *parseError, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- int32_t start=index;
- index=skipWhiteSpace(index);
- if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) {
- setParseError(parseError, 0); // Missing choice argument pattern.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- for(;;) {
- // The choice argument style contains |-separated (number, separator, message) triples.
- // Parse the number.
- int32_t numberIndex=index;
- index=skipDouble(index);
- int32_t length=index-numberIndex;
- if(length==0) {
- setParseError(parseError, start); // Bad choice pattern syntax.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- if(length>Part::MAX_LENGTH) {
- setParseError(parseError, numberIndex); // Choice number too long.
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- parseDouble(numberIndex, index, TRUE, parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- // Parse the separator.
- index=skipWhiteSpace(index);
- if(index==msg.length()) {
- setParseError(parseError, start); // Bad choice pattern syntax.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- UChar c=msg.charAt(index);
- if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) { // U+2264 is <=
- setParseError(parseError, start); // Expected choice separator (#<\u2264) instead of c.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode);
- // Parse the message fragment.
- index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode);
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
- if(index==msg.length()) {
- return index;
- }
- if(msg.charAt(index)==u_rightCurlyBrace) {
- if(!inMessageFormatPattern(nestingLevel)) {
- setParseError(parseError, start); // Bad choice pattern syntax.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- return index;
- } // else the terminator is '|'
- index=skipWhiteSpace(index+1);
- }
-}
-
-int32_t
-MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType,
- int32_t index, int32_t nestingLevel,
- UParseError *parseError, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- int32_t start=index;
- UBool isEmpty=TRUE;
- UBool hasOther=FALSE;
- for(;;) {
- // First, collect the selector looking for a small set of terminators.
- // It would be a little faster to consider the syntax of each possible
- // token right here, but that makes the code too complicated.
- index=skipWhiteSpace(index);
- UBool eos=index==msg.length();
- if(eos || msg.charAt(index)==u_rightCurlyBrace) {
- if(eos==inMessageFormatPattern(nestingLevel)) {
- setParseError(parseError, start); // Bad plural/select pattern syntax.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- if(!hasOther) {
- setParseError(parseError, 0); // Missing 'other' keyword in plural/select pattern.
- errorCode=U_DEFAULT_KEYWORD_MISSING;
- return 0;
- }
- return index;
- }
- int32_t selectorIndex=index;
- if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) {
- // explicit-value plural selector: =double
- index=skipDouble(index+1);
- int32_t length=index-selectorIndex;
- if(length==1) {
- setParseError(parseError, start); // Bad plural/select pattern syntax.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- if(length>Part::MAX_LENGTH) {
- setParseError(parseError, selectorIndex); // Argument selector too long.
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
- parseDouble(selectorIndex+1, index, FALSE,
- parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
- } else {
- index=skipIdentifier(index);
- int32_t length=index-selectorIndex;
- if(length==0) {
- setParseError(parseError, start); // Bad plural/select pattern syntax.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
- if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && index<msg.length() &&
- 0==msg.compare(selectorIndex, 7, kOffsetColon, 0, 7)
- ) {
- // plural offset, not a selector
- if(!isEmpty) {
- // Plural argument 'offset:' (if present) must precede key-message pairs.
- setParseError(parseError, start);
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- // allow whitespace between offset: and its value
- int32_t valueIndex=skipWhiteSpace(index+1); // The ':' is at index.
- index=skipDouble(valueIndex);
- if(index==valueIndex) {
- setParseError(parseError, start); // Missing value for plural 'offset:'.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- if((index-valueIndex)>Part::MAX_LENGTH) {
- setParseError(parseError, valueIndex); // Plural offset value too long.
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- parseDouble(valueIndex, index, FALSE,
- parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- isEmpty=FALSE;
- continue; // no message fragment after the offset
- } else {
- // normal selector word
- if(length>Part::MAX_LENGTH) {
- setParseError(parseError, selectorIndex); // Argument selector too long.
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
- if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) {
- hasOther=TRUE;
- }
- }
- }
- if(U_FAILURE(errorCode)) {
- return 0;
- }
-
- // parse the message fragment following the selector
- index=skipWhiteSpace(index);
- if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) {
- setParseError(parseError, selectorIndex); // No message fragment after plural/select selector.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return 0;
- }
- index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode);
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- isEmpty=FALSE;
- }
-}
-
-int32_t
-MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) {
- // If the identifier contains only ASCII digits, then it is an argument _number_
- // and must not have leading zeros (except "0" itself).
- // Otherwise it is an argument _name_.
- if(start>=limit) {
- return UMSGPAT_ARG_NAME_NOT_VALID;
- }
- int32_t number;
- // Defer numeric errors until we know there are only digits.
- UBool badNumber;
- UChar c=s.charAt(start++);
- if(c==0x30) {
- if(start==limit) {
- return 0;
- } else {
- number=0;
- badNumber=TRUE; // leading zero
- }
- } else if(0x31<=c && c<=0x39) {
- number=c-0x30;
- badNumber=FALSE;
- } else {
- return UMSGPAT_ARG_NAME_NOT_NUMBER;
- }
- while(start<limit) {
- c=s.charAt(start++);
- if(0x30<=c && c<=0x39) {
- if(number>=INT32_MAX/10) {
- badNumber=TRUE; // overflow
- }
- number=number*10+(c-0x30);
- } else {
- return UMSGPAT_ARG_NAME_NOT_NUMBER;
- }
- }
- // There are only ASCII digits.
- if(badNumber) {
- return UMSGPAT_ARG_NAME_NOT_VALID;
- } else {
- return number;
- }
-}
-
-void
-MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
- UParseError *parseError, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- U_ASSERT(start<limit);
- // fake loop for easy exit and single throw statement
- for(;;) { /*loop doesn't iterate*/
- // fast path for small integers and infinity
- int32_t value=0;
- int32_t isNegative=0; // not boolean so that we can easily add it to value
- int32_t index=start;
- UChar c=msg.charAt(index++);
- if(c==u_minus) {
- isNegative=1;
- if(index==limit) {
- break; // no number
- }
- c=msg.charAt(index++);
- } else if(c==u_plus) {
- if(index==limit) {
- break; // no number
- }
- c=msg.charAt(index++);
- }
- if(c==0x221e) { // infinity
- if(allowInfinity && index==limit) {
- double infinity=uprv_getInfinity();
- addArgDoublePart(
- isNegative!=0 ? -infinity : infinity,
- start, limit-start, errorCode);
- return;
- } else {
- break;
- }
- }
- // try to parse the number as a small integer but fall back to a double
- while('0'<=c && c<='9') {
- value=value*10+(c-'0');
- if(value>(Part::MAX_VALUE+isNegative)) {
- break; // not a small-enough integer
- }
- if(index==limit) {
- addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start,
- isNegative!=0 ? -value : value, errorCode);
- return;
- }
- c=msg.charAt(index++);
- }
- // Let Double.parseDouble() throw a NumberFormatException.
- char numberChars[128];
- int32_t capacity=(int32_t)sizeof(numberChars);
- int32_t length=limit-start;
- if(length>=capacity) {
- break; // number too long
- }
- msg.extract(start, length, numberChars, capacity, US_INV);
- if((int32_t)uprv_strlen(numberChars)<length) {
- break; // contains non-invariant character that was turned into NUL
- }
- char *end;
- double numericValue=uprv_strtod(numberChars, &end);
- if(end!=(numberChars+length)) {
- break; // parsing error
- }
- addArgDoublePart(numericValue, start, length, errorCode);
- return;
- }
- setParseError(parseError, start /*, limit*/); // Bad syntax for numeric value.
- errorCode=U_PATTERN_SYNTAX_ERROR;
- return;
-}
-
-int32_t
-MessagePattern::skipWhiteSpace(int32_t index) {
- const UChar *s=msg.getBuffer();
- int32_t msgLength=msg.length();
- const UChar *t=PatternProps::skipWhiteSpace(s+index, msgLength-index);
- return (int32_t)(t-s);
-}
-
-int32_t
-MessagePattern::skipIdentifier(int32_t index) {
- const UChar *s=msg.getBuffer();
- int32_t msgLength=msg.length();
- const UChar *t=PatternProps::skipIdentifier(s+index, msgLength-index);
- return (int32_t)(t-s);
-}
-
-int32_t
-MessagePattern::skipDouble(int32_t index) {
- int32_t msgLength=msg.length();
- while(index<msgLength) {
- UChar c=msg.charAt(index);
- // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
- if((c<0x30 && c!=u_plus && c!=u_minus && c!=u_dot) || (c>0x39 && c!=u_e && c!=u_E && c!=0x221e)) {
- break;
- }
- ++index;
- }
- return index;
-}
-
-UBool
-MessagePattern::isArgTypeChar(UChar32 c) {
- return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z);
-}
-
-UBool
-MessagePattern::isChoice(int32_t index) {
- UChar c;
- return
- ((c=msg.charAt(index++))==u_c || c==u_C) &&
- ((c=msg.charAt(index++))==u_h || c==u_H) &&
- ((c=msg.charAt(index++))==u_o || c==u_O) &&
- ((c=msg.charAt(index++))==u_i || c==u_I) &&
- ((c=msg.charAt(index++))==u_c || c==u_C) &&
- ((c=msg.charAt(index))==u_e || c==u_E);
-}
-
-UBool
-MessagePattern::isPlural(int32_t index) {
- UChar c;
- return
- ((c=msg.charAt(index++))==u_p || c==u_P) &&
- ((c=msg.charAt(index++))==u_l || c==u_L) &&
- ((c=msg.charAt(index++))==u_u || c==u_U) &&
- ((c=msg.charAt(index++))==u_r || c==u_R) &&
- ((c=msg.charAt(index++))==u_a || c==u_A) &&
- ((c=msg.charAt(index))==u_l || c==u_L);
-}
-
-UBool
-MessagePattern::isSelect(int32_t index) {
- UChar c;
- return
- ((c=msg.charAt(index++))==u_s || c==u_S) &&
- ((c=msg.charAt(index++))==u_e || c==u_E) &&
- ((c=msg.charAt(index++))==u_l || c==u_L) &&
- ((c=msg.charAt(index++))==u_e || c==u_E) &&
- ((c=msg.charAt(index++))==u_c || c==u_C) &&
- ((c=msg.charAt(index))==u_t || c==u_T);
-}
-
-UBool
-MessagePattern::isOrdinal(int32_t index) {
- UChar c;
- return
- ((c=msg.charAt(index++))==u_o || c==u_O) &&
- ((c=msg.charAt(index++))==u_r || c==u_R) &&
- ((c=msg.charAt(index++))==u_d || c==u_D) &&
- ((c=msg.charAt(index++))==u_i || c==u_I) &&
- ((c=msg.charAt(index++))==u_n || c==u_N) &&
- ((c=msg.charAt(index++))==u_a || c==u_A) &&
- ((c=msg.charAt(index))==u_l || c==u_L);
-}
-
-UBool
-MessagePattern::inMessageFormatPattern(int32_t nestingLevel) {
- return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START;
-}
-
-UBool
-MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) {
- return
- nestingLevel==1 &&
- parentType==UMSGPAT_ARG_TYPE_CHOICE &&
- partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START;
-}
-
-void
-MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length,
- int32_t value, UErrorCode &errorCode) {
- if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) {
- Part &part=partsList->a[partsLength++];
- part.type=type;
- part.index=index;
- part.length=(uint16_t)length;
- part.value=(int16_t)value;
- part.limitPartIndex=0;
- }
-}
-
-void
-MessagePattern::addLimitPart(int32_t start,
- UMessagePatternPartType type, int32_t index, int32_t length,
- int32_t value, UErrorCode &errorCode) {
- partsList->a[start].limitPartIndex=partsLength;
- addPart(type, index, length, value, errorCode);
-}
-
-void
-MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length,
- UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- int32_t numericIndex=numericValuesLength;
- if(numericValuesList==NULL) {
- numericValuesList=new MessagePatternDoubleList();
- if(numericValuesList==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) {
- return;
- } else {
- if(numericIndex>Part::MAX_VALUE) {
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
- }
- numericValuesList->a[numericValuesLength++]=numericValue;
- addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode);
-}
-
-void
-MessagePattern::setParseError(UParseError *parseError, int32_t index) {
- if(parseError==NULL) {
- return;
- }
- parseError->offset=index;
-
- // Set preContext to some of msg before index.
- // Avoid splitting a surrogate pair.
- int32_t length=index;
- if(length>=U_PARSE_CONTEXT_LEN) {
- length=U_PARSE_CONTEXT_LEN-1;
- if(length>0 && U16_IS_TRAIL(msg[index-length])) {
- --length;
- }
- }
- msg.extract(index-length, length, parseError->preContext);
- parseError->preContext[length]=0;
-
- // Set postContext to some of msg starting at index.
- length=msg.length()-index;
- if(length>=U_PARSE_CONTEXT_LEN) {
- length=U_PARSE_CONTEXT_LEN-1;
- if(length>0 && U16_IS_LEAD(msg[index+length-1])) {
- --length;
- }
- }
- msg.extract(index, length, parseError->postContext);
- parseError->postContext[length]=0;
-}
-
-// MessageImpl ------------------------------------------------------------- ***
-
-void
-MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
- UnicodeString &sb) {
- int32_t doubleApos=-1;
- for(;;) {
- int32_t i=s.indexOf(u_apos, start);
- if(i<0 || i>=limit) {
- sb.append(s, start, limit-start);
- break;
- }
- if(i==doubleApos) {
- // Double apostrophe at start-1 and start==i, append one.
- sb.append(u_apos);
- ++start;
- doubleApos=-1;
- } else {
- // Append text between apostrophes and skip this one.
- sb.append(s, start, i-start);
- doubleApos=start=i+1;
- }
- }
-}
-
-// Ported from second half of ICU4J SelectFormat.format(String).
-UnicodeString &
-MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
- int32_t msgStart,
- UnicodeString &result) {
- const UnicodeString &msgString=msgPattern.getPatternString();
- int32_t prevIndex=msgPattern.getPart(msgStart).getLimit();
- for(int32_t i=msgStart;;) {
- const MessagePattern::Part &part=msgPattern.getPart(++i);
- UMessagePatternPartType type=part.getType();
- int32_t index=part.getIndex();
- if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
- return result.append(msgString, prevIndex, index-prevIndex);
- } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
- result.append(msgString, prevIndex, index-prevIndex);
- prevIndex=part.getLimit();
- } else if(type==UMSGPAT_PART_TYPE_ARG_START) {
- result.append(msgString, prevIndex, index-prevIndex);
- prevIndex=index;
- i=msgPattern.getLimitPartIndex(i);
- index=msgPattern.getPart(i).getLimit();
- appendReducedApostrophes(msgString, prevIndex, index, result);
- prevIndex=index;
- }
- }
-}
-
-U_NAMESPACE_END
-
-#endif // !UCONFIG_NO_FORMATTING
diff --git a/contrib/libs/icu/common/msvcres.h b/contrib/libs/icu/common/msvcres.h
deleted file mode 100644
index 0cace85e747..00000000000
--- a/contrib/libs/icu/common/msvcres.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//{{NO_DEPENDENCIES}}
-// Copyright (c) 2003-2010 International Business Machines
-// Corporation and others. All Rights Reserved.
-//
-// Used by common.rc and other .rc files.
-//Do not edit with Microsoft Developer Studio because it will modify this
-//header the wrong way. This is here to prevent Visual Studio .NET from
-//unnessarily building the resource files when it's not needed.
-//
-
-/*
-These are defined before unicode/uversion.h in order to prevent
-STLPort's broken stddef.h from being used when rc.exe parses this file.
-*/
-#define _STLP_OUTERMOST_HEADER_ID 0
-#define _STLP_WINCE 1
-
-#include "unicode/uversion.h"
-
-#define ICU_WEBSITE "http://icu-project.org"
-#define ICU_COMPANY "The ICU Project"
-#define ICU_PRODUCT_PREFIX "ICU"
-#define ICU_PRODUCT "International Components for Unicode"
diff --git a/contrib/libs/icu/common/mutex.h b/contrib/libs/icu/common/mutex.h
deleted file mode 100644
index 44b1f90ba04..00000000000
--- a/contrib/libs/icu/common/mutex.h
+++ /dev/null
@@ -1,77 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1997-2013, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*/
-//----------------------------------------------------------------------------
-// File: mutex.h
-//
-// Lightweight C++ wrapper for umtx_ C mutex functions
-//
-// Author: Alan Liu 1/31/97
-// History:
-// 06/04/97 helena Updated setImplementation as per feedback from 5/21 drop.
-// 04/07/1999 srl refocused as a thin wrapper
-//
-//----------------------------------------------------------------------------
-#ifndef MUTEX_H
-#define MUTEX_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "umutex.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * Mutex is a helper class for convenient locking and unlocking of a UMutex.
- *
- * Creating a local scope Mutex will lock a UMutex, holding the lock until the Mutex
- * goes out of scope.
- *
- * If no UMutex is specified, the ICU global mutex is implied.
- *
- * For example:
- *
- * static UMutex myMutex;
- *
- * void Function(int arg1, int arg2)
- * {
- * static Object* foo; // Shared read-write object
- * Mutex mutex(&myMutex); // or no args for the global lock
- * foo->Method();
- * // When 'mutex' goes out of scope and gets destroyed here, the lock is released
- * }
- *
- * Note: Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function
- * returning a Mutex. This is a common mistake which silently slips through the
- * compiler!!
- */
-
-class U_COMMON_API Mutex : public UMemory {
-public:
- Mutex(UMutex *mutex = nullptr) : fMutex(mutex) {
- umtx_lock(fMutex);
- }
- ~Mutex() {
- umtx_unlock(fMutex);
- }
-
- Mutex(const Mutex &other) = delete; // forbid assigning of this class
- Mutex &operator=(const Mutex &other) = delete; // forbid copying of this class
- void *operator new(size_t s) = delete; // forbid heap allocation. Locals only.
-
-private:
- UMutex *fMutex;
-};
-
-
-U_NAMESPACE_END
-
-#endif //_MUTEX_
-//eof
diff --git a/contrib/libs/icu/common/norm2_nfc_data.h b/contrib/libs/icu/common/norm2_nfc_data.h
deleted file mode 100644
index 455cc0c4285..00000000000
--- a/contrib/libs/icu/common/norm2_nfc_data.h
+++ /dev/null
@@ -1,1149 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// Copyright (C) 1999-2016, International Business Machines
-// Corporation and others. All Rights Reserved.
-//
-// file name: norm2_nfc_data.h
-//
-// machine-generated by: icu/source/tools/gennorm2/n2builder.cpp
-
-
-#ifdef INCLUDED_FROM_NORMALIZER2_CPP
-
-static const UVersionInfo norm2_nfc_data_formatVersion={4,0,0,0};
-static const UVersionInfo norm2_nfc_data_dataVersion={0xd,0,0,0};
-
-static const int32_t norm2_nfc_data_indexes[Normalizer2Impl::IX_COUNT]={
-0x50,0x4bac,0x8814,0x8914,0x8914,0x8914,0x8914,0x8914,0xc0,0x300,0xae2,0x29e0,0x3c66,0xfc00,0x1288,0x3b9c,
-0x3c34,0x3c66,0x300,0
-};
-
-static const uint16_t norm2_nfc_data_trieIndex[1746]={
-0,0x40,0x7b,0xbb,0xfb,0x13a,0x17a,0x1b2,0x1f2,0x226,0x254,0x226,0x294,0x2d4,0x313,0x353,
-0x393,0x3d2,0x40f,0x44e,0x226,0x226,0x488,0x4c8,0x4f8,0x530,0x226,0x570,0x59f,0x5de,0x226,0x5f3,
-0x631,0x65f,0x226,0x68c,0x6cc,0x709,0x729,0x768,0x7a7,0x7e4,0x803,0x840,0x729,0x879,0x8a7,0x8e6,
-0x226,0x920,0x937,0x977,0x98e,0x9cd,0x226,0xa03,0xa23,0xa5e,0xa6a,0xaa5,0xacd,0xb0a,0xb4a,0xb84,
-0xb9f,0x226,0xbda,0x226,0xc1a,0xc39,0xc6f,0xcac,0x226,0x226,0x226,0x226,0x226,0xccf,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xcfb,0x226,0x226,0xd30,
-0x226,0x226,0xd4e,0x226,0xd78,0x226,0x226,0x226,0xdb4,0xdd4,0xe14,0xe53,0xe8e,0xece,0xf02,0xf2e,
-0x808,0x226,0x226,0xf62,0x226,0x226,0x226,0xfa2,0xfe2,0x1022,0x1062,0x10a2,0x10e2,0x1122,0x1162,0x11a2,
-0x11e2,0x226,0x226,0x1212,0x1243,0x226,0x1273,0x12a6,0x12e3,0x1322,0x1362,0x1398,0x13c6,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x13f1,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0xcbd,0x226,0x140e,0x226,0x144e,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x148e,0x14c8,0x1506,0x1546,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1585,0x15c3,0x15e3,0x226,0x226,0x226,0x226,
-0x161d,0x226,0x226,0x1645,0x1677,0x16a5,0x80c,0x16b8,0x226,0x226,0x16c8,0x1708,0x226,0x226,0x226,0x1420,
-0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,
-0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,
-0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,
-0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,
-0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,
-0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,
-0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,
-0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,
-0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,
-0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,
-0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x1794,0x226,
-0x17d4,0x180f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x184f,0x188f,0x18cf,0x190f,0x194f,0x198f,0x19cf,0x1a0f,0x1a32,0x1a72,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1a92,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x655,0x664,0x67c,0x69b,0x6b0,0x6b0,0x6b0,0x6b4,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbda,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x40c,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ac5,0x226,0x226,0x1ad5,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0xdc6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ae5,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x15d6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x1aef,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7eb,0x226,0x226,
-0x9ba,0x226,0x1aff,0x1b0c,0x1b18,0x226,0x226,0x226,0x226,0x414,0x226,0x1b23,0x1b33,0x226,0x226,0x226,
-0x7e0,0x226,0x226,0x226,0x226,0x1b43,0x226,0x226,0x226,0x1b4e,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x1b55,0x226,0x226,0x226,0x226,0x1b60,0x1b6f,0x8f6,0x1b7d,0x412,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x1b8b,0x798,0x226,0x226,0x226,0x226,0x226,0x1b9b,0x1baa,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x8d6,0x1bb2,0x1bc2,0x226,
-0x226,0x226,0x9ba,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bcc,0x226,0x226,0x226,0x226,0x226,
-0x226,0x7e6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bc9,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bdc,
-0x7e0,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x84d,0x226,0x226,0x226,0x7ed,0x7ea,
-0x226,0x226,0x226,0x226,0x7e8,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x9ba,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbd4,0x226,0x226,0x226,
-0x226,0x7ea,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x1bec,0x226,0x226,0x226,0xefb,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x1bfc,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bfe,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x1c0d,0x1c1d,0x1c2b,0x1c38,0x226,0x1c44,0x1c52,0x1c62,0x226,0x226,
-0x226,0x226,0xcea,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c72,0x1c7a,
-0x1c88,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0xefb,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x4fc,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x1c98,0x226,0x226,0x226,0x226,0x226,0x226,0x1ca4,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x1cb4,0x1cc4,0x1cd4,0x1ce4,0x1cf4,0x1d04,0x1d14,0x1d24,0x1d34,0x1d44,0x1d54,
-0x1d64,0x1d74,0x1d84,0x1d94,0x1da4,0x1db4,0x1dc4,0x1dd4,0x1de4,0x1df4,0x1e04,0x1e14,0x1e24,0x1e34,0x1e44,0x1e54,
-0x1e64,0x1e74,0x1e84,0x1e94,0x1ea4,0x1eb4,0x1ec4,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x408,0x428,0xc4,0xc4,0xc4,0x448,0x457,0x46d,0x489,0x4a6,0x4c2,
-0x4df,0x4fc,0x51b,0x538,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
-0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x552,0xc4,0x566,0xc4,0xc4,0xc4,0xc4,
-0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
-0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x586,0xc4,0xc4,0xc4,0xc4,0xc4,
-0xc4,0xc4,0xc4,0x591,0x5ae,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5ce,0x5e2,0xc4,0xc4,0x5f5,
-0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
-0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
-0x615,0x635
-};
-
-static const uint16_t norm2_nfc_data_trieData[7892]={
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,4,8,0xc,1,
-1,0x10,0x50,0x5c,0x70,0x88,0xcc,0xd0,0xec,0x108,0x144,0x148,0x15c,0x174,0x180,0x1a4,
-0x1e4,1,0x1ec,0x20c,0x228,0x244,0x290,0x298,0x2b0,0x2b8,0x2dc,1,1,1,1,1,
-1,0x2f4,0x334,0x340,0x354,0x36c,0x3b0,0x3b4,0x3d0,0x3f0,0x428,0x430,0x444,0x45c,0x468,0x48c,
-0x4cc,1,0x4d4,0x4f4,0x510,0x530,0x57c,0x584,0x5a0,0x5a8,0x5d0,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0x5e8,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,0x128a,0x1290,0xae4,0x1296,0xafa,
-0xb04,0x5f4,0xb0e,0x129c,0x12a2,0xb18,0x12a8,0x12ae,0x12b4,0x12ba,0xb2e,1,0x12c0,0x12c6,0x12cc,0xb38,
-0xb4e,0xb60,1,0x5fc,0x12d2,0x12d8,0x12de,0xb6a,0x12e4,1,1,0x12ea,0x12f0,0xb80,0x12f6,0xb96,
-0xba0,0x600,0xbaa,0x12fc,0x1302,0xbb4,0x1308,0x130e,0x1314,0x131a,0xbca,1,0x1320,0x1326,0x132c,0xbd4,
-0xbea,0xbfc,1,0x608,0x1332,0x1338,0x133e,0xc06,0x1344,1,0x134a,0x1350,0x1356,0xc1c,0xc32,0x135d,
-0x1363,0x1368,0x136e,0x1374,0x137a,0x1380,0x1386,0x138c,0x1392,0x1398,0x139e,1,1,0xc48,0xc56,0x13a4,
-0x13aa,0x13b0,0x13b6,0x13bd,0x13c3,0x13c8,0x13ce,0x13d4,0x13da,0x13e0,0x13e6,0x13ec,0x13f2,0x13f9,0x13ff,0x1404,
-0x140a,1,1,0x1410,0x1416,0x141c,0x1422,0x1428,0x142e,0x1435,0x143b,0x1440,1,1,1,0x1447,
-0x144d,0x1453,0x1459,1,0x145e,0x1464,0x146b,0x1471,0x1476,0x147c,1,1,1,0x1482,0x1488,0x148f,
-0x1495,0x149a,0x14a0,1,1,1,0xc64,0xc72,0x14a6,0x14ac,0x14b2,0x14b8,1,1,0x14be,0x14c4,
-0x14cb,0x14d1,0x14d6,0x14dc,0xc80,0xc8a,0x14e2,0x14e8,0x14ef,0x14f5,0xc94,0xc9e,0x14fb,0x1501,0x1506,0x150c,
-1,1,0xca8,0xcb2,0xcbc,0xcc6,0x1512,0x1518,0x151e,0x1524,0x152a,0x1530,0x1537,0x153d,0x1542,0x1548,
-0x154e,0x1554,0x155a,0x1560,0x1566,0x156c,0x1572,0x1578,0x157e,0x60c,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,0xcd0,0xcea,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0xd04,0xd1e,1,1,1,1,1,
-1,0x610,1,1,1,1,1,1,1,1,1,1,1,1,1,0x1584,
-0x158a,0x1590,0x1596,0x159c,0x15a2,0x15a8,0x15ae,0x15b6,0x15c0,0x15ca,0x15d4,0x15de,0x15e8,0x15f2,0x15fc,1,
-0x1606,0x1610,0x161a,0x1624,0x162d,0x1633,1,1,0x1638,0x163e,0x1644,0x164a,0xd38,0xd42,0x1653,0x165d,
-0x1665,0x166b,0x1671,1,1,1,0x1676,0x167c,1,1,0x1682,0x1688,0x1690,0x169a,0x16a3,0x16a9,
-0x16af,0x16b5,0x16ba,0x16c0,0x16c6,0x16cc,0x16d2,0x16d8,0x16de,0x16e4,0x16ea,0x16f0,0x16f6,0x16fc,0x1702,0x1708,
-0x170e,0x1714,0x171a,0x1720,0x1726,0x172c,0x1732,0x1738,0x173e,0x1744,0x174a,0x1750,0x1756,0x175c,1,1,
-0x1762,0x1768,1,1,1,1,1,1,0xd4c,0xd56,0xd60,0xd6a,0x1770,0x177a,0x1784,0x178e,
-0xd74,0xd7e,0x1798,0x17a2,0x17aa,0x17b0,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0x614,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xffcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,
-0xfdcc,0xffcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xfdcc,0xffd0,0xffb8,0xffb8,0xffb8,0xffb8,0xffd0,0xfdb0,
-0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xff94,0xff94,0xfdb8,0xfdb8,0xfdb8,0xfdb8,0xfd94,0xfd94,0xffb8,0xffb8,0xffb8,
-0xffb8,0xfdb8,0xfdb8,0xffb8,0xfdb8,0xfdb8,0xffb8,0xffb8,0xfe02,0xfe02,0xfe02,0xfe02,0xfc02,0xffb8,0xffb8,0xffb8,
-0xffb8,0xffcc,0xffcc,0xffcc,0x3c36,0x3c3c,0xfdcc,0x3c42,0x3c48,0xfde0,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,
-0xffcc,0xffb8,0xffb8,1,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffd0,0xffb8,0xffb8,0xffcc,
-0xffd2,0xffd4,0xffd4,0xffd2,0xffd4,0xffd4,0xffd2,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,0x29e1,1,1,1,1,1,1,1,
-1,1,0x29e5,1,1,1,1,1,0x17b7,0x17bd,0x29e9,0x17c3,0x17c9,0x17cf,1,0x17d5,
-1,0x17db,0x17e1,0x17e9,0x618,1,1,1,0x634,1,0x644,1,0x658,1,1,1,
-1,1,0x674,1,0x684,1,1,1,0x688,1,1,1,0x6a0,0x17f1,0x17f7,0xd88,
-0x17fd,0xd92,0x1803,0x180b,0x6b4,1,1,1,0x6d4,1,0x6e4,1,0x6fc,1,1,1,
-1,1,0x71c,1,0x72c,1,1,1,0x734,1,1,1,0x754,0xd9c,0xdae,0x1813,
-0x1819,0xdc0,1,1,1,0x76c,0x181f,0x1825,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0x182b,0x1831,1,0x1837,1,1,0x774,0x183d,1,1,1,1,0x1843,
-0x1849,0x184f,1,0x778,1,1,0x780,1,0x784,0x790,0x798,0x79c,0x1855,0x7ac,1,1,
-1,0x7b0,1,1,1,1,0x7b4,1,1,1,0x7c4,1,1,1,0x7c8,1,
-0x7cc,1,1,0x7d0,1,1,0x7d8,1,0x7dc,0x7e8,0x7f0,0x7f4,0x185b,0x804,1,1,
-1,0x808,1,1,1,0x80c,1,1,1,0x81c,1,1,1,0x820,1,0x824,
-1,1,0x1861,0x1867,1,0x186d,1,1,0x828,0x1873,1,1,1,1,0x1879,0x187f,
-0x1885,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0x82c,0x830,0x188b,0x1891,1,1,1,1,1,1,
-1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x1897,
-0x189d,1,1,1,1,1,1,1,1,1,1,1,1,1,0x18a3,0x18a9,
-0x18af,0x18b5,1,1,0x18bb,0x18c1,0x834,0x838,0x18c7,0x18cd,0x18d3,0x18d9,0x18df,0x18e5,1,1,
-0x18eb,0x18f1,0x18f7,0x18fd,0x1903,0x1909,0x83c,0x840,0x190f,0x1915,0x191b,0x1921,0x1927,0x192d,0x1933,0x1939,
-0x193f,0x1945,0x194b,0x1951,1,1,0x1957,0x195d,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,
-0xffcc,0xffcc,0xffbc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,
-0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffbc,0xffc8,0xffcc,0xfe14,0xfe16,0xfe18,0xfe1a,0xfe1c,0xfe1e,0xfe20,0xfe22,
-0xfe24,0xfe26,0xfe26,0xfe28,0xfe2a,0xfe2c,1,0xfe2e,1,0xfe30,0xfe32,1,0xffcc,0xffb8,1,0xfe24,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xfe3c,0xfe3e,0xfe40,1,1,1,1,1,1,1,0x1962,0x1968,0x196f,0x1975,0x197b,0x844,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0x850,1,0x854,0xfe36,0xfe38,0xfe3a,0xfe3c,0xfe3e,
-0xfe40,0xfe42,0xfe44,0xfdcc,0xfdcc,0xfdb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-0xfe46,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-0x1981,0x858,0x1987,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0x85c,0x198d,1,0x860,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc,
-0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,0xffcc,0xffcc,1,0xffb8,0xffcc,0xffcc,0xffb8,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-0xfe48,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,
-0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffb8,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc,
-0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1,
-0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffb8,
-0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffb8,
-0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xfe36,0xfe38,0xfe3a,0xffcc,
-0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0x864,0x1993,1,1,1,1,1,1,0x868,0x1999,1,0x86c,
-0x199f,1,1,1,1,1,1,1,0xfc0e,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0xfe12,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,1,1,
-1,0x29ec,0x29f2,0x29f8,0x29fe,0x2a04,0x2a0a,0x2a10,0x2a16,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,0xfe0e,1,0xfc00,1,1,1,1,1,1,1,0x870,
-1,1,1,0x19a5,0x19ab,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,
-1,1,1,1,0x2a1c,0x2a22,1,0x2a28,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0xffcc,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,0x2a2e,1,1,0x2a34,1,1,
-1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,
-1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,0x2a3a,0x2a40,0x2a46,
-1,1,0x2a4c,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e,
-1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-0x878,0x19b1,1,1,0x19b7,0x19bd,0xfe12,1,1,1,1,1,1,1,1,0xfc00,
-0xfc00,1,1,1,1,0x2a52,0x2a58,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0x884,1,0x19c3,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,0xfc00,1,1,1,1,1,1,0x888,0x890,1,1,
-0x19c9,0x19cf,0x19d5,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0x894,1,0x19db,1,1,1,1,0xfe12,1,1,
-1,1,1,1,1,0xfea8,0xfcb6,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0xfe0e,1,1,0x898,0x19e1,1,0xfc00,1,1,1,0x89c,0x19e7,0x19ed,
-1,0xdca,0x19f5,1,0xfe12,1,1,1,1,1,1,1,0xfc00,0xfc00,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,0xfc00,1,1,1,
-1,1,1,0x8a8,0x8b0,1,1,0x19fd,0x1a03,0x1a09,0xfe12,1,1,1,1,1,
-1,1,1,1,0xfc00,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,0xfc12,1,1,
-1,1,0xfc00,1,1,1,1,1,1,1,1,1,0x8b4,0x1a0f,1,0xdd4,
-0x1a17,0x1a1f,0xfc00,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,0xfece,0xfece,0xfe12,1,1,
-1,1,1,1,1,1,0xfed6,0xfed6,0xfed6,0xfed6,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xfeec,0xfeec,0xfe12,1,1,1,1,1,1,1,1,0xfef4,0xfef4,0xfef4,
-0xfef4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffb8,1,0xffb8,1,0xffb0,1,1,1,1,1,1,0x2a5f,1,1,
-1,1,1,1,1,1,1,0x2a65,1,1,1,1,0x2a6b,1,1,1,
-1,0x2a71,1,1,1,1,0x2a77,1,1,1,1,1,1,1,1,1,
-1,1,1,0x2a7d,1,1,1,1,1,1,1,0xff02,0xff04,0x3c50,0xff08,0x3c58,
-0x2a82,1,0x2a88,1,0xff04,0xff04,0xff04,0xff04,1,1,0xff04,0x3c60,0xffcc,0xffcc,0xfe12,1,
-0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x2a8f,1,1,
-1,1,1,1,1,1,1,0x2a95,1,1,1,1,0x2a9b,1,1,1,
-1,0x2aa1,1,1,1,1,0x2aa7,1,1,1,1,1,1,1,1,1,
-1,1,1,0x2aad,1,1,1,1,1,1,0xffb8,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0x8c0,0x1a25,1,1,1,1,1,1,1,0xfc00,1,1,
-1,1,1,1,1,1,0xfe0e,1,0xfe12,0xfe12,1,1,1,1,1,1,
-1,1,1,1,1,1,1,0xffb8,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,
-0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,
-0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,0xffcc,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,0xffc8,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,0xffbc,0xffcc,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,1,1,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,
-0xffcc,0xffb8,1,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0x8c4,0x1a2b,0x8c8,0x1a31,0x8cc,0x1a37,0x8d0,0x1a3d,0x8d4,0x1a43,1,1,0x8d8,
-0x1a49,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xfe0e,0xfc00,1,1,1,1,0x8dc,0x1a4f,0x8e0,0x1a55,0x8e4,0x8e8,0x1a5b,0x1a61,
-0x8ec,0x1a67,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,
-0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffcc,0xffcc,0xffcc,1,0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,
-0xffb8,0xffb8,0xffcc,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1,1,1,0xffb8,
-1,1,1,1,1,1,0xffcc,1,1,1,0xffcc,0xffcc,1,1,1,1,
-1,1,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffd4,
-0xffac,0xffb8,0xff94,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffd0,0xffc8,0xffc8,0xffb8,1,0xffcc,0xffd2,0xffb8,
-0xffcc,0xffb8,0x1a6c,0x1a72,0x1a78,0x1a7e,0x1a85,0x1a8b,0x1a91,0x1a97,0x1a9f,0x1aa9,0x1ab0,0x1ab6,0x1abc,0x1ac2,
-0x1ac8,0x1ace,0x1ad5,0x1adb,0x1ae0,0x1ae6,0x1aee,0x1af8,0x1b02,0x1b0c,0x1b14,0x1b1a,0x1b20,0x1b26,0x1b2f,0x1b39,
-0x1b41,0x1b47,0x1b4c,0x1b52,0x1b58,0x1b5e,0x1b64,0x1b6a,0x1b70,0x1b76,0x1b7d,0x1b83,0x1b88,0x1b8e,0x1b94,0x1b9a,
-0x1ba2,0x1bac,0x1bb4,0x1bba,0x1bc0,0x1bc6,0x1bcc,0x1bd2,0xdde,0xde8,0x1bda,0x1be4,0x1bec,0x1bf2,0x1bf8,0x1bfe,
-0x1c04,0x1c0a,0x1c10,0x1c16,0x1c1d,0x1c23,0x1c28,0x1c2e,0x1c34,0x1c3a,0x1c40,0x1c46,0x1c4c,0x1c52,0x1c5a,0x1c64,
-0x1c6e,0x1c78,0x1c82,0x1c8c,0x1c96,0x1ca0,0x1ca9,0x1caf,0x1cb5,0x1cbb,0x1cc0,0x1cc6,0xdf2,0xdfc,0x1cce,0x1cd8,
-0x1ce0,0x1ce6,0x1cec,0x1cf2,0xe06,0xe10,0x1cfa,0x1d04,0x1d0e,0x1d18,0x1d22,0x1d2c,0x1d34,0x1d3a,0x1d40,0x1d46,
-0x1d4c,0x1d52,0x1d58,0x1d5e,0x1d64,0x1d6a,0x1d70,0x1d76,0x1d7c,0x1d82,0x1d8a,0x1d94,0x1d9e,0x1da8,0x1db0,0x1db6,
-0x1dbd,0x1dc3,0x1dc8,0x1dce,0x1dd4,0x1dda,0x1de0,0x1de6,0x1dec,0x1df2,0x1df9,0x1dff,0x1e05,0x1e0b,0x1e11,0x1e17,
-0x1e1c,0x1e22,0x1e28,0x1e2e,0x1e35,0x1e3b,0x1e41,0x1e47,0x1e4c,0x1e52,0x1e58,0x1e5e,1,0x1e65,1,1,
-1,1,0xe1a,0xe28,0x1e6a,0x1e70,0x1e78,0x1e82,0x1e8c,0x1e96,0x1ea0,0x1eaa,0x1eb4,0x1ebe,0x1ec8,0x1ed2,
-0x1edc,0x1ee6,0x1ef0,0x1efa,0x1f04,0x1f0e,0x1f18,0x1f22,0x1f2c,0x1f36,0xe36,0xe40,0x1f3e,0x1f44,0x1f4a,0x1f50,
-0x1f58,0x1f62,0x1f6c,0x1f76,0x1f80,0x1f8a,0x1f94,0x1f9e,0x1fa8,0x1fb2,0x1fba,0x1fc0,0x1fc6,0x1fcc,0xe4a,0xe54,
-0x1fd2,0x1fd8,0x1fe0,0x1fea,0x1ff4,0x1ffe,0x2008,0x2012,0x201c,0x2026,0x2030,0x203a,0x2044,0x204e,0x2058,0x2062,
-0x206c,0x2076,0x2080,0x208a,0x2094,0x209e,0x20a6,0x20ac,0x20b2,0x20b8,0x20c0,0x20ca,0x20d4,0x20de,0x20e8,0x20f2,
-0x20fc,0x2106,0x2110,0x211a,0x2122,0x2128,0x212f,0x2135,0x213a,0x2140,0x2146,0x214c,1,1,1,1,
-1,1,0xe5e,0xe74,0xe8c,0xe9a,0xea8,0xeb6,0xec4,0xed2,0xede,0xef4,0xf0c,0xf1a,0xf28,0xf36,
-0xf44,0xf52,0xf5e,0xf6c,0x2155,0x215f,0x2169,0x2173,1,1,0xf7a,0xf88,0x217d,0x2187,0x2191,0x219b,
-1,1,0xf96,0xfac,0xfc4,0xfd2,0xfe0,0xfee,0xffc,0x100a,0x1016,0x102c,0x1044,0x1052,0x1060,0x106e,
-0x107c,0x108a,0x1096,0x10a8,0x21a5,0x21af,0x21b9,0x21c3,0x21cd,0x21d7,0x10ba,0x10cc,0x21e1,0x21eb,0x21f5,0x21ff,
-0x2209,0x2213,0x10de,0x10ec,0x221d,0x2227,0x2231,0x223b,1,1,0x10fa,0x1108,0x2245,0x224f,0x2259,0x2263,
-1,1,0x1116,0x1128,0x226d,0x2277,0x2281,0x228b,0x2295,0x229f,1,0x113a,1,0x22a9,1,0x22b3,
-1,0x22bd,0x114c,0x1162,0x117a,0x1188,0x1196,0x11a4,0x11b2,0x11c0,0x11cc,0x11e2,0x11fa,0x1208,0x1216,0x1224,
-0x1232,0x1240,0x124c,0x3b9e,0x22c5,0x3ba6,0x1256,0x3bae,0x22cb,0x3bb6,0x22d1,0x3bbe,0x22d7,0x3bc6,0x1260,0x3bce,
-1,1,0x22de,0x22e8,0x22f7,0x2307,0x2317,0x2327,0x2337,0x2347,0x2352,0x235c,0x236b,0x237b,0x238b,0x239b,
-0x23ab,0x23bb,0x23c6,0x23d0,0x23df,0x23ef,0x23ff,0x240f,0x241f,0x242f,0x243a,0x2444,0x2453,0x2463,0x2473,0x2483,
-0x2493,0x24a3,0x24ae,0x24b8,0x24c7,0x24d7,0x24e7,0x24f7,0x2507,0x2517,0x2522,0x252c,0x253b,0x254b,0x255b,0x256b,
-0x257b,0x258b,0x2595,0x259b,0x25a3,0x25aa,0x25b3,1,0x126a,0x25bd,0x25c5,0x25cb,0x25d1,0x3bd6,0x25d6,1,
-0x2ab2,0x8f0,1,0x25dd,0x25e5,0x25ec,0x25f5,1,0x1274,0x25ff,0x2607,0x3bde,0x260d,0x3be6,0x2612,0x2619,
-0x261f,0x2625,0x262b,0x2631,0x2639,0x3bf0,1,1,0x2641,0x2649,0x2651,0x2657,0x265d,0x3bfa,1,0x2663,
-0x2669,0x266f,0x2675,0x267b,0x2683,0x3c04,0x268b,0x2691,0x2697,0x269f,0x26a7,0x26ad,0x26b3,0x3c0e,0x26b9,0x26bf,
-0x3c16,0x2ab7,1,1,0x26c7,0x26ce,0x26d7,1,0x127e,0x26e1,0x26e9,0x3c1e,0x26ef,0x3c26,0x26f4,0x2abb,
-0x8fc,1,0xfa09,0xfa09,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffcc,0xffcc,0xfe02,0xfe02,0xffcc,0xffcc,0xffcc,0xffcc,0xfe02,0xfe02,0xfe02,0xffcc,0xffcc,1,
-1,1,1,0xffcc,1,1,1,0xfe02,0xfe02,0xffcc,0xffb8,0xffcc,0xfe02,0xfe02,0xffb8,0xffb8,
-0xffb8,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0x2abe,1,1,1,0x2ac2,0x3c2e,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0x908,1,0x90c,1,0x910,1,1,1,1,1,0x26fb,0x2701,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,0x2707,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0x270d,0x2713,0x2719,0x914,1,0x918,1,0x91c,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0x920,0x271f,1,1,1,0x924,0x2725,1,0x928,0x272b,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0x92c,0x2731,0x930,0x2737,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x934,
-1,1,1,0x273d,1,0x938,0x2743,0x93c,1,0x2749,0x940,0x274f,1,1,1,0x944,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0x2755,0x948,0x275b,1,0x94c,0x950,1,1,1,1,1,1,1,0x2761,
-0x2767,0x276d,0x2773,0x2779,0x954,0x958,0x277f,0x2785,0x95c,0x960,0x278b,0x2791,0x964,0x968,0x96c,0x970,
-1,1,0x2797,0x279d,0x974,0x978,0x27a3,0x27a9,0x97c,0x980,0x27af,0x27b5,1,1,1,1,
-1,1,1,0x984,0x988,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0x98c,1,1,1,1,1,0x990,0x994,1,0x998,0x27bb,0x27c1,
-0x27c7,0x27cd,1,1,0x99c,0x9a0,0x9a4,0x9a8,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0x27d3,0x27d9,0x27df,0x27e5,1,1,1,1,
-1,1,0x27eb,0x27f1,0x27f7,0x27fd,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x2ac7,
-0x2acb,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,0x2acf,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0xffb4,0xffc8,0xffd0,0xffbc,0xffc0,0xffc0,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x9ac,1,
-1,1,1,0x9b0,0x2803,0x9b4,0x2809,0x9b8,0x280f,0x9bc,0x2815,0x9c0,0x281b,0x9c4,0x2821,0x9c8,
-0x2827,0x9cc,0x282d,0x9d0,0x2833,0x9d4,0x2839,0x9d8,0x283f,0x9dc,0x2845,1,0x9e0,0x284b,0x9e4,0x2851,
-0x9e8,0x2857,1,1,1,1,1,0x9ec,0x285d,0x2863,0x9f4,0x2869,0x286f,0x9fc,0x2875,0x287b,
-0xa04,0x2881,0x2887,0xa0c,0x288d,0x2893,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,0x2899,1,1,1,1,0xfc10,
-0xfc10,1,1,0xa14,0x289f,1,1,1,1,1,1,1,0xa18,1,1,1,
-1,0xa1c,0x28a5,0xa20,0x28ab,0xa24,0x28b1,0xa28,0x28b7,0xa2c,0x28bd,0xa30,0x28c3,0xa34,0x28c9,0xa38,
-0x28cf,0xa3c,0x28d5,0xa40,0x28db,0xa44,0x28e1,0xa48,0x28e7,1,0xa4c,0x28ed,0xa50,0x28f3,0xa54,0x28f9,
-1,1,1,1,1,0xa58,0x28ff,0x2905,0xa60,0x290b,0x2911,0xa68,0x2917,0x291d,0xa70,0x2923,
-0x2929,0xa78,0x292f,0x2935,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,0xa80,0xa84,0xa88,0xa8c,1,0x293b,1,1,0x2941,0x2947,0x294d,
-0x2953,1,1,0xa90,0x2959,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0xffcc,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0xffcc,1,0xffcc,0xffcc,0xffb8,1,1,0xffcc,
-0xffcc,1,1,1,1,1,0xffcc,0xffcc,1,0xffcc,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,
-1,1,1,1,1,1,1,1,0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
-0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
-0x1289,0x1289,0x1289,0x1289,0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
-0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
-0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
-0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0xae2,0x1289,0x1289,0x1289,
-0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
-0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0x3c66,1,0x3c66,0x3c66,0x3c66,0x3c66,0x3c66,0x3c66,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x3c66,0x3c66,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0x3c66,1,1,1,1,0x3c66,1,1,1,0x3c66,1,0x3c66,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,0x3b97,1,0x2ad5,
-0x2ad9,0x2add,0x2ae1,0x2ae5,0x2ae9,0x2aed,0x2af1,0x2af1,0x2af5,0x2af9,0x2afd,0x2b01,0x2b05,0x2b09,0x2b0d,0x2b11,
-0x2b15,0x2b19,0x2b1d,0x2b21,0x2b25,0x2b29,0x2b2d,0x2b31,0x2b35,0x2b39,0x2b3d,0x2b41,0x2b45,0x2b49,0x2b4d,0x2b51,
-0x2b55,0x2b59,0x2b5d,0x2b61,0x2b65,0x2b69,0x2b6d,0x2b71,0x2b75,0x2b79,0x2b7d,0x2b81,0x2b85,0x2b89,0x2b8d,0x2b91,
-0x2b95,0x2b99,0x2b9d,0x2ba1,0x2ba5,0x2ba9,0x2bad,0x2bb1,0x2bb5,0x2bb9,0x2bbd,0x2bc1,0x2bc5,0x2bc9,0x2bcd,0x2bd1,
-0x2bd5,0x2bd9,0x2bdd,0x2be1,0x2be5,0x2be9,0x2bed,0x2bf1,0x2bf5,0x2bf9,0x2bfd,0x2c01,0x2c05,0x2c09,0x2c0d,0x2c11,
-0x2c15,0x2c19,0x2c1d,0x2c21,0x2c25,0x2c29,0x2c2d,0x2c31,0x2c35,0x2c39,0x2c3d,0x2b21,0x2c41,0x2c45,0x2c49,0x2c4d,
-0x2c51,0x2c55,0x2c59,0x2c5d,0x2c61,0x2c65,0x2c69,0x2c6d,0x2c71,0x2c75,0x2c79,0x2c7d,0x2c81,0x2c85,0x2c89,0x2c8d,
-0x2c91,0x2c95,0x2c99,0x2c9d,0x2ca1,0x2ca5,0x2ca9,0x2cad,0x2cb1,0x2cb5,0x2cb9,0x2cbd,0x2cc1,0x2cc5,0x2cc9,0x2ccd,
-0x2cd1,0x2cd5,0x2cd9,0x2cdd,0x2ce1,0x2ce5,0x2ce9,0x2ced,0x2cf1,0x2cf5,0x2cf9,0x2cfd,0x2d01,0x2d05,0x2d09,0x2d0d,
-0x2d11,0x2d15,0x2d19,0x2d1d,0x2d21,0x2d25,0x2d29,0x2d2d,0x2d31,0x2d35,0x2d39,0x2d3d,0x2d41,0x2d45,0x2d49,0x2d4d,
-0x2c89,0x2d51,0x2d55,0x2d59,0x2d5d,0x2d61,0x2d65,0x2d69,0x2d6d,0x2c49,0x2d71,0x2d75,0x2d79,0x2d7d,0x2d81,0x2d85,
-0x2d89,0x2d8d,0x2d91,0x2d95,0x2d99,0x2d9d,0x2da1,0x2da5,0x2da9,0x2dad,0x2db1,0x2db5,0x2db9,0x2dbd,0x2b21,0x2dc1,
-0x2dc5,0x2dc9,0x2dcd,0x2dd1,0x2dd5,0x2dd9,0x2ddd,0x2de1,0x2de5,0x2de9,0x2ded,0x2df1,0x2df5,0x2df9,0x2dfd,0x2e01,
-0x2e05,0x2e09,0x2e0d,0x2e11,0x2e15,0x2e19,0x2e1d,0x2e21,0x2e25,0x2e29,0x2c51,0x2e2d,0x2e31,0x2e35,0x2e39,0x2e3d,
-0x2e41,0x2e45,0x2e49,0x2e4d,0x2e51,0x2e55,0x2e59,0x2e5d,0x2e61,0x2e65,0x2e69,0x2e6d,0x2e71,0x2e75,0x2e79,0x2e7d,
-0x2e81,0x2e85,0x2e89,0x2e8d,0x2e91,0x2e95,0x2e99,0x2e9d,0x2ea1,0x2ea5,0x2ea9,0x2ead,0x2eb1,0x2eb5,0x2eb9,0x2ebd,
-0x2ec1,0x2ec5,0x2ec9,0x2ecd,0x2ed1,0x2ed5,0x2ed9,0x2edd,0x2ee1,0x2ee5,0x2ee9,0x2eed,0x2ef1,1,1,0x2ef5,
-1,0x2ef9,1,1,0x2efd,0x2f01,0x2f05,0x2f09,0x2f0d,0x2f11,0x2f15,0x2f19,0x2f1d,0x2f21,1,0x2f25,
-1,0x2f29,1,1,0x2f2d,0x2f31,1,1,1,0x2f35,0x2f39,0x2f3d,0x2f41,0x2f45,0x2f49,0x2f4d,
-0x2f51,0x2f55,0x2f59,0x2f5d,0x2f61,0x2f65,0x2f69,0x2f6d,0x2f71,0x2f75,0x2f79,0x2f7d,0x2f81,0x2f85,0x2f89,0x2f8d,
-0x2f91,0x2f95,0x2f99,0x2f9d,0x2fa1,0x2fa5,0x2fa9,0x2fad,0x2fb1,0x2fb5,0x2fb9,0x2fbd,0x2fc1,0x2fc5,0x2fc9,0x2fcd,
-0x2fd1,0x2fd5,0x2fd9,0x2fdd,0x2fe1,0x2fe5,0x2d25,0x2fe9,0x2fed,0x2ff1,0x2ff5,0x2ff9,0x2ffd,0x2ffd,0x3001,0x3005,
-0x3009,0x300d,0x3011,0x3015,0x3019,0x301d,0x2f2d,0x3021,0x3025,0x3029,0x302d,0x3031,0x3037,1,1,0x303b,
-0x303f,0x3043,0x3047,0x304b,0x304f,0x3053,0x3057,0x2f65,0x305b,0x305f,0x3063,0x2ef5,0x3067,0x306b,0x306f,0x3073,
-0x3077,0x307b,0x307f,0x3083,0x3087,0x308b,0x308f,0x3093,0x2f89,0x3097,0x2f8d,0x309b,0x309f,0x30a3,0x30a7,0x30ab,
-0x2ef9,0x2b75,0x30af,0x30b3,0x30b7,0x2c8d,0x2de9,0x30bb,0x30bf,0x2fa9,0x30c3,0x2fad,0x30c7,0x30cb,0x30cf,0x2f01,
-0x30d3,0x30d7,0x30db,0x30df,0x30e3,0x2f05,0x30e7,0x30eb,0x30ef,0x30f3,0x30f7,0x30fb,0x2fe5,0x30ff,0x3103,0x2d25,
-0x3107,0x2ff5,0x310b,0x310f,0x3113,0x3117,0x311b,0x3009,0x311f,0x2f29,0x3123,0x300d,0x2c41,0x3127,0x3011,0x312b,
-0x3019,0x312f,0x3133,0x3137,0x313b,0x313f,0x3021,0x2f19,0x3143,0x3025,0x3147,0x3029,0x314b,0x2af1,0x314f,0x3155,
-0x315b,0x3161,0x3165,0x3169,0x316d,0x3173,0x3179,0x317f,0x3183,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x3186,
-0xfe34,0x318c,1,1,1,1,1,1,1,1,1,1,0x3192,0x3198,0x31a0,0x31aa,
-0x31b2,0x31b8,0x31be,0x31c4,0x31ca,0x31d0,0x31d6,0x31dc,0x31e2,1,0x31e8,0x31ee,0x31f4,0x31fa,0x3200,1,
-0x3206,1,0x320c,0x3212,1,0x3218,0x321e,1,0x3224,0x322a,0x3230,0x3236,0x323c,0x3242,0x3248,0x324e,
-0x3254,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,
-0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffb8,1,0xffcc,1,1,1,1,1,1,1,1,0xffcc,0xfe02,0xffb8,
-1,1,1,1,0xfe12,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,
-1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,1,
-1,1,1,1,1,1,1,1,0xa94,0x295f,0xa9a,0x2969,1,1,1,1,
-1,0xaa0,1,1,1,1,1,0x2973,1,1,1,1,1,1,1,1,
-1,0xfe12,0xfc0e,1,1,1,1,1,1,1,0xfc00,1,1,1,1,1,
-1,0x297d,0x2987,1,0xaa6,0xaac,0xfe12,0xfe12,1,1,1,1,1,1,1,1,
-1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,0xfe0e,1,1,
-1,1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe12,
-1,1,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe0e,1,0xfc00,1,
-1,1,1,1,1,1,0xab2,1,1,1,0x2991,0x299b,0xfe12,1,1,1,
-1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,0xfe12,1,1,
-1,0xfe0e,1,1,1,1,1,1,1,1,1,0xfc00,1,1,1,1,
-1,1,1,1,0xabe,0xfc00,0x29a5,0x29af,0xfc00,0x29b9,1,1,0xfe12,0xfe0e,1,1,
-1,1,1,1,1,1,1,1,1,1,0xad0,0xad6,0x29c3,0x29cd,1,1,
-1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,0xfc00,1,1,1,
-1,0xadc,1,1,0x29d7,1,1,1,1,0xfe12,0xfe12,1,0xfe02,0xfe02,0xfe02,0xfe02,
-0xfe02,1,1,1,1,1,1,1,1,1,1,1,0xfe0c,0xfe0c,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,0xfe02,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,0x325a,0x3264,0x3278,0x3290,0x32a8,
-0x32c0,0x32d8,0xffb0,0xffb0,0xfe02,0xfe02,0xfe02,1,1,1,0xffc4,0xffb0,0xffb0,0xffb0,1,1,
-1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,
-0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x32e6,0x32f0,0x3304,
-0x331c,0x3334,0x334c,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,
-1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,1,
-1,1,1,1,0x335b,0x335f,0x3363,0x3367,0x336d,0x2f4d,0x3371,0x3375,0x3379,0x337d,0x2f51,0x3381,
-0x3385,0x3389,0x2f55,0x338f,0x3393,0x3397,0x339b,0x33a1,0x33a5,0x33a9,0x33ad,0x33b3,0x33b7,0x33bb,0x33bf,0x303f,
-0x33c3,0x33c9,0x33cd,0x33d1,0x33d5,0x33d9,0x33dd,0x33e1,0x33e5,0x3053,0x2f59,0x2f5d,0x3057,0x33e9,0x33ed,0x2c59,
-0x33f1,0x2f61,0x33f5,0x33f9,0x33fd,0x3401,0x3401,0x3401,0x3405,0x340b,0x340f,0x3413,0x3417,0x341d,0x3421,0x3425,
-0x3429,0x342d,0x3431,0x3435,0x3439,0x343d,0x3441,0x3445,0x3449,0x344d,0x344d,0x305f,0x3451,0x3455,0x3459,0x345d,
-0x2f69,0x3461,0x3465,0x3469,0x2ebd,0x346d,0x3471,0x3475,0x3479,0x347d,0x3481,0x3485,0x3489,0x348d,0x3493,0x3497,
-0x349b,0x349f,0x34a3,0x34a7,0x34ab,0x34b1,0x34b7,0x34bb,0x34bf,0x34c3,0x34c7,0x34cb,0x34cf,0x34d3,0x34d7,0x34d7,
-0x34db,0x34e1,0x34e5,0x2c49,0x34e9,0x34ed,0x34f3,0x34f7,0x34fb,0x34ff,0x3503,0x3507,0x2f7d,0x350b,0x350f,0x3513,
-0x3519,0x351d,0x3523,0x3527,0x352b,0x352f,0x3533,0x3537,0x353b,0x353f,0x3543,0x3547,0x354b,0x354f,0x3555,0x3559,
-0x355d,0x3561,0x2b71,0x3565,0x356b,0x356f,0x356f,0x3575,0x3579,0x3579,0x357d,0x3581,0x3587,0x358d,0x3591,0x3595,
-0x3599,0x359d,0x35a1,0x35a5,0x35a9,0x35ad,0x35b1,0x2f81,0x35b5,0x35bb,0x35bf,0x35c3,0x308f,0x35c3,0x35c7,0x2f89,
-0x35cb,0x35cf,0x35d3,0x35d7,0x2f8d,0x2b05,0x35db,0x35df,0x35e3,0x35e7,0x35eb,0x35ef,0x35f3,0x35f9,0x35fd,0x3601,
-0x3605,0x3609,0x360d,0x3613,0x3617,0x361b,0x361f,0x3623,0x3627,0x362b,0x362f,0x3633,0x2f91,0x3637,0x363b,0x3641,
-0x3645,0x3649,0x364d,0x2f99,0x3651,0x3655,0x3659,0x365d,0x3661,0x3665,0x3669,0x366d,0x2b75,0x30af,0x3671,0x3675,
-0x3679,0x367d,0x3683,0x3687,0x368b,0x368f,0x2f9d,0x3693,0x3699,0x369d,0x36a1,0x3161,0x36a5,0x36a9,0x36ad,0x36b1,
-0x36b5,0x36bb,0x36bf,0x36c3,0x36c7,0x36cd,0x36d1,0x36d5,0x36d9,0x2c8d,0x36dd,0x36e1,0x36e7,0x36ed,0x36f3,0x36f7,
-0x36fd,0x3701,0x3705,0x3709,0x370d,0x2fa1,0x2de9,0x3711,0x3715,0x3719,0x371d,0x3723,0x3727,0x372b,0x372f,0x30bf,
-0x3733,0x3737,0x373d,0x3741,0x3745,0x374b,0x3751,0x3755,0x30c3,0x3759,0x375d,0x3761,0x3765,0x3769,0x376d,0x3771,
-0x3777,0x377b,0x3781,0x3785,0x378b,0x30cb,0x378f,0x3793,0x3799,0x379d,0x37a1,0x37a7,0x37ad,0x37b1,0x37b5,0x37b9,
-0x37bd,0x37bd,0x37c1,0x37c5,0x30d3,0x37c9,0x37cd,0x37d1,0x37d5,0x37d9,0x37df,0x37e3,0x2c55,0x37e9,0x37ef,0x37f3,
-0x37f9,0x37ff,0x3805,0x3809,0x30eb,0x380d,0x3813,0x3819,0x381f,0x3825,0x3829,0x3829,0x30ef,0x3169,0x382d,0x3831,
-0x3835,0x3839,0x383f,0x2bbd,0x30f7,0x3843,0x3847,0x2fcd,0x384d,0x3853,0x2f15,0x3859,0x385d,0x2fdd,0x3861,0x3865,
-0x3869,0x386f,0x386f,0x3875,0x3879,0x387d,0x3883,0x3887,0x388b,0x388f,0x3895,0x3899,0x389d,0x38a1,0x38a5,0x38a9,
-0x38af,0x38b3,0x38b7,0x38bb,0x38bf,0x38c3,0x38c7,0x38cd,0x38d3,0x38d7,0x38dd,0x38e1,0x38e7,0x38eb,0x2ff5,0x38ef,
-0x38f5,0x38fb,0x38ff,0x3905,0x3909,0x390f,0x3913,0x3917,0x391b,0x391f,0x3923,0x3927,0x392d,0x3933,0x3939,0x3575,
-0x393f,0x3943,0x3947,0x394b,0x394f,0x3953,0x3957,0x395b,0x395f,0x3963,0x3967,0x396b,0x2c9d,0x3971,0x3975,0x3979,
-0x397d,0x3981,0x3985,0x3001,0x3989,0x398d,0x3991,0x3995,0x3999,0x399f,0x39a5,0x39ab,0x39af,0x39b3,0x39b7,0x39bb,
-0x39c1,0x39c5,0x39cb,0x39cf,0x39d3,0x39d9,0x39df,0x39e3,0x2ba9,0x39e7,0x39eb,0x39ef,0x39f3,0x39f7,0x39fb,0x3113,
-0x39ff,0x3a03,0x3a07,0x3a0b,0x3a0f,0x3a13,0x3a17,0x3a1b,0x3a1f,0x3a23,0x3a29,0x3a2d,0x3a31,0x3a35,0x3a39,0x3a3d,
-0x3a43,0x3a49,0x3a4d,0x3a51,0x3127,0x312b,0x3a55,0x3a59,0x3a5f,0x3a63,0x3a67,0x3a6b,0x3a6f,0x3a75,0x3a7b,0x3a7f,
-0x3a83,0x3a87,0x3a8d,0x312f,0x3a91,0x3a97,0x3a9d,0x3aa1,0x3aa5,0x3aa9,0x3aaf,0x3ab3,0x3ab7,0x3abb,0x3abf,0x3ac3,
-0x3ac7,0x3acb,0x3ad1,0x3ad5,0x3ad9,0x3add,0x3ae3,0x3ae7,0x3aeb,0x3aef,0x3af3,0x3af9,0x3aff,0x3b03,0x3b07,0x3b0b,
-0x3b11,0x3b15,0x3147,0x3147,0x3b1b,0x3b1f,0x3b25,0x3b29,0x3b2d,0x3b31,0x3b35,0x3b39,0x3b3d,0x3b41,0x314b,0x3b47,
-0x3b4b,0x3b4f,0x3b53,0x3b57,0x3b5b,0x3b61,0x3b65,0x3b6b,0x3b71,0x3b77,0x3b7b,0x3b7f,0x3b83,0x3b87,0x3b8b,0x3b8f,
-0x3b93,0x3b97,1,1
-};
-
-static const UCPTrie norm2_nfc_data_trie={
- norm2_nfc_data_trieIndex,
- { norm2_nfc_data_trieData },
- 1746, 7892,
- 0x2fc00, 0x30,
- 0, 0,
- 0, 0,
- 0xc4, 0x226,
- 0x1,
-};
-
-static const uint16_t norm2_nfc_data_extraData[7732]={
-0xffff,0xffff,0x8670,0x44dc,0x8670,0x44c0,0x8670,0x44de,0x600,0x180,0x602,0x182,0x604,0x185,0x606,0x186,
-0x608,0x200,0x60c,0x205,0x60e,0x44d,0x610,0x189,0x612,0x3d44,0x614,0x18b,0x618,0x39a,0x61e,0x400,
-0x622,0x404,0x646,0x3d41,0x64a,0x3c00,0x8650,0x208,0x60e,0x3c04,0x646,0x3c08,0x8662,0x3c0c,0x602,0x20c,
-0x604,0x210,0x60e,0x214,0x618,0x218,0x864e,0x18f,0x60e,0x3c14,0x618,0x21c,0x646,0x3c18,0x64e,0x3c20,
-0x65a,0x3c24,0x8662,0x3c1c,0x600,0x190,0x602,0x192,0x604,0x195,0x606,0x3d78,0x608,0x225,0x60c,0x228,
-0x60e,0x22c,0x610,0x196,0x612,0x3d74,0x618,0x234,0x61e,0x408,0x622,0x40c,0x646,0x3d71,0x64e,0x451,
-0x650,0x230,0x65a,0x3c30,0x8660,0x3c34,0x860e,0x3c3c,0x602,0x3e8,0x604,0x238,0x608,0x3c40,0x60c,0x23c,
-0x60e,0x240,0x618,0x3cc,0x864e,0x244,0x604,0x248,0x60e,0x3c44,0x610,0x3c4c,0x618,0x43c,0x646,0x3c48,
-0x64e,0x3c50,0x865c,0x3c54,0x600,0x198,0x602,0x19a,0x604,0x19c,0x606,0x250,0x608,0x254,0x60c,0x258,
-0x60e,0x260,0x610,0x19f,0x612,0x3d90,0x618,0x39e,0x61e,0x410,0x622,0x414,0x646,0x3d94,0x650,0x25c,
-0x8660,0x3c58,0x8604,0x268,0x602,0x3c60,0x618,0x3d0,0x646,0x3c64,0x64e,0x26c,0x8662,0x3c68,0x602,0x272,
-0x618,0x27a,0x646,0x3c6d,0x64e,0x276,0x65a,0x3c78,0x8662,0x3c74,0x602,0x3c7c,0x60e,0x3c80,0x8646,0x3c84,
-0x600,0x3f0,0x602,0x286,0x606,0x1a2,0x60e,0x3c88,0x618,0x28e,0x646,0x3c8c,0x64e,0x28a,0x65a,0x3c94,
-0x8662,0x3c90,0x600,0x1a4,0x602,0x1a6,0x604,0x1a9,0x606,0x1ab,0x608,0x299,0x60c,0x29c,0x60e,0x45d,
-0x610,0x1ad,0x612,0x3d9c,0x616,0x2a0,0x618,0x3a2,0x61e,0x418,0x622,0x41c,0x636,0x341,0x646,0x3d99,
-0x8650,0x3d5,0x602,0x3ca8,0x860e,0x3cac,0x602,0x2a8,0x60e,0x3cb0,0x618,0x2b0,0x61e,0x420,0x622,0x424,
-0x646,0x3cb5,0x64e,0x2ac,0x8662,0x3cbc,0x602,0x2b5,0x604,0x2b8,0x60e,0x3cc0,0x618,0x2c1,0x646,0x3cc5,
-0x64c,0x430,0x864e,0x2bc,0x60e,0x3cd4,0x618,0x2c8,0x646,0x3cd8,0x64c,0x434,0x64e,0x2c4,0x65a,0x3ce0,
-0x8662,0x3cdc,0x600,0x1b2,0x602,0x1b4,0x604,0x1b6,0x606,0x2d1,0x608,0x2d5,0x60c,0x2d8,0x610,0x1b9,
-0x612,0x3dcc,0x614,0x2dc,0x616,0x2e0,0x618,0x3a6,0x61e,0x428,0x622,0x42c,0x636,0x35f,0x646,0x3dc8,
-0x648,0x3ce4,0x650,0x2e4,0x65a,0x3cec,0x8660,0x3ce8,0x606,0x3cf8,0x8646,0x3cfc,0x600,0x3d00,0x602,0x3d04,
-0x604,0x2e8,0x60e,0x3d0c,0x610,0x3d08,0x8646,0x3d10,0x60e,0x3d14,0x8610,0x3d18,0x600,0x3de4,0x602,0x1ba,
-0x604,0x2ec,0x606,0x3df0,0x608,0x464,0x60e,0x3d1c,0x610,0x2f0,0x612,0x3dec,0x8646,0x3de8,0x602,0x2f2,
-0x604,0x3d20,0x60e,0x2f6,0x618,0x2fa,0x646,0x3d24,0x8662,0x3d28,0x600,0x1c0,0x602,0x1c2,0x604,0x1c5,
-0x606,0x1c6,0x608,0x202,0x60c,0x207,0x60e,0x44f,0x610,0x1c9,0x612,0x3d46,0x614,0x1cb,0x618,0x39c,
-0x61e,0x402,0x622,0x406,0x646,0x3d43,0x64a,0x3c02,0x8650,0x20a,0x60e,0x3c06,0x646,0x3c0a,0x8662,0x3c0e,
-0x602,0x20e,0x604,0x212,0x60e,0x216,0x618,0x21a,0x864e,0x1cf,0x60e,0x3c16,0x618,0x21e,0x646,0x3c1a,
-0x64e,0x3c22,0x65a,0x3c26,0x8662,0x3c1e,0x600,0x1d0,0x602,0x1d2,0x604,0x1d5,0x606,0x3d7a,0x608,0x227,
-0x60c,0x22a,0x60e,0x22e,0x610,0x1d6,0x612,0x3d76,0x618,0x236,0x61e,0x40a,0x622,0x40e,0x646,0x3d73,
-0x64e,0x453,0x650,0x232,0x65a,0x3c32,0x8660,0x3c36,0x860e,0x3c3e,0x602,0x3ea,0x604,0x23a,0x608,0x3c42,
-0x60c,0x23e,0x60e,0x242,0x618,0x3ce,0x864e,0x246,0x604,0x24a,0x60e,0x3c46,0x610,0x3c4e,0x618,0x43e,
-0x646,0x3c4a,0x64e,0x3c52,0x65c,0x3c56,0x8662,0x3d2c,0x600,0x1d8,0x602,0x1da,0x604,0x1dc,0x606,0x252,
-0x608,0x256,0x60c,0x25a,0x610,0x1df,0x612,0x3d92,0x618,0x3a0,0x61e,0x412,0x622,0x416,0x646,0x3d96,
-0x650,0x25e,0x8660,0x3c5a,0x604,0x26a,0x8618,0x3e0,0x602,0x3c62,0x618,0x3d2,0x646,0x3c66,0x64e,0x26e,
-0x8662,0x3c6a,0x602,0x274,0x618,0x27c,0x646,0x3c6f,0x64e,0x278,0x65a,0x3c7a,0x8662,0x3c76,0x602,0x3c7e,
-0x60e,0x3c82,0x8646,0x3c86,0x600,0x3f2,0x602,0x288,0x606,0x1e2,0x60e,0x3c8a,0x618,0x290,0x646,0x3c8e,
-0x64e,0x28c,0x65a,0x3c96,0x8662,0x3c92,0x600,0x1e4,0x602,0x1e6,0x604,0x1e9,0x606,0x1eb,0x608,0x29b,
-0x60c,0x29e,0x60e,0x45f,0x610,0x1ed,0x612,0x3d9e,0x616,0x2a2,0x618,0x3a4,0x61e,0x41a,0x622,0x41e,
-0x636,0x343,0x646,0x3d9b,0x8650,0x3d7,0x602,0x3caa,0x860e,0x3cae,0x602,0x2aa,0x60e,0x3cb2,0x618,0x2b2,
-0x61e,0x422,0x622,0x426,0x646,0x3cb7,0x64e,0x2ae,0x8662,0x3cbe,0x602,0x2b7,0x604,0x2ba,0x60e,0x3cc2,
-0x618,0x2c3,0x646,0x3cc7,0x64c,0x432,0x864e,0x2be,0x60e,0x3cd6,0x610,0x3d2e,0x618,0x2ca,0x646,0x3cda,
-0x64c,0x436,0x64e,0x2c6,0x65a,0x3ce2,0x8662,0x3cde,0x600,0x1f2,0x602,0x1f4,0x604,0x1f6,0x606,0x2d3,
-0x608,0x2d7,0x60c,0x2da,0x610,0x1f9,0x612,0x3dce,0x614,0x2de,0x616,0x2e2,0x618,0x3a8,0x61e,0x42a,
-0x622,0x42e,0x636,0x361,0x646,0x3dca,0x648,0x3ce6,0x650,0x2e6,0x65a,0x3cee,0x8660,0x3cea,0x606,0x3cfa,
-0x8646,0x3cfe,0x600,0x3d02,0x602,0x3d06,0x604,0x2ea,0x60e,0x3d0e,0x610,0x3d0a,0x614,0x3d30,0x8646,0x3d12,
-0x60e,0x3d16,0x8610,0x3d1a,0x600,0x3de6,0x602,0x1fa,0x604,0x2ee,0x606,0x3df2,0x608,0x466,0x60e,0x3d1e,
-0x610,0x1fe,0x612,0x3dee,0x614,0x3d32,0x8646,0x3dea,0x602,0x2f4,0x604,0x3d22,0x60e,0x2f8,0x618,0x2fc,
-0x646,0x3d26,0x8662,0x3d2a,0x600,0x3fda,0x602,0x70a,0x8684,0x3f82,0x602,0x3f8,0x8608,0x3c4,0x8602,0x3fc,
-0x602,0x3fa,0x8608,0x3c6,0x8602,0x3fe,0x860e,0x3d36,0x8618,0x3dc,0x8618,0x3de,0x600,0x3f74,0x602,0x70c,
-0x608,0x3f72,0x60c,0x3f70,0x626,0x3e11,0x628,0x3e13,0x868a,0x3f78,0x600,0x3f90,0x602,0x710,0x626,0x3e31,
-0x8628,0x3e33,0x600,0x3f94,0x602,0x712,0x626,0x3e51,0x628,0x3e53,0x868a,0x3f98,0x600,0x3fb4,0x602,0x714,
-0x608,0x3fb2,0x60c,0x3fb0,0x610,0x754,0x626,0x3e71,0x8628,0x3e73,0x600,0x3ff0,0x602,0x718,0x626,0x3e91,
-0x8628,0x3e93,0x8628,0x3fd8,0x600,0x3fd4,0x602,0x71c,0x608,0x3fd2,0x60c,0x3fd0,0x610,0x756,0x8628,0x3eb3,
-0x600,0x3ff4,0x602,0x71e,0x626,0x3ed1,0x628,0x3ed3,0x868a,0x3ff8,0x600,0x3ee1,0x602,0x759,0x608,0x3f62,
-0x60c,0x3f60,0x626,0x3e01,0x628,0x3e03,0x684,0x3f6d,0x868a,0x3f66,0x600,0x3ee4,0x602,0x75a,0x626,0x3e21,
-0x8628,0x3e23,0x600,0x3ee9,0x602,0x75d,0x626,0x3e41,0x628,0x3e43,0x684,0x3f8d,0x868a,0x3f86,0x600,0x3eec,
-0x602,0x75e,0x608,0x3fa2,0x60c,0x3fa0,0x610,0x795,0x626,0x3e61,0x628,0x3e63,0x8684,0x3fac,0x600,0x3ef0,
-0x602,0x798,0x626,0x3e81,0x8628,0x3e83,0x626,0x3fc8,0x8628,0x3fca,0x600,0x3ef4,0x602,0x79a,0x608,0x3fc2,
-0x60c,0x3fc0,0x610,0x797,0x626,0x3ea1,0x628,0x3ea3,0x8684,0x3fcc,0x600,0x3ef9,0x602,0x79d,0x626,0x3ec1,
-0x628,0x3ec3,0x684,0x3fed,0x868a,0x3fe6,0x602,0x7a6,0x8610,0x7a8,0x8610,0x80e,0x60c,0x9a0,0x8610,0x9a4,
-0x8602,0x806,0x600,0x800,0x60c,0x9ac,0x8610,0x802,0x60c,0x982,0x8610,0x9b8,0x8610,0x9bc,0x600,0x81a,
-0x608,0x9c4,0x60c,0x832,0x8610,0x9c8,0x8602,0x818,0x8610,0x9cc,0x608,0x9dc,0x60c,0x81c,0x610,0x9e0,
-0x8616,0x9e4,0x8610,0x9e8,0x8610,0x9f0,0x8610,0x9d8,0x60c,0x9a2,0x8610,0x9a6,0x8602,0x8a6,0x600,0x8a0,
-0x60c,0x9ae,0x8610,0x8a2,0x60c,0x984,0x8610,0x9ba,0x8610,0x9be,0x600,0x8ba,0x608,0x9c6,0x60c,0x872,
-0x8610,0x9ca,0x8602,0x8b8,0x8610,0x9ce,0x608,0x9de,0x60c,0x8bc,0x610,0x9e2,0x8616,0x9e6,0x8610,0x9ea,
-0x8610,0x9f2,0x8610,0x9da,0x8610,0x8ae,0x861e,0x8ec,0x861e,0x8ee,0x8610,0x9b4,0x8610,0x9b6,0x8610,0x9d4,
-0x8610,0x9d6,0xca6,0xc44,0xca8,0xc46,0x8caa,0xc4a,0x8ca8,0xc48,0x8ca8,0xc4c,0x8ca8,0xd84,0x8ca8,0xda6,
-0x8ca8,0xd80,0x9278,0x1252,0x9278,0x1262,0x9278,0x1268,0x137c,0x1396,0x93ae,0x1398,0x167c,0x1696,0x16ac,0x1690,
-0x96ae,0x1698,0x97ae,0x1728,0x177c,0x1794,0x97ae,0x1798,0x977c,0x1796,0x98ac,0x1890,0x99aa,0x1980,0x1984,0x1995,
-0x19aa,0x198e,0x99ac,0x1990,0x1a7c,0x1a94,0x9aae,0x1a98,0x9a7c,0x1a96,0x1b94,0x1bb4,0x1b9e,0x1bb9,0x9bbe,0x1bbc,
-0xa05c,0x204c,0xb66a,0x360c,0xb66a,0x3610,0xb66a,0x3614,0xb66a,0x3618,0xb66a,0x361c,0xb66a,0x3624,0xb66a,0x3676,
-0xb66a,0x367a,0xb66a,0x3680,0xb66a,0x3682,0xb66a,0x3686,0x600,0x3f9a,0x602,0x3f9c,0x8684,0x3f9e,0x600,0x3fba,
-0x602,0x3fbc,0x8684,0x3fbe,0x8670,0x4334,0x8670,0x4336,0x8670,0x435c,0x8670,0x439a,0x8670,0x439e,0x8670,0x439c,
-0x8670,0x4408,0x8670,0x4412,0x8670,0x4418,0x8670,0x4448,0x8670,0x444c,0x8670,0x4482,0x8670,0x4488,0x8670,0x448e,
-0x8670,0x4492,0x8670,0x44da,0x8670,0x44c4,0x8670,0x44e0,0x8670,0x44e2,0x8670,0x44e8,0x8670,0x44ea,0x8670,0x44f0,
-0x8670,0x44f2,0x8670,0x4500,0x8670,0x4502,0x8670,0x45c0,0x8670,0x45c2,0x8670,0x4508,0x8670,0x450a,0x8670,0x4510,
-0x8670,0x4512,0x8670,0x45c4,0x8670,0x45c6,0x8670,0x4558,0x8670,0x455a,0x8670,0x455c,0x8670,0x455e,0x8670,0x45d4,
-0x8670,0x45d6,0x8670,0x45d8,0x8670,0x45da,0xe132,0x6128,0xe132,0x6098,0xe132,0x609c,0xe132,0x60a0,0xe132,0x60a4,
-0xe132,0x60a8,0xe132,0x60ac,0xe132,0x60b0,0xe132,0x60b4,0xe132,0x60b8,0xe132,0x60bc,0xe132,0x60c0,0xe132,0x60c4,
-0xe132,0x60ca,0xe132,0x60ce,0xe132,0x60d2,0x6132,0x60e0,0xe134,0x60e2,0x6132,0x60e6,0xe134,0x60e8,0x6132,0x60ec,
-0xe134,0x60ee,0x6132,0x60f2,0xe134,0x60f4,0x6132,0x60f8,0xe134,0x60fa,0xe132,0x613c,0xe132,0x61e8,0xe132,0x6158,
-0xe132,0x615c,0xe132,0x6160,0xe132,0x6164,0xe132,0x6168,0xe132,0x616c,0xe132,0x6170,0xe132,0x6174,0xe132,0x6178,
-0xe132,0x617c,0xe132,0x6180,0xe132,0x6184,0xe132,0x618a,0xe132,0x618e,0xe132,0x6192,0x6132,0x61a0,0xe134,0x61a2,
-0x6132,0x61a6,0xe134,0x61a8,0x6132,0x61ac,0xe134,0x61ae,0x6132,0x61b2,0xe134,0x61b4,0x6132,0x61b8,0xe134,0x61ba,
-0xe132,0x61ee,0xe132,0x61f0,0xe132,0x61f2,0xe132,0x61f4,0xe132,0x61fc,0xb489,0x2e82,0x2134,0xb489,0x2e82,0x2138,
-0xb489,0x2e82,0x2156,0xb489,0x49c2,0x225c,0xb489,0x49c2,0x225e,0x3489,0xcf82,0x2696,0xb489,0xd5c2,0x2698,0x348b,
-0x2c02,0x2978,0x348b,0x2e82,0x2976,0xb48b,0x2f42,0x297c,0xb48b,0x6bc2,0x2b74,0xb48b,0x6bc2,0x2b76,0xb48d,0x4c02,
-0x3270,2,0xe602,0x41,0x302,0x600,0x3d4c,0x602,0x3d48,0x606,0x3d54,0x8612,0x3d50,0xe602,0x41,0x308,
-0x8608,0x3bc,0xe602,0x41,0x30a,0x8602,0x3f4,0xca02,0x43,0x327,0x8602,0x3c10,0xe602,0x45,0x302,0x600,
-0x3d80,0x602,0x3d7c,0x606,0x3d88,0x8612,0x3d84,0xe602,0x49,0x308,0x8602,0x3c5c,0xe602,0x4f,0x302,0x600,
-0x3da4,0x602,0x3da0,0x606,0x3dac,0x8612,0x3da8,0xe602,0x4f,0x303,0x602,0x3c98,0x608,0x458,0x8610,0x3c9c,
-0xe602,0x4f,0x308,0x8608,0x454,0xe602,0x55,0x308,0x600,0x3b6,0x602,0x3ae,0x608,0x3aa,0x8618,0x3b2,
-0xe602,0x61,0x302,0x600,0x3d4e,0x602,0x3d4a,0x606,0x3d56,0x8612,0x3d52,0xe602,0x61,0x308,0x8608,0x3be,
-0xe602,0x61,0x30a,0x8602,0x3f6,0xca02,0x63,0x327,0x8602,0x3c12,0xe602,0x65,0x302,0x600,0x3d82,0x602,
-0x3d7e,0x606,0x3d8a,0x8612,0x3d86,0xe602,0x69,0x308,0x8602,0x3c5e,0xe602,0x6f,0x302,0x600,0x3da6,0x602,
-0x3da2,0x606,0x3dae,0x8612,0x3daa,0xe602,0x6f,0x303,0x602,0x3c9a,0x608,0x45a,0x8610,0x3c9e,0xe602,0x6f,
-0x308,0x8608,0x456,0xe602,0x75,0x308,0x600,0x3b8,0x602,0x3b0,0x608,0x3ac,0x8618,0x3b4,0xe602,0x41,
-0x306,0x600,0x3d60,0x602,0x3d5c,0x606,0x3d68,0x8612,0x3d64,0xe602,0x61,0x306,0x600,0x3d62,0x602,0x3d5e,
-0x606,0x3d6a,0x8612,0x3d66,0xe602,0x45,0x304,0x600,0x3c28,0x8602,0x3c2c,0xe602,0x65,0x304,0x600,0x3c2a,
-0x8602,0x3c2e,0xe602,0x4f,0x304,0x600,0x3ca0,0x8602,0x3ca4,0xe602,0x6f,0x304,0x600,0x3ca2,0x8602,0x3ca6,
-0xe602,0x53,0x301,0x860e,0x3cc8,0xe602,0x73,0x301,0x860e,0x3cca,0xe602,0x53,0x30c,0x860e,0x3ccc,0xe602,
-0x73,0x30c,0x860e,0x3cce,0xe602,0x55,0x303,0x8602,0x3cf0,0xe602,0x75,0x303,0x8602,0x3cf2,0xe602,0x55,
-0x304,0x8610,0x3cf4,0xe602,0x75,0x304,0x8610,0x3cf6,0xd802,0x4f,0x31b,0x600,0x3db8,0x602,0x3db4,0x606,
-0x3dc0,0x612,0x3dbc,0x8646,0x3dc4,0xd802,0x6f,0x31b,0x600,0x3dba,0x602,0x3db6,0x606,0x3dc2,0x612,0x3dbe,
-0x8646,0x3dc6,0xd802,0x55,0x31b,0x600,0x3dd4,0x602,0x3dd0,0x606,0x3ddc,0x612,0x3dd8,0x8646,0x3de0,0xd802,
-0x75,0x31b,0x600,0x3dd6,0x602,0x3dd2,0x606,0x3dde,0x612,0x3dda,0x8646,0x3de2,0xca02,0x4f,0x328,0x8608,
-0x3d8,0xca02,0x6f,0x328,0x8608,0x3da,0xe602,0x41,0x307,0x8608,0x3c0,0xe602,0x61,0x307,0x8608,0x3c2,
-0xca02,0x45,0x327,0x860c,0x3c38,0xca02,0x65,0x327,0x860c,0x3c3a,0xe602,0x4f,0x307,0x8608,0x460,0xe602,
-0x6f,0x307,0x8608,0x462,0xe602,0x3b1,0x301,0x868a,0x3f68,0xe602,0x3b7,0x301,0x868a,0x3f88,0xe602,0x3b9,
-0x308,0x600,0x3fa4,0x602,0x720,0x8684,0x3fae,0xe602,0x3c5,0x308,0x600,0x3fc4,0x602,0x760,0x8684,0x3fce,
-0xe602,0x3c9,0x301,0x868a,0x3fe8,2,0xcc6,0xcc2,0x99aa,0x1996,2,0xdd9,0xdcf,0x9b94,0x1bba,0xdc02,
-0x4c,0x323,0x8608,0x3c70,0xdc02,0x6c,0x323,0x8608,0x3c72,0xdc02,0x52,0x323,0x8608,0x3cb8,0xdc02,0x72,
-0x323,0x8608,0x3cba,0xdc02,0x53,0x323,0x860e,0x3cd0,0xdc02,0x73,0x323,0x860e,0x3cd2,0xdc02,0x41,0x323,
-0x604,0x3d58,0x860c,0x3d6c,0xdc02,0x61,0x323,0x604,0x3d5a,0x860c,0x3d6e,0xdc02,0x45,0x323,0x8604,0x3d8c,
-0xdc02,0x65,0x323,0x8604,0x3d8e,0xdc02,0x4f,0x323,0x8604,0x3db0,0xdc02,0x6f,0x323,0x8604,0x3db2,0xe602,
-0x3b1,0x313,0x600,0x3e05,0x602,0x3e09,0x684,0x3e0d,0x868a,0x3f00,0xe602,0x3b1,0x314,0x600,0x3e07,0x602,
-0x3e0b,0x684,0x3e0f,0x868a,0x3f02,0x1f00,0xe643,0x3b1,0x313,0x300,0x868a,0x3f04,0x1f01,0xe643,0x3b1,0x314,
-0x300,0x868a,0x3f06,0x1f00,0xe643,0x3b1,0x313,0x301,0x868a,0x3f08,0x1f01,0xe643,0x3b1,0x314,0x301,0x868a,
-0x3f0a,0x1f00,0xe643,0x3b1,0x313,0x342,0x868a,0x3f0c,0x1f01,0xe643,0x3b1,0x314,0x342,0x868a,0x3f0e,0xe602,
-0x391,0x313,0x600,0x3e15,0x602,0x3e19,0x684,0x3e1d,0x868a,0x3f10,0xe602,0x391,0x314,0x600,0x3e17,0x602,
-0x3e1b,0x684,0x3e1f,0x868a,0x3f12,0x1f08,0xe643,0x391,0x313,0x300,0x868a,0x3f14,0x1f09,0xe643,0x391,0x314,
-0x300,0x868a,0x3f16,0x1f08,0xe643,0x391,0x313,0x301,0x868a,0x3f18,0x1f09,0xe643,0x391,0x314,0x301,0x868a,
-0x3f1a,0x1f08,0xe643,0x391,0x313,0x342,0x868a,0x3f1c,0x1f09,0xe643,0x391,0x314,0x342,0x868a,0x3f1e,0xe602,
-0x3b5,0x313,0x600,0x3e24,0x8602,0x3e28,0xe602,0x3b5,0x314,0x600,0x3e26,0x8602,0x3e2a,0xe602,0x395,0x313,
-0x600,0x3e34,0x8602,0x3e38,0xe602,0x395,0x314,0x600,0x3e36,0x8602,0x3e3a,0xe602,0x3b7,0x313,0x600,0x3e45,
-0x602,0x3e49,0x684,0x3e4d,0x868a,0x3f20,0xe602,0x3b7,0x314,0x600,0x3e47,0x602,0x3e4b,0x684,0x3e4f,0x868a,
-0x3f22,0x1f20,0xe643,0x3b7,0x313,0x300,0x868a,0x3f24,0x1f21,0xe643,0x3b7,0x314,0x300,0x868a,0x3f26,0x1f20,
-0xe643,0x3b7,0x313,0x301,0x868a,0x3f28,0x1f21,0xe643,0x3b7,0x314,0x301,0x868a,0x3f2a,0x1f20,0xe643,0x3b7,
-0x313,0x342,0x868a,0x3f2c,0x1f21,0xe643,0x3b7,0x314,0x342,0x868a,0x3f2e,0xe602,0x397,0x313,0x600,0x3e55,
-0x602,0x3e59,0x684,0x3e5d,0x868a,0x3f30,0xe602,0x397,0x314,0x600,0x3e57,0x602,0x3e5b,0x684,0x3e5f,0x868a,
-0x3f32,0x1f28,0xe643,0x397,0x313,0x300,0x868a,0x3f34,0x1f29,0xe643,0x397,0x314,0x300,0x868a,0x3f36,0x1f28,
-0xe643,0x397,0x313,0x301,0x868a,0x3f38,0x1f29,0xe643,0x397,0x314,0x301,0x868a,0x3f3a,0x1f28,0xe643,0x397,
-0x313,0x342,0x868a,0x3f3c,0x1f29,0xe643,0x397,0x314,0x342,0x868a,0x3f3e,0xe602,0x3b9,0x313,0x600,0x3e64,
-0x602,0x3e68,0x8684,0x3e6c,0xe602,0x3b9,0x314,0x600,0x3e66,0x602,0x3e6a,0x8684,0x3e6e,0xe602,0x399,0x313,
-0x600,0x3e74,0x602,0x3e78,0x8684,0x3e7c,0xe602,0x399,0x314,0x600,0x3e76,0x602,0x3e7a,0x8684,0x3e7e,0xe602,
-0x3bf,0x313,0x600,0x3e84,0x8602,0x3e88,0xe602,0x3bf,0x314,0x600,0x3e86,0x8602,0x3e8a,0xe602,0x39f,0x313,
-0x600,0x3e94,0x8602,0x3e98,0xe602,0x39f,0x314,0x600,0x3e96,0x8602,0x3e9a,0xe602,0x3c5,0x313,0x600,0x3ea4,
-0x602,0x3ea8,0x8684,0x3eac,0xe602,0x3c5,0x314,0x600,0x3ea6,0x602,0x3eaa,0x8684,0x3eae,0xe602,0x3a5,0x314,
-0x600,0x3eb6,0x602,0x3eba,0x8684,0x3ebe,0xe602,0x3c9,0x313,0x600,0x3ec5,0x602,0x3ec9,0x684,0x3ecd,0x868a,
-0x3f40,0xe602,0x3c9,0x314,0x600,0x3ec7,0x602,0x3ecb,0x684,0x3ecf,0x868a,0x3f42,0x1f60,0xe643,0x3c9,0x313,
-0x300,0x868a,0x3f44,0x1f61,0xe643,0x3c9,0x314,0x300,0x868a,0x3f46,0x1f60,0xe643,0x3c9,0x313,0x301,0x868a,
-0x3f48,0x1f61,0xe643,0x3c9,0x314,0x301,0x868a,0x3f4a,0x1f60,0xe643,0x3c9,0x313,0x342,0x868a,0x3f4c,0x1f61,
-0xe643,0x3c9,0x314,0x342,0x868a,0x3f4e,0xe602,0x3a9,0x313,0x600,0x3ed5,0x602,0x3ed9,0x684,0x3edd,0x868a,
-0x3f50,0xe602,0x3a9,0x314,0x600,0x3ed7,0x602,0x3edb,0x684,0x3edf,0x868a,0x3f52,0x1f68,0xe643,0x3a9,0x313,
-0x300,0x868a,0x3f54,0x1f69,0xe643,0x3a9,0x314,0x300,0x868a,0x3f56,0x1f68,0xe643,0x3a9,0x313,0x301,0x868a,
-0x3f58,0x1f69,0xe643,0x3a9,0x314,0x301,0x868a,0x3f5a,0x1f68,0xe643,0x3a9,0x313,0x342,0x868a,0x3f5c,0x1f69,
-0xe643,0x3a9,0x314,0x342,0x868a,0x3f5e,0xe602,0x3b1,0x300,0x868a,0x3f64,0xe602,0x3b7,0x300,0x868a,0x3f84,
-0xe602,0x3c9,0x300,0x868a,0x3fe4,0xe602,0x3b1,0x342,0x868a,0x3f6e,0xe602,0x3b7,0x342,0x868a,0x3f8e,0xe602,
-0x3c9,0x342,0x868a,0x3fee,3,0xe602,0x41,0x300,0xe602,0x41,0x301,0xe602,0x41,0x303,0xe602,0x45,
-0x300,0xe602,0x45,0x301,0xe602,0x45,0x308,0xe602,0x49,0x300,0xe602,0x49,0x301,0xe602,0x49,0x302,
-0xe602,0x4e,0x303,0xe602,0x4f,0x300,0xe602,0x4f,0x301,0xe602,0x55,0x300,0xe602,0x55,0x301,0xe602,
-0x55,0x302,0xe602,0x59,0x301,0xe602,0x61,0x300,0xe602,0x61,0x301,0xe602,0x61,0x303,0xe602,0x65,
-0x300,0xe602,0x65,0x301,0xe602,0x65,0x308,0xe602,0x69,0x300,0xe602,0x69,0x301,0xe602,0x69,0x302,
-0xe602,0x6e,0x303,0xe602,0x6f,0x300,0xe602,0x6f,0x301,0xe602,0x75,0x300,0xe602,0x75,0x301,0xe602,
-0x75,0x302,0xe602,0x79,0x301,0xe602,0x79,0x308,0xe602,0x41,0x304,0xe602,0x61,0x304,0xca02,0x41,
-0x328,0xca02,0x61,0x328,0xe602,0x43,0x301,0xe602,0x63,0x301,0xe602,0x43,0x302,0xe602,0x63,0x302,
-0xe602,0x43,0x307,0xe602,0x63,0x307,0xe602,0x43,0x30c,0xe602,0x63,0x30c,0xe602,0x44,0x30c,0xe602,
-0x64,0x30c,0xe602,0x45,0x306,0xe602,0x65,0x306,0xe602,0x45,0x307,0xe602,0x65,0x307,0xca02,0x45,
-0x328,0xca02,0x65,0x328,0xe602,0x45,0x30c,0xe602,0x65,0x30c,0xe602,0x47,0x302,0xe602,0x67,0x302,
-0xe602,0x47,0x306,0xe602,0x67,0x306,0xe602,0x47,0x307,0xe602,0x67,0x307,0xca02,0x47,0x327,0xca02,
-0x67,0x327,0xe602,0x48,0x302,0xe602,0x68,0x302,0xe602,0x49,0x303,0xe602,0x69,0x303,0xe602,0x49,
-0x304,0xe602,0x69,0x304,0xe602,0x49,0x306,0xe602,0x69,0x306,0xca02,0x49,0x328,0xca02,0x69,0x328,
-0xe602,0x49,0x307,0xe602,0x4a,0x302,0xe602,0x6a,0x302,0xca02,0x4b,0x327,0xca02,0x6b,0x327,0xe602,
-0x4c,0x301,0xe602,0x6c,0x301,0xca02,0x4c,0x327,0xca02,0x6c,0x327,0xe602,0x4c,0x30c,0xe602,0x6c,
-0x30c,0xe602,0x4e,0x301,0xe602,0x6e,0x301,0xca02,0x4e,0x327,0xca02,0x6e,0x327,0xe602,0x4e,0x30c,
-0xe602,0x6e,0x30c,0xe602,0x4f,0x306,0xe602,0x6f,0x306,0xe602,0x4f,0x30b,0xe602,0x6f,0x30b,0xe602,
-0x52,0x301,0xe602,0x72,0x301,0xca02,0x52,0x327,0xca02,0x72,0x327,0xe602,0x52,0x30c,0xe602,0x72,
-0x30c,0xe602,0x53,0x302,0xe602,0x73,0x302,0xca02,0x53,0x327,0xca02,0x73,0x327,0xca02,0x54,0x327,
-0xca02,0x74,0x327,0xe602,0x54,0x30c,0xe602,0x74,0x30c,0xe602,0x55,0x306,0xe602,0x75,0x306,0xe602,
-0x55,0x30a,0xe602,0x75,0x30a,0xe602,0x55,0x30b,0xe602,0x75,0x30b,0xca02,0x55,0x328,0xca02,0x75,
-0x328,0xe602,0x57,0x302,0xe602,0x77,0x302,0xe602,0x59,0x302,0xe602,0x79,0x302,0xe602,0x59,0x308,
-0xe602,0x5a,0x301,0xe602,0x7a,0x301,0xe602,0x5a,0x307,0xe602,0x7a,0x307,0xe602,0x5a,0x30c,0xe602,
-0x7a,0x30c,0xe602,0x41,0x30c,0xe602,0x61,0x30c,0xe602,0x49,0x30c,0xe602,0x69,0x30c,0xe602,0x4f,
-0x30c,0xe602,0x6f,0x30c,0xe602,0x55,0x30c,0xe602,0x75,0x30c,0xdc,0xe643,0x55,0x308,0x304,0xfc,
-0xe643,0x75,0x308,0x304,0xdc,0xe643,0x55,0x308,0x301,0xfc,0xe643,0x75,0x308,0x301,0xdc,0xe643,
-0x55,0x308,0x30c,0xfc,0xe643,0x75,0x308,0x30c,0xdc,0xe643,0x55,0x308,0x300,0xfc,0xe643,0x75,
-0x308,0x300,0xc4,0xe643,0x41,0x308,0x304,0xe4,0xe643,0x61,0x308,0x304,0x226,0xe643,0x41,0x307,
-0x304,0x227,0xe643,0x61,0x307,0x304,0xe602,0xc6,0x304,0xe602,0xe6,0x304,0xe602,0x47,0x30c,0xe602,
-0x67,0x30c,0xe602,0x4b,0x30c,0xe602,0x6b,0x30c,0x1ea,0xe643,0x4f,0x328,0x304,0x1eb,0xe643,0x6f,
-0x328,0x304,0xe602,0x1b7,0x30c,0xe602,0x292,0x30c,0xe602,0x6a,0x30c,0xe602,0x47,0x301,0xe602,0x67,
-0x301,0xe602,0x4e,0x300,0xe602,0x6e,0x300,0xc5,0xe643,0x41,0x30a,0x301,0xe5,0xe643,0x61,0x30a,
-0x301,0xe602,0xc6,0x301,0xe602,0xe6,0x301,0xe602,0xd8,0x301,0xe602,0xf8,0x301,0xe602,0x41,0x30f,
-0xe602,0x61,0x30f,0xe602,0x41,0x311,0xe602,0x61,0x311,0xe602,0x45,0x30f,0xe602,0x65,0x30f,0xe602,
-0x45,0x311,0xe602,0x65,0x311,0xe602,0x49,0x30f,0xe602,0x69,0x30f,0xe602,0x49,0x311,0xe602,0x69,
-0x311,0xe602,0x4f,0x30f,0xe602,0x6f,0x30f,0xe602,0x4f,0x311,0xe602,0x6f,0x311,0xe602,0x52,0x30f,
-0xe602,0x72,0x30f,0xe602,0x52,0x311,0xe602,0x72,0x311,0xe602,0x55,0x30f,0xe602,0x75,0x30f,0xe602,
-0x55,0x311,0xe602,0x75,0x311,0xdc02,0x53,0x326,0xdc02,0x73,0x326,0xdc02,0x54,0x326,0xdc02,0x74,
-0x326,0xe602,0x48,0x30c,0xe602,0x68,0x30c,0xd6,0xe643,0x4f,0x308,0x304,0xf6,0xe643,0x6f,0x308,
-0x304,0xd5,0xe643,0x4f,0x303,0x304,0xf5,0xe643,0x6f,0x303,0x304,0x22e,0xe643,0x4f,0x307,0x304,
-0x22f,0xe643,0x6f,0x307,0x304,0xe602,0x59,0x304,0xe602,0x79,0x304,0xe602,0xa8,0x301,0xe602,0x391,
-0x301,0xe602,0x395,0x301,0xe602,0x397,0x301,0xe602,0x399,0x301,0xe602,0x39f,0x301,0xe602,0x3a5,0x301,
-0xe602,0x3a9,0x301,0x3ca,0xe643,0x3b9,0x308,0x301,0xe602,0x399,0x308,0xe602,0x3a5,0x308,0xe602,0x3b5,
-0x301,0xe602,0x3b9,0x301,0x3cb,0xe643,0x3c5,0x308,0x301,0xe602,0x3bf,0x301,0xe602,0x3c5,0x301,0xe602,
-0x3d2,0x301,0xe602,0x3d2,0x308,0xe602,0x415,0x300,0xe602,0x415,0x308,0xe602,0x413,0x301,0xe602,0x406,
-0x308,0xe602,0x41a,0x301,0xe602,0x418,0x300,0xe602,0x423,0x306,0xe602,0x418,0x306,0xe602,0x438,0x306,
-0xe602,0x435,0x300,0xe602,0x435,0x308,0xe602,0x433,0x301,0xe602,0x456,0x308,0xe602,0x43a,0x301,0xe602,
-0x438,0x300,0xe602,0x443,0x306,0xe602,0x474,0x30f,0xe602,0x475,0x30f,0xe602,0x416,0x306,0xe602,0x436,
-0x306,0xe602,0x410,0x306,0xe602,0x430,0x306,0xe602,0x410,0x308,0xe602,0x430,0x308,0xe602,0x415,0x306,
-0xe602,0x435,0x306,0xe602,0x4d8,0x308,0xe602,0x4d9,0x308,0xe602,0x416,0x308,0xe602,0x436,0x308,0xe602,
-0x417,0x308,0xe602,0x437,0x308,0xe602,0x418,0x304,0xe602,0x438,0x304,0xe602,0x418,0x308,0xe602,0x438,
-0x308,0xe602,0x41e,0x308,0xe602,0x43e,0x308,0xe602,0x4e8,0x308,0xe602,0x4e9,0x308,0xe602,0x42d,0x308,
-0xe602,0x44d,0x308,0xe602,0x423,0x304,0xe602,0x443,0x304,0xe602,0x423,0x308,0xe602,0x443,0x308,0xe602,
-0x423,0x30b,0xe602,0x443,0x30b,0xe602,0x427,0x308,0xe602,0x447,0x308,0xe602,0x42b,0x308,0xe602,0x44b,
-0x308,0xe602,0x627,0x653,0xe602,0x627,0x654,0xe602,0x648,0x654,0xdc02,0x627,0x655,0xe602,0x64a,0x654,
-0xe602,0x6d5,0x654,0xe602,0x6c1,0x654,0xe602,0x6d2,0x654,0x702,0x928,0x93c,0x702,0x930,0x93c,0x702,
-0x933,0x93c,2,0x9c7,0x9be,2,0x9c7,0x9d7,2,0xb47,0xb56,2,0xb47,0xb3e,2,0xb47,
-0xb57,2,0xb92,0xbd7,2,0xbc6,0xbbe,2,0xbc7,0xbbe,2,0xbc6,0xbd7,0x5b02,0xc46,0xc56,
-2,0xcbf,0xcd5,2,0xcc6,0xcd5,2,0xcc6,0xcd6,0xcca,0x43,0xcc6,0xcc2,0xcd5,2,0xd46,
-0xd3e,2,0xd47,0xd3e,2,0xd46,0xd57,0x902,0xdd9,0xdca,0xddc,0x943,0xdd9,0xdcf,0xdca,2,
-0xdd9,0xddf,2,0x1025,0x102e,2,0x1b05,0x1b35,2,0x1b07,0x1b35,2,0x1b09,0x1b35,2,0x1b0b,
-0x1b35,2,0x1b0d,0x1b35,2,0x1b11,0x1b35,2,0x1b3a,0x1b35,2,0x1b3c,0x1b35,2,0x1b3e,0x1b35,
-2,0x1b3f,0x1b35,2,0x1b42,0x1b35,0xdc02,0x41,0x325,0xdc02,0x61,0x325,0xe602,0x42,0x307,0xe602,
-0x62,0x307,0xdc02,0x42,0x323,0xdc02,0x62,0x323,0xdc02,0x42,0x331,0xdc02,0x62,0x331,0xc7,0xe643,
-0x43,0x327,0x301,0xe7,0xe643,0x63,0x327,0x301,0xe602,0x44,0x307,0xe602,0x64,0x307,0xdc02,0x44,
-0x323,0xdc02,0x64,0x323,0xdc02,0x44,0x331,0xdc02,0x64,0x331,0xca02,0x44,0x327,0xca02,0x64,0x327,
-0xdc02,0x44,0x32d,0xdc02,0x64,0x32d,0x112,0xe643,0x45,0x304,0x300,0x113,0xe643,0x65,0x304,0x300,
-0x112,0xe643,0x45,0x304,0x301,0x113,0xe643,0x65,0x304,0x301,0xdc02,0x45,0x32d,0xdc02,0x65,0x32d,
-0xdc02,0x45,0x330,0xdc02,0x65,0x330,0x228,0xe643,0x45,0x327,0x306,0x229,0xe643,0x65,0x327,0x306,
-0xe602,0x46,0x307,0xe602,0x66,0x307,0xe602,0x47,0x304,0xe602,0x67,0x304,0xe602,0x48,0x307,0xe602,
-0x68,0x307,0xdc02,0x48,0x323,0xdc02,0x68,0x323,0xe602,0x48,0x308,0xe602,0x68,0x308,0xca02,0x48,
-0x327,0xca02,0x68,0x327,0xdc02,0x48,0x32e,0xdc02,0x68,0x32e,0xdc02,0x49,0x330,0xdc02,0x69,0x330,
-0xcf,0xe643,0x49,0x308,0x301,0xef,0xe643,0x69,0x308,0x301,0xe602,0x4b,0x301,0xe602,0x6b,0x301,
-0xdc02,0x4b,0x323,0xdc02,0x6b,0x323,0xdc02,0x4b,0x331,0xdc02,0x6b,0x331,0x1e36,0xe643,0x4c,0x323,
-0x304,0x1e37,0xe643,0x6c,0x323,0x304,0xdc02,0x4c,0x331,0xdc02,0x6c,0x331,0xdc02,0x4c,0x32d,0xdc02,
-0x6c,0x32d,0xe602,0x4d,0x301,0xe602,0x6d,0x301,0xe602,0x4d,0x307,0xe602,0x6d,0x307,0xdc02,0x4d,
-0x323,0xdc02,0x6d,0x323,0xe602,0x4e,0x307,0xe602,0x6e,0x307,0xdc02,0x4e,0x323,0xdc02,0x6e,0x323,
-0xdc02,0x4e,0x331,0xdc02,0x6e,0x331,0xdc02,0x4e,0x32d,0xdc02,0x6e,0x32d,0xd5,0xe643,0x4f,0x303,
-0x301,0xf5,0xe643,0x6f,0x303,0x301,0xd5,0xe643,0x4f,0x303,0x308,0xf5,0xe643,0x6f,0x303,0x308,
-0x14c,0xe643,0x4f,0x304,0x300,0x14d,0xe643,0x6f,0x304,0x300,0x14c,0xe643,0x4f,0x304,0x301,0x14d,
-0xe643,0x6f,0x304,0x301,0xe602,0x50,0x301,0xe602,0x70,0x301,0xe602,0x50,0x307,0xe602,0x70,0x307,
-0xe602,0x52,0x307,0xe602,0x72,0x307,0x1e5a,0xe643,0x52,0x323,0x304,0x1e5b,0xe643,0x72,0x323,0x304,
-0xdc02,0x52,0x331,0xdc02,0x72,0x331,0xe602,0x53,0x307,0xe602,0x73,0x307,0x15a,0xe643,0x53,0x301,
-0x307,0x15b,0xe643,0x73,0x301,0x307,0x160,0xe643,0x53,0x30c,0x307,0x161,0xe643,0x73,0x30c,0x307,
-0x1e62,0xe643,0x53,0x323,0x307,0x1e63,0xe643,0x73,0x323,0x307,0xe602,0x54,0x307,0xe602,0x74,0x307,
-0xdc02,0x54,0x323,0xdc02,0x74,0x323,0xdc02,0x54,0x331,0xdc02,0x74,0x331,0xdc02,0x54,0x32d,0xdc02,
-0x74,0x32d,0xdc02,0x55,0x324,0xdc02,0x75,0x324,0xdc02,0x55,0x330,0xdc02,0x75,0x330,0xdc02,0x55,
-0x32d,0xdc02,0x75,0x32d,0x168,0xe643,0x55,0x303,0x301,0x169,0xe643,0x75,0x303,0x301,0x16a,0xe643,
-0x55,0x304,0x308,0x16b,0xe643,0x75,0x304,0x308,0xe602,0x56,0x303,0xe602,0x76,0x303,0xdc02,0x56,
-0x323,0xdc02,0x76,0x323,0xe602,0x57,0x300,0xe602,0x77,0x300,0xe602,0x57,0x301,0xe602,0x77,0x301,
-0xe602,0x57,0x308,0xe602,0x77,0x308,0xe602,0x57,0x307,0xe602,0x77,0x307,0xdc02,0x57,0x323,0xdc02,
-0x77,0x323,0xe602,0x58,0x307,0xe602,0x78,0x307,0xe602,0x58,0x308,0xe602,0x78,0x308,0xe602,0x59,
-0x307,0xe602,0x79,0x307,0xe602,0x5a,0x302,0xe602,0x7a,0x302,0xdc02,0x5a,0x323,0xdc02,0x7a,0x323,
-0xdc02,0x5a,0x331,0xdc02,0x7a,0x331,0xdc02,0x68,0x331,0xe602,0x74,0x308,0xe602,0x77,0x30a,0xe602,
-0x79,0x30a,0xe602,0x17f,0x307,0xe602,0x41,0x309,0xe602,0x61,0x309,0xc2,0xe643,0x41,0x302,0x301,
-0xe2,0xe643,0x61,0x302,0x301,0xc2,0xe643,0x41,0x302,0x300,0xe2,0xe643,0x61,0x302,0x300,0xc2,
-0xe643,0x41,0x302,0x309,0xe2,0xe643,0x61,0x302,0x309,0xc2,0xe643,0x41,0x302,0x303,0xe2,0xe643,
-0x61,0x302,0x303,0x1ea0,0xe643,0x41,0x323,0x302,0x1ea1,0xe643,0x61,0x323,0x302,0x102,0xe643,0x41,
-0x306,0x301,0x103,0xe643,0x61,0x306,0x301,0x102,0xe643,0x41,0x306,0x300,0x103,0xe643,0x61,0x306,
-0x300,0x102,0xe643,0x41,0x306,0x309,0x103,0xe643,0x61,0x306,0x309,0x102,0xe643,0x41,0x306,0x303,
-0x103,0xe643,0x61,0x306,0x303,0x1ea0,0xe643,0x41,0x323,0x306,0x1ea1,0xe643,0x61,0x323,0x306,0xe602,
-0x45,0x309,0xe602,0x65,0x309,0xe602,0x45,0x303,0xe602,0x65,0x303,0xca,0xe643,0x45,0x302,0x301,
-0xea,0xe643,0x65,0x302,0x301,0xca,0xe643,0x45,0x302,0x300,0xea,0xe643,0x65,0x302,0x300,0xca,
-0xe643,0x45,0x302,0x309,0xea,0xe643,0x65,0x302,0x309,0xca,0xe643,0x45,0x302,0x303,0xea,0xe643,
-0x65,0x302,0x303,0x1eb8,0xe643,0x45,0x323,0x302,0x1eb9,0xe643,0x65,0x323,0x302,0xe602,0x49,0x309,
-0xe602,0x69,0x309,0xdc02,0x49,0x323,0xdc02,0x69,0x323,0xe602,0x4f,0x309,0xe602,0x6f,0x309,0xd4,
-0xe643,0x4f,0x302,0x301,0xf4,0xe643,0x6f,0x302,0x301,0xd4,0xe643,0x4f,0x302,0x300,0xf4,0xe643,
-0x6f,0x302,0x300,0xd4,0xe643,0x4f,0x302,0x309,0xf4,0xe643,0x6f,0x302,0x309,0xd4,0xe643,0x4f,
-0x302,0x303,0xf4,0xe643,0x6f,0x302,0x303,0x1ecc,0xe643,0x4f,0x323,0x302,0x1ecd,0xe643,0x6f,0x323,
-0x302,0x1a0,0xe643,0x4f,0x31b,0x301,0x1a1,0xe643,0x6f,0x31b,0x301,0x1a0,0xe643,0x4f,0x31b,0x300,
-0x1a1,0xe643,0x6f,0x31b,0x300,0x1a0,0xe643,0x4f,0x31b,0x309,0x1a1,0xe643,0x6f,0x31b,0x309,0x1a0,
-0xe643,0x4f,0x31b,0x303,0x1a1,0xe643,0x6f,0x31b,0x303,0x1a0,0xdc43,0x4f,0x31b,0x323,0x1a1,0xdc43,
-0x6f,0x31b,0x323,0xdc02,0x55,0x323,0xdc02,0x75,0x323,0xe602,0x55,0x309,0xe602,0x75,0x309,0x1af,
-0xe643,0x55,0x31b,0x301,0x1b0,0xe643,0x75,0x31b,0x301,0x1af,0xe643,0x55,0x31b,0x300,0x1b0,0xe643,
-0x75,0x31b,0x300,0x1af,0xe643,0x55,0x31b,0x309,0x1b0,0xe643,0x75,0x31b,0x309,0x1af,0xe643,0x55,
-0x31b,0x303,0x1b0,0xe643,0x75,0x31b,0x303,0x1af,0xdc43,0x55,0x31b,0x323,0x1b0,0xdc43,0x75,0x31b,
-0x323,0xe602,0x59,0x300,0xe602,0x79,0x300,0xdc02,0x59,0x323,0xdc02,0x79,0x323,0xe602,0x59,0x309,
-0xe602,0x79,0x309,0xe602,0x59,0x303,0xe602,0x79,0x303,0x1f10,0xe643,0x3b5,0x313,0x300,0x1f11,0xe643,
-0x3b5,0x314,0x300,0x1f10,0xe643,0x3b5,0x313,0x301,0x1f11,0xe643,0x3b5,0x314,0x301,0x1f18,0xe643,0x395,
-0x313,0x300,0x1f19,0xe643,0x395,0x314,0x300,0x1f18,0xe643,0x395,0x313,0x301,0x1f19,0xe643,0x395,0x314,
-0x301,0x1f30,0xe643,0x3b9,0x313,0x300,0x1f31,0xe643,0x3b9,0x314,0x300,0x1f30,0xe643,0x3b9,0x313,0x301,
-0x1f31,0xe643,0x3b9,0x314,0x301,0x1f30,0xe643,0x3b9,0x313,0x342,0x1f31,0xe643,0x3b9,0x314,0x342,0x1f38,
-0xe643,0x399,0x313,0x300,0x1f39,0xe643,0x399,0x314,0x300,0x1f38,0xe643,0x399,0x313,0x301,0x1f39,0xe643,
-0x399,0x314,0x301,0x1f38,0xe643,0x399,0x313,0x342,0x1f39,0xe643,0x399,0x314,0x342,0x1f40,0xe643,0x3bf,
-0x313,0x300,0x1f41,0xe643,0x3bf,0x314,0x300,0x1f40,0xe643,0x3bf,0x313,0x301,0x1f41,0xe643,0x3bf,0x314,
-0x301,0x1f48,0xe643,0x39f,0x313,0x300,0x1f49,0xe643,0x39f,0x314,0x300,0x1f48,0xe643,0x39f,0x313,0x301,
-0x1f49,0xe643,0x39f,0x314,0x301,0x1f50,0xe643,0x3c5,0x313,0x300,0x1f51,0xe643,0x3c5,0x314,0x300,0x1f50,
-0xe643,0x3c5,0x313,0x301,0x1f51,0xe643,0x3c5,0x314,0x301,0x1f50,0xe643,0x3c5,0x313,0x342,0x1f51,0xe643,
-0x3c5,0x314,0x342,0x1f59,0xe643,0x3a5,0x314,0x300,0x1f59,0xe643,0x3a5,0x314,0x301,0x1f59,0xe643,0x3a5,
-0x314,0x342,0xe602,0x3b5,0x300,0xe602,0x3b9,0x300,0xe602,0x3bf,0x300,0xe602,0x3c5,0x300,0x1f00,0xf043,
-0x3b1,0x313,0x345,0x1f01,0xf043,0x3b1,0x314,0x345,0x1f02,0x345,2,0xf044,0x3b1,0x313,0x300,0x345,
-0x1f03,0x345,2,0xf044,0x3b1,0x314,0x300,0x345,0x1f04,0x345,2,0xf044,0x3b1,0x313,0x301,0x345,
-0x1f05,0x345,2,0xf044,0x3b1,0x314,0x301,0x345,0x1f06,0x345,2,0xf044,0x3b1,0x313,0x342,0x345,
-0x1f07,0x345,2,0xf044,0x3b1,0x314,0x342,0x345,0x1f08,0xf043,0x391,0x313,0x345,0x1f09,0xf043,0x391,
-0x314,0x345,0x1f0a,0x345,2,0xf044,0x391,0x313,0x300,0x345,0x1f0b,0x345,2,0xf044,0x391,0x314,
-0x300,0x345,0x1f0c,0x345,2,0xf044,0x391,0x313,0x301,0x345,0x1f0d,0x345,2,0xf044,0x391,0x314,
-0x301,0x345,0x1f0e,0x345,2,0xf044,0x391,0x313,0x342,0x345,0x1f0f,0x345,2,0xf044,0x391,0x314,
-0x342,0x345,0x1f20,0xf043,0x3b7,0x313,0x345,0x1f21,0xf043,0x3b7,0x314,0x345,0x1f22,0x345,2,0xf044,
-0x3b7,0x313,0x300,0x345,0x1f23,0x345,2,0xf044,0x3b7,0x314,0x300,0x345,0x1f24,0x345,2,0xf044,
-0x3b7,0x313,0x301,0x345,0x1f25,0x345,2,0xf044,0x3b7,0x314,0x301,0x345,0x1f26,0x345,2,0xf044,
-0x3b7,0x313,0x342,0x345,0x1f27,0x345,2,0xf044,0x3b7,0x314,0x342,0x345,0x1f28,0xf043,0x397,0x313,
-0x345,0x1f29,0xf043,0x397,0x314,0x345,0x1f2a,0x345,2,0xf044,0x397,0x313,0x300,0x345,0x1f2b,0x345,
-2,0xf044,0x397,0x314,0x300,0x345,0x1f2c,0x345,2,0xf044,0x397,0x313,0x301,0x345,0x1f2d,0x345,
-2,0xf044,0x397,0x314,0x301,0x345,0x1f2e,0x345,2,0xf044,0x397,0x313,0x342,0x345,0x1f2f,0x345,
-2,0xf044,0x397,0x314,0x342,0x345,0x1f60,0xf043,0x3c9,0x313,0x345,0x1f61,0xf043,0x3c9,0x314,0x345,
-0x1f62,0x345,2,0xf044,0x3c9,0x313,0x300,0x345,0x1f63,0x345,2,0xf044,0x3c9,0x314,0x300,0x345,
-0x1f64,0x345,2,0xf044,0x3c9,0x313,0x301,0x345,0x1f65,0x345,2,0xf044,0x3c9,0x314,0x301,0x345,
-0x1f66,0x345,2,0xf044,0x3c9,0x313,0x342,0x345,0x1f67,0x345,2,0xf044,0x3c9,0x314,0x342,0x345,
-0x1f68,0xf043,0x3a9,0x313,0x345,0x1f69,0xf043,0x3a9,0x314,0x345,0x1f6a,0x345,2,0xf044,0x3a9,0x313,
-0x300,0x345,0x1f6b,0x345,2,0xf044,0x3a9,0x314,0x300,0x345,0x1f6c,0x345,2,0xf044,0x3a9,0x313,
-0x301,0x345,0x1f6d,0x345,2,0xf044,0x3a9,0x314,0x301,0x345,0x1f6e,0x345,2,0xf044,0x3a9,0x313,
-0x342,0x345,0x1f6f,0x345,2,0xf044,0x3a9,0x314,0x342,0x345,0xe602,0x3b1,0x306,0xe602,0x3b1,0x304,
-0x1f70,0xf043,0x3b1,0x300,0x345,0xf002,0x3b1,0x345,0x3ac,0xf043,0x3b1,0x301,0x345,0x1fb6,0xf043,0x3b1,
-0x342,0x345,0xe602,0x391,0x306,0xe602,0x391,0x304,0xe602,0x391,0x300,0xf002,0x391,0x345,0xe602,0xa8,
-0x342,0x1f74,0xf043,0x3b7,0x300,0x345,0xf002,0x3b7,0x345,0x3ae,0xf043,0x3b7,0x301,0x345,0x1fc6,0xf043,
-0x3b7,0x342,0x345,0xe602,0x395,0x300,0xe602,0x397,0x300,0xf002,0x397,0x345,0xe602,0x1fbf,0x300,0xe602,
-0x1fbf,0x301,0xe602,0x1fbf,0x342,0xe602,0x3b9,0x306,0xe602,0x3b9,0x304,0x3ca,0xe643,0x3b9,0x308,0x300,
-0xe602,0x3b9,0x342,0x3ca,0xe643,0x3b9,0x308,0x342,0xe602,0x399,0x306,0xe602,0x399,0x304,0xe602,0x399,
-0x300,0xe602,0x1ffe,0x300,0xe602,0x1ffe,0x301,0xe602,0x1ffe,0x342,0xe602,0x3c5,0x306,0xe602,0x3c5,0x304,
-0x3cb,0xe643,0x3c5,0x308,0x300,0xe602,0x3c1,0x313,0xe602,0x3c1,0x314,0xe602,0x3c5,0x342,0x3cb,0xe643,
-0x3c5,0x308,0x342,0xe602,0x3a5,0x306,0xe602,0x3a5,0x304,0xe602,0x3a5,0x300,0xe602,0x3a1,0x314,0xe602,
-0xa8,0x300,0x1f7c,0xf043,0x3c9,0x300,0x345,0xf002,0x3c9,0x345,0x3ce,0xf043,0x3c9,0x301,0x345,0x1ff6,
-0xf043,0x3c9,0x342,0x345,0xe602,0x39f,0x300,0xe602,0x3a9,0x300,0xf002,0x3a9,0x345,0x102,0x2190,0x338,
-0x102,0x2192,0x338,0x102,0x2194,0x338,0x102,0x21d0,0x338,0x102,0x21d4,0x338,0x102,0x21d2,0x338,0x102,
-0x2203,0x338,0x102,0x2208,0x338,0x102,0x220b,0x338,0x102,0x2223,0x338,0x102,0x2225,0x338,0x102,0x223c,
-0x338,0x102,0x2243,0x338,0x102,0x2245,0x338,0x102,0x2248,0x338,0x102,0x3d,0x338,0x102,0x2261,0x338,
-0x102,0x224d,0x338,0x102,0x3c,0x338,0x102,0x3e,0x338,0x102,0x2264,0x338,0x102,0x2265,0x338,0x102,
-0x2272,0x338,0x102,0x2273,0x338,0x102,0x2276,0x338,0x102,0x2277,0x338,0x102,0x227a,0x338,0x102,0x227b,
-0x338,0x102,0x2282,0x338,0x102,0x2283,0x338,0x102,0x2286,0x338,0x102,0x2287,0x338,0x102,0x22a2,0x338,
-0x102,0x22a8,0x338,0x102,0x22a9,0x338,0x102,0x22ab,0x338,0x102,0x227c,0x338,0x102,0x227d,0x338,0x102,
-0x2291,0x338,0x102,0x2292,0x338,0x102,0x22b2,0x338,0x102,0x22b3,0x338,0x102,0x22b4,0x338,0x102,0x22b5,
-0x338,0x802,0x304b,0x3099,0x802,0x304d,0x3099,0x802,0x304f,0x3099,0x802,0x3051,0x3099,0x802,0x3053,0x3099,
-0x802,0x3055,0x3099,0x802,0x3057,0x3099,0x802,0x3059,0x3099,0x802,0x305b,0x3099,0x802,0x305d,0x3099,0x802,
-0x305f,0x3099,0x802,0x3061,0x3099,0x802,0x3064,0x3099,0x802,0x3066,0x3099,0x802,0x3068,0x3099,0x802,0x306f,
-0x3099,0x802,0x306f,0x309a,0x802,0x3072,0x3099,0x802,0x3072,0x309a,0x802,0x3075,0x3099,0x802,0x3075,0x309a,
-0x802,0x3078,0x3099,0x802,0x3078,0x309a,0x802,0x307b,0x3099,0x802,0x307b,0x309a,0x802,0x3046,0x3099,0x802,
-0x309d,0x3099,0x802,0x30ab,0x3099,0x802,0x30ad,0x3099,0x802,0x30af,0x3099,0x802,0x30b1,0x3099,0x802,0x30b3,
-0x3099,0x802,0x30b5,0x3099,0x802,0x30b7,0x3099,0x802,0x30b9,0x3099,0x802,0x30bb,0x3099,0x802,0x30bd,0x3099,
-0x802,0x30bf,0x3099,0x802,0x30c1,0x3099,0x802,0x30c4,0x3099,0x802,0x30c6,0x3099,0x802,0x30c8,0x3099,0x802,
-0x30cf,0x3099,0x802,0x30cf,0x309a,0x802,0x30d2,0x3099,0x802,0x30d2,0x309a,0x802,0x30d5,0x3099,0x802,0x30d5,
-0x309a,0x802,0x30d8,0x3099,0x802,0x30d8,0x309a,0x802,0x30db,0x3099,0x802,0x30db,0x309a,0x802,0x30a6,0x3099,
-0x802,0x30ef,0x3099,0x802,0x30f0,0x3099,0x802,0x30f1,0x3099,0x802,0x30f2,0x3099,0x802,0x30fd,0x3099,0x704,
-0xd804,0xdc99,0xd804,0xdcba,0x704,0xd804,0xdc9b,0xd804,0xdcba,0x704,0xd804,0xdca5,0xd804,0xdcba,4,0xd804,
-0xdd31,0xd804,0xdd27,4,0xd804,0xdd32,0xd804,0xdd27,4,0xd804,0xdf47,0xd804,0xdf3e,4,0xd804,0xdf47,
-0xd804,0xdf57,4,0xd805,0xdcb9,0xd805,0xdcba,4,0xd805,0xdcb9,0xd805,0xdcb0,4,0xd805,0xdcb9,0xd805,
-0xdcbd,4,0xd805,0xddb8,0xd805,0xddaf,4,0xd805,0xddb9,0xd805,0xddaf,4,0xd806,0xdd35,0xd806,0xdd30,
-1,0x2b9,1,0x3b,1,0xb7,0x702,0x915,0x93c,0x702,0x916,0x93c,0x702,0x917,0x93c,0x702,
-0x91c,0x93c,0x702,0x921,0x93c,0x702,0x922,0x93c,0x702,0x92b,0x93c,0x702,0x92f,0x93c,0x702,0x9a1,
-0x9bc,0x702,0x9a2,0x9bc,0x702,0x9af,0x9bc,0x702,0xa32,0xa3c,0x702,0xa38,0xa3c,0x702,0xa16,0xa3c,
-0x702,0xa17,0xa3c,0x702,0xa1c,0xa3c,0x702,0xa2b,0xa3c,0x702,0xb21,0xb3c,0x702,0xb22,0xb3c,2,
-0xf42,0xfb7,2,0xf4c,0xfb7,2,0xf51,0xfb7,2,0xf56,0xfb7,2,0xf5b,0xfb7,2,0xf40,
-0xfb5,0x8202,0xfb2,0xf80,0x8202,0xfb3,0xf80,2,0xf92,0xfb7,2,0xf9c,0xfb7,2,0xfa1,0xfb7,
-2,0xfa6,0xfb7,2,0xfab,0xfb7,2,0xf90,0xfb5,1,0x3b9,1,0x60,1,0xb4,1,
-0x3a9,1,0x4b,1,0x3008,1,0x3009,0x102,0x2add,0x338,1,0x8c48,1,0x66f4,1,0x8eca,
-1,0x8cc8,1,0x6ed1,1,0x4e32,1,0x53e5,1,0x9f9c,1,0x5951,1,0x91d1,1,0x5587,
-1,0x5948,1,0x61f6,1,0x7669,1,0x7f85,1,0x863f,1,0x87ba,1,0x88f8,1,0x908f,
-1,0x6a02,1,0x6d1b,1,0x70d9,1,0x73de,1,0x843d,1,0x916a,1,0x99f1,1,0x4e82,
-1,0x5375,1,0x6b04,1,0x721b,1,0x862d,1,0x9e1e,1,0x5d50,1,0x6feb,1,0x85cd,
-1,0x8964,1,0x62c9,1,0x81d8,1,0x881f,1,0x5eca,1,0x6717,1,0x6d6a,1,0x72fc,
-1,0x90ce,1,0x4f86,1,0x51b7,1,0x52de,1,0x64c4,1,0x6ad3,1,0x7210,1,0x76e7,
-1,0x8001,1,0x8606,1,0x865c,1,0x8def,1,0x9732,1,0x9b6f,1,0x9dfa,1,0x788c,
-1,0x797f,1,0x7da0,1,0x83c9,1,0x9304,1,0x9e7f,1,0x8ad6,1,0x58df,1,0x5f04,
-1,0x7c60,1,0x807e,1,0x7262,1,0x78ca,1,0x8cc2,1,0x96f7,1,0x58d8,1,0x5c62,
-1,0x6a13,1,0x6dda,1,0x6f0f,1,0x7d2f,1,0x7e37,1,0x964b,1,0x52d2,1,0x808b,
-1,0x51dc,1,0x51cc,1,0x7a1c,1,0x7dbe,1,0x83f1,1,0x9675,1,0x8b80,1,0x62cf,
-1,0x8afe,1,0x4e39,1,0x5be7,1,0x6012,1,0x7387,1,0x7570,1,0x5317,1,0x78fb,
-1,0x4fbf,1,0x5fa9,1,0x4e0d,1,0x6ccc,1,0x6578,1,0x7d22,1,0x53c3,1,0x585e,
-1,0x7701,1,0x8449,1,0x8aaa,1,0x6bba,1,0x8fb0,1,0x6c88,1,0x62fe,1,0x82e5,
-1,0x63a0,1,0x7565,1,0x4eae,1,0x5169,1,0x51c9,1,0x6881,1,0x7ce7,1,0x826f,
-1,0x8ad2,1,0x91cf,1,0x52f5,1,0x5442,1,0x5973,1,0x5eec,1,0x65c5,1,0x6ffe,
-1,0x792a,1,0x95ad,1,0x9a6a,1,0x9e97,1,0x9ece,1,0x529b,1,0x66c6,1,0x6b77,
-1,0x8f62,1,0x5e74,1,0x6190,1,0x6200,1,0x649a,1,0x6f23,1,0x7149,1,0x7489,
-1,0x79ca,1,0x7df4,1,0x806f,1,0x8f26,1,0x84ee,1,0x9023,1,0x934a,1,0x5217,
-1,0x52a3,1,0x54bd,1,0x70c8,1,0x88c2,1,0x5ec9,1,0x5ff5,1,0x637b,1,0x6bae,
-1,0x7c3e,1,0x7375,1,0x4ee4,1,0x56f9,1,0x5dba,1,0x601c,1,0x73b2,1,0x7469,
-1,0x7f9a,1,0x8046,1,0x9234,1,0x96f6,1,0x9748,1,0x9818,1,0x4f8b,1,0x79ae,
-1,0x91b4,1,0x96b8,1,0x60e1,1,0x4e86,1,0x50da,1,0x5bee,1,0x5c3f,1,0x6599,
-1,0x71ce,1,0x7642,1,0x84fc,1,0x907c,1,0x9f8d,1,0x6688,1,0x962e,1,0x5289,
-1,0x677b,1,0x67f3,1,0x6d41,1,0x6e9c,1,0x7409,1,0x7559,1,0x786b,1,0x7d10,
-1,0x985e,1,0x516d,1,0x622e,1,0x9678,1,0x502b,1,0x5d19,1,0x6dea,1,0x8f2a,
-1,0x5f8b,1,0x6144,1,0x6817,1,0x9686,1,0x5229,1,0x540f,1,0x5c65,1,0x6613,
-1,0x674e,1,0x68a8,1,0x6ce5,1,0x7406,1,0x75e2,1,0x7f79,1,0x88cf,1,0x88e1,
-1,0x91cc,1,0x96e2,1,0x533f,1,0x6eba,1,0x541d,1,0x71d0,1,0x7498,1,0x85fa,
-1,0x96a3,1,0x9c57,1,0x9e9f,1,0x6797,1,0x6dcb,1,0x81e8,1,0x7acb,1,0x7b20,
-1,0x7c92,1,0x72c0,1,0x7099,1,0x8b58,1,0x4ec0,1,0x8336,1,0x523a,1,0x5207,
-1,0x5ea6,1,0x62d3,1,0x7cd6,1,0x5b85,1,0x6d1e,1,0x66b4,1,0x8f3b,1,0x884c,
-1,0x964d,1,0x898b,1,0x5ed3,1,0x5140,1,0x55c0,1,0x585a,1,0x6674,1,0x51de,
-1,0x732a,1,0x76ca,1,0x793c,1,0x795e,1,0x7965,1,0x798f,1,0x9756,1,0x7cbe,
-1,0x7fbd,1,0x8612,1,0x8af8,1,0x9038,1,0x90fd,1,0x98ef,1,0x98fc,1,0x9928,
-1,0x9db4,1,0x90de,1,0x96b7,1,0x4fae,1,0x50e7,1,0x514d,1,0x52c9,1,0x52e4,
-1,0x5351,1,0x559d,1,0x5606,1,0x5668,1,0x5840,1,0x58a8,1,0x5c64,1,0x5c6e,
-1,0x6094,1,0x6168,1,0x618e,1,0x61f2,1,0x654f,1,0x65e2,1,0x6691,1,0x6885,
-1,0x6d77,1,0x6e1a,1,0x6f22,1,0x716e,1,0x722b,1,0x7422,1,0x7891,1,0x793e,
-1,0x7949,1,0x7948,1,0x7950,1,0x7956,1,0x795d,1,0x798d,1,0x798e,1,0x7a40,
-1,0x7a81,1,0x7bc0,1,0x7e09,1,0x7e41,1,0x7f72,1,0x8005,1,0x81ed,1,0x8279,
-1,0x8457,1,0x8910,1,0x8996,1,0x8b01,1,0x8b39,1,0x8cd3,1,0x8d08,1,0x8fb6,
-1,0x96e3,1,0x97ff,1,0x983b,1,0x6075,2,0xd850,0xdeee,1,0x8218,1,0x4e26,1,
-0x51b5,1,0x5168,1,0x4f80,1,0x5145,1,0x5180,1,0x52c7,1,0x52fa,1,0x5555,1,
-0x5599,1,0x55e2,1,0x58b3,1,0x5944,1,0x5954,1,0x5a62,1,0x5b28,1,0x5ed2,1,
-0x5ed9,1,0x5f69,1,0x5fad,1,0x60d8,1,0x614e,1,0x6108,1,0x6160,1,0x6234,1,
-0x63c4,1,0x641c,1,0x6452,1,0x6556,1,0x671b,1,0x6756,1,0x6b79,1,0x6edb,1,
-0x6ecb,1,0x701e,1,0x77a7,1,0x7235,1,0x72af,1,0x7471,1,0x7506,1,0x753b,1,
-0x761d,1,0x761f,1,0x76db,1,0x76f4,1,0x774a,1,0x7740,1,0x78cc,1,0x7ab1,1,
-0x7c7b,1,0x7d5b,1,0x7f3e,1,0x8352,1,0x83ef,1,0x8779,1,0x8941,1,0x8986,1,
-0x8abf,1,0x8acb,1,0x8aed,1,0x8b8a,1,0x8f38,1,0x9072,1,0x9199,1,0x9276,1,
-0x967c,1,0x97db,1,0x980b,1,0x9b12,2,0xd84a,0xdc4a,2,0xd84a,0xdc44,2,0xd84c,0xdfd5,
-1,0x3b9d,1,0x4018,1,0x4039,2,0xd854,0xde49,2,0xd857,0xdcd0,2,0xd85f,0xded3,1,
-0x9f43,1,0x9f8e,0xe02,0x5d9,0x5b4,0x1102,0x5f2,0x5b7,0x1802,0x5e9,0x5c1,0x1902,0x5e9,0x5c2,0xfb49,
-0x1843,0x5e9,0x5bc,0x5c1,0xfb49,0x1943,0x5e9,0x5bc,0x5c2,0x1102,0x5d0,0x5b7,0x1202,0x5d0,0x5b8,0x1502,
-0x5d0,0x5bc,0x1502,0x5d1,0x5bc,0x1502,0x5d2,0x5bc,0x1502,0x5d3,0x5bc,0x1502,0x5d4,0x5bc,0x1502,0x5d5,
-0x5bc,0x1502,0x5d6,0x5bc,0x1502,0x5d8,0x5bc,0x1502,0x5d9,0x5bc,0x1502,0x5da,0x5bc,0x1502,0x5db,0x5bc,
-0x1502,0x5dc,0x5bc,0x1502,0x5de,0x5bc,0x1502,0x5e0,0x5bc,0x1502,0x5e1,0x5bc,0x1502,0x5e3,0x5bc,0x1502,
-0x5e4,0x5bc,0x1502,0x5e6,0x5bc,0x1502,0x5e7,0x5bc,0x1502,0x5e8,0x5bc,0x1502,0x5e9,0x5bc,0x1502,0x5ea,
-0x5bc,0x1302,0x5d5,0x5b9,0x1702,0x5d1,0x5bf,0x1702,0x5db,0x5bf,0x1702,0x5e4,0x5bf,0xd804,0xd834,0xdd57,
-0xd834,0xdd65,0xd804,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd5f,0xd834,0xdd6e,4,0xd846,0xd834,0xdd58,0xd834,
-0xdd65,0xd834,0xdd6e,0xd834,0xdd5f,0xd834,0xdd6f,4,0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd6f,0xd834,
-0xdd5f,0xd834,0xdd70,4,0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd70,0xd834,0xdd5f,0xd834,0xdd71,4,
-0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd71,0xd834,0xdd5f,0xd834,0xdd72,4,0xd846,0xd834,0xdd58,0xd834,
-0xdd65,0xd834,0xdd72,0xd804,0xd834,0xddb9,0xd834,0xdd65,0xd804,0xd834,0xddba,0xd834,0xdd65,0xd834,0xddbb,0xd834,
-0xdd6e,4,0xd846,0xd834,0xddb9,0xd834,0xdd65,0xd834,0xdd6e,0xd834,0xddbc,0xd834,0xdd6e,4,0xd846,0xd834,
-0xddba,0xd834,0xdd65,0xd834,0xdd6e,0xd834,0xddbb,0xd834,0xdd6f,4,0xd846,0xd834,0xddb9,0xd834,0xdd65,0xd834,
-0xdd6f,0xd834,0xddbc,0xd834,0xdd6f,4,0xd846,0xd834,0xddba,0xd834,0xdd65,0xd834,0xdd6f,1,0x4e3d,1,
-0x4e38,1,0x4e41,2,0xd840,0xdd22,1,0x4f60,1,0x4fbb,1,0x5002,1,0x507a,1,0x5099,
-1,0x50cf,1,0x349e,2,0xd841,0xde3a,1,0x5154,1,0x5164,1,0x5177,2,0xd841,0xdd1c,
-1,0x34b9,1,0x5167,1,0x518d,2,0xd841,0xdd4b,1,0x5197,1,0x51a4,1,0x4ecc,1,
-0x51ac,2,0xd864,0xdddf,1,0x51f5,1,0x5203,1,0x34df,1,0x523b,1,0x5246,1,0x5272,
-1,0x5277,1,0x3515,1,0x5305,1,0x5306,1,0x5349,1,0x535a,1,0x5373,1,0x537d,
-1,0x537f,2,0xd842,0xde2c,1,0x7070,1,0x53ca,1,0x53df,2,0xd842,0xdf63,1,0x53eb,
-1,0x53f1,1,0x5406,1,0x549e,1,0x5438,1,0x5448,1,0x5468,1,0x54a2,1,0x54f6,
-1,0x5510,1,0x5553,1,0x5563,1,0x5584,1,0x55ab,1,0x55b3,1,0x55c2,1,0x5716,
-1,0x5717,1,0x5651,1,0x5674,1,0x58ee,1,0x57ce,1,0x57f4,1,0x580d,1,0x578b,
-1,0x5832,1,0x5831,1,0x58ac,2,0xd845,0xdce4,1,0x58f2,1,0x58f7,1,0x5906,1,
-0x591a,1,0x5922,1,0x5962,2,0xd845,0xdea8,2,0xd845,0xdeea,1,0x59ec,1,0x5a1b,1,
-0x5a27,1,0x59d8,1,0x5a66,1,0x36ee,1,0x36fc,1,0x5b08,1,0x5b3e,2,0xd846,0xddc8,
-1,0x5bc3,1,0x5bd8,1,0x5bf3,2,0xd846,0xdf18,1,0x5bff,1,0x5c06,1,0x5f53,1,
-0x5c22,1,0x3781,1,0x5c60,1,0x5cc0,1,0x5c8d,2,0xd847,0xdde4,1,0x5d43,2,0xd847,
-0xdde6,1,0x5d6e,1,0x5d6b,1,0x5d7c,1,0x5de1,1,0x5de2,1,0x382f,1,0x5dfd,1,
-0x5e28,1,0x5e3d,1,0x5e69,1,0x3862,2,0xd848,0xdd83,1,0x387c,1,0x5eb0,1,0x5eb3,
-1,0x5eb6,2,0xd868,0xdf92,1,0x5efe,2,0xd848,0xdf31,1,0x8201,1,0x5f22,1,0x38c7,
-2,0xd84c,0xdeb8,2,0xd858,0xddda,1,0x5f62,1,0x5f6b,1,0x38e3,1,0x5f9a,1,0x5fcd,
-1,0x5fd7,1,0x5ff9,1,0x6081,1,0x393a,1,0x391c,2,0xd849,0xded4,1,0x60c7,1,
-0x6148,1,0x614c,1,0x617a,1,0x61b2,1,0x61a4,1,0x61af,1,0x61de,1,0x6210,1,
-0x621b,1,0x625d,1,0x62b1,1,0x62d4,1,0x6350,2,0xd84a,0xdf0c,1,0x633d,1,0x62fc,
-1,0x6368,1,0x6383,1,0x63e4,2,0xd84a,0xdff1,1,0x6422,1,0x63c5,1,0x63a9,1,
-0x3a2e,1,0x6469,1,0x647e,1,0x649d,1,0x6477,1,0x3a6c,1,0x656c,2,0xd84c,0xdc0a,
-1,0x65e3,1,0x66f8,1,0x6649,1,0x3b19,1,0x3b08,1,0x3ae4,1,0x5192,1,0x5195,
-1,0x6700,1,0x669c,1,0x80ad,1,0x43d9,1,0x6721,1,0x675e,1,0x6753,2,0xd84c,
-0xdfc3,1,0x3b49,1,0x67fa,1,0x6785,1,0x6852,2,0xd84d,0xdc6d,1,0x688e,1,0x681f,
-1,0x6914,1,0x6942,1,0x69a3,1,0x69ea,1,0x6aa8,2,0xd84d,0xdea3,1,0x6adb,1,
-0x3c18,1,0x6b21,2,0xd84e,0xdca7,1,0x6b54,1,0x3c4e,1,0x6b72,1,0x6b9f,1,0x6bbb,
-2,0xd84e,0xde8d,2,0xd847,0xdd0b,2,0xd84e,0xdefa,1,0x6c4e,2,0xd84f,0xdcbc,1,0x6cbf,
-1,0x6ccd,1,0x6c67,1,0x6d16,1,0x6d3e,1,0x6d69,1,0x6d78,1,0x6d85,2,0xd84f,
-0xdd1e,1,0x6d34,1,0x6e2f,1,0x6e6e,1,0x3d33,1,0x6ec7,2,0xd84f,0xded1,1,0x6df9,
-1,0x6f6e,2,0xd84f,0xdf5e,2,0xd84f,0xdf8e,1,0x6fc6,1,0x7039,1,0x701b,1,0x3d96,
-1,0x704a,1,0x707d,1,0x7077,1,0x70ad,2,0xd841,0xdd25,1,0x7145,2,0xd850,0xde63,
-1,0x719c,2,0xd850,0xdfab,1,0x7228,1,0x7250,2,0xd851,0xde08,1,0x7280,1,0x7295,
-2,0xd851,0xdf35,2,0xd852,0xdc14,1,0x737a,1,0x738b,1,0x3eac,1,0x73a5,1,0x3eb8,
-1,0x7447,1,0x745c,1,0x7485,1,0x74ca,1,0x3f1b,1,0x7524,2,0xd853,0xdc36,1,
-0x753e,2,0xd853,0xdc92,2,0xd848,0xdd9f,1,0x7610,2,0xd853,0xdfa1,2,0xd853,0xdfb8,2,
-0xd854,0xdc44,1,0x3ffc,1,0x4008,2,0xd854,0xdcf3,2,0xd854,0xdcf2,2,0xd854,0xdd19,2,
-0xd854,0xdd33,1,0x771e,1,0x771f,1,0x778b,1,0x4046,1,0x4096,2,0xd855,0xdc1d,1,
-0x784e,1,0x40e3,2,0xd855,0xde26,2,0xd855,0xde9a,2,0xd855,0xdec5,1,0x79eb,1,0x412f,
-1,0x7a4a,1,0x7a4f,2,0xd856,0xdd7c,2,0xd856,0xdea7,1,0x7aee,1,0x4202,2,0xd856,
-0xdfab,1,0x7bc6,1,0x7bc9,1,0x4227,2,0xd857,0xdc80,1,0x7cd2,1,0x42a0,1,0x7ce8,
-1,0x7ce3,1,0x7d00,2,0xd857,0xdf86,1,0x7d63,1,0x4301,1,0x7dc7,1,0x7e02,1,
-0x7e45,1,0x4334,2,0xd858,0xde28,2,0xd858,0xde47,1,0x4359,2,0xd858,0xded9,1,0x7f7a,
-2,0xd858,0xdf3e,1,0x7f95,1,0x7ffa,2,0xd859,0xdcda,2,0xd859,0xdd23,1,0x8060,2,
-0xd859,0xdda8,1,0x8070,2,0xd84c,0xdf5f,1,0x43d5,1,0x80b2,1,0x8103,1,0x440b,1,
-0x813e,1,0x5ab5,2,0xd859,0xdfa7,2,0xd859,0xdfb5,2,0xd84c,0xdf93,2,0xd84c,0xdf9c,1,
-0x8204,1,0x8f9e,1,0x446b,1,0x8291,1,0x828b,1,0x829d,1,0x52b3,1,0x82b1,1,
-0x82b3,1,0x82bd,1,0x82e6,2,0xd85a,0xdf3c,1,0x831d,1,0x8363,1,0x83ad,1,0x8323,
-1,0x83bd,1,0x83e7,1,0x8353,1,0x83ca,1,0x83cc,1,0x83dc,2,0xd85b,0xdc36,2,
-0xd85b,0xdd6b,2,0xd85b,0xdcd5,1,0x452b,1,0x84f1,1,0x84f3,1,0x8516,2,0xd85c,0xdfca,
-1,0x8564,2,0xd85b,0xdf2c,1,0x455d,1,0x4561,2,0xd85b,0xdfb1,2,0xd85c,0xdcd2,1,
-0x456b,1,0x8650,1,0x8667,1,0x8669,1,0x86a9,1,0x8688,1,0x870e,1,0x86e2,1,
-0x8728,1,0x876b,1,0x8786,1,0x45d7,1,0x87e1,1,0x8801,1,0x45f9,1,0x8860,1,
-0x8863,2,0xd85d,0xde67,1,0x88d7,1,0x88de,1,0x4635,1,0x88fa,1,0x34bb,2,0xd85e,
-0xdcae,2,0xd85e,0xdd66,1,0x46be,1,0x46c7,1,0x8aa0,1,0x8c55,2,0xd85f,0xdca8,1,
-0x8cab,1,0x8cc1,1,0x8d1b,1,0x8d77,2,0xd85f,0xdf2f,2,0xd842,0xdc04,1,0x8dcb,1,
-0x8dbc,1,0x8df0,2,0xd842,0xdcde,1,0x8ed4,2,0xd861,0xddd2,2,0xd861,0xdded,1,0x9094,
-1,0x90f1,1,0x9111,2,0xd861,0xdf2e,1,0x911b,1,0x9238,1,0x92d7,1,0x92d8,1,
-0x927c,1,0x93f9,1,0x9415,2,0xd862,0xdffa,1,0x958b,1,0x4995,1,0x95b7,2,0xd863,
-0xdd77,1,0x49e6,1,0x96c3,1,0x5db2,1,0x9723,2,0xd864,0xdd45,2,0xd864,0xde1a,1,
-0x4a6e,1,0x4a76,1,0x97e0,2,0xd865,0xdc0a,1,0x4ab2,2,0xd865,0xdc96,1,0x9829,2,
-0xd865,0xddb6,1,0x98e2,1,0x4b33,1,0x9929,1,0x99a7,1,0x99c2,1,0x99fe,1,0x4bce,
-2,0xd866,0xdf30,1,0x9c40,1,0x9cfd,1,0x4cce,1,0x4ced,1,0x9d67,2,0xd868,0xdcce,
-1,0x4cf8,2,0xd868,0xdd05,2,0xd868,0xde0e,2,0xd868,0xde91,1,0x9ebb,1,0x4d56,1,
-0x9ef9,1,0x9efe,1,0x9f05,1,0x9f0f,1,0x9f16,1,0x9f3b,2,0xd869,0xde00,0x3ac,0xe642,
-0x3b1,0x301,0x3ad,0xe642,0x3b5,0x301,0x3ae,0xe642,0x3b7,0x301,0x3af,0xe642,0x3b9,0x301,0x3cc,0xe642,
-0x3bf,0x301,0x3cd,0xe642,0x3c5,0x301,0x3ce,0xe642,0x3c9,0x301,0x386,0xe642,0x391,0x301,0x388,0xe642,
-0x395,0x301,0x389,0xe642,0x397,0x301,0x390,1,0xe643,0x3b9,0x308,0x301,0x38a,0xe642,0x399,0x301,
-0x3b0,1,0xe643,0x3c5,0x308,0x301,0x38e,0xe642,0x3a5,0x301,0x385,0xe642,0xa8,0x301,0x38c,0xe642,
-0x39f,0x301,0x38f,0xe642,0x3a9,0x301,0xc5,0xe642,0x41,0x30a,0xe6e6,0xe681,0x300,0xe6e6,0xe681,0x301,
-0xe6e6,0xe681,0x313,0xe6e6,0xe682,0x308,0x301,0x8100,0x8282,0xf71,0xf72,0x8100,0x8482,0xf71,0xf74,0x8100,
-0x8282,0xf71,0xf80,0
-};
-
-static const uint8_t norm2_nfc_data_smallFCD[256]={
-0xc0,0xef,3,0x7f,0xdf,0x70,0xcf,0x87,0xc7,0xe6,0x66,0x46,0x64,0x46,0x66,0x5b,
-0x12,0,0,4,0,0,0,0x43,0x20,2,0x69,0xae,0xc2,0xc0,0xff,0xff,
-0xc0,0x72,0xbf,0,0,0,0,0,0,0,0x40,0,0x80,0x88,0,0,
-0xfe,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0x98,0,0xc3,0x66,0xe0,0x80,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,7,0,0,2,0
-};
-
-#endif // INCLUDED_FROM_NORMALIZER2_CPP
diff --git a/contrib/libs/icu/common/norm2allmodes.h b/contrib/libs/icu/common/norm2allmodes.h
deleted file mode 100644
index 682ece28f13..00000000000
--- a/contrib/libs/icu/common/norm2allmodes.h
+++ /dev/null
@@ -1,369 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* norm2allmodes.h
-*
-* created on: 2014sep07
-* created by: Markus W. Scherer
-*/
-
-#ifndef __NORM2ALLMODES_H__
-#define __NORM2ALLMODES_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/edits.h"
-#include "unicode/normalizer2.h"
-#include "unicode/stringoptions.h"
-#include "unicode/unistr.h"
-#include "cpputils.h"
-#include "normalizer2impl.h"
-
-U_NAMESPACE_BEGIN
-
-// Intermediate class:
-// Has Normalizer2Impl and does boilerplate argument checking and setup.
-class Normalizer2WithImpl : public Normalizer2 {
-public:
- Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
- virtual ~Normalizer2WithImpl();
-
- // normalize
- virtual UnicodeString &
- normalize(const UnicodeString &src,
- UnicodeString &dest,
- UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- dest.setToBogus();
- return dest;
- }
- const UChar *sArray=src.getBuffer();
- if(&dest==&src || sArray==NULL) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- dest.setToBogus();
- return dest;
- }
- dest.remove();
- ReorderingBuffer buffer(impl, dest);
- if(buffer.init(src.length(), errorCode)) {
- normalize(sArray, sArray+src.length(), buffer, errorCode);
- }
- return dest;
- }
- virtual void
- normalize(const UChar *src, const UChar *limit,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
-
- // normalize and append
- virtual UnicodeString &
- normalizeSecondAndAppend(UnicodeString &first,
- const UnicodeString &second,
- UErrorCode &errorCode) const {
- return normalizeSecondAndAppend(first, second, TRUE, errorCode);
- }
- virtual UnicodeString &
- append(UnicodeString &first,
- const UnicodeString &second,
- UErrorCode &errorCode) const {
- return normalizeSecondAndAppend(first, second, FALSE, errorCode);
- }
- UnicodeString &
- normalizeSecondAndAppend(UnicodeString &first,
- const UnicodeString &second,
- UBool doNormalize,
- UErrorCode &errorCode) const {
- uprv_checkCanGetBuffer(first, errorCode);
- if(U_FAILURE(errorCode)) {
- return first;
- }
- const UChar *secondArray=second.getBuffer();
- if(&first==&second || secondArray==NULL) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return first;
- }
- int32_t firstLength=first.length();
- UnicodeString safeMiddle;
- {
- ReorderingBuffer buffer(impl, first);
- if(buffer.init(firstLength+second.length(), errorCode)) {
- normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
- safeMiddle, buffer, errorCode);
- }
- } // The ReorderingBuffer destructor finalizes the first string.
- if(U_FAILURE(errorCode)) {
- // Restore the modified suffix of the first string.
- first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
- }
- return first;
- }
- virtual void
- normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
- virtual UBool
- getDecomposition(UChar32 c, UnicodeString &decomposition) const {
- UChar buffer[4];
- int32_t length;
- const UChar *d=impl.getDecomposition(c, buffer, length);
- if(d==NULL) {
- return FALSE;
- }
- if(d==buffer) {
- decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
- } else {
- decomposition.setTo(FALSE, d, length); // read-only alias
- }
- return TRUE;
- }
- virtual UBool
- getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
- UChar buffer[30];
- int32_t length;
- const UChar *d=impl.getRawDecomposition(c, buffer, length);
- if(d==NULL) {
- return FALSE;
- }
- if(d==buffer) {
- decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
- } else {
- decomposition.setTo(FALSE, d, length); // read-only alias
- }
- return TRUE;
- }
- virtual UChar32
- composePair(UChar32 a, UChar32 b) const {
- return impl.composePair(a, b);
- }
-
- virtual uint8_t
- getCombiningClass(UChar32 c) const {
- return impl.getCC(impl.getNorm16(c));
- }
-
- // quick checks
- virtual UBool
- isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- const UChar *sArray=s.getBuffer();
- if(sArray==NULL) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- const UChar *sLimit=sArray+s.length();
- return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
- }
- virtual UNormalizationCheckResult
- quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
- return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
- }
- virtual int32_t
- spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- const UChar *sArray=s.getBuffer();
- if(sArray==NULL) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
- }
- virtual const UChar *
- spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
-
- virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
- return UNORM_YES;
- }
-
- const Normalizer2Impl &impl;
-};
-
-class DecomposeNormalizer2 : public Normalizer2WithImpl {
-public:
- DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
- virtual ~DecomposeNormalizer2();
-
-private:
- virtual void
- normalize(const UChar *src, const UChar *limit,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const {
- impl.decompose(src, limit, &buffer, errorCode);
- }
- using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
- virtual void
- normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const {
- impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
- }
- virtual const UChar *
- spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
- return impl.decompose(src, limit, NULL, errorCode);
- }
- using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
- virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
- return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
- }
- virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
- virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
- virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
-};
-
-class ComposeNormalizer2 : public Normalizer2WithImpl {
-public:
- ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
- Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
- virtual ~ComposeNormalizer2();
-
-private:
- virtual void
- normalize(const UChar *src, const UChar *limit,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
- impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
- }
- using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
-
- void
- normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
- Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
- if (U_FAILURE(errorCode)) {
- return;
- }
- if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
- edits->reset();
- }
- const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
- impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
- &sink, edits, errorCode);
- sink.Flush();
- }
-
- virtual void
- normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
- impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
- }
-
- virtual UBool
- isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- const UChar *sArray=s.getBuffer();
- if(sArray==NULL) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- UnicodeString temp;
- ReorderingBuffer buffer(impl, temp);
- if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
- return FALSE;
- }
- return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
- }
- virtual UBool
- isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
- return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
- }
- virtual UNormalizationCheckResult
- quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
- if(U_FAILURE(errorCode)) {
- return UNORM_MAYBE;
- }
- const UChar *sArray=s.getBuffer();
- if(sArray==NULL) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return UNORM_MAYBE;
- }
- UNormalizationCheckResult qcResult=UNORM_YES;
- impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
- return qcResult;
- }
- virtual const UChar *
- spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE {
- return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
- }
- using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
- virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
- return impl.getCompQuickCheck(impl.getNorm16(c));
- }
- virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
- return impl.hasCompBoundaryBefore(c);
- }
- virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
- return impl.hasCompBoundaryAfter(c, onlyContiguous);
- }
- virtual UBool isInert(UChar32 c) const U_OVERRIDE {
- return impl.isCompInert(c, onlyContiguous);
- }
-
- const UBool onlyContiguous;
-};
-
-class FCDNormalizer2 : public Normalizer2WithImpl {
-public:
- FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
- virtual ~FCDNormalizer2();
-
-private:
- virtual void
- normalize(const UChar *src, const UChar *limit,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const {
- impl.makeFCD(src, limit, &buffer, errorCode);
- }
- using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
- virtual void
- normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const {
- impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
- }
- virtual const UChar *
- spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
- return impl.makeFCD(src, limit, NULL, errorCode);
- }
- using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
- virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
- virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
- virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
-};
-
-struct Norm2AllModes : public UMemory {
- Norm2AllModes(Normalizer2Impl *i)
- : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
- ~Norm2AllModes();
-
- static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
- static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
- static Norm2AllModes *createInstance(const char *packageName,
- const char *name,
- UErrorCode &errorCode);
-
- static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
- static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
- static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
-
- Normalizer2Impl *impl;
- ComposeNormalizer2 comp;
- DecomposeNormalizer2 decomp;
- FCDNormalizer2 fcd;
- ComposeNormalizer2 fcc;
-};
-
-U_NAMESPACE_END
-
-#endif // !UCONFIG_NO_NORMALIZATION
-#endif // __NORM2ALLMODES_H__
diff --git a/contrib/libs/icu/common/normalizer2.cpp b/contrib/libs/icu/common/normalizer2.cpp
deleted file mode 100644
index 6be7e0b21a2..00000000000
--- a/contrib/libs/icu/common/normalizer2.cpp
+++ /dev/null
@@ -1,572 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2009-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: normalizer2.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2009nov22
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/edits.h"
-#include "unicode/normalizer2.h"
-#include "unicode/stringoptions.h"
-#include "unicode/unistr.h"
-#include "unicode/unorm.h"
-#include "cstring.h"
-#include "mutex.h"
-#include "norm2allmodes.h"
-#include "normalizer2impl.h"
-#include "uassert.h"
-#include "ucln_cmn.h"
-
-using icu::Normalizer2Impl;
-
-#if NORM2_HARDCODE_NFC_DATA
-// NFC/NFD data machine-generated by gennorm2 --csource
-#define INCLUDED_FROM_NORMALIZER2_CPP
-#include "norm2_nfc_data.h"
-#endif
-
-U_NAMESPACE_BEGIN
-
-// Public API dispatch via Normalizer2 subclasses -------------------------- ***
-
-Normalizer2::~Normalizer2() {}
-
-void
-Normalizer2::normalizeUTF8(uint32_t /*options*/, StringPiece src, ByteSink &sink,
- Edits *edits, UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode)) {
- return;
- }
- if (edits != nullptr) {
- errorCode = U_UNSUPPORTED_ERROR;
- return;
- }
- UnicodeString src16 = UnicodeString::fromUTF8(src);
- normalize(src16, errorCode).toUTF8(sink);
-}
-
-UBool
-Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
- return FALSE;
-}
-
-UChar32
-Normalizer2::composePair(UChar32, UChar32) const {
- return U_SENTINEL;
-}
-
-uint8_t
-Normalizer2::getCombiningClass(UChar32 /*c*/) const {
- return 0;
-}
-
-UBool
-Normalizer2::isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const {
- return U_SUCCESS(errorCode) && isNormalized(UnicodeString::fromUTF8(s), errorCode);
-}
-
-// Normalizer2 implementation for the old UNORM_NONE.
-class NoopNormalizer2 : public Normalizer2 {
- virtual ~NoopNormalizer2();
-
- virtual UnicodeString &
- normalize(const UnicodeString &src,
- UnicodeString &dest,
- UErrorCode &errorCode) const U_OVERRIDE {
- if(U_SUCCESS(errorCode)) {
- if(&dest!=&src) {
- dest=src;
- } else {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
- }
- return dest;
- }
- virtual void
- normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
- Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
- if(U_SUCCESS(errorCode)) {
- if (edits != nullptr) {
- if ((options & U_EDITS_NO_RESET) == 0) {
- edits->reset();
- }
- edits->addUnchanged(src.length());
- }
- if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
- sink.Append(src.data(), src.length());
- }
- sink.Flush();
- }
- }
-
- virtual UnicodeString &
- normalizeSecondAndAppend(UnicodeString &first,
- const UnicodeString &second,
- UErrorCode &errorCode) const U_OVERRIDE {
- if(U_SUCCESS(errorCode)) {
- if(&first!=&second) {
- first.append(second);
- } else {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
- }
- return first;
- }
- virtual UnicodeString &
- append(UnicodeString &first,
- const UnicodeString &second,
- UErrorCode &errorCode) const U_OVERRIDE {
- if(U_SUCCESS(errorCode)) {
- if(&first!=&second) {
- first.append(second);
- } else {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
- }
- return first;
- }
- virtual UBool
- getDecomposition(UChar32, UnicodeString &) const U_OVERRIDE {
- return FALSE;
- }
- // No need to U_OVERRIDE the default getRawDecomposition().
- virtual UBool
- isNormalized(const UnicodeString &, UErrorCode &errorCode) const U_OVERRIDE {
- return U_SUCCESS(errorCode);
- }
- virtual UBool
- isNormalizedUTF8(StringPiece, UErrorCode &errorCode) const U_OVERRIDE {
- return U_SUCCESS(errorCode);
- }
- virtual UNormalizationCheckResult
- quickCheck(const UnicodeString &, UErrorCode &) const U_OVERRIDE {
- return UNORM_YES;
- }
- virtual int32_t
- spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const U_OVERRIDE {
- return s.length();
- }
- virtual UBool hasBoundaryBefore(UChar32) const U_OVERRIDE { return TRUE; }
- virtual UBool hasBoundaryAfter(UChar32) const U_OVERRIDE { return TRUE; }
- virtual UBool isInert(UChar32) const U_OVERRIDE { return TRUE; }
-};
-
-NoopNormalizer2::~NoopNormalizer2() {}
-
-Normalizer2WithImpl::~Normalizer2WithImpl() {}
-
-DecomposeNormalizer2::~DecomposeNormalizer2() {}
-
-ComposeNormalizer2::~ComposeNormalizer2() {}
-
-FCDNormalizer2::~FCDNormalizer2() {}
-
-// instance cache ---------------------------------------------------------- ***
-
-U_CDECL_BEGIN
-static UBool U_CALLCONV uprv_normalizer2_cleanup();
-U_CDECL_END
-
-static Normalizer2 *noopSingleton;
-static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
-
-static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- noopSingleton=new NoopNormalizer2;
- if(noopSingleton==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
-}
-
-const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return NULL; }
- umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode);
- return noopSingleton;
-}
-
-const Normalizer2Impl *
-Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
- return &((Normalizer2WithImpl *)norm2)->impl;
-}
-
-Norm2AllModes::~Norm2AllModes() {
- delete impl;
-}
-
-Norm2AllModes *
-Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- delete impl;
- return NULL;
- }
- Norm2AllModes *allModes=new Norm2AllModes(impl);
- if(allModes==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- delete impl;
- return NULL;
- }
- return allModes;
-}
-
-#if NORM2_HARDCODE_NFC_DATA
-Norm2AllModes *
-Norm2AllModes::createNFCInstance(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- Normalizer2Impl *impl=new Normalizer2Impl;
- if(impl==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- impl->init(norm2_nfc_data_indexes, &norm2_nfc_data_trie,
- norm2_nfc_data_extraData, norm2_nfc_data_smallFCD);
- return createInstance(impl, errorCode);
-}
-
-static Norm2AllModes *nfcSingleton;
-
-static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
-
-static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) {
- nfcSingleton=Norm2AllModes::createNFCInstance(errorCode);
- ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
-}
-
-const Norm2AllModes *
-Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return NULL; }
- umtx_initOnce(nfcInitOnce, &initNFCSingleton, errorCode);
- return nfcSingleton;
-}
-
-const Normalizer2 *
-Normalizer2::getNFCInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=NULL ? &allModes->comp : NULL;
-}
-
-const Normalizer2 *
-Normalizer2::getNFDInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=NULL ? &allModes->decomp : NULL;
-}
-
-const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=NULL ? &allModes->fcd : NULL;
-}
-
-const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=NULL ? &allModes->fcc : NULL;
-}
-
-const Normalizer2Impl *
-Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=NULL ? allModes->impl : NULL;
-}
-#endif // NORM2_HARDCODE_NFC_DATA
-
-U_CDECL_BEGIN
-
-static UBool U_CALLCONV uprv_normalizer2_cleanup() {
- delete noopSingleton;
- noopSingleton = NULL;
- noopInitOnce.reset();
-#if NORM2_HARDCODE_NFC_DATA
- delete nfcSingleton;
- nfcSingleton = NULL;
- nfcInitOnce.reset();
-#endif
- return TRUE;
-}
-
-U_CDECL_END
-
-U_NAMESPACE_END
-
-// C API ------------------------------------------------------------------- ***
-
-U_NAMESPACE_USE
-
-U_CAPI const UNormalizer2 * U_EXPORT2
-unorm2_getNFCInstance(UErrorCode *pErrorCode) {
- return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
-}
-
-U_CAPI const UNormalizer2 * U_EXPORT2
-unorm2_getNFDInstance(UErrorCode *pErrorCode) {
- return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
-}
-
-U_CAPI void U_EXPORT2
-unorm2_close(UNormalizer2 *norm2) {
- delete (Normalizer2 *)norm2;
-}
-
-U_CAPI int32_t U_EXPORT2
-unorm2_normalize(const UNormalizer2 *norm2,
- const UChar *src, int32_t length,
- UChar *dest, int32_t capacity,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if( (src==NULL ? length!=0 : length<-1) ||
- (dest==NULL ? capacity!=0 : capacity<0) ||
- (src==dest && src!=NULL)
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString destString(dest, 0, capacity);
- // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
- if(length!=0) {
- const Normalizer2 *n2=(const Normalizer2 *)norm2;
- const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
- if(n2wi!=NULL) {
- // Avoid duplicate argument checking and support NUL-terminated src.
- ReorderingBuffer buffer(n2wi->impl, destString);
- if(buffer.init(length, *pErrorCode)) {
- n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
- }
- } else {
- UnicodeString srcString(length<0, src, length);
- n2->normalize(srcString, destString, *pErrorCode);
- }
- }
- return destString.extract(dest, capacity, *pErrorCode);
-}
-
-static int32_t
-normalizeSecondAndAppend(const UNormalizer2 *norm2,
- UChar *first, int32_t firstLength, int32_t firstCapacity,
- const UChar *second, int32_t secondLength,
- UBool doNormalize,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
- (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
- (firstCapacity<0 || firstLength<-1)) ||
- (first==second && first!=NULL)
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString firstString(first, firstLength, firstCapacity);
- firstLength=firstString.length(); // In case it was -1.
- // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
- if(secondLength!=0) {
- const Normalizer2 *n2=(const Normalizer2 *)norm2;
- const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
- if(n2wi!=NULL) {
- // Avoid duplicate argument checking and support NUL-terminated src.
- UnicodeString safeMiddle;
- {
- ReorderingBuffer buffer(n2wi->impl, firstString);
- if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
- n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
- doNormalize, safeMiddle, buffer, *pErrorCode);
- }
- } // The ReorderingBuffer destructor finalizes firstString.
- if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
- // Restore the modified suffix of the first string.
- // This does not restore first[] array contents between firstLength and firstCapacity.
- // (That might be uninitialized memory, as far as we know.)
- if(first!=NULL) { /* don't dereference NULL */
- safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
- if(firstLength<firstCapacity) {
- first[firstLength]=0; // NUL-terminate in case it was originally.
- }
- }
- }
- } else {
- UnicodeString secondString(secondLength<0, second, secondLength);
- if(doNormalize) {
- n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
- } else {
- n2->append(firstString, secondString, *pErrorCode);
- }
- }
- }
- return firstString.extract(first, firstCapacity, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
- UChar *first, int32_t firstLength, int32_t firstCapacity,
- const UChar *second, int32_t secondLength,
- UErrorCode *pErrorCode) {
- return normalizeSecondAndAppend(norm2,
- first, firstLength, firstCapacity,
- second, secondLength,
- TRUE, pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-unorm2_append(const UNormalizer2 *norm2,
- UChar *first, int32_t firstLength, int32_t firstCapacity,
- const UChar *second, int32_t secondLength,
- UErrorCode *pErrorCode) {
- return normalizeSecondAndAppend(norm2,
- first, firstLength, firstCapacity,
- second, secondLength,
- FALSE, pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-unorm2_getDecomposition(const UNormalizer2 *norm2,
- UChar32 c, UChar *decomposition, int32_t capacity,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(decomposition==NULL ? capacity!=0 : capacity<0) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString destString(decomposition, 0, capacity);
- if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
- return destString.extract(decomposition, capacity, *pErrorCode);
- } else {
- return -1;
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-unorm2_getRawDecomposition(const UNormalizer2 *norm2,
- UChar32 c, UChar *decomposition, int32_t capacity,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(decomposition==NULL ? capacity!=0 : capacity<0) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString destString(decomposition, 0, capacity);
- if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
- return destString.extract(decomposition, capacity, *pErrorCode);
- } else {
- return -1;
- }
-}
-
-U_CAPI UChar32 U_EXPORT2
-unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
- return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
-}
-
-U_CAPI uint8_t U_EXPORT2
-unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
- return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
-}
-
-U_CAPI UBool U_EXPORT2
-unorm2_isNormalized(const UNormalizer2 *norm2,
- const UChar *s, int32_t length,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if((s==NULL && length!=0) || length<-1) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString sString(length<0, s, length);
- return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
-}
-
-U_CAPI UNormalizationCheckResult U_EXPORT2
-unorm2_quickCheck(const UNormalizer2 *norm2,
- const UChar *s, int32_t length,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return UNORM_NO;
- }
- if((s==NULL && length!=0) || length<-1) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return UNORM_NO;
- }
- UnicodeString sString(length<0, s, length);
- return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
- const UChar *s, int32_t length,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if((s==NULL && length!=0) || length<-1) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UnicodeString sString(length<0, s, length);
- return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
-}
-
-U_CAPI UBool U_EXPORT2
-unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
- return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
-}
-
-U_CAPI UBool U_EXPORT2
-unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
- return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
-}
-
-U_CAPI UBool U_EXPORT2
-unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
- return ((const Normalizer2 *)norm2)->isInert(c);
-}
-
-// Some properties APIs ---------------------------------------------------- ***
-
-U_CAPI uint8_t U_EXPORT2
-u_getCombiningClass(UChar32 c) {
- UErrorCode errorCode=U_ZERO_ERROR;
- const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode);
- if(U_SUCCESS(errorCode)) {
- return nfd->getCombiningClass(c);
- } else {
- return 0;
- }
-}
-
-U_CFUNC uint16_t
-unorm_getFCD16(UChar32 c) {
- UErrorCode errorCode=U_ZERO_ERROR;
- const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
- if(U_SUCCESS(errorCode)) {
- return impl->getFCD16(c);
- } else {
- return 0;
- }
-}
-
-#endif // !UCONFIG_NO_NORMALIZATION
diff --git a/contrib/libs/icu/common/normalizer2impl.cpp b/contrib/libs/icu/common/normalizer2impl.cpp
deleted file mode 100644
index cbf6b4d9804..00000000000
--- a/contrib/libs/icu/common/normalizer2impl.cpp
+++ /dev/null
@@ -1,2669 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2009-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: normalizer2impl.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2009nov22
-* created by: Markus W. Scherer
-*/
-
-// #define UCPTRIE_DEBUG
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/bytestream.h"
-#include "unicode/edits.h"
-#include "unicode/normalizer2.h"
-#include "unicode/stringoptions.h"
-#include "unicode/ucptrie.h"
-#include "unicode/udata.h"
-#include "unicode/umutablecptrie.h"
-#include "unicode/ustring.h"
-#include "unicode/utf16.h"
-#include "unicode/utf8.h"
-#include "bytesinkutil.h"
-#include "cmemory.h"
-#include "mutex.h"
-#include "normalizer2impl.h"
-#include "putilimp.h"
-#include "uassert.h"
-#include "ucptrie_impl.h"
-#include "uset_imp.h"
-#include "uvector.h"
-
-U_NAMESPACE_BEGIN
-
-namespace {
-
-/**
- * UTF-8 lead byte for minNoMaybeCP.
- * Can be lower than the actual lead byte for c.
- * Typically U+0300 for NFC/NFD, U+00A0 for NFKC/NFKD, U+0041 for NFKC_Casefold.
- */
-inline uint8_t leadByteForCP(UChar32 c) {
- if (c <= 0x7f) {
- return (uint8_t)c;
- } else if (c <= 0x7ff) {
- return (uint8_t)(0xc0+(c>>6));
- } else {
- // Should not occur because ccc(U+0300)!=0.
- return 0xe0;
- }
-}
-
-/**
- * Returns the code point from one single well-formed UTF-8 byte sequence
- * between cpStart and cpLimit.
- *
- * Trie UTF-8 macros do not assemble whole code points (for efficiency).
- * When we do need the code point, we call this function.
- * We should not need it for normalization-inert data (norm16==0).
- * Illegal sequences yield the error value norm16==0 just like real normalization-inert code points.
- */
-UChar32 codePointFromValidUTF8(const uint8_t *cpStart, const uint8_t *cpLimit) {
- // Similar to U8_NEXT_UNSAFE(s, i, c).
- U_ASSERT(cpStart < cpLimit);
- uint8_t c = *cpStart;
- switch(cpLimit-cpStart) {
- case 1:
- return c;
- case 2:
- return ((c&0x1f)<<6) | (cpStart[1]&0x3f);
- case 3:
- // no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar)
- return (UChar)((c<<12) | ((cpStart[1]&0x3f)<<6) | (cpStart[2]&0x3f));
- case 4:
- return ((c&7)<<18) | ((cpStart[1]&0x3f)<<12) | ((cpStart[2]&0x3f)<<6) | (cpStart[3]&0x3f);
- default:
- UPRV_UNREACHABLE; // Should not occur.
- }
-}
-
-/**
- * Returns the last code point in [start, p[ if it is valid and in U+1000..U+D7FF.
- * Otherwise returns a negative value.
- */
-UChar32 previousHangulOrJamo(const uint8_t *start, const uint8_t *p) {
- if ((p - start) >= 3) {
- p -= 3;
- uint8_t l = *p;
- uint8_t t1, t2;
- if (0xe1 <= l && l <= 0xed &&
- (t1 = (uint8_t)(p[1] - 0x80)) <= 0x3f &&
- (t2 = (uint8_t)(p[2] - 0x80)) <= 0x3f &&
- (l < 0xed || t1 <= 0x1f)) {
- return ((l & 0xf) << 12) | (t1 << 6) | t2;
- }
- }
- return U_SENTINEL;
-}
-
-/**
- * Returns the offset from the Jamo T base if [src, limit[ starts with a single Jamo T code point.
- * Otherwise returns a negative value.
- */
-int32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) {
- // Jamo T: E1 86 A8..E1 87 82
- if ((limit - src) >= 3 && *src == 0xe1) {
- if (src[1] == 0x86) {
- uint8_t t = src[2];
- // The first Jamo T is U+11A8 but JAMO_T_BASE is 11A7.
- // Offset 0 does not correspond to any conjoining Jamo.
- if (0xa8 <= t && t <= 0xbf) {
- return t - 0xa7;
- }
- } else if (src[1] == 0x87) {
- uint8_t t = src[2];
- if ((int8_t)t <= (int8_t)0x82u) {
- return t - (0xa7 - 0x40);
- }
- }
- }
- return -1;
-}
-
-void
-appendCodePointDelta(const uint8_t *cpStart, const uint8_t *cpLimit, int32_t delta,
- ByteSink &sink, Edits *edits) {
- char buffer[U8_MAX_LENGTH];
- int32_t length;
- int32_t cpLength = (int32_t)(cpLimit - cpStart);
- if (cpLength == 1) {
- // The builder makes ASCII map to ASCII.
- buffer[0] = (uint8_t)(*cpStart + delta);
- length = 1;
- } else {
- int32_t trail = *(cpLimit-1) + delta;
- if (0x80 <= trail && trail <= 0xbf) {
- // The delta only changes the last trail byte.
- --cpLimit;
- length = 0;
- do { buffer[length++] = *cpStart++; } while (cpStart < cpLimit);
- buffer[length++] = (uint8_t)trail;
- } else {
- // Decode the code point, add the delta, re-encode.
- UChar32 c = codePointFromValidUTF8(cpStart, cpLimit) + delta;
- length = 0;
- U8_APPEND_UNSAFE(buffer, length, c);
- }
- }
- if (edits != nullptr) {
- edits->addReplace(cpLength, length);
- }
- sink.Append(buffer, length);
-}
-
-} // namespace
-
-// ReorderingBuffer -------------------------------------------------------- ***
-
-ReorderingBuffer::ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest,
- UErrorCode &errorCode) :
- impl(ni), str(dest),
- start(str.getBuffer(8)), reorderStart(start), limit(start),
- remainingCapacity(str.getCapacity()), lastCC(0) {
- if (start == nullptr && U_SUCCESS(errorCode)) {
- // getBuffer() already did str.setToBogus()
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-UBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) {
- int32_t length=str.length();
- start=str.getBuffer(destCapacity);
- if(start==NULL) {
- // getBuffer() already did str.setToBogus()
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
- limit=start+length;
- remainingCapacity=str.getCapacity()-length;
- reorderStart=start;
- if(start==limit) {
- lastCC=0;
- } else {
- setIterator();
- lastCC=previousCC();
- // Set reorderStart after the last code point with cc<=1 if there is one.
- if(lastCC>1) {
- while(previousCC()>1) {}
- }
- reorderStart=codePointLimit;
- }
- return TRUE;
-}
-
-UBool ReorderingBuffer::equals(const UChar *otherStart, const UChar *otherLimit) const {
- int32_t length=(int32_t)(limit-start);
- return
- length==(int32_t)(otherLimit-otherStart) &&
- 0==u_memcmp(start, otherStart, length);
-}
-
-UBool ReorderingBuffer::equals(const uint8_t *otherStart, const uint8_t *otherLimit) const {
- U_ASSERT((otherLimit - otherStart) <= INT32_MAX); // ensured by caller
- int32_t length = (int32_t)(limit - start);
- int32_t otherLength = (int32_t)(otherLimit - otherStart);
- // For equal strings, UTF-8 is at least as long as UTF-16, and at most three times as long.
- if (otherLength < length || (otherLength / 3) > length) {
- return FALSE;
- }
- // Compare valid strings from between normalization boundaries.
- // (Invalid sequences are normalization-inert.)
- for (int32_t i = 0, j = 0;;) {
- if (i >= length) {
- return j >= otherLength;
- } else if (j >= otherLength) {
- return FALSE;
- }
- // Not at the end of either string yet.
- UChar32 c, other;
- U16_NEXT_UNSAFE(start, i, c);
- U8_NEXT_UNSAFE(otherStart, j, other);
- if (c != other) {
- return FALSE;
- }
- }
-}
-
-UBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
- if(remainingCapacity<2 && !resize(2, errorCode)) {
- return FALSE;
- }
- if(lastCC<=cc || cc==0) {
- limit[0]=U16_LEAD(c);
- limit[1]=U16_TRAIL(c);
- limit+=2;
- lastCC=cc;
- if(cc<=1) {
- reorderStart=limit;
- }
- } else {
- insert(c, cc);
- }
- remainingCapacity-=2;
- return TRUE;
-}
-
-UBool ReorderingBuffer::append(const UChar *s, int32_t length, UBool isNFD,
- uint8_t leadCC, uint8_t trailCC,
- UErrorCode &errorCode) {
- if(length==0) {
- return TRUE;
- }
- if(remainingCapacity<length && !resize(length, errorCode)) {
- return FALSE;
- }
- remainingCapacity-=length;
- if(lastCC<=leadCC || leadCC==0) {
- if(trailCC<=1) {
- reorderStart=limit+length;
- } else if(leadCC<=1) {
- reorderStart=limit+1; // Ok if not a code point boundary.
- }
- const UChar *sLimit=s+length;
- do { *limit++=*s++; } while(s!=sLimit);
- lastCC=trailCC;
- } else {
- int32_t i=0;
- UChar32 c;
- U16_NEXT(s, i, length, c);
- insert(c, leadCC); // insert first code point
- while(i<length) {
- U16_NEXT(s, i, length, c);
- if(i<length) {
- if (isNFD) {
- leadCC = Normalizer2Impl::getCCFromYesOrMaybe(impl.getRawNorm16(c));
- } else {
- leadCC = impl.getCC(impl.getNorm16(c));
- }
- } else {
- leadCC=trailCC;
- }
- append(c, leadCC, errorCode);
- }
- }
- return TRUE;
-}
-
-UBool ReorderingBuffer::appendZeroCC(UChar32 c, UErrorCode &errorCode) {
- int32_t cpLength=U16_LENGTH(c);
- if(remainingCapacity<cpLength && !resize(cpLength, errorCode)) {
- return FALSE;
- }
- remainingCapacity-=cpLength;
- if(cpLength==1) {
- *limit++=(UChar)c;
- } else {
- limit[0]=U16_LEAD(c);
- limit[1]=U16_TRAIL(c);
- limit+=2;
- }
- lastCC=0;
- reorderStart=limit;
- return TRUE;
-}
-
-UBool ReorderingBuffer::appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode) {
- if(s==sLimit) {
- return TRUE;
- }
- int32_t length=(int32_t)(sLimit-s);
- if(remainingCapacity<length && !resize(length, errorCode)) {
- return FALSE;
- }
- u_memcpy(limit, s, length);
- limit+=length;
- remainingCapacity-=length;
- lastCC=0;
- reorderStart=limit;
- return TRUE;
-}
-
-void ReorderingBuffer::remove() {
- reorderStart=limit=start;
- remainingCapacity=str.getCapacity();
- lastCC=0;
-}
-
-void ReorderingBuffer::removeSuffix(int32_t suffixLength) {
- if(suffixLength<(limit-start)) {
- limit-=suffixLength;
- remainingCapacity+=suffixLength;
- } else {
- limit=start;
- remainingCapacity=str.getCapacity();
- }
- lastCC=0;
- reorderStart=limit;
-}
-
-UBool ReorderingBuffer::resize(int32_t appendLength, UErrorCode &errorCode) {
- int32_t reorderStartIndex=(int32_t)(reorderStart-start);
- int32_t length=(int32_t)(limit-start);
- str.releaseBuffer(length);
- int32_t newCapacity=length+appendLength;
- int32_t doubleCapacity=2*str.getCapacity();
- if(newCapacity<doubleCapacity) {
- newCapacity=doubleCapacity;
- }
- if(newCapacity<256) {
- newCapacity=256;
- }
- start=str.getBuffer(newCapacity);
- if(start==NULL) {
- // getBuffer() already did str.setToBogus()
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
- reorderStart=start+reorderStartIndex;
- limit=start+length;
- remainingCapacity=str.getCapacity()-length;
- return TRUE;
-}
-
-void ReorderingBuffer::skipPrevious() {
- codePointLimit=codePointStart;
- UChar c=*--codePointStart;
- if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(*(codePointStart-1))) {
- --codePointStart;
- }
-}
-
-uint8_t ReorderingBuffer::previousCC() {
- codePointLimit=codePointStart;
- if(reorderStart>=codePointStart) {
- return 0;
- }
- UChar32 c=*--codePointStart;
- UChar c2;
- if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(c2=*(codePointStart-1))) {
- --codePointStart;
- c=U16_GET_SUPPLEMENTARY(c2, c);
- }
- return impl.getCCFromYesOrMaybeCP(c);
-}
-
-// Inserts c somewhere before the last character.
-// Requires 0<cc<lastCC which implies reorderStart<limit.
-void ReorderingBuffer::insert(UChar32 c, uint8_t cc) {
- for(setIterator(), skipPrevious(); previousCC()>cc;) {}
- // insert c at codePointLimit, after the character with prevCC<=cc
- UChar *q=limit;
- UChar *r=limit+=U16_LENGTH(c);
- do {
- *--r=*--q;
- } while(codePointLimit!=q);
- writeCodePoint(q, c);
- if(cc<=1) {
- reorderStart=r;
- }
-}
-
-// Normalizer2Impl --------------------------------------------------------- ***
-
-struct CanonIterData : public UMemory {
- CanonIterData(UErrorCode &errorCode);
- ~CanonIterData();
- void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode);
- UMutableCPTrie *mutableTrie;
- UCPTrie *trie;
- UVector canonStartSets; // contains UnicodeSet *
-};
-
-Normalizer2Impl::~Normalizer2Impl() {
- delete fCanonIterData;
-}
-
-void
-Normalizer2Impl::init(const int32_t *inIndexes, const UCPTrie *inTrie,
- const uint16_t *inExtraData, const uint8_t *inSmallFCD) {
- minDecompNoCP = static_cast<UChar>(inIndexes[IX_MIN_DECOMP_NO_CP]);
- minCompNoMaybeCP = static_cast<UChar>(inIndexes[IX_MIN_COMP_NO_MAYBE_CP]);
- minLcccCP = static_cast<UChar>(inIndexes[IX_MIN_LCCC_CP]);
-
- minYesNo = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO]);
- minYesNoMappingsOnly = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]);
- minNoNo = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO]);
- minNoNoCompBoundaryBefore = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]);
- minNoNoCompNoMaybeCC = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]);
- minNoNoEmpty = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_EMPTY]);
- limitNoNo = static_cast<uint16_t>(inIndexes[IX_LIMIT_NO_NO]);
- minMaybeYes = static_cast<uint16_t>(inIndexes[IX_MIN_MAYBE_YES]);
- U_ASSERT((minMaybeYes & 7) == 0); // 8-aligned for noNoDelta bit fields
- centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1;
-
- normTrie=inTrie;
-
- maybeYesCompositions=inExtraData;
- extraData=maybeYesCompositions+((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT);
-
- smallFCD=inSmallFCD;
-}
-
-U_CDECL_BEGIN
-
-static uint32_t U_CALLCONV
-segmentStarterMapper(const void * /*context*/, uint32_t value) {
- return value&CANON_NOT_SEGMENT_STARTER;
-}
-
-U_CDECL_END
-
-void
-Normalizer2Impl::addLcccChars(UnicodeSet &set) const {
- UChar32 start = 0, end;
- uint32_t norm16;
- while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
- nullptr, nullptr, &norm16)) >= 0) {
- if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES &&
- norm16 != Normalizer2Impl::JAMO_VT) {
- set.add(start, end);
- } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) {
- uint16_t fcd16 = getFCD16(start);
- if (fcd16 > 0xff) { set.add(start, end); }
- }
- start = end + 1;
- }
-}
-
-void
-Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
- // Add the start code point of each same-value range of the trie.
- UChar32 start = 0, end;
- uint32_t value;
- while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
- nullptr, nullptr, &value)) >= 0) {
- sa->add(sa->set, start);
- if (start != end && isAlgorithmicNoNo((uint16_t)value) &&
- (value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) {
- // Range of code points with same-norm16-value algorithmic decompositions.
- // They might have different non-zero FCD16 values.
- uint16_t prevFCD16 = getFCD16(start);
- while (++start <= end) {
- uint16_t fcd16 = getFCD16(start);
- if (fcd16 != prevFCD16) {
- sa->add(sa->set, start);
- prevFCD16 = fcd16;
- }
- }
- }
- start = end + 1;
- }
-
- /* add Hangul LV syllables and LV+1 because of skippables */
- for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) {
- sa->add(sa->set, c);
- sa->add(sa->set, c+1);
- }
- sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
-}
-
-void
-Normalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const {
- // Add the start code point of each same-value range of the canonical iterator data trie.
- if (!ensureCanonIterData(errorCode)) { return; }
- // Currently only used for the SEGMENT_STARTER property.
- UChar32 start = 0, end;
- uint32_t value;
- while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0,
- segmentStarterMapper, nullptr, &value)) >= 0) {
- sa->add(sa->set, start);
- start = end + 1;
- }
-}
-
-const UChar *
-Normalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src,
- UChar32 minNeedDataCP,
- ReorderingBuffer *buffer,
- UErrorCode &errorCode) const {
- // Make some effort to support NUL-terminated strings reasonably.
- // Take the part of the fast quick check loop that does not look up
- // data and check the first part of the string.
- // After this prefix, determine the string length to simplify the rest
- // of the code.
- const UChar *prevSrc=src;
- UChar c;
- while((c=*src++)<minNeedDataCP && c!=0) {}
- // Back out the last character for full processing.
- // Copy this prefix.
- if(--src!=prevSrc) {
- if(buffer!=NULL) {
- buffer->appendZeroCC(prevSrc, src, errorCode);
- }
- }
- return src;
-}
-
-UnicodeString &
-Normalizer2Impl::decompose(const UnicodeString &src, UnicodeString &dest,
- UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- dest.setToBogus();
- return dest;
- }
- const UChar *sArray=src.getBuffer();
- if(&dest==&src || sArray==NULL) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- dest.setToBogus();
- return dest;
- }
- decompose(sArray, sArray+src.length(), dest, src.length(), errorCode);
- return dest;
-}
-
-void
-Normalizer2Impl::decompose(const UChar *src, const UChar *limit,
- UnicodeString &dest,
- int32_t destLengthEstimate,
- UErrorCode &errorCode) const {
- if(destLengthEstimate<0 && limit!=NULL) {
- destLengthEstimate=(int32_t)(limit-src);
- }
- dest.remove();
- ReorderingBuffer buffer(*this, dest);
- if(buffer.init(destLengthEstimate, errorCode)) {
- decompose(src, limit, &buffer, errorCode);
- }
-}
-
-// Dual functionality:
-// buffer!=NULL: normalize
-// buffer==NULL: isNormalized/spanQuickCheckYes
-const UChar *
-Normalizer2Impl::decompose(const UChar *src, const UChar *limit,
- ReorderingBuffer *buffer,
- UErrorCode &errorCode) const {
- UChar32 minNoCP=minDecompNoCP;
- if(limit==NULL) {
- src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode);
- if(U_FAILURE(errorCode)) {
- return src;
- }
- limit=u_strchr(src, 0);
- }
-
- const UChar *prevSrc;
- UChar32 c=0;
- uint16_t norm16=0;
-
- // only for quick check
- const UChar *prevBoundary=src;
- uint8_t prevCC=0;
-
- for(;;) {
- // count code units below the minimum or with irrelevant data for the quick check
- for(prevSrc=src; src!=limit;) {
- if( (c=*src)<minNoCP ||
- isMostDecompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
- ) {
- ++src;
- } else if(!U16_IS_LEAD(c)) {
- break;
- } else {
- UChar c2;
- if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
- c=U16_GET_SUPPLEMENTARY(c, c2);
- norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
- if(isMostDecompYesAndZeroCC(norm16)) {
- src+=2;
- } else {
- break;
- }
- } else {
- ++src; // unpaired lead surrogate: inert
- }
- }
- }
- // copy these code units all at once
- if(src!=prevSrc) {
- if(buffer!=NULL) {
- if(!buffer->appendZeroCC(prevSrc, src, errorCode)) {
- break;
- }
- } else {
- prevCC=0;
- prevBoundary=src;
- }
- }
- if(src==limit) {
- break;
- }
-
- // Check one above-minimum, relevant code point.
- src+=U16_LENGTH(c);
- if(buffer!=NULL) {
- if(!decompose(c, norm16, *buffer, errorCode)) {
- break;
- }
- } else {
- if(isDecompYes(norm16)) {
- uint8_t cc=getCCFromYesOrMaybe(norm16);
- if(prevCC<=cc || cc==0) {
- prevCC=cc;
- if(cc<=1) {
- prevBoundary=src;
- }
- continue;
- }
- }
- return prevBoundary; // "no" or cc out of order
- }
- }
- return src;
-}
-
-// Decompose a short piece of text which is likely to contain characters that
-// fail the quick check loop and/or where the quick check loop's overhead
-// is unlikely to be amortized.
-// Called by the compose() and makeFCD() implementations.
-const UChar *
-Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit,
- UBool stopAtCompBoundary, UBool onlyContiguous,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode)) {
- return nullptr;
- }
- while(src<limit) {
- if (stopAtCompBoundary && *src < minCompNoMaybeCP) {
- return src;
- }
- const UChar *prevSrc = src;
- UChar32 c;
- uint16_t norm16;
- UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);
- if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) {
- return prevSrc;
- }
- if(!decompose(c, norm16, buffer, errorCode)) {
- return nullptr;
- }
- if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
- return src;
- }
- }
- return src;
-}
-
-UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
- ReorderingBuffer &buffer,
- UErrorCode &errorCode) const {
- // get the decomposition and the lead and trail cc's
- if (norm16 >= limitNoNo) {
- if (isMaybeOrNonZeroCC(norm16)) {
- return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode);
- }
- // Maps to an isCompYesAndZeroCC.
- c=mapAlgorithmic(c, norm16);
- norm16=getRawNorm16(c);
- }
- if (norm16 < minYesNo) {
- // c does not decompose
- return buffer.append(c, 0, errorCode);
- } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
- // Hangul syllable: decompose algorithmically
- UChar jamos[3];
- return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode);
- }
- // c decomposes, get everything from the variable-length extra data
- const uint16_t *mapping=getMapping(norm16);
- uint16_t firstUnit=*mapping;
- int32_t length=firstUnit&MAPPING_LENGTH_MASK;
- uint8_t leadCC, trailCC;
- trailCC=(uint8_t)(firstUnit>>8);
- if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
- leadCC=(uint8_t)(*(mapping-1)>>8);
- } else {
- leadCC=0;
- }
- return buffer.append((const UChar *)mapping+1, length, TRUE, leadCC, trailCC, errorCode);
-}
-
-const uint8_t *
-Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
- UBool stopAtCompBoundary, UBool onlyContiguous,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode)) {
- return nullptr;
- }
- while (src < limit) {
- const uint8_t *prevSrc = src;
- uint16_t norm16;
- UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
- // Get the decomposition and the lead and trail cc's.
- UChar32 c = U_SENTINEL;
- if (norm16 >= limitNoNo) {
- if (isMaybeOrNonZeroCC(norm16)) {
- // No boundaries around this character.
- c = codePointFromValidUTF8(prevSrc, src);
- if (!buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode)) {
- return nullptr;
- }
- continue;
- }
- // Maps to an isCompYesAndZeroCC.
- if (stopAtCompBoundary) {
- return prevSrc;
- }
- c = codePointFromValidUTF8(prevSrc, src);
- c = mapAlgorithmic(c, norm16);
- norm16 = getRawNorm16(c);
- } else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) {
- return prevSrc;
- }
- // norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8.
- // We do not see invalid UTF-8 here because
- // its norm16==INERT is normalization-inert,
- // so it gets copied unchanged in the fast path,
- // and we stop the slow path where invalid UTF-8 begins.
- U_ASSERT(norm16 != INERT);
- if (norm16 < minYesNo) {
- if (c < 0) {
- c = codePointFromValidUTF8(prevSrc, src);
- }
- // does not decompose
- if (!buffer.append(c, 0, errorCode)) {
- return nullptr;
- }
- } else if (isHangulLV(norm16) || isHangulLVT(norm16)) {
- // Hangul syllable: decompose algorithmically
- if (c < 0) {
- c = codePointFromValidUTF8(prevSrc, src);
- }
- char16_t jamos[3];
- if (!buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode)) {
- return nullptr;
- }
- } else {
- // The character decomposes, get everything from the variable-length extra data.
- const uint16_t *mapping = getMapping(norm16);
- uint16_t firstUnit = *mapping;
- int32_t length = firstUnit & MAPPING_LENGTH_MASK;
- uint8_t trailCC = (uint8_t)(firstUnit >> 8);
- uint8_t leadCC;
- if (firstUnit & MAPPING_HAS_CCC_LCCC_WORD) {
- leadCC = (uint8_t)(*(mapping-1) >> 8);
- } else {
- leadCC = 0;
- }
- if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) {
- return nullptr;
- }
- }
- if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
- return src;
- }
- }
- return src;
-}
-
-const UChar *
-Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const {
- uint16_t norm16;
- if(c<minDecompNoCP || isMaybeOrNonZeroCC(norm16=getNorm16(c))) {
- // c does not decompose
- return nullptr;
- }
- const UChar *decomp = nullptr;
- if(isDecompNoAlgorithmic(norm16)) {
- // Maps to an isCompYesAndZeroCC.
- c=mapAlgorithmic(c, norm16);
- decomp=buffer;
- length=0;
- U16_APPEND_UNSAFE(buffer, length, c);
- // The mapping might decompose further.
- norm16 = getRawNorm16(c);
- }
- if (norm16 < minYesNo) {
- return decomp;
- } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
- // Hangul syllable: decompose algorithmically
- length=Hangul::decompose(c, buffer);
- return buffer;
- }
- // c decomposes, get everything from the variable-length extra data
- const uint16_t *mapping=getMapping(norm16);
- length=*mapping&MAPPING_LENGTH_MASK;
- return (const UChar *)mapping+1;
-}
-
-// The capacity of the buffer must be 30=MAPPING_LENGTH_MASK-1
-// so that a raw mapping fits that consists of one unit ("rm0")
-// plus all but the first two code units of the normal mapping.
-// The maximum length of a normal mapping is 31=MAPPING_LENGTH_MASK.
-const UChar *
-Normalizer2Impl::getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const {
- uint16_t norm16;
- if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
- // c does not decompose
- return NULL;
- } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
- // Hangul syllable: decompose algorithmically
- Hangul::getRawDecomposition(c, buffer);
- length=2;
- return buffer;
- } else if(isDecompNoAlgorithmic(norm16)) {
- c=mapAlgorithmic(c, norm16);
- length=0;
- U16_APPEND_UNSAFE(buffer, length, c);
- return buffer;
- }
- // c decomposes, get everything from the variable-length extra data
- const uint16_t *mapping=getMapping(norm16);
- uint16_t firstUnit=*mapping;
- int32_t mLength=firstUnit&MAPPING_LENGTH_MASK; // length of normal mapping
- if(firstUnit&MAPPING_HAS_RAW_MAPPING) {
- // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word.
- // Bit 7=MAPPING_HAS_CCC_LCCC_WORD
- const uint16_t *rawMapping=mapping-((firstUnit>>7)&1)-1;
- uint16_t rm0=*rawMapping;
- if(rm0<=MAPPING_LENGTH_MASK) {
- length=rm0;
- return (const UChar *)rawMapping-rm0;
- } else {
- // Copy the normal mapping and replace its first two code units with rm0.
- buffer[0]=(UChar)rm0;
- u_memcpy(buffer+1, (const UChar *)mapping+1+2, mLength-2);
- length=mLength-1;
- return buffer;
- }
- } else {
- length=mLength;
- return (const UChar *)mapping+1;
- }
-}
-
-void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit,
- UBool doDecompose,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer,
- UErrorCode &errorCode) const {
- buffer.copyReorderableSuffixTo(safeMiddle);
- if(doDecompose) {
- decompose(src, limit, &buffer, errorCode);
- return;
- }
- // Just merge the strings at the boundary.
- bool isFirst = true;
- uint8_t firstCC = 0, prevCC = 0, cc;
- const UChar *p = src;
- while (p != limit) {
- const UChar *codePointStart = p;
- UChar32 c;
- uint16_t norm16;
- UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
- if ((cc = getCC(norm16)) == 0) {
- p = codePointStart;
- break;
- }
- if (isFirst) {
- firstCC = cc;
- isFirst = false;
- }
- prevCC = cc;
- }
- if(limit==NULL) { // appendZeroCC() needs limit!=NULL
- limit=u_strchr(p, 0);
- }
-
- if (buffer.append(src, (int32_t)(p - src), FALSE, firstCC, prevCC, errorCode)) {
- buffer.appendZeroCC(p, limit, errorCode);
- }
-}
-
-UBool Normalizer2Impl::hasDecompBoundaryBefore(UChar32 c) const {
- return c < minLcccCP || (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) ||
- norm16HasDecompBoundaryBefore(getNorm16(c));
-}
-
-UBool Normalizer2Impl::norm16HasDecompBoundaryBefore(uint16_t norm16) const {
- if (norm16 < minNoNoCompNoMaybeCC) {
- return TRUE;
- }
- if (norm16 >= limitNoNo) {
- return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
- }
- // c decomposes, get everything from the variable-length extra data
- const uint16_t *mapping=getMapping(norm16);
- uint16_t firstUnit=*mapping;
- // TRUE if leadCC==0 (hasFCDBoundaryBefore())
- return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0;
-}
-
-UBool Normalizer2Impl::hasDecompBoundaryAfter(UChar32 c) const {
- if (c < minDecompNoCP) {
- return TRUE;
- }
- if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) {
- return TRUE;
- }
- return norm16HasDecompBoundaryAfter(getNorm16(c));
-}
-
-UBool Normalizer2Impl::norm16HasDecompBoundaryAfter(uint16_t norm16) const {
- if(norm16 <= minYesNo || isHangulLVT(norm16)) {
- return TRUE;
- }
- if (norm16 >= limitNoNo) {
- if (isMaybeOrNonZeroCC(norm16)) {
- return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
- }
- // Maps to an isCompYesAndZeroCC.
- return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1;
- }
- // c decomposes, get everything from the variable-length extra data
- const uint16_t *mapping=getMapping(norm16);
- uint16_t firstUnit=*mapping;
- // decomp after-boundary: same as hasFCDBoundaryAfter(),
- // fcd16<=1 || trailCC==0
- if(firstUnit>0x1ff) {
- return FALSE; // trailCC>1
- }
- if(firstUnit<=0xff) {
- return TRUE; // trailCC==0
- }
- // if(trailCC==1) test leadCC==0, same as checking for before-boundary
- // TRUE if leadCC==0 (hasFCDBoundaryBefore())
- return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0;
-}
-
-/*
- * Finds the recomposition result for
- * a forward-combining "lead" character,
- * specified with a pointer to its compositions list,
- * and a backward-combining "trail" character.
- *
- * If the lead and trail characters combine, then this function returns
- * the following "compositeAndFwd" value:
- * Bits 21..1 composite character
- * Bit 0 set if the composite is a forward-combining starter
- * otherwise it returns -1.
- *
- * The compositions list has (trail, compositeAndFwd) pair entries,
- * encoded as either pairs or triples of 16-bit units.
- * The last entry has the high bit of its first unit set.
- *
- * The list is sorted by ascending trail characters (there are no duplicates).
- * A linear search is used.
- *
- * See normalizer2impl.h for a more detailed description
- * of the compositions list format.
- */
-int32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) {
- uint16_t key1, firstUnit;
- if(trail<COMP_1_TRAIL_LIMIT) {
- // trail character is 0..33FF
- // result entry may have 2 or 3 units
- key1=(uint16_t)(trail<<1);
- while(key1>(firstUnit=*list)) {
- list+=2+(firstUnit&COMP_1_TRIPLE);
- }
- if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
- if(firstUnit&COMP_1_TRIPLE) {
- return ((int32_t)list[1]<<16)|list[2];
- } else {
- return list[1];
- }
- }
- } else {
- // trail character is 3400..10FFFF
- // result entry has 3 units
- key1=(uint16_t)(COMP_1_TRAIL_LIMIT+
- (((trail>>COMP_1_TRAIL_SHIFT))&
- ~COMP_1_TRIPLE));
- uint16_t key2=(uint16_t)(trail<<COMP_2_TRAIL_SHIFT);
- uint16_t secondUnit;
- for(;;) {
- if(key1>(firstUnit=*list)) {
- list+=2+(firstUnit&COMP_1_TRIPLE);
- } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
- if(key2>(secondUnit=list[1])) {
- if(firstUnit&COMP_1_LAST_TUPLE) {
- break;
- } else {
- list+=3;
- }
- } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
- return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2];
- } else {
- break;
- }
- } else {
- break;
- }
- }
- }
- return -1;
-}
-
-/**
- * @param list some character's compositions list
- * @param set recursively receives the composites from these compositions
- */
-void Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const {
- uint16_t firstUnit;
- int32_t compositeAndFwd;
- do {
- firstUnit=*list;
- if((firstUnit&COMP_1_TRIPLE)==0) {
- compositeAndFwd=list[1];
- list+=2;
- } else {
- compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)|list[2];
- list+=3;
- }
- UChar32 composite=compositeAndFwd>>1;
- if((compositeAndFwd&1)!=0) {
- addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set);
- }
- set.add(composite);
- } while((firstUnit&COMP_1_LAST_TUPLE)==0);
-}
-
-/*
- * Recomposes the buffer text starting at recomposeStartIndex
- * (which is in NFD - decomposed and canonically ordered),
- * and truncates the buffer contents.
- *
- * Note that recomposition never lengthens the text:
- * Any character consists of either one or two code units;
- * a composition may contain at most one more code unit than the original starter,
- * while the combining mark that is removed has at least one code unit.
- */
-void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
- UBool onlyContiguous) const {
- UChar *p=buffer.getStart()+recomposeStartIndex;
- UChar *limit=buffer.getLimit();
- if(p==limit) {
- return;
- }
-
- UChar *starter, *pRemove, *q, *r;
- const uint16_t *compositionsList;
- UChar32 c, compositeAndFwd;
- uint16_t norm16;
- uint8_t cc, prevCC;
- UBool starterIsSupplementary;
-
- // Some of the following variables are not used until we have a forward-combining starter
- // and are only initialized now to avoid compiler warnings.
- compositionsList=NULL; // used as indicator for whether we have a forward-combining starter
- starter=NULL;
- starterIsSupplementary=FALSE;
- prevCC=0;
-
- for(;;) {
- UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
- cc=getCCFromYesOrMaybe(norm16);
- if( // this character combines backward and
- isMaybe(norm16) &&
- // we have seen a starter that combines forward and
- compositionsList!=NULL &&
- // the backward-combining character is not blocked
- (prevCC<cc || prevCC==0)
- ) {
- if(isJamoVT(norm16)) {
- // c is a Jamo V/T, see if we can compose it with the previous character.
- if(c<Hangul::JAMO_T_BASE) {
- // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
- UChar prev=(UChar)(*starter-Hangul::JAMO_L_BASE);
- if(prev<Hangul::JAMO_L_COUNT) {
- pRemove=p-1;
- UChar syllable=(UChar)
- (Hangul::HANGUL_BASE+
- (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))*
- Hangul::JAMO_T_COUNT);
- UChar t;
- if(p!=limit && (t=(UChar)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
- ++p;
- syllable+=t; // The next character was a Jamo T.
- }
- *starter=syllable;
- // remove the Jamo V/T
- q=pRemove;
- r=p;
- while(r<limit) {
- *q++=*r++;
- }
- limit=q;
- p=pRemove;
- }
- }
- /*
- * No "else" for Jamo T:
- * Since the input is in NFD, there are no Hangul LV syllables that
- * a Jamo T could combine with.
- * All Jamo Ts are combined above when handling Jamo Vs.
- */
- if(p==limit) {
- break;
- }
- compositionsList=NULL;
- continue;
- } else if((compositeAndFwd=combine(compositionsList, c))>=0) {
- // The starter and the combining mark (c) do combine.
- UChar32 composite=compositeAndFwd>>1;
-
- // Replace the starter with the composite, remove the combining mark.
- pRemove=p-U16_LENGTH(c); // pRemove & p: start & limit of the combining mark
- if(starterIsSupplementary) {
- if(U_IS_SUPPLEMENTARY(composite)) {
- // both are supplementary
- starter[0]=U16_LEAD(composite);
- starter[1]=U16_TRAIL(composite);
- } else {
- *starter=(UChar)composite;
- // The composite is shorter than the starter,
- // move the intermediate characters forward one.
- starterIsSupplementary=FALSE;
- q=starter+1;
- r=q+1;
- while(r<pRemove) {
- *q++=*r++;
- }
- --pRemove;
- }
- } else if(U_IS_SUPPLEMENTARY(composite)) {
- // The composite is longer than the starter,
- // move the intermediate characters back one.
- starterIsSupplementary=TRUE;
- ++starter; // temporarily increment for the loop boundary
- q=pRemove;
- r=++pRemove;
- while(starter<q) {
- *--r=*--q;
- }
- *starter=U16_TRAIL(composite);
- *--starter=U16_LEAD(composite); // undo the temporary increment
- } else {
- // both are on the BMP
- *starter=(UChar)composite;
- }
-
- /* remove the combining mark by moving the following text over it */
- if(pRemove<p) {
- q=pRemove;
- r=p;
- while(r<limit) {
- *q++=*r++;
- }
- limit=q;
- p=pRemove;
- }
- // Keep prevCC because we removed the combining mark.
-
- if(p==limit) {
- break;
- }
- // Is the composite a starter that combines forward?
- if(compositeAndFwd&1) {
- compositionsList=
- getCompositionsListForComposite(getRawNorm16(composite));
- } else {
- compositionsList=NULL;
- }
-
- // We combined; continue with looking for compositions.
- continue;
- }
- }
-
- // no combination this time
- prevCC=cc;
- if(p==limit) {
- break;
- }
-
- // If c did not combine, then check if it is a starter.
- if(cc==0) {
- // Found a new starter.
- if((compositionsList=getCompositionsListForDecompYes(norm16))!=NULL) {
- // It may combine with something, prepare for it.
- if(U_IS_BMP(c)) {
- starterIsSupplementary=FALSE;
- starter=p-1;
- } else {
- starterIsSupplementary=TRUE;
- starter=p-2;
- }
- }
- } else if(onlyContiguous) {
- // FCC: no discontiguous compositions; any intervening character blocks.
- compositionsList=NULL;
- }
- }
- buffer.setReorderingLimit(limit);
-}
-
-UChar32
-Normalizer2Impl::composePair(UChar32 a, UChar32 b) const {
- uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16
- const uint16_t *list;
- if(isInert(norm16)) {
- return U_SENTINEL;
- } else if(norm16<minYesNoMappingsOnly) {
- // a combines forward.
- if(isJamoL(norm16)) {
- b-=Hangul::JAMO_V_BASE;
- if(0<=b && b<Hangul::JAMO_V_COUNT) {
- return
- (Hangul::HANGUL_BASE+
- ((a-Hangul::JAMO_L_BASE)*Hangul::JAMO_V_COUNT+b)*
- Hangul::JAMO_T_COUNT);
- } else {
- return U_SENTINEL;
- }
- } else if(isHangulLV(norm16)) {
- b-=Hangul::JAMO_T_BASE;
- if(0<b && b<Hangul::JAMO_T_COUNT) { // not b==0!
- return a+b;
- } else {
- return U_SENTINEL;
- }
- } else {
- // 'a' has a compositions list in extraData
- list=getMapping(norm16);
- if(norm16>minYesNo) { // composite 'a' has both mapping & compositions list
- list+= // mapping pointer
- 1+ // +1 to skip the first unit with the mapping length
- (*list&MAPPING_LENGTH_MASK); // + mapping length
- }
- }
- } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) {
- return U_SENTINEL;
- } else {
- list=getCompositionsListForMaybe(norm16);
- }
- if(b<0 || 0x10ffff<b) { // combine(list, b) requires a valid code point b
- return U_SENTINEL;
- }
-#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
- return combine(list, b)>>1;
-#else
- int32_t compositeAndFwd=combine(list, b);
- return compositeAndFwd>=0 ? compositeAndFwd>>1 : U_SENTINEL;
-#endif
-}
-
-// Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
-// doCompose: normalize
-// !doCompose: isNormalized (buffer must be empty and initialized)
-UBool
-Normalizer2Impl::compose(const UChar *src, const UChar *limit,
- UBool onlyContiguous,
- UBool doCompose,
- ReorderingBuffer &buffer,
- UErrorCode &errorCode) const {
- const UChar *prevBoundary=src;
- UChar32 minNoMaybeCP=minCompNoMaybeCP;
- if(limit==NULL) {
- src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP,
- doCompose ? &buffer : NULL,
- errorCode);
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- limit=u_strchr(src, 0);
- if (prevBoundary != src) {
- if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) {
- prevBoundary = src;
- } else {
- buffer.removeSuffix(1);
- prevBoundary = --src;
- }
- }
- }
-
- for (;;) {
- // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
- // or with (compYes && ccc==0) properties.
- const UChar *prevSrc;
- UChar32 c = 0;
- uint16_t norm16 = 0;
- for (;;) {
- if (src == limit) {
- if (prevBoundary != limit && doCompose) {
- buffer.appendZeroCC(prevBoundary, limit, errorCode);
- }
- return TRUE;
- }
- if( (c=*src)<minNoMaybeCP ||
- isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
- ) {
- ++src;
- } else {
- prevSrc = src++;
- if(!U16_IS_LEAD(c)) {
- break;
- } else {
- UChar c2;
- if(src!=limit && U16_IS_TRAIL(c2=*src)) {
- ++src;
- c=U16_GET_SUPPLEMENTARY(c, c2);
- norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
- if(!isCompYesAndZeroCC(norm16)) {
- break;
- }
- }
- }
- }
- }
- // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
- // The current character is either a "noNo" (has a mapping)
- // or a "maybeYes" (combines backward)
- // or a "yesYes" with ccc!=0.
- // It is not a Hangul syllable or Jamo L because those have "yes" properties.
-
- // Medium-fast path: Handle cases that do not require full decomposition and recomposition.
- if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes
- if (!doCompose) {
- return FALSE;
- }
- // Fast path for mapping a character that is immediately surrounded by boundaries.
- // In this case, we need not decompose around the current character.
- if (isDecompNoAlgorithmic(norm16)) {
- // Maps to a single isCompYesAndZeroCC character
- // which also implies hasCompBoundaryBefore.
- if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
- hasCompBoundaryBefore(src, limit)) {
- if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
- break;
- }
- if(!buffer.append(mapAlgorithmic(c, norm16), 0, errorCode)) {
- break;
- }
- prevBoundary = src;
- continue;
- }
- } else if (norm16 < minNoNoCompBoundaryBefore) {
- // The mapping is comp-normalized which also implies hasCompBoundaryBefore.
- if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
- hasCompBoundaryBefore(src, limit)) {
- if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
- break;
- }
- const UChar *mapping = reinterpret_cast<const UChar *>(getMapping(norm16));
- int32_t length = *mapping++ & MAPPING_LENGTH_MASK;
- if(!buffer.appendZeroCC(mapping, mapping + length, errorCode)) {
- break;
- }
- prevBoundary = src;
- continue;
- }
- } else if (norm16 >= minNoNoEmpty) {
- // The current character maps to nothing.
- // Simply omit it from the output if there is a boundary before _or_ after it.
- // The character itself implies no boundaries.
- if (hasCompBoundaryBefore(src, limit) ||
- hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) {
- if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
- break;
- }
- prevBoundary = src;
- continue;
- }
- }
- // Other "noNo" type, or need to examine more text around this character:
- // Fall through to the slow path.
- } else if (isJamoVT(norm16) && prevBoundary != prevSrc) {
- UChar prev=*(prevSrc-1);
- if(c<Hangul::JAMO_T_BASE) {
- // The current character is a Jamo Vowel,
- // compose with previous Jamo L and following Jamo T.
- UChar l = (UChar)(prev-Hangul::JAMO_L_BASE);
- if(l<Hangul::JAMO_L_COUNT) {
- if (!doCompose) {
- return FALSE;
- }
- int32_t t;
- if (src != limit &&
- 0 < (t = ((int32_t)*src - Hangul::JAMO_T_BASE)) &&
- t < Hangul::JAMO_T_COUNT) {
- // The next character is a Jamo T.
- ++src;
- } else if (hasCompBoundaryBefore(src, limit)) {
- // No Jamo T follows, not even via decomposition.
- t = 0;
- } else {
- t = -1;
- }
- if (t >= 0) {
- UChar32 syllable = Hangul::HANGUL_BASE +
- (l*Hangul::JAMO_V_COUNT + (c-Hangul::JAMO_V_BASE)) *
- Hangul::JAMO_T_COUNT + t;
- --prevSrc; // Replace the Jamo L as well.
- if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
- break;
- }
- if(!buffer.appendBMP((UChar)syllable, 0, errorCode)) {
- break;
- }
- prevBoundary = src;
- continue;
- }
- // If we see L+V+x where x!=T then we drop to the slow path,
- // decompose and recompose.
- // This is to deal with NFKC finding normal L and V but a
- // compatibility variant of a T.
- // We need to either fully compose that combination here
- // (which would complicate the code and may not work with strange custom data)
- // or use the slow path.
- }
- } else if (Hangul::isHangulLV(prev)) {
- // The current character is a Jamo Trailing consonant,
- // compose with previous Hangul LV that does not contain a Jamo T.
- if (!doCompose) {
- return FALSE;
- }
- UChar32 syllable = prev + c - Hangul::JAMO_T_BASE;
- --prevSrc; // Replace the Hangul LV as well.
- if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
- break;
- }
- if(!buffer.appendBMP((UChar)syllable, 0, errorCode)) {
- break;
- }
- prevBoundary = src;
- continue;
- }
- // No matching context, or may need to decompose surrounding text first:
- // Fall through to the slow path.
- } else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC
- // One or more combining marks that do not combine-back:
- // Check for canonical order, copy unchanged if ok and
- // if followed by a character with a boundary-before.
- uint8_t cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0
- if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) {
- // Fails FCD test, need to decompose and contiguously recompose.
- if (!doCompose) {
- return FALSE;
- }
- } else {
- // If !onlyContiguous (not FCC), then we ignore the tccc of
- // the previous character which passed the quick check "yes && ccc==0" test.
- const UChar *nextSrc;
- uint16_t n16;
- for (;;) {
- if (src == limit) {
- if (doCompose) {
- buffer.appendZeroCC(prevBoundary, limit, errorCode);
- }
- return TRUE;
- }
- uint8_t prevCC = cc;
- nextSrc = src;
- UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, n16);
- if (n16 >= MIN_YES_YES_WITH_CC) {
- cc = getCCFromNormalYesOrMaybe(n16);
- if (prevCC > cc) {
- if (!doCompose) {
- return FALSE;
- }
- break;
- }
- } else {
- break;
- }
- src = nextSrc;
- }
- // src is after the last in-order combining mark.
- // If there is a boundary here, then we continue with no change.
- if (norm16HasCompBoundaryBefore(n16)) {
- if (isCompYesAndZeroCC(n16)) {
- src = nextSrc;
- }
- continue;
- }
- // Use the slow path. There is no boundary in [prevSrc, src[.
- }
- }
-
- // Slow path: Find the nearest boundaries around the current character,
- // decompose and recompose.
- if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
- const UChar *p = prevSrc;
- UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16);
- if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
- prevSrc = p;
- }
- }
- if (doCompose && prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
- break;
- }
- int32_t recomposeStartIndex=buffer.length();
- // We know there is not a boundary here.
- decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous,
- buffer, errorCode);
- // Decompose until the next boundary.
- src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous,
- buffer, errorCode);
- if (U_FAILURE(errorCode)) {
- break;
- }
- if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals()
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return TRUE;
- }
- recompose(buffer, recomposeStartIndex, onlyContiguous);
- if(!doCompose) {
- if(!buffer.equals(prevSrc, src)) {
- return FALSE;
- }
- buffer.remove();
- }
- prevBoundary=src;
- }
- return TRUE;
-}
-
-// Very similar to compose(): Make the same changes in both places if relevant.
-// pQCResult==NULL: spanQuickCheckYes
-// pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES)
-const UChar *
-Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
- UBool onlyContiguous,
- UNormalizationCheckResult *pQCResult) const {
- const UChar *prevBoundary=src;
- UChar32 minNoMaybeCP=minCompNoMaybeCP;
- if(limit==NULL) {
- UErrorCode errorCode=U_ZERO_ERROR;
- src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, NULL, errorCode);
- limit=u_strchr(src, 0);
- if (prevBoundary != src) {
- if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) {
- prevBoundary = src;
- } else {
- prevBoundary = --src;
- }
- }
- }
-
- for(;;) {
- // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
- // or with (compYes && ccc==0) properties.
- const UChar *prevSrc;
- UChar32 c = 0;
- uint16_t norm16 = 0;
- for (;;) {
- if(src==limit) {
- return src;
- }
- if( (c=*src)<minNoMaybeCP ||
- isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
- ) {
- ++src;
- } else {
- prevSrc = src++;
- if(!U16_IS_LEAD(c)) {
- break;
- } else {
- UChar c2;
- if(src!=limit && U16_IS_TRAIL(c2=*src)) {
- ++src;
- c=U16_GET_SUPPLEMENTARY(c, c2);
- norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
- if(!isCompYesAndZeroCC(norm16)) {
- break;
- }
- }
- }
- }
- }
- // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
- // The current character is either a "noNo" (has a mapping)
- // or a "maybeYes" (combines backward)
- // or a "yesYes" with ccc!=0.
- // It is not a Hangul syllable or Jamo L because those have "yes" properties.
-
- uint16_t prevNorm16 = INERT;
- if (prevBoundary != prevSrc) {
- if (norm16HasCompBoundaryBefore(norm16)) {
- prevBoundary = prevSrc;
- } else {
- const UChar *p = prevSrc;
- uint16_t n16;
- UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, n16);
- if (norm16HasCompBoundaryAfter(n16, onlyContiguous)) {
- prevBoundary = prevSrc;
- } else {
- prevBoundary = p;
- prevNorm16 = n16;
- }
- }
- }
-
- if(isMaybeOrNonZeroCC(norm16)) {
- uint8_t cc=getCCFromYesOrMaybe(norm16);
- if (onlyContiguous /* FCC */ && cc != 0 &&
- getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) {
- // The [prevBoundary..prevSrc[ character
- // passed the quick check "yes && ccc==0" test
- // but is out of canonical order with the current combining mark.
- } else {
- // If !onlyContiguous (not FCC), then we ignore the tccc of
- // the previous character which passed the quick check "yes && ccc==0" test.
- const UChar *nextSrc;
- for (;;) {
- if (norm16 < MIN_YES_YES_WITH_CC) {
- if (pQCResult != nullptr) {
- *pQCResult = UNORM_MAYBE;
- } else {
- return prevBoundary;
- }
- }
- if (src == limit) {
- return src;
- }
- uint8_t prevCC = cc;
- nextSrc = src;
- UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, norm16);
- if (isMaybeOrNonZeroCC(norm16)) {
- cc = getCCFromYesOrMaybe(norm16);
- if (!(prevCC <= cc || cc == 0)) {
- break;
- }
- } else {
- break;
- }
- src = nextSrc;
- }
- // src is after the last in-order combining mark.
- if (isCompYesAndZeroCC(norm16)) {
- prevBoundary = src;
- src = nextSrc;
- continue;
- }
- }
- }
- if(pQCResult!=NULL) {
- *pQCResult=UNORM_NO;
- }
- return prevBoundary;
- }
-}
-
-void Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit,
- UBool doCompose,
- UBool onlyContiguous,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer,
- UErrorCode &errorCode) const {
- if(!buffer.isEmpty()) {
- const UChar *firstStarterInSrc=findNextCompBoundary(src, limit, onlyContiguous);
- if(src!=firstStarterInSrc) {
- const UChar *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(),
- buffer.getLimit(), onlyContiguous);
- int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest);
- UnicodeString middle(lastStarterInDest, destSuffixLength);
- buffer.removeSuffix(destSuffixLength);
- safeMiddle=middle;
- middle.append(src, (int32_t)(firstStarterInSrc-src));
- const UChar *middleStart=middle.getBuffer();
- compose(middleStart, middleStart+middle.length(), onlyContiguous,
- TRUE, buffer, errorCode);
- if(U_FAILURE(errorCode)) {
- return;
- }
- src=firstStarterInSrc;
- }
- }
- if(doCompose) {
- compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
- } else {
- if(limit==NULL) { // appendZeroCC() needs limit!=NULL
- limit=u_strchr(src, 0);
- }
- buffer.appendZeroCC(src, limit, errorCode);
- }
-}
-
-UBool
-Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
- const uint8_t *src, const uint8_t *limit,
- ByteSink *sink, Edits *edits, UErrorCode &errorCode) const {
- U_ASSERT(limit != nullptr);
- UnicodeString s16;
- uint8_t minNoMaybeLead = leadByteForCP(minCompNoMaybeCP);
- const uint8_t *prevBoundary = src;
-
- for (;;) {
- // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
- // or with (compYes && ccc==0) properties.
- const uint8_t *prevSrc;
- uint16_t norm16 = 0;
- for (;;) {
- if (src == limit) {
- if (prevBoundary != limit && sink != nullptr) {
- ByteSinkUtil::appendUnchanged(prevBoundary, limit,
- *sink, options, edits, errorCode);
- }
- return TRUE;
- }
- if (*src < minNoMaybeLead) {
- ++src;
- } else {
- prevSrc = src;
- UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
- if (!isCompYesAndZeroCC(norm16)) {
- break;
- }
- }
- }
- // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
- // The current character is either a "noNo" (has a mapping)
- // or a "maybeYes" (combines backward)
- // or a "yesYes" with ccc!=0.
- // It is not a Hangul syllable or Jamo L because those have "yes" properties.
-
- // Medium-fast path: Handle cases that do not require full decomposition and recomposition.
- if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes
- if (sink == nullptr) {
- return FALSE;
- }
- // Fast path for mapping a character that is immediately surrounded by boundaries.
- // In this case, we need not decompose around the current character.
- if (isDecompNoAlgorithmic(norm16)) {
- // Maps to a single isCompYesAndZeroCC character
- // which also implies hasCompBoundaryBefore.
- if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
- hasCompBoundaryBefore(src, limit)) {
- if (prevBoundary != prevSrc &&
- !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
- *sink, options, edits, errorCode)) {
- break;
- }
- appendCodePointDelta(prevSrc, src, getAlgorithmicDelta(norm16), *sink, edits);
- prevBoundary = src;
- continue;
- }
- } else if (norm16 < minNoNoCompBoundaryBefore) {
- // The mapping is comp-normalized which also implies hasCompBoundaryBefore.
- if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
- hasCompBoundaryBefore(src, limit)) {
- if (prevBoundary != prevSrc &&
- !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
- *sink, options, edits, errorCode)) {
- break;
- }
- const uint16_t *mapping = getMapping(norm16);
- int32_t length = *mapping++ & MAPPING_LENGTH_MASK;
- if (!ByteSinkUtil::appendChange(prevSrc, src, (const UChar *)mapping, length,
- *sink, edits, errorCode)) {
- break;
- }
- prevBoundary = src;
- continue;
- }
- } else if (norm16 >= minNoNoEmpty) {
- // The current character maps to nothing.
- // Simply omit it from the output if there is a boundary before _or_ after it.
- // The character itself implies no boundaries.
- if (hasCompBoundaryBefore(src, limit) ||
- hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) {
- if (prevBoundary != prevSrc &&
- !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
- *sink, options, edits, errorCode)) {
- break;
- }
- if (edits != nullptr) {
- edits->addReplace((int32_t)(src - prevSrc), 0);
- }
- prevBoundary = src;
- continue;
- }
- }
- // Other "noNo" type, or need to examine more text around this character:
- // Fall through to the slow path.
- } else if (isJamoVT(norm16)) {
- // Jamo L: E1 84 80..92
- // Jamo V: E1 85 A1..B5
- // Jamo T: E1 86 A8..E1 87 82
- U_ASSERT((src - prevSrc) == 3 && *prevSrc == 0xe1);
- UChar32 prev = previousHangulOrJamo(prevBoundary, prevSrc);
- if (prevSrc[1] == 0x85) {
- // The current character is a Jamo Vowel,
- // compose with previous Jamo L and following Jamo T.
- UChar32 l = prev - Hangul::JAMO_L_BASE;
- if ((uint32_t)l < Hangul::JAMO_L_COUNT) {
- if (sink == nullptr) {
- return FALSE;
- }
- int32_t t = getJamoTMinusBase(src, limit);
- if (t >= 0) {
- // The next character is a Jamo T.
- src += 3;
- } else if (hasCompBoundaryBefore(src, limit)) {
- // No Jamo T follows, not even via decomposition.
- t = 0;
- }
- if (t >= 0) {
- UChar32 syllable = Hangul::HANGUL_BASE +
- (l*Hangul::JAMO_V_COUNT + (prevSrc[2]-0xa1)) *
- Hangul::JAMO_T_COUNT + t;
- prevSrc -= 3; // Replace the Jamo L as well.
- if (prevBoundary != prevSrc &&
- !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
- *sink, options, edits, errorCode)) {
- break;
- }
- ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits);
- prevBoundary = src;
- continue;
- }
- // If we see L+V+x where x!=T then we drop to the slow path,
- // decompose and recompose.
- // This is to deal with NFKC finding normal L and V but a
- // compatibility variant of a T.
- // We need to either fully compose that combination here
- // (which would complicate the code and may not work with strange custom data)
- // or use the slow path.
- }
- } else if (Hangul::isHangulLV(prev)) {
- // The current character is a Jamo Trailing consonant,
- // compose with previous Hangul LV that does not contain a Jamo T.
- if (sink == nullptr) {
- return FALSE;
- }
- UChar32 syllable = prev + getJamoTMinusBase(prevSrc, src);
- prevSrc -= 3; // Replace the Hangul LV as well.
- if (prevBoundary != prevSrc &&
- !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
- *sink, options, edits, errorCode)) {
- break;
- }
- ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits);
- prevBoundary = src;
- continue;
- }
- // No matching context, or may need to decompose surrounding text first:
- // Fall through to the slow path.
- } else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC
- // One or more combining marks that do not combine-back:
- // Check for canonical order, copy unchanged if ok and
- // if followed by a character with a boundary-before.
- uint8_t cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0
- if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) {
- // Fails FCD test, need to decompose and contiguously recompose.
- if (sink == nullptr) {
- return FALSE;
- }
- } else {
- // If !onlyContiguous (not FCC), then we ignore the tccc of
- // the previous character which passed the quick check "yes && ccc==0" test.
- const uint8_t *nextSrc;
- uint16_t n16;
- for (;;) {
- if (src == limit) {
- if (sink != nullptr) {
- ByteSinkUtil::appendUnchanged(prevBoundary, limit,
- *sink, options, edits, errorCode);
- }
- return TRUE;
- }
- uint8_t prevCC = cc;
- nextSrc = src;
- UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, n16);
- if (n16 >= MIN_YES_YES_WITH_CC) {
- cc = getCCFromNormalYesOrMaybe(n16);
- if (prevCC > cc) {
- if (sink == nullptr) {
- return FALSE;
- }
- break;
- }
- } else {
- break;
- }
- src = nextSrc;
- }
- // src is after the last in-order combining mark.
- // If there is a boundary here, then we continue with no change.
- if (norm16HasCompBoundaryBefore(n16)) {
- if (isCompYesAndZeroCC(n16)) {
- src = nextSrc;
- }
- continue;
- }
- // Use the slow path. There is no boundary in [prevSrc, src[.
- }
- }
-
- // Slow path: Find the nearest boundaries around the current character,
- // decompose and recompose.
- if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
- const uint8_t *p = prevSrc;
- UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16);
- if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
- prevSrc = p;
- }
- }
- ReorderingBuffer buffer(*this, s16, errorCode);
- if (U_FAILURE(errorCode)) {
- break;
- }
- // We know there is not a boundary here.
- decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous,
- buffer, errorCode);
- // Decompose until the next boundary.
- src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous,
- buffer, errorCode);
- if (U_FAILURE(errorCode)) {
- break;
- }
- if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals()
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return TRUE;
- }
- recompose(buffer, 0, onlyContiguous);
- if (!buffer.equals(prevSrc, src)) {
- if (sink == nullptr) {
- return FALSE;
- }
- if (prevBoundary != prevSrc &&
- !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
- *sink, options, edits, errorCode)) {
- break;
- }
- if (!ByteSinkUtil::appendChange(prevSrc, src, buffer.getStart(), buffer.length(),
- *sink, edits, errorCode)) {
- break;
- }
- prevBoundary = src;
- }
- }
- return TRUE;
-}
-
-UBool Normalizer2Impl::hasCompBoundaryBefore(const UChar *src, const UChar *limit) const {
- if (src == limit || *src < minCompNoMaybeCP) {
- return TRUE;
- }
- UChar32 c;
- uint16_t norm16;
- UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);
- return norm16HasCompBoundaryBefore(norm16);
-}
-
-UBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const {
- if (src == limit) {
- return TRUE;
- }
- uint16_t norm16;
- UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
- return norm16HasCompBoundaryBefore(norm16);
-}
-
-UBool Normalizer2Impl::hasCompBoundaryAfter(const UChar *start, const UChar *p,
- UBool onlyContiguous) const {
- if (start == p) {
- return TRUE;
- }
- UChar32 c;
- uint16_t norm16;
- UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
- return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
-}
-
-UBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p,
- UBool onlyContiguous) const {
- if (start == p) {
- return TRUE;
- }
- uint16_t norm16;
- UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16);
- return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
-}
-
-const UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p,
- UBool onlyContiguous) const {
- while (p != start) {
- const UChar *codePointLimit = p;
- UChar32 c;
- uint16_t norm16;
- UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
- if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
- return codePointLimit;
- }
- if (hasCompBoundaryBefore(c, norm16)) {
- return p;
- }
- }
- return p;
-}
-
-const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit,
- UBool onlyContiguous) const {
- while (p != limit) {
- const UChar *codePointStart = p;
- UChar32 c;
- uint16_t norm16;
- UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
- if (hasCompBoundaryBefore(c, norm16)) {
- return codePointStart;
- }
- if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
- return p;
- }
- }
- return p;
-}
-
-uint8_t Normalizer2Impl::getPreviousTrailCC(const UChar *start, const UChar *p) const {
- if (start == p) {
- return 0;
- }
- int32_t i = (int32_t)(p - start);
- UChar32 c;
- U16_PREV(start, 0, i, c);
- return (uint8_t)getFCD16(c);
-}
-
-uint8_t Normalizer2Impl::getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const {
- if (start == p) {
- return 0;
- }
- int32_t i = (int32_t)(p - start);
- UChar32 c;
- U8_PREV(start, 0, i, c);
- return (uint8_t)getFCD16(c);
-}
-
-// Note: normalizer2impl.cpp r30982 (2011-nov-27)
-// still had getFCDTrie() which built and cached an FCD trie.
-// That provided faster access to FCD data than getFCD16FromNormData()
-// but required synchronization and consumed some 10kB of heap memory
-// in any process that uses FCD (e.g., via collation).
-// minDecompNoCP etc. and smallFCD[] are intended to help with any loss of performance,
-// at least for ASCII & CJK.
-
-// Ticket 20907 - The optimizer in MSVC/Visual Studio versions below 16.4 has trouble with this
-// function on Windows ARM64. As a work-around, we disable optimizations for this function.
-// This work-around could/should be removed once the following versions of Visual Studio are no
-// longer supported: All versions of VS2017, and versions of VS2019 below 16.4.
-#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
-#pragma optimize( "", off )
-#endif
-// Gets the FCD value from the regular normalization data.
-uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
- uint16_t norm16=getNorm16(c);
- if (norm16 >= limitNoNo) {
- if(norm16>=MIN_NORMAL_MAYBE_YES) {
- // combining mark
- norm16=getCCFromNormalYesOrMaybe(norm16);
- return norm16|(norm16<<8);
- } else if(norm16>=minMaybeYes) {
- return 0;
- } else { // isDecompNoAlgorithmic(norm16)
- uint16_t deltaTrailCC = norm16 & DELTA_TCCC_MASK;
- if (deltaTrailCC <= DELTA_TCCC_1) {
- return deltaTrailCC >> OFFSET_SHIFT;
- }
- // Maps to an isCompYesAndZeroCC.
- c=mapAlgorithmic(c, norm16);
- norm16=getRawNorm16(c);
- }
- }
- if(norm16<=minYesNo || isHangulLVT(norm16)) {
- // no decomposition or Hangul syllable, all zeros
- return 0;
- }
- // c decomposes, get everything from the variable-length extra data
- const uint16_t *mapping=getMapping(norm16);
- uint16_t firstUnit=*mapping;
- norm16=firstUnit>>8; // tccc
- if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
- norm16|=*(mapping-1)&0xff00; // lccc
- }
- return norm16;
-}
-#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
-#pragma optimize( "", on )
-#endif
-
-// Dual functionality:
-// buffer!=NULL: normalize
-// buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
-const UChar *
-Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
- ReorderingBuffer *buffer,
- UErrorCode &errorCode) const {
- // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
- // Similar to the prevBoundary in the compose() implementation.
- const UChar *prevBoundary=src;
- int32_t prevFCD16=0;
- if(limit==NULL) {
- src=copyLowPrefixFromNulTerminated(src, minLcccCP, buffer, errorCode);
- if(U_FAILURE(errorCode)) {
- return src;
- }
- if(prevBoundary<src) {
- prevBoundary=src;
- // We know that the previous character's lccc==0.
- // Fetching the fcd16 value was deferred for this below-U+0300 code point.
- prevFCD16=getFCD16(*(src-1));
- if(prevFCD16>1) {
- --prevBoundary;
- }
- }
- limit=u_strchr(src, 0);
- }
-
- // Note: In this function we use buffer->appendZeroCC() because we track
- // the lead and trail combining classes here, rather than leaving it to
- // the ReorderingBuffer.
- // The exception is the call to decomposeShort() which uses the buffer
- // in the normal way.
-
- const UChar *prevSrc;
- UChar32 c=0;
- uint16_t fcd16=0;
-
- for(;;) {
- // count code units with lccc==0
- for(prevSrc=src; src!=limit;) {
- if((c=*src)<minLcccCP) {
- prevFCD16=~c;
- ++src;
- } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
- prevFCD16=0;
- ++src;
- } else {
- if(U16_IS_LEAD(c)) {
- UChar c2;
- if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
- c=U16_GET_SUPPLEMENTARY(c, c2);
- }
- }
- if((fcd16=getFCD16FromNormData(c))<=0xff) {
- prevFCD16=fcd16;
- src+=U16_LENGTH(c);
- } else {
- break;
- }
- }
- }
- // copy these code units all at once
- if(src!=prevSrc) {
- if(buffer!=NULL && !buffer->appendZeroCC(prevSrc, src, errorCode)) {
- break;
- }
- if(src==limit) {
- break;
- }
- prevBoundary=src;
- // We know that the previous character's lccc==0.
- if(prevFCD16<0) {
- // Fetching the fcd16 value was deferred for this below-minLcccCP code point.
- UChar32 prev=~prevFCD16;
- if(prev<minDecompNoCP) {
- prevFCD16=0;
- } else {
- prevFCD16=getFCD16FromNormData(prev);
- if(prevFCD16>1) {
- --prevBoundary;
- }
- }
- } else {
- const UChar *p=src-1;
- if(U16_IS_TRAIL(*p) && prevSrc<p && U16_IS_LEAD(*(p-1))) {
- --p;
- // Need to fetch the previous character's FCD value because
- // prevFCD16 was just for the trail surrogate code point.
- prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1]));
- // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
- }
- if(prevFCD16>1) {
- prevBoundary=p;
- }
- }
- // The start of the current character (c).
- prevSrc=src;
- } else if(src==limit) {
- break;
- }
-
- src+=U16_LENGTH(c);
- // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
- // Check for proper order, and decompose locally if necessary.
- if((prevFCD16&0xff)<=(fcd16>>8)) {
- // proper order: prev tccc <= current lccc
- if((fcd16&0xff)<=1) {
- prevBoundary=src;
- }
- if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) {
- break;
- }
- prevFCD16=fcd16;
- continue;
- } else if(buffer==NULL) {
- return prevBoundary; // quick check "no"
- } else {
- /*
- * Back out the part of the source that we copied or appended
- * already but is now going to be decomposed.
- * prevSrc is set to after what was copied/appended.
- */
- buffer->removeSuffix((int32_t)(prevSrc-prevBoundary));
- /*
- * Find the part of the source that needs to be decomposed,
- * up to the next safe boundary.
- */
- src=findNextFCDBoundary(src, limit);
- /*
- * The source text does not fulfill the conditions for FCD.
- * Decompose and reorder a limited piece of the text.
- */
- decomposeShort(prevBoundary, src, FALSE, FALSE, *buffer, errorCode);
- if (U_FAILURE(errorCode)) {
- break;
- }
- prevBoundary=src;
- prevFCD16=0;
- }
- }
- return src;
-}
-
-void Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit,
- UBool doMakeFCD,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer,
- UErrorCode &errorCode) const {
- if(!buffer.isEmpty()) {
- const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit);
- if(src!=firstBoundaryInSrc) {
- const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(),
- buffer.getLimit());
- int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest);
- UnicodeString middle(lastBoundaryInDest, destSuffixLength);
- buffer.removeSuffix(destSuffixLength);
- safeMiddle=middle;
- middle.append(src, (int32_t)(firstBoundaryInSrc-src));
- const UChar *middleStart=middle.getBuffer();
- makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode);
- if(U_FAILURE(errorCode)) {
- return;
- }
- src=firstBoundaryInSrc;
- }
- }
- if(doMakeFCD) {
- makeFCD(src, limit, &buffer, errorCode);
- } else {
- if(limit==NULL) { // appendZeroCC() needs limit!=NULL
- limit=u_strchr(src, 0);
- }
- buffer.appendZeroCC(src, limit, errorCode);
- }
-}
-
-const UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const {
- while(start<p) {
- const UChar *codePointLimit = p;
- UChar32 c;
- uint16_t norm16;
- UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
- if (c < minDecompNoCP || norm16HasDecompBoundaryAfter(norm16)) {
- return codePointLimit;
- }
- if (norm16HasDecompBoundaryBefore(norm16)) {
- return p;
- }
- }
- return p;
-}
-
-const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const {
- while(p<limit) {
- const UChar *codePointStart=p;
- UChar32 c;
- uint16_t norm16;
- UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
- if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16)) {
- return codePointStart;
- }
- if (norm16HasDecompBoundaryAfter(norm16)) {
- return p;
- }
- }
- return p;
-}
-
-// CanonicalIterator data -------------------------------------------------- ***
-
-CanonIterData::CanonIterData(UErrorCode &errorCode) :
- mutableTrie(umutablecptrie_open(0, 0, &errorCode)), trie(nullptr),
- canonStartSets(uprv_deleteUObject, NULL, errorCode) {}
-
-CanonIterData::~CanonIterData() {
- umutablecptrie_close(mutableTrie);
- ucptrie_close(trie);
-}
-
-void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) {
- uint32_t canonValue = umutablecptrie_get(mutableTrie, decompLead);
- if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
- // origin is the first character whose decomposition starts with
- // the character for which we are setting the value.
- umutablecptrie_set(mutableTrie, decompLead, canonValue|origin, &errorCode);
- } else {
- // origin is not the first character, or it is U+0000.
- UnicodeSet *set;
- if((canonValue&CANON_HAS_SET)==0) {
- set=new UnicodeSet;
- if(set==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
- canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
- umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode);
- canonStartSets.addElement(set, errorCode);
- if(firstOrigin!=0) {
- set->add(firstOrigin);
- }
- } else {
- set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK)];
- }
- set->add(origin);
- }
-}
-
-// C++ class for friend access to private Normalizer2Impl members.
-class InitCanonIterData {
-public:
- static void doInit(Normalizer2Impl *impl, UErrorCode &errorCode);
-};
-
-U_CDECL_BEGIN
-
-// UInitOnce instantiation function for CanonIterData
-static void U_CALLCONV
-initCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) {
- InitCanonIterData::doInit(impl, errorCode);
-}
-
-U_CDECL_END
-
-void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
- U_ASSERT(impl->fCanonIterData == NULL);
- impl->fCanonIterData = new CanonIterData(errorCode);
- if (impl->fCanonIterData == NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- }
- if (U_SUCCESS(errorCode)) {
- UChar32 start = 0, end;
- uint32_t value;
- while ((end = ucptrie_getRange(impl->normTrie, start,
- UCPMAP_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT,
- nullptr, nullptr, &value)) >= 0) {
- // Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
- if (value != Normalizer2Impl::INERT) {
- impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode);
- }
- start = end + 1;
- }
-#ifdef UCPTRIE_DEBUG
- umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData");
-#endif
- impl->fCanonIterData->trie = umutablecptrie_buildImmutable(
- impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode);
- umutablecptrie_close(impl->fCanonIterData->mutableTrie);
- impl->fCanonIterData->mutableTrie = nullptr;
- }
- if (U_FAILURE(errorCode)) {
- delete impl->fCanonIterData;
- impl->fCanonIterData = NULL;
- }
-}
-
-void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
- CanonIterData &newData,
- UErrorCode &errorCode) const {
- if(isInert(norm16) || (minYesNo<=norm16 && norm16<minNoNo)) {
- // Inert, or 2-way mapping (including Hangul syllable).
- // We do not write a canonStartSet for any yesNo character.
- // Composites from 2-way mappings are added at runtime from the
- // starter's compositions list, and the other characters in
- // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
- // "maybe" characters.
- return;
- }
- for(UChar32 c=start; c<=end; ++c) {
- uint32_t oldValue = umutablecptrie_get(newData.mutableTrie, c);
- uint32_t newValue=oldValue;
- if(isMaybeOrNonZeroCC(norm16)) {
- // not a segment starter if it occurs in a decomposition or has cc!=0
- newValue|=CANON_NOT_SEGMENT_STARTER;
- if(norm16<MIN_NORMAL_MAYBE_YES) {
- newValue|=CANON_HAS_COMPOSITIONS;
- }
- } else if(norm16<minYesNo) {
- newValue|=CANON_HAS_COMPOSITIONS;
- } else {
- // c has a one-way decomposition
- UChar32 c2=c;
- // Do not modify the whole-range norm16 value.
- uint16_t norm16_2=norm16;
- if (isDecompNoAlgorithmic(norm16_2)) {
- // Maps to an isCompYesAndZeroCC.
- c2 = mapAlgorithmic(c2, norm16_2);
- norm16_2 = getRawNorm16(c2);
- // No compatibility mappings for the CanonicalIterator.
- U_ASSERT(!(isHangulLV(norm16_2) || isHangulLVT(norm16_2)));
- }
- if (norm16_2 > minYesNo) {
- // c decomposes, get everything from the variable-length extra data
- const uint16_t *mapping=getMapping(norm16_2);
- uint16_t firstUnit=*mapping;
- int32_t length=firstUnit&MAPPING_LENGTH_MASK;
- if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
- if(c==c2 && (*(mapping-1)&0xff)!=0) {
- newValue|=CANON_NOT_SEGMENT_STARTER; // original c has cc!=0
- }
- }
- // Skip empty mappings (no characters in the decomposition).
- if(length!=0) {
- ++mapping; // skip over the firstUnit
- // add c to first code point's start set
- int32_t i=0;
- U16_NEXT_UNSAFE(mapping, i, c2);
- newData.addToStartSet(c, c2, errorCode);
- // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
- // one-way mapping. A 2-way mapping is possible here after
- // intermediate algorithmic mapping.
- if(norm16_2>=minNoNo) {
- while(i<length) {
- U16_NEXT_UNSAFE(mapping, i, c2);
- uint32_t c2Value = umutablecptrie_get(newData.mutableTrie, c2);
- if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
- umutablecptrie_set(newData.mutableTrie, c2,
- c2Value|CANON_NOT_SEGMENT_STARTER, &errorCode);
- }
- }
- }
- }
- } else {
- // c decomposed to c2 algorithmically; c has cc==0
- newData.addToStartSet(c, c2, errorCode);
- }
- }
- if(newValue!=oldValue) {
- umutablecptrie_set(newData.mutableTrie, c, newValue, &errorCode);
- }
- }
-}
-
-UBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const {
- // Logically const: Synchronized instantiation.
- Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this);
- umtx_initOnce(me->fCanonIterDataInitOnce, &initCanonIterData, me, errorCode);
- return U_SUCCESS(errorCode);
-}
-
-int32_t Normalizer2Impl::getCanonValue(UChar32 c) const {
- return (int32_t)ucptrie_get(fCanonIterData->trie, c);
-}
-
-const UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const {
- return *(const UnicodeSet *)fCanonIterData->canonStartSets[n];
-}
-
-UBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const {
- return getCanonValue(c)>=0;
-}
-
-UBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const {
- int32_t canonValue=getCanonValue(c)&~CANON_NOT_SEGMENT_STARTER;
- if(canonValue==0) {
- return FALSE;
- }
- set.clear();
- int32_t value=canonValue&CANON_VALUE_MASK;
- if((canonValue&CANON_HAS_SET)!=0) {
- set.addAll(getCanonStartSet(value));
- } else if(value!=0) {
- set.add(value);
- }
- if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
- uint16_t norm16=getRawNorm16(c);
- if(norm16==JAMO_L) {
- UChar32 syllable=
- (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);
- set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1);
- } else {
- addComposites(getCompositionsList(norm16), set);
- }
- }
- return TRUE;
-}
-
-U_NAMESPACE_END
-
-// Normalizer2 data swapping ----------------------------------------------- ***
-
-U_NAMESPACE_USE
-
-U_CAPI int32_t U_EXPORT2
-unorm2_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UDataInfo *pInfo;
- int32_t headerSize;
-
- const uint8_t *inBytes;
- uint8_t *outBytes;
-
- const int32_t *inIndexes;
- int32_t indexes[Normalizer2Impl::IX_TOTAL_SIZE+1];
-
- int32_t i, offset, nextOffset, size;
-
- /* udata_swapDataHeader checks the arguments */
- headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* check data format and format version */
- pInfo=(const UDataInfo *)((const char *)inData+4);
- uint8_t formatVersion0=pInfo->formatVersion[0];
- if(!(
- pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
- pInfo->dataFormat[1]==0x72 &&
- pInfo->dataFormat[2]==0x6d &&
- pInfo->dataFormat[3]==0x32 &&
- (1<=formatVersion0 && formatVersion0<=4)
- )) {
- udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
- pInfo->dataFormat[0], pInfo->dataFormat[1],
- pInfo->dataFormat[2], pInfo->dataFormat[3],
- pInfo->formatVersion[0]);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- inBytes=(const uint8_t *)inData+headerSize;
- outBytes=(uint8_t *)outData+headerSize;
-
- inIndexes=(const int32_t *)inBytes;
- int32_t minIndexesLength;
- if(formatVersion0==1) {
- minIndexesLength=Normalizer2Impl::IX_MIN_MAYBE_YES+1;
- } else if(formatVersion0==2) {
- minIndexesLength=Normalizer2Impl::IX_MIN_YES_NO_MAPPINGS_ONLY+1;
- } else {
- minIndexesLength=Normalizer2Impl::IX_MIN_LCCC_CP+1;
- }
-
- if(length>=0) {
- length-=headerSize;
- if(length<minIndexesLength*4) {
- udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n",
- length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
-
- /* read the first few indexes */
- for(i=0; i<UPRV_LENGTHOF(indexes); ++i) {
- indexes[i]=udata_readInt32(ds, inIndexes[i]);
- }
-
- /* get the total length of the data */
- size=indexes[Normalizer2Impl::IX_TOTAL_SIZE];
-
- if(length>=0) {
- if(length<size) {
- udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for all of Normalizer2 data\n",
- length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- /* copy the data for inaccessible bytes */
- if(inBytes!=outBytes) {
- uprv_memcpy(outBytes, inBytes, size);
- }
-
- offset=0;
-
- /* swap the int32_t indexes[] */
- nextOffset=indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET];
- ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode);
- offset=nextOffset;
-
- /* swap the trie */
- nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET];
- utrie_swapAnyVersion(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
- offset=nextOffset;
-
- /* swap the uint16_t extraData[] */
- nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET];
- ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
- offset=nextOffset;
-
- /* no need to swap the uint8_t smallFCD[] (new in formatVersion 2) */
- nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1];
- offset=nextOffset;
-
- U_ASSERT(offset==size);
- }
-
- return headerSize+size;
-}
-
-#endif // !UCONFIG_NO_NORMALIZATION
diff --git a/contrib/libs/icu/common/normalizer2impl.h b/contrib/libs/icu/common/normalizer2impl.h
deleted file mode 100644
index cf3015ea881..00000000000
--- a/contrib/libs/icu/common/normalizer2impl.h
+++ /dev/null
@@ -1,978 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2009-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: normalizer2impl.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2009nov22
-* created by: Markus W. Scherer
-*/
-
-#ifndef __NORMALIZER2IMPL_H__
-#define __NORMALIZER2IMPL_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/normalizer2.h"
-#include "unicode/ucptrie.h"
-#include "unicode/unistr.h"
-#include "unicode/unorm.h"
-#include "unicode/utf.h"
-#include "unicode/utf16.h"
-#include "mutex.h"
-#include "udataswp.h"
-#include "uset_imp.h"
-
-// When the nfc.nrm data is *not* hardcoded into the common library
-// (with this constant set to 0),
-// then it needs to be built into the data package:
-// Add nfc.nrm to icu4c/source/data/Makefile.in DAT_FILES_SHORT
-#define NORM2_HARDCODE_NFC_DATA 1
-
-U_NAMESPACE_BEGIN
-
-struct CanonIterData;
-
-class ByteSink;
-class Edits;
-class InitCanonIterData;
-class LcccContext;
-
-class U_COMMON_API Hangul {
-public:
- /* Korean Hangul and Jamo constants */
- enum {
- JAMO_L_BASE=0x1100, /* "lead" jamo */
- JAMO_L_END=0x1112,
- JAMO_V_BASE=0x1161, /* "vowel" jamo */
- JAMO_V_END=0x1175,
- JAMO_T_BASE=0x11a7, /* "trail" jamo */
- JAMO_T_END=0x11c2,
-
- HANGUL_BASE=0xac00,
- HANGUL_END=0xd7a3,
-
- JAMO_L_COUNT=19,
- JAMO_V_COUNT=21,
- JAMO_T_COUNT=28,
-
- JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT,
-
- HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT,
- HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT
- };
-
- static inline UBool isHangul(UChar32 c) {
- return HANGUL_BASE<=c && c<HANGUL_LIMIT;
- }
- static inline UBool
- isHangulLV(UChar32 c) {
- c-=HANGUL_BASE;
- return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
- }
- static inline UBool isJamoL(UChar32 c) {
- return (uint32_t)(c-JAMO_L_BASE)<JAMO_L_COUNT;
- }
- static inline UBool isJamoV(UChar32 c) {
- return (uint32_t)(c-JAMO_V_BASE)<JAMO_V_COUNT;
- }
- static inline UBool isJamoT(UChar32 c) {
- int32_t t=c-JAMO_T_BASE;
- return 0<t && t<JAMO_T_COUNT; // not JAMO_T_BASE itself
- }
- static UBool isJamo(UChar32 c) {
- return JAMO_L_BASE<=c && c<=JAMO_T_END &&
- (c<=JAMO_L_END || (JAMO_V_BASE<=c && c<=JAMO_V_END) || JAMO_T_BASE<c);
- }
-
- /**
- * Decomposes c, which must be a Hangul syllable, into buffer
- * and returns the length of the decomposition (2 or 3).
- */
- static inline int32_t decompose(UChar32 c, UChar buffer[3]) {
- c-=HANGUL_BASE;
- UChar32 c2=c%JAMO_T_COUNT;
- c/=JAMO_T_COUNT;
- buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
- buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
- if(c2==0) {
- return 2;
- } else {
- buffer[2]=(UChar)(JAMO_T_BASE+c2);
- return 3;
- }
- }
-
- /**
- * Decomposes c, which must be a Hangul syllable, into buffer.
- * This is the raw, not recursive, decomposition. Its length is always 2.
- */
- static inline void getRawDecomposition(UChar32 c, UChar buffer[2]) {
- UChar32 orig=c;
- c-=HANGUL_BASE;
- UChar32 c2=c%JAMO_T_COUNT;
- if(c2==0) {
- c/=JAMO_T_COUNT;
- buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
- buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
- } else {
- buffer[0]=(UChar)(orig-c2); // LV syllable
- buffer[1]=(UChar)(JAMO_T_BASE+c2);
- }
- }
-private:
- Hangul(); // no instantiation
-};
-
-class Normalizer2Impl;
-
-class U_COMMON_API ReorderingBuffer : public UMemory {
-public:
- /** Constructs only; init() should be called. */
- ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :
- impl(ni), str(dest),
- start(NULL), reorderStart(NULL), limit(NULL),
- remainingCapacity(0), lastCC(0) {}
- /** Constructs, removes the string contents, and initializes for a small initial capacity. */
- ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest, UErrorCode &errorCode);
- ~ReorderingBuffer() {
- if(start!=NULL) {
- str.releaseBuffer((int32_t)(limit-start));
- }
- }
- UBool init(int32_t destCapacity, UErrorCode &errorCode);
-
- UBool isEmpty() const { return start==limit; }
- int32_t length() const { return (int32_t)(limit-start); }
- UChar *getStart() { return start; }
- UChar *getLimit() { return limit; }
- uint8_t getLastCC() const { return lastCC; }
-
- UBool equals(const UChar *start, const UChar *limit) const;
- UBool equals(const uint8_t *otherStart, const uint8_t *otherLimit) const;
-
- UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
- return (c<=0xffff) ?
- appendBMP((UChar)c, cc, errorCode) :
- appendSupplementary(c, cc, errorCode);
- }
- UBool append(const UChar *s, int32_t length, UBool isNFD,
- uint8_t leadCC, uint8_t trailCC,
- UErrorCode &errorCode);
- UBool appendBMP(UChar c, uint8_t cc, UErrorCode &errorCode) {
- if(remainingCapacity==0 && !resize(1, errorCode)) {
- return FALSE;
- }
- if(lastCC<=cc || cc==0) {
- *limit++=c;
- lastCC=cc;
- if(cc<=1) {
- reorderStart=limit;
- }
- } else {
- insert(c, cc);
- }
- --remainingCapacity;
- return TRUE;
- }
- UBool appendZeroCC(UChar32 c, UErrorCode &errorCode);
- UBool appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode);
- void remove();
- void removeSuffix(int32_t suffixLength);
- void setReorderingLimit(UChar *newLimit) {
- remainingCapacity+=(int32_t)(limit-newLimit);
- reorderStart=limit=newLimit;
- lastCC=0;
- }
- void copyReorderableSuffixTo(UnicodeString &s) const {
- s.setTo(ConstChar16Ptr(reorderStart), (int32_t)(limit-reorderStart));
- }
-private:
- /*
- * TODO: Revisit whether it makes sense to track reorderStart.
- * It is set to after the last known character with cc<=1,
- * which stops previousCC() before it reads that character and looks up its cc.
- * previousCC() is normally only called from insert().
- * In other words, reorderStart speeds up the insertion of a combining mark
- * into a multi-combining mark sequence where it does not belong at the end.
- * This might not be worth the trouble.
- * On the other hand, it's not a huge amount of trouble.
- *
- * We probably need it for UNORM_SIMPLE_APPEND.
- */
-
- UBool appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode);
- void insert(UChar32 c, uint8_t cc);
- static void writeCodePoint(UChar *p, UChar32 c) {
- if(c<=0xffff) {
- *p=(UChar)c;
- } else {
- p[0]=U16_LEAD(c);
- p[1]=U16_TRAIL(c);
- }
- }
- UBool resize(int32_t appendLength, UErrorCode &errorCode);
-
- const Normalizer2Impl &impl;
- UnicodeString &str;
- UChar *start, *reorderStart, *limit;
- int32_t remainingCapacity;
- uint8_t lastCC;
-
- // private backward iterator
- void setIterator() { codePointStart=limit; }
- void skipPrevious(); // Requires start<codePointStart.
- uint8_t previousCC(); // Returns 0 if there is no previous character.
-
- UChar *codePointStart, *codePointLimit;
-};
-
-/**
- * Low-level implementation of the Unicode Normalization Algorithm.
- * For the data structure and details see the documentation at the end of
- * this normalizer2impl.h and in the design doc at
- * http://site.icu-project.org/design/normalization/custom
- */
-class U_COMMON_API Normalizer2Impl : public UObject {
-public:
- Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) { }
- virtual ~Normalizer2Impl();
-
- void init(const int32_t *inIndexes, const UCPTrie *inTrie,
- const uint16_t *inExtraData, const uint8_t *inSmallFCD);
-
- void addLcccChars(UnicodeSet &set) const;
- void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
- void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
-
- // low-level properties ------------------------------------------------ ***
-
- UBool ensureCanonIterData(UErrorCode &errorCode) const;
-
- // The trie stores values for lead surrogate code *units*.
- // Surrogate code *points* are inert.
- uint16_t getNorm16(UChar32 c) const {
- return U_IS_LEAD(c) ?
- static_cast<uint16_t>(INERT) :
- UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c);
- }
- uint16_t getRawNorm16(UChar32 c) const { return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); }
-
- UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
- if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
- return UNORM_YES;
- } else if(minMaybeYes<=norm16) {
- return UNORM_MAYBE;
- } else {
- return UNORM_NO;
- }
- }
- UBool isAlgorithmicNoNo(uint16_t norm16) const { return limitNoNo<=norm16 && norm16<minMaybeYes; }
- UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<minMaybeYes; }
- UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo || minMaybeYes<=norm16; }
-
- uint8_t getCC(uint16_t norm16) const {
- if(norm16>=MIN_NORMAL_MAYBE_YES) {
- return getCCFromNormalYesOrMaybe(norm16);
- }
- if(norm16<minNoNo || limitNoNo<=norm16) {
- return 0;
- }
- return getCCFromNoNo(norm16);
- }
- static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) {
- return (uint8_t)(norm16 >> OFFSET_SHIFT);
- }
- static uint8_t getCCFromYesOrMaybe(uint16_t norm16) {
- return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
- }
- uint8_t getCCFromYesOrMaybeCP(UChar32 c) const {
- if (c < minCompNoMaybeCP) { return 0; }
- return getCCFromYesOrMaybe(getNorm16(c));
- }
-
- /**
- * Returns the FCD data for code point c.
- * @param c A Unicode code point.
- * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
- */
- uint16_t getFCD16(UChar32 c) const {
- if(c<minDecompNoCP) {
- return 0;
- } else if(c<=0xffff) {
- if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
- }
- return getFCD16FromNormData(c);
- }
- /**
- * Returns the FCD data for the next code point (post-increment).
- * Might skip only a lead surrogate rather than the whole surrogate pair if none of
- * the supplementary code points associated with the lead surrogate have non-zero FCD data.
- * @param s A valid pointer into a string. Requires s!=limit.
- * @param limit The end of the string, or NULL.
- * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
- */
- uint16_t nextFCD16(const UChar *&s, const UChar *limit) const {
- UChar32 c=*s++;
- if(c<minDecompNoCP || !singleLeadMightHaveNonZeroFCD16(c)) {
- return 0;
- }
- UChar c2;
- if(U16_IS_LEAD(c) && s!=limit && U16_IS_TRAIL(c2=*s)) {
- c=U16_GET_SUPPLEMENTARY(c, c2);
- ++s;
- }
- return getFCD16FromNormData(c);
- }
- /**
- * Returns the FCD data for the previous code point (pre-decrement).
- * @param start The start of the string.
- * @param s A valid pointer into a string. Requires start<s.
- * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
- */
- uint16_t previousFCD16(const UChar *start, const UChar *&s) const {
- UChar32 c=*--s;
- if(c<minDecompNoCP) {
- return 0;
- }
- if(!U16_IS_TRAIL(c)) {
- if(!singleLeadMightHaveNonZeroFCD16(c)) {
- return 0;
- }
- } else {
- UChar c2;
- if(start<s && U16_IS_LEAD(c2=*(s-1))) {
- c=U16_GET_SUPPLEMENTARY(c2, c);
- --s;
- }
- }
- return getFCD16FromNormData(c);
- }
-
- /** Returns TRUE if the single-or-lead code unit c might have non-zero FCD data. */
- UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
- // 0<=lead<=0xffff
- uint8_t bits=smallFCD[lead>>8];
- if(bits==0) { return false; }
- return (UBool)((bits>>((lead>>5)&7))&1);
- }
- /** Returns the FCD value from the regular normalization data. */
- uint16_t getFCD16FromNormData(UChar32 c) const;
-
- /**
- * Gets the decomposition for one code point.
- * @param c code point
- * @param buffer out-only buffer for algorithmic decompositions
- * @param length out-only, takes the length of the decomposition, if any
- * @return pointer to the decomposition, or NULL if none
- */
- const UChar *getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const;
-
- /**
- * Gets the raw decomposition for one code point.
- * @param c code point
- * @param buffer out-only buffer for algorithmic decompositions
- * @param length out-only, takes the length of the decomposition, if any
- * @return pointer to the decomposition, or NULL if none
- */
- const UChar *getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const;
-
- UChar32 composePair(UChar32 a, UChar32 b) const;
-
- UBool isCanonSegmentStarter(UChar32 c) const;
- UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const;
-
- enum {
- // Fixed norm16 values.
- MIN_YES_YES_WITH_CC=0xfe02,
- JAMO_VT=0xfe00,
- MIN_NORMAL_MAYBE_YES=0xfc00,
- JAMO_L=2, // offset=1 hasCompBoundaryAfter=FALSE
- INERT=1, // offset=0 hasCompBoundaryAfter=TRUE
-
- // norm16 bit 0 is comp-boundary-after.
- HAS_COMP_BOUNDARY_AFTER=1,
- OFFSET_SHIFT=1,
-
- // For algorithmic one-way mappings, norm16 bits 2..1 indicate the
- // tccc (0, 1, >1) for quick FCC boundary-after tests.
- DELTA_TCCC_0=0,
- DELTA_TCCC_1=2,
- DELTA_TCCC_GT_1=4,
- DELTA_TCCC_MASK=6,
- DELTA_SHIFT=3,
-
- MAX_DELTA=0x40
- };
-
- enum {
- // Byte offsets from the start of the data, after the generic header.
- IX_NORM_TRIE_OFFSET,
- IX_EXTRA_DATA_OFFSET,
- IX_SMALL_FCD_OFFSET,
- IX_RESERVED3_OFFSET,
- IX_RESERVED4_OFFSET,
- IX_RESERVED5_OFFSET,
- IX_RESERVED6_OFFSET,
- IX_TOTAL_SIZE,
-
- // Code point thresholds for quick check codes.
- IX_MIN_DECOMP_NO_CP,
- IX_MIN_COMP_NO_MAYBE_CP,
-
- // Norm16 value thresholds for quick check combinations and types of extra data.
-
- /** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */
- IX_MIN_YES_NO,
- /** Mappings are comp-normalized. */
- IX_MIN_NO_NO,
- IX_LIMIT_NO_NO,
- IX_MIN_MAYBE_YES,
-
- /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */
- IX_MIN_YES_NO_MAPPINGS_ONLY,
- /** Mappings are not comp-normalized but have a comp boundary before. */
- IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE,
- /** Mappings do not have a comp boundary before. */
- IX_MIN_NO_NO_COMP_NO_MAYBE_CC,
- /** Mappings to the empty string. */
- IX_MIN_NO_NO_EMPTY,
-
- IX_MIN_LCCC_CP,
- IX_RESERVED19,
- IX_COUNT
- };
-
- enum {
- MAPPING_HAS_CCC_LCCC_WORD=0x80,
- MAPPING_HAS_RAW_MAPPING=0x40,
- // unused bit 0x20,
- MAPPING_LENGTH_MASK=0x1f
- };
-
- enum {
- COMP_1_LAST_TUPLE=0x8000,
- COMP_1_TRIPLE=1,
- COMP_1_TRAIL_LIMIT=0x3400,
- COMP_1_TRAIL_MASK=0x7ffe,
- COMP_1_TRAIL_SHIFT=9, // 10-1 for the "triple" bit
- COMP_2_TRAIL_SHIFT=6,
- COMP_2_TRAIL_MASK=0xffc0
- };
-
- // higher-level functionality ------------------------------------------ ***
-
- // NFD without an NFD Normalizer2 instance.
- UnicodeString &decompose(const UnicodeString &src, UnicodeString &dest,
- UErrorCode &errorCode) const;
- /**
- * Decomposes [src, limit[ and writes the result to dest.
- * limit can be NULL if src is NUL-terminated.
- * destLengthEstimate is the initial dest buffer capacity and can be -1.
- */
- void decompose(const UChar *src, const UChar *limit,
- UnicodeString &dest, int32_t destLengthEstimate,
- UErrorCode &errorCode) const;
-
- const UChar *decompose(const UChar *src, const UChar *limit,
- ReorderingBuffer *buffer, UErrorCode &errorCode) const;
- void decomposeAndAppend(const UChar *src, const UChar *limit,
- UBool doDecompose,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer,
- UErrorCode &errorCode) const;
- UBool compose(const UChar *src, const UChar *limit,
- UBool onlyContiguous,
- UBool doCompose,
- ReorderingBuffer &buffer,
- UErrorCode &errorCode) const;
- const UChar *composeQuickCheck(const UChar *src, const UChar *limit,
- UBool onlyContiguous,
- UNormalizationCheckResult *pQCResult) const;
- void composeAndAppend(const UChar *src, const UChar *limit,
- UBool doCompose,
- UBool onlyContiguous,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer,
- UErrorCode &errorCode) const;
-
- /** sink==nullptr: isNormalized() */
- UBool composeUTF8(uint32_t options, UBool onlyContiguous,
- const uint8_t *src, const uint8_t *limit,
- ByteSink *sink, icu::Edits *edits, UErrorCode &errorCode) const;
-
- const UChar *makeFCD(const UChar *src, const UChar *limit,
- ReorderingBuffer *buffer, UErrorCode &errorCode) const;
- void makeFCDAndAppend(const UChar *src, const UChar *limit,
- UBool doMakeFCD,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer,
- UErrorCode &errorCode) const;
-
- UBool hasDecompBoundaryBefore(UChar32 c) const;
- UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const;
- UBool hasDecompBoundaryAfter(UChar32 c) const;
- UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const;
- UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); }
-
- UBool hasCompBoundaryBefore(UChar32 c) const {
- return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c));
- }
- UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous) const {
- return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous);
- }
- UBool isCompInert(UChar32 c, UBool onlyContiguous) const {
- uint16_t norm16=getNorm16(c);
- return isCompYesAndZeroCC(norm16) &&
- (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
- (!onlyContiguous || isInert(norm16) || *getMapping(norm16) <= 0x1ff);
- }
-
- UBool hasFCDBoundaryBefore(UChar32 c) const { return hasDecompBoundaryBefore(c); }
- UBool hasFCDBoundaryAfter(UChar32 c) const { return hasDecompBoundaryAfter(c); }
- UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
-private:
- friend class InitCanonIterData;
- friend class LcccContext;
-
- UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
- UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; }
- static UBool isInert(uint16_t norm16) { return norm16==INERT; }
- static UBool isJamoL(uint16_t norm16) { return norm16==JAMO_L; }
- static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; }
- uint16_t hangulLVT() const { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; }
- UBool isHangulLV(uint16_t norm16) const { return norm16==minYesNo; }
- UBool isHangulLVT(uint16_t norm16) const {
- return norm16==hangulLVT();
- }
- UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; }
- // UBool isCompYes(uint16_t norm16) const {
- // return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
- // }
- // UBool isCompYesOrMaybe(uint16_t norm16) const {
- // return norm16<minNoNo || minMaybeYes<=norm16;
- // }
- // UBool hasZeroCCFromDecompYes(uint16_t norm16) const {
- // return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
- // }
- UBool isDecompYesAndZeroCC(uint16_t norm16) const {
- return norm16<minYesNo ||
- norm16==JAMO_VT ||
- (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
- }
- /**
- * A little faster and simpler than isDecompYesAndZeroCC() but does not include
- * the MaybeYes which combine-forward and have ccc=0.
- * (Standard Unicode 10 normalization does not have such characters.)
- */
- UBool isMostDecompYesAndZeroCC(uint16_t norm16) const {
- return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
- }
- UBool isDecompNoAlgorithmic(uint16_t norm16) const { return norm16>=limitNoNo; }
-
- // For use with isCompYes().
- // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
- // static uint8_t getCCFromYes(uint16_t norm16) {
- // return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0;
- // }
- uint8_t getCCFromNoNo(uint16_t norm16) const {
- const uint16_t *mapping=getMapping(norm16);
- if(*mapping&MAPPING_HAS_CCC_LCCC_WORD) {
- return (uint8_t)*(mapping-1);
- } else {
- return 0;
- }
- }
- // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
- uint8_t getTrailCCFromCompYesAndZeroCC(uint16_t norm16) const {
- if(norm16<=minYesNo) {
- return 0; // yesYes and Hangul LV have ccc=tccc=0
- } else {
- // For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here.
- return (uint8_t)(*getMapping(norm16)>>8); // tccc from yesNo
- }
- }
- uint8_t getPreviousTrailCC(const UChar *start, const UChar *p) const;
- uint8_t getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const;
-
- // Requires algorithmic-NoNo.
- UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const {
- return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta;
- }
- UChar32 getAlgorithmicDelta(uint16_t norm16) const {
- return (norm16>>DELTA_SHIFT)-centerNoNoDelta;
- }
-
- // Requires minYesNo<norm16<limitNoNo.
- const uint16_t *getMapping(uint16_t norm16) const { return extraData+(norm16>>OFFSET_SHIFT); }
- const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const {
- if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) {
- return NULL;
- } else if(norm16<minMaybeYes) {
- return getMapping(norm16); // for yesYes; if Jamo L: harmless empty list
- } else {
- return maybeYesCompositions+norm16-minMaybeYes;
- }
- }
- const uint16_t *getCompositionsListForComposite(uint16_t norm16) const {
- // A composite has both mapping & compositions list.
- const uint16_t *list=getMapping(norm16);
- return list+ // mapping pointer
- 1+ // +1 to skip the first unit with the mapping length
- (*list&MAPPING_LENGTH_MASK); // + mapping length
- }
- const uint16_t *getCompositionsListForMaybe(uint16_t norm16) const {
- // minMaybeYes<=norm16<MIN_NORMAL_MAYBE_YES
- return maybeYesCompositions+((norm16-minMaybeYes)>>OFFSET_SHIFT);
- }
- /**
- * @param c code point must have compositions
- * @return compositions list pointer
- */
- const uint16_t *getCompositionsList(uint16_t norm16) const {
- return isDecompYes(norm16) ?
- getCompositionsListForDecompYes(norm16) :
- getCompositionsListForComposite(norm16);
- }
-
- const UChar *copyLowPrefixFromNulTerminated(const UChar *src,
- UChar32 minNeedDataCP,
- ReorderingBuffer *buffer,
- UErrorCode &errorCode) const;
- const UChar *decomposeShort(const UChar *src, const UChar *limit,
- UBool stopAtCompBoundary, UBool onlyContiguous,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const;
- UBool decompose(UChar32 c, uint16_t norm16,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const;
-
- const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit,
- UBool stopAtCompBoundary, UBool onlyContiguous,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const;
-
- static int32_t combine(const uint16_t *list, UChar32 trail);
- void addComposites(const uint16_t *list, UnicodeSet &set) const;
- void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
- UBool onlyContiguous) const;
-
- UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const {
- return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16);
- }
- UBool norm16HasCompBoundaryBefore(uint16_t norm16) const {
- return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16);
- }
- UBool hasCompBoundaryBefore(const UChar *src, const UChar *limit) const;
- UBool hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const;
- UBool hasCompBoundaryAfter(const UChar *start, const UChar *p,
- UBool onlyContiguous) const;
- UBool hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p,
- UBool onlyContiguous) const;
- UBool norm16HasCompBoundaryAfter(uint16_t norm16, UBool onlyContiguous) const {
- return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
- (!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16));
- }
- /** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */
- UBool isTrailCC01ForCompBoundaryAfter(uint16_t norm16) const {
- return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ?
- (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : *getMapping(norm16) <= 0x1ff);
- }
-
- const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p, UBool onlyContiguous) const;
- const UChar *findNextCompBoundary(const UChar *p, const UChar *limit, UBool onlyContiguous) const;
-
- const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const;
- const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const;
-
- void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
- CanonIterData &newData, UErrorCode &errorCode) const;
-
- int32_t getCanonValue(UChar32 c) const;
- const UnicodeSet &getCanonStartSet(int32_t n) const;
-
- // UVersionInfo dataVersion;
-
- // BMP code point thresholds for quick check loops looking at single UTF-16 code units.
- UChar minDecompNoCP;
- UChar minCompNoMaybeCP;
- UChar minLcccCP;
-
- // Norm16 value thresholds for quick check combinations and types of extra data.
- uint16_t minYesNo;
- uint16_t minYesNoMappingsOnly;
- uint16_t minNoNo;
- uint16_t minNoNoCompBoundaryBefore;
- uint16_t minNoNoCompNoMaybeCC;
- uint16_t minNoNoEmpty;
- uint16_t limitNoNo;
- uint16_t centerNoNoDelta;
- uint16_t minMaybeYes;
-
- const UCPTrie *normTrie;
- const uint16_t *maybeYesCompositions;
- const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
- const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
-
- UInitOnce fCanonIterDataInitOnce = U_INITONCE_INITIALIZER;
- CanonIterData *fCanonIterData;
-};
-
-// bits in canonIterData
-#define CANON_NOT_SEGMENT_STARTER 0x80000000
-#define CANON_HAS_COMPOSITIONS 0x40000000
-#define CANON_HAS_SET 0x200000
-#define CANON_VALUE_MASK 0x1fffff
-
-/**
- * ICU-internal shortcut for quick access to standard Unicode normalization.
- */
-class U_COMMON_API Normalizer2Factory {
-public:
- static const Normalizer2 *getFCDInstance(UErrorCode &errorCode);
- static const Normalizer2 *getFCCInstance(UErrorCode &errorCode);
- static const Normalizer2 *getNoopInstance(UErrorCode &errorCode);
-
- static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &errorCode);
-
- static const Normalizer2Impl *getNFCImpl(UErrorCode &errorCode);
- static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode);
- static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode);
-
- // Get the Impl instance of the Normalizer2.
- // Must be used only when it is known that norm2 is a Normalizer2WithImpl instance.
- static const Normalizer2Impl *getImpl(const Normalizer2 *norm2);
-private:
- Normalizer2Factory(); // No instantiation.
-};
-
-U_NAMESPACE_END
-
-U_CAPI int32_t U_EXPORT2
-unorm2_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/**
- * Get the NF*_QC property for a code point, for u_getIntPropertyValue().
- * @internal
- */
-U_CFUNC UNormalizationCheckResult
-unorm_getQuickCheck(UChar32 c, UNormalizationMode mode);
-
-/**
- * Gets the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue().
- * @internal
- */
-U_CFUNC uint16_t
-unorm_getFCD16(UChar32 c);
-
-/**
- * Format of Normalizer2 .nrm data files.
- * Format version 4.0.
- *
- * Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms.
- * ICU ships with data files for standard Unicode Normalization Forms
- * NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm).
- * Custom (application-specific) data can be built into additional .nrm files
- * with the gennorm2 build tool.
- * ICU ships with one such file, uts46.nrm, for the implementation of UTS #46.
- *
- * Normalizer2.getInstance() causes a .nrm file to be loaded, unless it has been
- * cached already. Internally, Normalizer2Impl.load() reads the .nrm file.
- *
- * A .nrm file begins with a standard ICU data file header
- * (DataHeader, see ucmndata.h and unicode/udata.h).
- * The UDataInfo.dataVersion field usually contains the Unicode version
- * for which the data was generated.
- *
- * After the header, the file contains the following parts.
- * Constants are defined as enum values of the Normalizer2Impl class.
- *
- * Many details of the data structures are described in the design doc
- * which is at http://site.icu-project.org/design/normalization/custom
- *
- * int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_NORM_TRIE_OFFSET]/4;
- *
- * The first eight indexes are byte offsets in ascending order.
- * Each byte offset marks the start of the next part in the data file,
- * and the end of the previous one.
- * When two consecutive byte offsets are the same, then the corresponding part is empty.
- * Byte offsets are offsets from after the header,
- * that is, from the beginning of the indexes[].
- * Each part starts at an offset with proper alignment for its data.
- * If necessary, the previous part may include padding bytes to achieve this alignment.
- *
- * minDecompNoCP=indexes[IX_MIN_DECOMP_NO_CP] is the lowest code point
- * with a decomposition mapping, that is, with NF*D_QC=No.
- * minCompNoMaybeCP=indexes[IX_MIN_COMP_NO_MAYBE_CP] is the lowest code point
- * with NF*C_QC=No (has a one-way mapping) or Maybe (combines backward).
- * minLcccCP=indexes[IX_MIN_LCCC_CP] (index 18, new in formatVersion 3)
- * is the lowest code point with lccc!=0.
- *
- * The next eight indexes are thresholds of 16-bit trie values for ranges of
- * values indicating multiple normalization properties.
- * They are listed here in threshold order, not in the order they are stored in the indexes.
- * minYesNo=indexes[IX_MIN_YES_NO];
- * minYesNoMappingsOnly=indexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
- * minNoNo=indexes[IX_MIN_NO_NO];
- * minNoNoCompBoundaryBefore=indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
- * minNoNoCompNoMaybeCC=indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
- * minNoNoEmpty=indexes[IX_MIN_NO_NO_EMPTY];
- * limitNoNo=indexes[IX_LIMIT_NO_NO];
- * minMaybeYes=indexes[IX_MIN_MAYBE_YES];
- * See the normTrie description below and the design doc for details.
- *
- * UCPTrie normTrie; -- see ucptrie_impl.h and ucptrie.h, same as Java CodePointTrie
- *
- * The trie holds the main normalization data. Each code point is mapped to a 16-bit value.
- * Rather than using independent bits in the value (which would require more than 16 bits),
- * information is extracted primarily via range checks.
- * Except, format version 3 uses bit 0 for hasCompBoundaryAfter().
- * For example, a 16-bit value norm16 in the range minYesNo<=norm16<minNoNo
- * means that the character has NF*C_QC=Yes and NF*D_QC=No properties,
- * which means it has a two-way (round-trip) decomposition mapping.
- * Values in the range 2<=norm16<limitNoNo are also directly indexes into the extraData
- * pointing to mappings, compositions lists, or both.
- * Value norm16==INERT (0 in versions 1 & 2, 1 in version 3)
- * means that the character is normalization-inert, that is,
- * it does not have a mapping, does not participate in composition, has a zero
- * canonical combining class, and forms a boundary where text before it and after it
- * can be normalized independently.
- * For details about how multiple properties are encoded in 16-bit values
- * see the design doc.
- * Note that the encoding cannot express all combinations of the properties involved;
- * it only supports those combinations that are allowed by
- * the Unicode Normalization algorithms. Details are in the design doc as well.
- * The gennorm2 tool only builds .nrm files for data that conforms to the limitations.
- *
- * The trie has a value for each lead surrogate code unit representing the "worst case"
- * properties of the 1024 supplementary characters whose UTF-16 form starts with
- * the lead surrogate. If all of the 1024 supplementary characters are normalization-inert,
- * then their lead surrogate code unit has the trie value INERT.
- * When the lead surrogate unit's value exceeds the quick check minimum during processing,
- * the properties for the full supplementary code point need to be looked up.
- *
- * uint16_t maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes];
- * uint16_t extraData[];
- *
- * There is only one byte offset for the end of these two arrays.
- * The split between them is given by the constant and variable mentioned above.
- * In version 3, the difference must be shifted right by OFFSET_SHIFT.
- *
- * The maybeYesCompositions array contains compositions lists for characters that
- * combine both forward (as starters in composition pairs)
- * and backward (as trailing characters in composition pairs).
- * Such characters do not occur in Unicode 5.2 but are allowed by
- * the Unicode Normalization algorithms.
- * If there are no such characters, then minMaybeYes==MIN_NORMAL_MAYBE_YES
- * and the maybeYesCompositions array is empty.
- * If there are such characters, then minMaybeYes is subtracted from their norm16 values
- * to get the index into this array.
- *
- * The extraData array contains compositions lists for "YesYes" characters,
- * followed by mappings and optional compositions lists for "YesNo" characters,
- * followed by only mappings for "NoNo" characters.
- * (Referring to pairs of NFC/NFD quick check values.)
- * The norm16 values of those characters are directly indexes into the extraData array.
- * In version 3, the norm16 values must be shifted right by OFFSET_SHIFT
- * for accessing extraData.
- *
- * The data structures for compositions lists and mappings are described in the design doc.
- *
- * uint8_t smallFCD[0x100]; -- new in format version 2
- *
- * This is a bit set to help speed up FCD value lookups in the absence of a full
- * UTrie2 or other large data structure with the full FCD value mapping.
- *
- * Each smallFCD bit is set if any of the corresponding 32 BMP code points
- * has a non-zero FCD value (lccc!=0 or tccc!=0).
- * Bit 0 of smallFCD[0] is for U+0000..U+001F. Bit 7 of smallFCD[0xff] is for U+FFE0..U+FFFF.
- * A bit for 32 lead surrogates is set if any of the 32k corresponding
- * _supplementary_ code points has a non-zero FCD value.
- *
- * This bit set is most useful for the large blocks of CJK characters with FCD=0.
- *
- * Changes from format version 1 to format version 2 ---------------------------
- *
- * - Addition of data for raw (not recursively decomposed) mappings.
- * + The MAPPING_NO_COMP_BOUNDARY_AFTER bit in the extraData is now also set when
- * the mapping is to an empty string or when the character combines-forward.
- * This subsumes the one actual use of the MAPPING_PLUS_COMPOSITION_LIST bit which
- * is then repurposed for the MAPPING_HAS_RAW_MAPPING bit.
- * + For details see the design doc.
- * - Addition of indexes[IX_MIN_YES_NO_MAPPINGS_ONLY] and separation of the yesNo extraData into
- * distinct ranges (combines-forward vs. not)
- * so that a range check can be used to find out if there is a compositions list.
- * This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LIST flag.
- * It is needed for the new (in ICU 49) composePair(), not for other normalization.
- * - Addition of the smallFCD[] bit set.
- *
- * Changes from format version 2 to format version 3 (ICU 60) ------------------
- *
- * - norm16 bit 0 indicates hasCompBoundaryAfter(),
- * except that for contiguous composition (FCC) the tccc must be checked as well.
- * Data indexes and ccc values are shifted left by one (OFFSET_SHIFT).
- * Thresholds like minNoNo are tested before shifting.
- *
- * - Algorithmic mapping deltas are shifted left by two more bits (total DELTA_SHIFT),
- * to make room for two bits (three values) indicating whether the tccc is 0, 1, or greater.
- * See DELTA_TCCC_MASK etc.
- * This helps with fetching tccc/FCD values and FCC hasCompBoundaryAfter().
- * minMaybeYes is 8-aligned so that the DELTA_TCCC_MASK bits can be tested directly.
- *
- * - Algorithmic mappings are only used for mapping to "comp yes and ccc=0" characters,
- * and ASCII characters are mapped algorithmically only to other ASCII characters.
- * This helps with hasCompBoundaryBefore() and compose() fast paths.
- * It is never necessary any more to loop for algorithmic mappings.
- *
- * - Addition of indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE],
- * indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC], and indexes[IX_MIN_NO_NO_EMPTY],
- * and separation of the noNo extraData into distinct ranges.
- * With this, the noNo norm16 value indicates whether the mapping is
- * compose-normalized, not normalized but hasCompBoundaryBefore(),
- * not even that, or maps to an empty string.
- * hasCompBoundaryBefore() can be determined solely from the norm16 value.
- *
- * - The norm16 value for Hangul LVT is now different from that for Hangul LV,
- * so that hasCompBoundaryAfter() need not check for the syllable type.
- * For Hangul LV, minYesNo continues to be used (no comp-boundary-after).
- * For Hangul LVT, minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER is used.
- * The extraData units at these indexes are set to firstUnit=2 and firstUnit=3, respectively,
- * to simplify some code.
- *
- * - The extraData firstUnit bit 5 is no longer necessary
- * (norm16 bit 0 used instead of firstUnit MAPPING_NO_COMP_BOUNDARY_AFTER),
- * is reserved again, and always set to 0.
- *
- * - Addition of indexes[IX_MIN_LCCC_CP], the first code point where lccc!=0.
- * This used to be hardcoded to U+0300, but in data like NFKC_Casefold it is lower:
- * U+00AD Soft Hyphen maps to an empty string,
- * which is artificially assigned "worst case" values lccc=1 and tccc=255.
- *
- * - A mapping to an empty string has explicit lccc=1 and tccc=255 values.
- *
- * Changes from format version 3 to format version 4 (ICU 63) ------------------
- *
- * Switched from UTrie2 to UCPTrie/CodePointTrie.
- *
- * The new trie no longer stores different values for surrogate code *units* vs.
- * surrogate code *points*.
- * Lead surrogates still have values for optimized UTF-16 string processing.
- * When looking up code point properties, the code now checks for lead surrogates and
- * treats them as inert.
- *
- * gennorm2 now has to reject mappings for surrogate code points.
- * UTS #46 maps unpaired surrogates to U+FFFD in code rather than via its
- * custom normalization data file.
- */
-
-#endif /* !UCONFIG_NO_NORMALIZATION */
-#endif /* __NORMALIZER2IMPL_H__ */
diff --git a/contrib/libs/icu/common/normlzr.cpp b/contrib/libs/icu/common/normlzr.cpp
deleted file mode 100644
index 2dea0ffc33c..00000000000
--- a/contrib/libs/icu/common/normlzr.cpp
+++ /dev/null
@@ -1,529 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- *************************************************************************
- * COPYRIGHT:
- * Copyright (c) 1996-2012, International Business Machines Corporation and
- * others. All Rights Reserved.
- *************************************************************************
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/uniset.h"
-#include "unicode/unistr.h"
-#include "unicode/chariter.h"
-#include "unicode/schriter.h"
-#include "unicode/uchriter.h"
-#include "unicode/normlzr.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "normalizer2impl.h"
-#include "uprops.h" // for uniset_getUnicode32Instance()
-
-#if defined(move32)
- // System can define move32 intrinsics, but the char iters define move32 method
- // using same undef trick in headers, so undef here to re-enable the method.
-#undef move32
-#endif
-
-U_NAMESPACE_BEGIN
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
-
-//-------------------------------------------------------------------------
-// Constructors and other boilerplate
-//-------------------------------------------------------------------------
-
-Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
- UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
- text(new StringCharacterIterator(str)),
- currentIndex(0), nextIndex(0),
- buffer(), bufferPos(0)
-{
- init();
-}
-
-Normalizer::Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode) :
- UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
- text(new UCharCharacterIterator(str, length)),
- currentIndex(0), nextIndex(0),
- buffer(), bufferPos(0)
-{
- init();
-}
-
-Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
- UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
- text(iter.clone()),
- currentIndex(0), nextIndex(0),
- buffer(), bufferPos(0)
-{
- init();
-}
-
-Normalizer::Normalizer(const Normalizer &copy) :
- UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
- text(copy.text->clone()),
- currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
- buffer(copy.buffer), bufferPos(copy.bufferPos)
-{
- init();
-}
-
-void
-Normalizer::init() {
- UErrorCode errorCode=U_ZERO_ERROR;
- fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
- if(fOptions&UNORM_UNICODE_3_2) {
- delete fFilteredNorm2;
- fNorm2=fFilteredNorm2=
- new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode));
- }
- if(U_FAILURE(errorCode)) {
- errorCode=U_ZERO_ERROR;
- fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
- }
-}
-
-Normalizer::~Normalizer()
-{
- delete fFilteredNorm2;
- delete text;
-}
-
-Normalizer*
-Normalizer::clone() const
-{
- return new Normalizer(*this);
-}
-
-/**
- * Generates a hash code for this iterator.
- */
-int32_t Normalizer::hashCode() const
-{
- return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
-}
-
-UBool Normalizer::operator==(const Normalizer& that) const
-{
- return
- this==&that ||
- (fUMode==that.fUMode &&
- fOptions==that.fOptions &&
- *text==*that.text &&
- buffer==that.buffer &&
- bufferPos==that.bufferPos &&
- nextIndex==that.nextIndex);
-}
-
-//-------------------------------------------------------------------------
-// Static utility methods
-//-------------------------------------------------------------------------
-
-void U_EXPORT2
-Normalizer::normalize(const UnicodeString& source,
- UNormalizationMode mode, int32_t options,
- UnicodeString& result,
- UErrorCode &status) {
- if(source.isBogus() || U_FAILURE(status)) {
- result.setToBogus();
- if(U_SUCCESS(status)) {
- status=U_ILLEGAL_ARGUMENT_ERROR;
- }
- } else {
- UnicodeString localDest;
- UnicodeString *dest;
-
- if(&source!=&result) {
- dest=&result;
- } else {
- // the source and result strings are the same object, use a temporary one
- dest=&localDest;
- }
- const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
- if(U_SUCCESS(status)) {
- if(options&UNORM_UNICODE_3_2) {
- FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
- normalize(source, *dest, status);
- } else {
- n2->normalize(source, *dest, status);
- }
- }
- if(dest==&localDest && U_SUCCESS(status)) {
- result=*dest;
- }
- }
-}
-
-void U_EXPORT2
-Normalizer::compose(const UnicodeString& source,
- UBool compat, int32_t options,
- UnicodeString& result,
- UErrorCode &status) {
- normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
-}
-
-void U_EXPORT2
-Normalizer::decompose(const UnicodeString& source,
- UBool compat, int32_t options,
- UnicodeString& result,
- UErrorCode &status) {
- normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
-}
-
-UNormalizationCheckResult
-Normalizer::quickCheck(const UnicodeString& source,
- UNormalizationMode mode, int32_t options,
- UErrorCode &status) {
- const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
- if(U_SUCCESS(status)) {
- if(options&UNORM_UNICODE_3_2) {
- return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
- quickCheck(source, status);
- } else {
- return n2->quickCheck(source, status);
- }
- } else {
- return UNORM_MAYBE;
- }
-}
-
-UBool
-Normalizer::isNormalized(const UnicodeString& source,
- UNormalizationMode mode, int32_t options,
- UErrorCode &status) {
- const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
- if(U_SUCCESS(status)) {
- if(options&UNORM_UNICODE_3_2) {
- return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
- isNormalized(source, status);
- } else {
- return n2->isNormalized(source, status);
- }
- } else {
- return FALSE;
- }
-}
-
-UnicodeString & U_EXPORT2
-Normalizer::concatenate(const UnicodeString &left, const UnicodeString &right,
- UnicodeString &result,
- UNormalizationMode mode, int32_t options,
- UErrorCode &errorCode) {
- if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
- result.setToBogus();
- if(U_SUCCESS(errorCode)) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
- } else {
- UnicodeString localDest;
- UnicodeString *dest;
-
- if(&right!=&result) {
- dest=&result;
- } else {
- // the right and result strings are the same object, use a temporary one
- dest=&localDest;
- }
- *dest=left;
- const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
- if(U_SUCCESS(errorCode)) {
- if(options&UNORM_UNICODE_3_2) {
- FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)).
- append(*dest, right, errorCode);
- } else {
- n2->append(*dest, right, errorCode);
- }
- }
- if(dest==&localDest && U_SUCCESS(errorCode)) {
- result=*dest;
- }
- }
- return result;
-}
-
-//-------------------------------------------------------------------------
-// Iteration API
-//-------------------------------------------------------------------------
-
-/**
- * Return the current character in the normalized text.
- */
-UChar32 Normalizer::current() {
- if(bufferPos<buffer.length() || nextNormalize()) {
- return buffer.char32At(bufferPos);
- } else {
- return DONE;
- }
-}
-
-/**
- * Return the next character in the normalized text and advance
- * the iteration position by one. If the end
- * of the text has already been reached, {@link #DONE} is returned.
- */
-UChar32 Normalizer::next() {
- if(bufferPos<buffer.length() || nextNormalize()) {
- UChar32 c=buffer.char32At(bufferPos);
- bufferPos+=U16_LENGTH(c);
- return c;
- } else {
- return DONE;
- }
-}
-
-/**
- * Return the previous character in the normalized text and decrement
- * the iteration position by one. If the beginning
- * of the text has already been reached, {@link #DONE} is returned.
- */
-UChar32 Normalizer::previous() {
- if(bufferPos>0 || previousNormalize()) {
- UChar32 c=buffer.char32At(bufferPos-1);
- bufferPos-=U16_LENGTH(c);
- return c;
- } else {
- return DONE;
- }
-}
-
-void Normalizer::reset() {
- currentIndex=nextIndex=text->setToStart();
- clearBuffer();
-}
-
-void
-Normalizer::setIndexOnly(int32_t index) {
- text->setIndex(index); // pins index
- currentIndex=nextIndex=text->getIndex();
- clearBuffer();
-}
-
-/**
- * Return the first character in the normalized text. This resets
- * the <tt>Normalizer's</tt> position to the beginning of the text.
- */
-UChar32 Normalizer::first() {
- reset();
- return next();
-}
-
-/**
- * Return the last character in the normalized text. This resets
- * the <tt>Normalizer's</tt> position to be just before the
- * the input text corresponding to that normalized character.
- */
-UChar32 Normalizer::last() {
- currentIndex=nextIndex=text->setToEnd();
- clearBuffer();
- return previous();
-}
-
-/**
- * Retrieve the current iteration position in the input text that is
- * being normalized. This method is useful in applications such as
- * searching, where you need to be able to determine the position in
- * the input text that corresponds to a given normalized output character.
- * <p>
- * <b>Note:</b> This method sets the position in the <em>input</em>, while
- * {@link #next} and {@link #previous} iterate through characters in the
- * <em>output</em>. This means that there is not necessarily a one-to-one
- * correspondence between characters returned by <tt>next</tt> and
- * <tt>previous</tt> and the indices passed to and returned from
- * <tt>setIndex</tt> and {@link #getIndex}.
- *
- */
-int32_t Normalizer::getIndex() const {
- if(bufferPos<buffer.length()) {
- return currentIndex;
- } else {
- return nextIndex;
- }
-}
-
-/**
- * Retrieve the index of the start of the input text. This is the begin index
- * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
- * over which this <tt>Normalizer</tt> is iterating
- */
-int32_t Normalizer::startIndex() const {
- return text->startIndex();
-}
-
-/**
- * Retrieve the index of the end of the input text. This is the end index
- * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
- * over which this <tt>Normalizer</tt> is iterating
- */
-int32_t Normalizer::endIndex() const {
- return text->endIndex();
-}
-
-//-------------------------------------------------------------------------
-// Property access methods
-//-------------------------------------------------------------------------
-
-void
-Normalizer::setMode(UNormalizationMode newMode)
-{
- fUMode = newMode;
- init();
-}
-
-UNormalizationMode
-Normalizer::getUMode() const
-{
- return fUMode;
-}
-
-void
-Normalizer::setOption(int32_t option,
- UBool value)
-{
- if (value) {
- fOptions |= option;
- } else {
- fOptions &= (~option);
- }
- init();
-}
-
-UBool
-Normalizer::getOption(int32_t option) const
-{
- return (fOptions & option) != 0;
-}
-
-/**
- * Set the input text over which this <tt>Normalizer</tt> will iterate.
- * The iteration position is set to the beginning of the input text.
- */
-void
-Normalizer::setText(const UnicodeString& newText,
- UErrorCode &status)
-{
- if (U_FAILURE(status)) {
- return;
- }
- CharacterIterator *newIter = new StringCharacterIterator(newText);
- if (newIter == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- delete text;
- text = newIter;
- reset();
-}
-
-/**
- * Set the input text over which this <tt>Normalizer</tt> will iterate.
- * The iteration position is set to the beginning of the string.
- */
-void
-Normalizer::setText(const CharacterIterator& newText,
- UErrorCode &status)
-{
- if (U_FAILURE(status)) {
- return;
- }
- CharacterIterator *newIter = newText.clone();
- if (newIter == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- delete text;
- text = newIter;
- reset();
-}
-
-void
-Normalizer::setText(ConstChar16Ptr newText,
- int32_t length,
- UErrorCode &status)
-{
- if (U_FAILURE(status)) {
- return;
- }
- CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
- if (newIter == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- delete text;
- text = newIter;
- reset();
-}
-
-/**
- * Copies the text under iteration into the UnicodeString referred to by "result".
- * @param result Receives a copy of the text under iteration.
- */
-void
-Normalizer::getText(UnicodeString& result)
-{
- text->getText(result);
-}
-
-//-------------------------------------------------------------------------
-// Private utility methods
-//-------------------------------------------------------------------------
-
-void Normalizer::clearBuffer() {
- buffer.remove();
- bufferPos=0;
-}
-
-UBool
-Normalizer::nextNormalize() {
- clearBuffer();
- currentIndex=nextIndex;
- text->setIndex(nextIndex);
- if(!text->hasNext()) {
- return FALSE;
- }
- // Skip at least one character so we make progress.
- UnicodeString segment(text->next32PostInc());
- while(text->hasNext()) {
- UChar32 c;
- if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
- text->move32(-1, CharacterIterator::kCurrent);
- break;
- }
- segment.append(c);
- }
- nextIndex=text->getIndex();
- UErrorCode errorCode=U_ZERO_ERROR;
- fNorm2->normalize(segment, buffer, errorCode);
- return U_SUCCESS(errorCode) && !buffer.isEmpty();
-}
-
-UBool
-Normalizer::previousNormalize() {
- clearBuffer();
- nextIndex=currentIndex;
- text->setIndex(currentIndex);
- if(!text->hasPrevious()) {
- return FALSE;
- }
- UnicodeString segment;
- while(text->hasPrevious()) {
- UChar32 c=text->previous32();
- segment.insert(0, c);
- if(fNorm2->hasBoundaryBefore(c)) {
- break;
- }
- }
- currentIndex=text->getIndex();
- UErrorCode errorCode=U_ZERO_ERROR;
- fNorm2->normalize(segment, buffer, errorCode);
- bufferPos=buffer.length();
- return U_SUCCESS(errorCode) && !buffer.isEmpty();
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
diff --git a/contrib/libs/icu/common/parsepos.cpp b/contrib/libs/icu/common/parsepos.cpp
deleted file mode 100644
index 56c6c788136..00000000000
--- a/contrib/libs/icu/common/parsepos.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2003-2003, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#include "unicode/parsepos.h"
-
-U_NAMESPACE_BEGIN
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ParsePosition)
-
-ParsePosition::~ParsePosition() {}
-
-ParsePosition *
-ParsePosition::clone() const {
- return new ParsePosition(*this);
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/patternprops.cpp b/contrib/libs/icu/common/patternprops.cpp
deleted file mode 100644
index c38a7e276de..00000000000
--- a/contrib/libs/icu/common/patternprops.cpp
+++ /dev/null
@@ -1,230 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: patternprops.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011mar13
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "patternprops.h"
-
-U_NAMESPACE_BEGIN
-
-/*
- * One byte per Latin-1 character.
- * Bit 0 is set if either Pattern property is true,
- * bit 1 if Pattern_Syntax is true,
- * bit 2 if Pattern_White_Space is true.
- * That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5.
- */
-static const uint8_t latin1[256]={
- // WS: 9..D
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // WS: 20 Syntax: 21..2F
- 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- // Syntax: 3A..40
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3,
- 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // Syntax: 5B..5E
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
- // Syntax: 60
- 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // Syntax: 7B..7E
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
- // WS: 85
- 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // Syntax: A1..A7, A9, AB, AC, AE
- 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0,
- // Syntax: B0, B1, B6, BB, BF
- 3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // Syntax: D7
- 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // Syntax: F7
- 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/*
- * One byte per 32 characters from U+2000..U+303F indexing into
- * a small table of 32-bit data words.
- * The first two data words are all-zeros and all-ones.
- */
-static const uint8_t index2000[130]={
- 2, 3, 4, 0, 0, 0, 0, 0, // 20xx
- 0, 0, 0, 0, 5, 1, 1, 1, // 21xx
- 1, 1, 1, 1, 1, 1, 1, 1, // 22xx
- 1, 1, 1, 1, 1, 1, 1, 1, // 23xx
- 1, 1, 1, 0, 0, 0, 0, 0, // 24xx
- 1, 1, 1, 1, 1, 1, 1, 1, // 25xx
- 1, 1, 1, 1, 1, 1, 1, 1, // 26xx
- 1, 1, 1, 6, 7, 1, 1, 1, // 27xx
- 1, 1, 1, 1, 1, 1, 1, 1, // 28xx
- 1, 1, 1, 1, 1, 1, 1, 1, // 29xx
- 1, 1, 1, 1, 1, 1, 1, 1, // 2Axx
- 1, 1, 1, 1, 1, 1, 1, 1, // 2Bxx
- 0, 0, 0, 0, 0, 0, 0, 0, // 2Cxx
- 0, 0, 0, 0, 0, 0, 0, 0, // 2Dxx
- 1, 1, 1, 1, 0, 0, 0, 0, // 2Exx
- 0, 0, 0, 0, 0, 0, 0, 0, // 2Fxx
- 8, 9 // 3000..303F
-};
-
-/*
- * One 32-bit integer per 32 characters. Ranges of all-false and all-true
- * are mapped to the first two values, other ranges map to appropriate bit patterns.
- */
-static const uint32_t syntax2000[]={
- 0,
- 0xffffffff,
- 0xffff0000, // 2: 2010..201F
- 0x7fff00ff, // 3: 2020..2027, 2030..203E
- 0x7feffffe, // 4: 2041..2053, 2055..205E
- 0xffff0000, // 5: 2190..219F
- 0x003fffff, // 6: 2760..2775
- 0xfff00000, // 7: 2794..279F
- 0xffffff0e, // 8: 3001..3003, 3008..301F
- 0x00010001 // 9: 3020, 3030
-};
-
-/*
- * Same as syntax2000, but with additional bits set for the
- * Pattern_White_Space characters 200E 200F 2028 2029.
- */
-static const uint32_t syntaxOrWhiteSpace2000[]={
- 0,
- 0xffffffff,
- 0xffffc000, // 2: 200E..201F
- 0x7fff03ff, // 3: 2020..2029, 2030..203E
- 0x7feffffe, // 4: 2041..2053, 2055..205E
- 0xffff0000, // 5: 2190..219F
- 0x003fffff, // 6: 2760..2775
- 0xfff00000, // 7: 2794..279F
- 0xffffff0e, // 8: 3001..3003, 3008..301F
- 0x00010001 // 9: 3020, 3030
-};
-
-UBool
-PatternProps::isSyntax(UChar32 c) {
- if(c<0) {
- return FALSE;
- } else if(c<=0xff) {
- return (UBool)(latin1[c]>>1)&1;
- } else if(c<0x2010) {
- return FALSE;
- } else if(c<=0x3030) {
- uint32_t bits=syntax2000[index2000[(c-0x2000)>>5]];
- return (UBool)((bits>>(c&0x1f))&1);
- } else if(0xfd3e<=c && c<=0xfe46) {
- return c<=0xfd3f || 0xfe45<=c;
- } else {
- return FALSE;
- }
-}
-
-UBool
-PatternProps::isSyntaxOrWhiteSpace(UChar32 c) {
- if(c<0) {
- return FALSE;
- } else if(c<=0xff) {
- return (UBool)(latin1[c]&1);
- } else if(c<0x200e) {
- return FALSE;
- } else if(c<=0x3030) {
- uint32_t bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]];
- return (UBool)((bits>>(c&0x1f))&1);
- } else if(0xfd3e<=c && c<=0xfe46) {
- return c<=0xfd3f || 0xfe45<=c;
- } else {
- return FALSE;
- }
-}
-
-UBool
-PatternProps::isWhiteSpace(UChar32 c) {
- if(c<0) {
- return FALSE;
- } else if(c<=0xff) {
- return (UBool)(latin1[c]>>2)&1;
- } else if(0x200e<=c && c<=0x2029) {
- return c<=0x200f || 0x2028<=c;
- } else {
- return FALSE;
- }
-}
-
-const UChar *
-PatternProps::skipWhiteSpace(const UChar *s, int32_t length) {
- while(length>0 && isWhiteSpace(*s)) {
- ++s;
- --length;
- }
- return s;
-}
-
-int32_t
-PatternProps::skipWhiteSpace(const UnicodeString& s, int32_t start) {
- int32_t i = start;
- int32_t length = s.length();
- while(i<length && isWhiteSpace(s.charAt(i))) {
- ++i;
- }
- return i;
-}
-
-const UChar *
-PatternProps::trimWhiteSpace(const UChar *s, int32_t &length) {
- if(length<=0 || (!isWhiteSpace(s[0]) && !isWhiteSpace(s[length-1]))) {
- return s;
- }
- int32_t start=0;
- int32_t limit=length;
- while(start<limit && isWhiteSpace(s[start])) {
- ++start;
- }
- if(start<limit) {
- // There is non-white space at start; we will not move limit below that,
- // so we need not test start<limit in the loop.
- while(isWhiteSpace(s[limit-1])) {
- --limit;
- }
- }
- length=limit-start;
- return s+start;
-}
-
-UBool
-PatternProps::isIdentifier(const UChar *s, int32_t length) {
- if(length<=0) {
- return FALSE;
- }
- const UChar *limit=s+length;
- do {
- if(isSyntaxOrWhiteSpace(*s++)) {
- return FALSE;
- }
- } while(s<limit);
- return TRUE;
-}
-
-const UChar *
-PatternProps::skipIdentifier(const UChar *s, int32_t length) {
- while(length>0 && !isSyntaxOrWhiteSpace(*s)) {
- ++s;
- --length;
- }
- return s;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/patternprops.h b/contrib/libs/icu/common/patternprops.h
deleted file mode 100644
index b57cdeb6e53..00000000000
--- a/contrib/libs/icu/common/patternprops.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: patternprops.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011mar13
-* created by: Markus W. Scherer
-*/
-
-#ifndef __PATTERNPROPS_H__
-#define __PATTERNPROPS_H__
-
-#include "unicode/unistr.h"
-#include "unicode/utypes.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * Implements the immutable Unicode properties Pattern_Syntax and Pattern_White_Space.
- * Hardcodes these properties, does not load data, does not depend on other ICU classes.
- * <p>
- * Note: Both properties include ASCII as well as non-ASCII, non-Latin-1 code points,
- * and both properties only include BMP code points (no supplementary ones).
- * Pattern_Syntax includes some unassigned code points.
- * <p>
- * [:Pattern_White_Space:] =
- * [\u0009-\u000D\ \u0085\u200E\u200F\u2028\u2029]
- * <p>
- * [:Pattern_Syntax:] =
- * [!-/\:-@\[-\^`\{-~\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE
- * \u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7
- * \u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E
- * \u2190-\u245F\u2500-\u2775\u2794-\u2BFF\u2E00-\u2E7F
- * \u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]
- * @author mscherer
- */
-class U_COMMON_API PatternProps {
-public:
- /**
- * @return TRUE if c is a Pattern_Syntax code point.
- */
- static UBool isSyntax(UChar32 c);
-
- /**
- * @return TRUE if c is a Pattern_Syntax or Pattern_White_Space code point.
- */
- static UBool isSyntaxOrWhiteSpace(UChar32 c);
-
- /**
- * @return TRUE if c is a Pattern_White_Space character.
- */
- static UBool isWhiteSpace(UChar32 c);
-
- /**
- * Skips over Pattern_White_Space starting at s.
- * @return The smallest pointer at or after s with a non-white space character.
- */
- static const UChar *skipWhiteSpace(const UChar *s, int32_t length);
-
- /**
- * Skips over Pattern_White_Space starting at index start in s.
- * @return The smallest index at or after start with a non-white space character.
- */
- static int32_t skipWhiteSpace(const UnicodeString &s, int32_t start);
-
- /**
- * @return s except with leading and trailing Pattern_White_Space removed and length adjusted.
- */
- static const UChar *trimWhiteSpace(const UChar *s, int32_t &length);
-
- /**
- * Tests whether the string contains a "pattern identifier", that is,
- * whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters.
- * @return TRUE if there are no Pattern_White_Space or Pattern_Syntax characters in s.
- */
- static UBool isIdentifier(const UChar *s, int32_t length);
-
- /**
- * Skips over a "pattern identifier" starting at index s.
- * @return The smallest pointer at or after s with
- * a Pattern_White_Space or Pattern_Syntax character.
- */
- static const UChar *skipIdentifier(const UChar *s, int32_t length);
-
-private:
- PatternProps(); // no constructor: all static methods
-};
-
-U_NAMESPACE_END
-
-#endif // __PATTERNPROPS_H__
diff --git a/contrib/libs/icu/common/pluralmap.cpp b/contrib/libs/icu/common/pluralmap.cpp
deleted file mode 100644
index ec87f0198e1..00000000000
--- a/contrib/libs/icu/common/pluralmap.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- * Copyright (C) 2015, International Business Machines Corporation and
- * others. All Rights Reserved.
- */
-
-#include "unicode/unistr.h"
-#include "charstr.h"
-#include "cstring.h"
-#include "pluralmap.h"
-
-U_NAMESPACE_BEGIN
-
-static const char * const gPluralForms[] = {
- "other", "zero", "one", "two", "few", "many"};
-
-PluralMapBase::Category
-PluralMapBase::toCategory(const char *pluralForm) {
- for (int32_t i = 0; i < UPRV_LENGTHOF(gPluralForms); ++i) {
- if (uprv_strcmp(pluralForm, gPluralForms[i]) == 0) {
- return static_cast<Category>(i);
- }
- }
- return NONE;
-}
-
-PluralMapBase::Category
-PluralMapBase::toCategory(const UnicodeString &pluralForm) {
- CharString cCategory;
- UErrorCode status = U_ZERO_ERROR;
- cCategory.appendInvariantChars(pluralForm, status);
- return U_FAILURE(status) ? NONE : toCategory(cCategory.data());
-}
-
-const char *PluralMapBase::getCategoryName(Category c) {
- int32_t index = c;
- return (index < 0 || index >= UPRV_LENGTHOF(gPluralForms)) ?
- NULL : gPluralForms[index];
-}
-
-
-U_NAMESPACE_END
-
diff --git a/contrib/libs/icu/common/pluralmap.h b/contrib/libs/icu/common/pluralmap.h
deleted file mode 100644
index db644093a1f..00000000000
--- a/contrib/libs/icu/common/pluralmap.h
+++ /dev/null
@@ -1,292 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 2015, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-*
-* File pluralmap.h - PluralMap class that maps plural categories to values.
-******************************************************************************
-*/
-
-#ifndef __PLURAL_MAP_H__
-#define __PLURAL_MAP_H__
-
-#include "unicode/uobject.h"
-#include "cmemory.h"
-
-U_NAMESPACE_BEGIN
-
-class UnicodeString;
-
-class U_COMMON_API PluralMapBase : public UMemory {
-public:
- /**
- * The names of all the plural categories. NONE is not an actual plural
- * category, but rather represents the absense of a plural category.
- */
- enum Category {
- NONE = -1,
- OTHER,
- ZERO,
- ONE,
- TWO,
- FEW,
- MANY,
- CATEGORY_COUNT
- };
-
- /**
- * Converts a category name such as "zero", "one", "two", "few", "many"
- * or "other" to a category enum. Returns NONE for an unrecognized
- * category name.
- */
- static Category toCategory(const char *categoryName);
-
- /**
- * Converts a category name such as "zero", "one", "two", "few", "many"
- * or "other" to a category enum. Returns NONE for urecongized
- * category name.
- */
- static Category toCategory(const UnicodeString &categoryName);
-
- /**
- * Converts a category to a name.
- * Passing NONE or CATEGORY_COUNT for category returns NULL.
- */
- static const char *getCategoryName(Category category);
-};
-
-/**
- * A Map of plural categories to values. It maintains ownership of the
- * values.
- *
- * Type T is the value type. T must provide the followng:
- * 1) Default constructor
- * 2) Copy constructor
- * 3) Assignment operator
- * 4) Must extend UMemory
- */
-template<typename T>
-class PluralMap : public PluralMapBase {
-public:
- /**
- * Other category is maps to a copy of the default value.
- */
- PluralMap() : fOtherVariant() {
- initializeNew();
- }
-
- /**
- * Other category is mapped to otherVariant.
- */
- PluralMap(const T &otherVariant) : fOtherVariant(otherVariant) {
- initializeNew();
- }
-
- PluralMap(const PluralMap<T> &other) : fOtherVariant(other.fOtherVariant) {
- fVariants[0] = &fOtherVariant;
- for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) {
- fVariants[i] = other.fVariants[i] ?
- new T(*other.fVariants[i]) : NULL;
- }
- }
-
- PluralMap<T> &operator=(const PluralMap<T> &other) {
- if (this == &other) {
- return *this;
- }
- for (int32_t i = 0; i < UPRV_LENGTHOF(fVariants); ++i) {
- if (fVariants[i] != NULL && other.fVariants[i] != NULL) {
- *fVariants[i] = *other.fVariants[i];
- } else if (fVariants[i] != NULL) {
- delete fVariants[i];
- fVariants[i] = NULL;
- } else if (other.fVariants[i] != NULL) {
- fVariants[i] = new T(*other.fVariants[i]);
- } else {
- // do nothing
- }
- }
- return *this;
- }
-
- ~PluralMap() {
- for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) {
- delete fVariants[i];
- }
- }
-
- /**
- * Removes all mappings and makes 'other' point to the default value.
- */
- void clear() {
- *fVariants[0] = T();
- for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) {
- delete fVariants[i];
- fVariants[i] = NULL;
- }
- }
-
- /**
- * Iterates through the mappings in this instance, set index to NONE
- * prior to using. Call next repeatedly to get the values until it
- * returns NULL. Each time next returns, caller may pass index
- * to getCategoryName() to get the name of the plural category.
- * When this function returns NULL, index is CATEGORY_COUNT
- */
- const T *next(Category &index) const {
- int32_t idx = index;
- ++idx;
- for (; idx < UPRV_LENGTHOF(fVariants); ++idx) {
- if (fVariants[idx] != NULL) {
- index = static_cast<Category>(idx);
- return fVariants[idx];
- }
- }
- index = static_cast<Category>(idx);
- return NULL;
- }
-
- /**
- * non const version of next.
- */
- T *nextMutable(Category &index) {
- const T *result = next(index);
- return const_cast<T *>(result);
- }
-
- /**
- * Returns the 'other' variant.
- * Same as calling get(OTHER).
- */
- const T &getOther() const {
- return get(OTHER);
- }
-
- /**
- * Returns the value associated with a category.
- * If no value found, or v is NONE or CATEGORY_COUNT, falls
- * back to returning the value for the 'other' category.
- */
- const T &get(Category v) const {
- int32_t index = v;
- if (index < 0 || index >= UPRV_LENGTHOF(fVariants) || fVariants[index] == NULL) {
- return *fVariants[0];
- }
- return *fVariants[index];
- }
-
- /**
- * Convenience routine to get the value by category name. Otherwise
- * works just like get(Category).
- */
- const T &get(const char *category) const {
- return get(toCategory(category));
- }
-
- /**
- * Convenience routine to get the value by category name as a
- * UnicodeString. Otherwise works just like get(category).
- */
- const T &get(const UnicodeString &category) const {
- return get(toCategory(category));
- }
-
- /**
- * Returns a pointer to the value associated with a category
- * that caller can safely modify. If the value was defaulting to the 'other'
- * variant because no explicit value was stored, this method creates a
- * new value using the default constructor at the returned pointer.
- *
- * @param category the category with the value to change.
- * @param status error returned here if index is NONE or CATEGORY_COUNT
- * or memory could not be allocated, or any other error happens.
- */
- T *getMutable(
- Category category,
- UErrorCode &status) {
- return getMutable(category, NULL, status);
- }
-
- /**
- * Convenience routine to get a mutable pointer to a value by category name.
- * Otherwise works just like getMutable(Category, UErrorCode &).
- * reports an error if the category name is invalid.
- */
- T *getMutable(
- const char *category,
- UErrorCode &status) {
- return getMutable(toCategory(category), NULL, status);
- }
-
- /**
- * Just like getMutable(Category, UErrorCode &) but copies defaultValue to
- * returned pointer if it was defaulting to the 'other' variant
- * because no explicit value was stored.
- */
- T *getMutableWithDefault(
- Category category,
- const T &defaultValue,
- UErrorCode &status) {
- return getMutable(category, &defaultValue, status);
- }
-
- /**
- * Returns TRUE if this object equals rhs.
- */
- UBool equals(
- const PluralMap<T> &rhs,
- UBool (*eqFunc)(const T &, const T &)) const {
- for (int32_t i = 0; i < UPRV_LENGTHOF(fVariants); ++i) {
- if (fVariants[i] == rhs.fVariants[i]) {
- continue;
- }
- if (fVariants[i] == NULL || rhs.fVariants[i] == NULL) {
- return FALSE;
- }
- if (!eqFunc(*fVariants[i], *rhs.fVariants[i])) {
- return FALSE;
- }
- }
- return TRUE;
- }
-
-private:
- T fOtherVariant;
- T* fVariants[6];
-
- T *getMutable(
- Category category,
- const T *defaultValue,
- UErrorCode &status) {
- if (U_FAILURE(status)) {
- return NULL;
- }
- int32_t index = category;
- if (index < 0 || index >= UPRV_LENGTHOF(fVariants)) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- if (fVariants[index] == NULL) {
- fVariants[index] = defaultValue == NULL ?
- new T() : new T(*defaultValue);
- }
- if (!fVariants[index]) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- return fVariants[index];
- }
-
- void initializeNew() {
- fVariants[0] = &fOtherVariant;
- for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) {
- fVariants[i] = NULL;
- }
- }
-};
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/propname.cpp b/contrib/libs/icu/common/propname.cpp
deleted file mode 100644
index a12eb7d9134..00000000000
--- a/contrib/libs/icu/common/propname.cpp
+++ /dev/null
@@ -1,328 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2002-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: October 30 2002
-* Since: ICU 2.4
-* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
-**********************************************************************
-*/
-#include "propname.h"
-#include "unicode/uchar.h"
-#include "unicode/udata.h"
-#include "unicode/uscript.h"
-#include "umutex.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uarrsort.h"
-#include "uinvchar.h"
-
-#define INCLUDED_FROM_PROPNAME_CPP
-#include "propname_data.h"
-
-U_CDECL_BEGIN
-
-/**
- * Get the next non-ignorable ASCII character from a property name
- * and lowercases it.
- * @return ((advance count for the name)<<8)|character
- */
-static inline int32_t
-getASCIIPropertyNameChar(const char *name) {
- int32_t i;
- char c;
-
- /* Ignore delimiters '-', '_', and ASCII White_Space */
- for(i=0;
- (c=name[i++])==0x2d || c==0x5f ||
- c==0x20 || (0x09<=c && c<=0x0d);
- ) {}
-
- if(c!=0) {
- return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
- } else {
- return i<<8;
- }
-}
-
-/**
- * Get the next non-ignorable EBCDIC character from a property name
- * and lowercases it.
- * @return ((advance count for the name)<<8)|character
- */
-static inline int32_t
-getEBCDICPropertyNameChar(const char *name) {
- int32_t i;
- char c;
-
- /* Ignore delimiters '-', '_', and EBCDIC White_Space */
- for(i=0;
- (c=name[i++])==0x60 || c==0x6d ||
- c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
- ) {}
-
- if(c!=0) {
- return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
- } else {
- return i<<8;
- }
-}
-
-/**
- * Unicode property names and property value names are compared "loosely".
- *
- * UCD.html 4.0.1 says:
- * For all property names, property value names, and for property values for
- * Enumerated, Binary, or Catalog properties, use the following
- * loose matching rule:
- *
- * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
- *
- * This function does just that, for (char *) name strings.
- * It is almost identical to ucnv_compareNames() but also ignores
- * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
- *
- * @internal
- */
-
-U_CAPI int32_t U_EXPORT2
-uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
- int32_t rc, r1, r2;
-
- for(;;) {
- r1=getASCIIPropertyNameChar(name1);
- r2=getASCIIPropertyNameChar(name2);
-
- /* If we reach the ends of both strings then they match */
- if(((r1|r2)&0xff)==0) {
- return 0;
- }
-
- /* Compare the lowercased characters */
- if(r1!=r2) {
- rc=(r1&0xff)-(r2&0xff);
- if(rc!=0) {
- return rc;
- }
- }
-
- name1+=r1>>8;
- name2+=r2>>8;
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
- int32_t rc, r1, r2;
-
- for(;;) {
- r1=getEBCDICPropertyNameChar(name1);
- r2=getEBCDICPropertyNameChar(name2);
-
- /* If we reach the ends of both strings then they match */
- if(((r1|r2)&0xff)==0) {
- return 0;
- }
-
- /* Compare the lowercased characters */
- if(r1!=r2) {
- rc=(r1&0xff)-(r2&0xff);
- if(rc!=0) {
- return rc;
- }
- }
-
- name1+=r1>>8;
- name2+=r2>>8;
- }
-}
-
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-int32_t PropNameData::findProperty(int32_t property) {
- int32_t i=1; // valueMaps index, initially after numRanges
- for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
- // Read and skip the start and limit of this range.
- int32_t start=valueMaps[i];
- int32_t limit=valueMaps[i+1];
- i+=2;
- if(property<start) {
- break;
- }
- if(property<limit) {
- return i+(property-start)*2;
- }
- i+=(limit-start)*2; // Skip all entries for this range.
- }
- return 0;
-}
-
-int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
- if(valueMapIndex==0) {
- return 0; // The property does not have named values.
- }
- ++valueMapIndex; // Skip the BytesTrie offset.
- int32_t numRanges=valueMaps[valueMapIndex++];
- if(numRanges<0x10) {
- // Ranges of values.
- for(; numRanges>0; --numRanges) {
- // Read and skip the start and limit of this range.
- int32_t start=valueMaps[valueMapIndex];
- int32_t limit=valueMaps[valueMapIndex+1];
- valueMapIndex+=2;
- if(value<start) {
- break;
- }
- if(value<limit) {
- return valueMaps[valueMapIndex+value-start];
- }
- valueMapIndex+=limit-start; // Skip all entries for this range.
- }
- } else {
- // List of values.
- int32_t valuesStart=valueMapIndex;
- int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
- do {
- int32_t v=valueMaps[valueMapIndex];
- if(value<v) {
- break;
- }
- if(value==v) {
- return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
- }
- } while(++valueMapIndex<nameGroupOffsetsStart);
- }
- return 0;
-}
-
-const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
- int32_t numNames=*nameGroup++;
- if(nameIndex<0 || numNames<=nameIndex) {
- return NULL;
- }
- // Skip nameIndex names.
- for(; nameIndex>0; --nameIndex) {
- nameGroup=uprv_strchr(nameGroup, 0)+1;
- }
- if(*nameGroup==0) {
- return NULL; // no name (Property[Value]Aliases.txt has "n/a")
- }
- return nameGroup;
-}
-
-UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
- if(name==NULL) {
- return FALSE;
- }
- UStringTrieResult result=USTRINGTRIE_NO_VALUE;
- char c;
- while((c=*name++)!=0) {
- c=uprv_invCharToLowercaseAscii(c);
- // Ignore delimiters '-', '_', and ASCII White_Space.
- if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
- continue;
- }
- if(!USTRINGTRIE_HAS_NEXT(result)) {
- return FALSE;
- }
- result=trie.next((uint8_t)c);
- }
- return USTRINGTRIE_HAS_VALUE(result);
-}
-
-const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
- int32_t valueMapIndex=findProperty(property);
- if(valueMapIndex==0) {
- return NULL; // Not a known property.
- }
- return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
-}
-
-const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
- int32_t valueMapIndex=findProperty(property);
- if(valueMapIndex==0) {
- return NULL; // Not a known property.
- }
- int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
- if(nameGroupOffset==0) {
- return NULL;
- }
- return getName(nameGroups+nameGroupOffset, nameChoice);
-}
-
-int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
- BytesTrie trie(bytesTries+bytesTrieOffset);
- if(containsName(trie, alias)) {
- return trie.getValue();
- } else {
- return UCHAR_INVALID_CODE;
- }
-}
-
-int32_t PropNameData::getPropertyEnum(const char *alias) {
- return getPropertyOrValueEnum(0, alias);
-}
-
-int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
- int32_t valueMapIndex=findProperty(property);
- if(valueMapIndex==0) {
- return UCHAR_INVALID_CODE; // Not a known property.
- }
- valueMapIndex=valueMaps[valueMapIndex+1];
- if(valueMapIndex==0) {
- return UCHAR_INVALID_CODE; // The property does not have named values.
- }
- // valueMapIndex is the start of the property's valueMap,
- // where the first word is the BytesTrie offset.
- return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
-}
-U_NAMESPACE_END
-
-//----------------------------------------------------------------------
-// Public API implementation
-
-U_CAPI const char* U_EXPORT2
-u_getPropertyName(UProperty property,
- UPropertyNameChoice nameChoice) {
- U_NAMESPACE_USE
- return PropNameData::getPropertyName(property, nameChoice);
-}
-
-U_CAPI UProperty U_EXPORT2
-u_getPropertyEnum(const char* alias) {
- U_NAMESPACE_USE
- return (UProperty)PropNameData::getPropertyEnum(alias);
-}
-
-U_CAPI const char* U_EXPORT2
-u_getPropertyValueName(UProperty property,
- int32_t value,
- UPropertyNameChoice nameChoice) {
- U_NAMESPACE_USE
- return PropNameData::getPropertyValueName(property, value, nameChoice);
-}
-
-U_CAPI int32_t U_EXPORT2
-u_getPropertyValueEnum(UProperty property,
- const char* alias) {
- U_NAMESPACE_USE
- return PropNameData::getPropertyValueEnum(property, alias);
-}
-
-U_CAPI const char* U_EXPORT2
-uscript_getName(UScriptCode scriptCode){
- return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
- U_LONG_PROPERTY_NAME);
-}
-
-U_CAPI const char* U_EXPORT2
-uscript_getShortName(UScriptCode scriptCode){
- return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
- U_SHORT_PROPERTY_NAME);
-}
diff --git a/contrib/libs/icu/common/propname.h b/contrib/libs/icu/common/propname.h
deleted file mode 100644
index 1a8ced5b879..00000000000
--- a/contrib/libs/icu/common/propname.h
+++ /dev/null
@@ -1,212 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2002-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: October 30 2002
-* Since: ICU 2.4
-* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
-**********************************************************************
-*/
-#ifndef PROPNAME_H
-#define PROPNAME_H
-
-#include "unicode/utypes.h"
-#include "unicode/bytestrie.h"
-#include "unicode/uchar.h"
-#include "udataswp.h"
-#include "uprops.h"
-
-/*
- * This header defines the in-memory layout of the property names data
- * structure representing the UCD data files PropertyAliases.txt and
- * PropertyValueAliases.txt. It is used by:
- * propname.cpp - reads data
- * genpname - creates data
- */
-
-/* low-level char * property name comparison -------------------------------- */
-
-U_CDECL_BEGIN
-
-/**
- * \var uprv_comparePropertyNames
- * Unicode property names and property value names are compared "loosely".
- *
- * UCD.html 4.0.1 says:
- * For all property names, property value names, and for property values for
- * Enumerated, Binary, or Catalog properties, use the following
- * loose matching rule:
- *
- * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
- *
- * This function does just that, for (char *) name strings.
- * It is almost identical to ucnv_compareNames() but also ignores
- * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
- *
- * @internal
- */
-
-U_CAPI int32_t U_EXPORT2
-uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
-
-U_CAPI int32_t U_EXPORT2
-uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
-
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
-# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
-#else
-# error U_CHARSET_FAMILY is not valid
-#endif
-
-U_CDECL_END
-
-/* UDataMemory structure and signatures ------------------------------------- */
-
-#define PNAME_DATA_NAME "pnames"
-#define PNAME_DATA_TYPE "icu"
-
-/* Fields in UDataInfo: */
-
-/* PNAME_SIG[] is encoded as numeric literals for compatibility with the HP compiler */
-#define PNAME_SIG_0 ((uint8_t)0x70) /* p */
-#define PNAME_SIG_1 ((uint8_t)0x6E) /* n */
-#define PNAME_SIG_2 ((uint8_t)0x61) /* a */
-#define PNAME_SIG_3 ((uint8_t)0x6D) /* m */
-
-U_NAMESPACE_BEGIN
-
-class PropNameData {
-public:
- enum {
- // Byte offsets from the start of the data, after the generic header.
- IX_VALUE_MAPS_OFFSET,
- IX_BYTE_TRIES_OFFSET,
- IX_NAME_GROUPS_OFFSET,
- IX_RESERVED3_OFFSET,
- IX_RESERVED4_OFFSET,
- IX_TOTAL_SIZE,
-
- // Other values.
- IX_MAX_NAME_LENGTH,
- IX_RESERVED7,
- IX_COUNT
- };
-
- static const char *getPropertyName(int32_t property, int32_t nameChoice);
- static const char *getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice);
-
- static int32_t getPropertyEnum(const char *alias);
- static int32_t getPropertyValueEnum(int32_t property, const char *alias);
-
-private:
- static int32_t findProperty(int32_t property);
- static int32_t findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value);
- static const char *getName(const char *nameGroup, int32_t nameIndex);
- static UBool containsName(BytesTrie &trie, const char *name);
-
- static int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias);
-
- static const int32_t indexes[];
- static const int32_t valueMaps[];
- static const uint8_t bytesTries[];
- static const char nameGroups[];
-};
-
-/*
- * pnames.icu formatVersion 2
- *
- * formatVersion 2 is new in ICU 4.8.
- * In ICU 4.8, the pnames.icu data file is used only in ICU4J.
- * ICU4C 4.8 has the same data structures hardcoded in source/common/propname_data.h.
- *
- * For documentation of pnames.icu formatVersion 1 see ICU4C 4.6 (2010-dec-01)
- * or earlier versions of this header file (source/common/propname.h).
- *
- * The pnames.icu begins with the standard ICU DataHeader/UDataInfo.
- * After that:
- *
- * int32_t indexes[8];
- *
- * (See the PropNameData::IX_... constants.)
- *
- * The first 6 indexes are byte offsets from the beginning of the data
- * (beginning of indexes[]) to following structures.
- * The length of each structure is the difference between its offset
- * and the next one.
- * All offsets are filled in: Where there is no data between two offsets,
- * those two offsets are the same.
- * The last offset (indexes[PropNameData::IX_TOTAL_SIZE]) indicates the
- * total number of bytes in the file. (Not counting the standard headers.)
- *
- * The sixth index (indexes[PropNameData::IX_MAX_NAME_LENGTH]) has the
- * maximum length of any Unicode property (or property value) alias.
- * (Without normalization, that is, including underscores etc.)
- *
- * int32_t valueMaps[];
- *
- * The valueMaps[] begins with a map from UProperty enums to properties,
- * followed by the per-property value maps from property values to names,
- * for those properties that have named values.
- * (Binary & enumerated, plus General_Category_Mask.)
- *
- * valueMaps[0] contains the number of UProperty enum ranges.
- * For each range:
- * int32_t start, limit -- first and last+1 UProperty enum of a dense range
- * Followed by (limit-start) pairs of
- * int32_t nameGroupOffset;
- * Offset into nameGroups[] for the property's names/aliases.
- * int32_t valueMapIndex;
- * Offset of the property's value map in the valueMaps[] array.
- * If the valueMapIndex is 0, then the property does not have named values.
- *
- * For each property's value map:
- * int32_t bytesTrieOffset; -- Offset into bytesTries[] for name->value mapping.
- * int32_t numRanges;
- * If numRanges is in the range 1..15, then that many ranges of values follow.
- * Per range:
- * int32_t start, limit -- first and last+1 UProperty enum of a range
- * Followed by (limit-start) entries of
- * int32_t nameGroupOffset;
- * Offset into nameGroups[] for the property value's names/aliases.
- * If the nameGroupOffset is 0, then this is not a named value for this property.
- * (That is, the ranges need not be dense.)
- * If numRanges is >=0x10, then (numRanges-0x10) sorted values
- * and then (numRanges-0x10) corresponding nameGroupOffsets follow.
- * Values are sorted as signed integers.
- * In this case, the set of values is dense; no nameGroupOffset will be 0.
- *
- * For both properties and property values, ranges are sorted by their start/limit values.
- *
- * uint8_t bytesTries[];
- *
- * This is a sequence of BytesTrie structures, byte-serialized tries for
- * mapping from names/aliases to values.
- * The first one maps from property names/aliases to UProperty enum constants.
- * The following ones are indexed by property value map bytesTrieOffsets
- * for mapping each property's names/aliases to their property values.
- *
- * char nameGroups[];
- *
- * This is a sequence of property name groups.
- * Each group is a list of names/aliases (invariant-character strings) for
- * one property or property value, in the order of UCharNameChoice.
- * The first byte of each group is the number of names in the group.
- * It is followed by that many NUL-terminated strings.
- * The first string is for the short name; if there is no short name,
- * then the first string is empty.
- * The second string is the long name. Further strings are additional aliases.
- *
- * The first name group is for a property rather than a property value,
- * so that a nameGroupOffset of 0 can be used to indicate "no value"
- * in a property's sparse value ranges.
- */
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/propname_data.h b/contrib/libs/icu/common/propname_data.h
deleted file mode 100644
index 6f63e9cdd47..00000000000
--- a/contrib/libs/icu/common/propname_data.h
+++ /dev/null
@@ -1,1919 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// Copyright (C) 1999-2016, International Business Machines
-// Corporation and others. All Rights Reserved.
-//
-// file name: propname_data.h
-//
-// machine-generated by: icu/tools/unicode/c/genprops/pnamesbuilder.cpp
-
-
-#ifdef INCLUDED_FROM_PROPNAME_CPP
-
-U_NAMESPACE_BEGIN
-
-const int32_t PropNameData::indexes[8]={0x20,0x15b8,0x5048,0xa69a,0xa69a,0xa69a,0x2f,0};
-
-const int32_t PropNameData::valueMaps[1382]={
-6,0,0x41,0,0xe3,0x368,0xe3,0x37e,0xe3,0x393,0xe3,0x3a9,0xe3,0x3b4,0xe3,0x3d5,
-0xe3,0x3e5,0xe3,0x3f4,0xe3,0x402,0xe3,0x426,0xe3,0x43d,0xe3,0x455,0xe3,0x46c,0xe3,0x47b,
-0xe3,0x48a,0xe3,0x49b,0xe3,0x4a9,0xe3,0x4bb,0xe3,0x4d5,0xe3,0x4f0,0xe3,0x505,0xe3,0x522,
-0xe3,0x533,0xe3,0x53e,0xe3,0x55d,0xe3,0x573,0xe3,0x584,0xe3,0x594,0xe3,0x5af,0xe3,0x5c8,
-0xe3,0x5d9,0xe3,0x5f3,0xe3,0x606,0xe3,0x616,0xe3,0x630,0xe3,0x649,0xe3,0x660,0xe3,0x674,
-0xe3,0x68a,0xe3,0x69e,0xe3,0x6b4,0xe3,0x6ce,0xe3,0x6e6,0xe3,0x702,0xe3,0x70a,0xe3,0x712,
-0xe3,0x71a,0xe3,0x722,0xe3,0x72b,0xe3,0x738,0xe3,0x74b,0xe3,0x768,0xe3,0x785,0xe3,0x7a2,
-0xe3,0x7c0,0xe3,0x7de,0xe3,0x802,0xe3,0x80f,0xe3,0x829,0xe3,0x83e,0xe3,0x859,0xe3,0x870,
-0xe3,0x887,0xe3,0x8a9,0xe3,0x1000,0x1019,0x8c8,0x15f,0xae8,0x17a,0x2f11,0xe9,0x2f30,0x2b3,0x306e,
-0x2c9,0x30c8,0x2d3,0x3325,0x2f5,0x3c20,0x35f,0x3c90,0x369,0x3f2a,0x398,0x3f68,0x3a0,0x4a5b,0x465,0x4ad9,
-0x46f,0x4afe,0x475,0x4b18,0x47b,0x4b39,0x482,0x4b53,0xe9,0x4b78,0xe9,0x4b9e,0x489,0x4c48,0x49f,0x4cc1,
-0x4b2,0x4d73,0x4cd,0x4daa,0x4d4,0x4f8a,0x4e8,0x540a,0x510,0x2000,0x2001,0x5469,0x518,0x3000,0x3001,0x54f5,
-0,0x4000,0x400e,0x5507,0,0x5510,0,0x552a,0,0x553b,0,0x554c,0,0x5562,0,0x556b,
-0,0x5588,0,0x55a6,0,0x55c4,0,0x55e2,0,0x55f8,0,0x560c,0,0x5622,0,0x7000,
-0x7001,0x563b,0,0x7d6,0x12,0,1,0x12,0x20,0x7f4,0x4a,0,1,6,7,8,
-9,0xa,0xb,0xc,0xd,0xe,0xf,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,
-0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x54,0x5b,0x67,0x6b,
-0x76,0x7a,0x81,0x82,0x84,0x85,0xc8,0xca,0xd6,0xd8,0xda,0xdc,0xde,0xe0,0xe2,0xe4,
-0xe6,0xe8,0xe9,0xea,0xf0,0x2e,0x40,0x4c,0x5e,0x68,0x79,0x84,0x91,0x9e,0xab,0xb8,
-0xc5,0xd2,0xdf,0xec,0xf9,0x106,0x113,0x120,0x12d,0x13a,0x147,0x154,0x161,0x16e,0x17b,0x188,
-0x195,0x1a2,0x1af,0x1bc,0x1c9,0x1d6,0x1e3,0x1f0,0x1fd,0x20c,0x21b,0x22a,0x239,0x248,0x257,0x266,
-0x275,0x28f,0x2a3,0x2b7,0x2d2,0x2e1,0x2ea,0x2fa,0x302,0x30b,0x31a,0x323,0x333,0x344,0x355,0x995,
-1,0,0x17,0x8d7,0x8e8,0x8f9,0x90d,0x924,0x93c,0x94e,0x963,0x97a,0x98f,0x99f,0x9b1,0x9ce,
-0x9ea,0x9fc,0xa19,0xa35,0xa51,0xa66,0xa7b,0xa95,0xab0,0xacb,0xb37,1,0,0x135,0xaf3,0xb00,
-0xb13,0xb3b,0xb59,0xb77,0xb8f,0xbba,0xbe4,0xbfc,0xc0f,0xc22,0xc31,0xc40,0xc4f,0xc5e,0xc75,0xc86,
-0xc99,0xcac,0xcb9,0xcc6,0xcd5,0xce6,0xcfb,0xd0c,0xd17,0xd20,0xd31,0xd42,0xd55,0xd67,0xd7a,0xd8d,
-0xdcc,0xdd9,0xde6,0xdf3,0xe08,0xe38,0xe52,0xe73,0xe9e,0xec1,0xf1f,0xf46,0xf61,0xf70,0xf97,0xfbf,
-0xfe2,0x1005,0x102f,0x1048,0x1067,0x108a,0x10ae,0x10c1,0x10db,0x1105,0x111d,0x1145,0x116e,0x1181,0x1194,0x11a7,
-0x11ce,0x11dd,0x11fd,0x122b,0x1249,0x1277,0x1293,0x12ae,0x12c7,0x12e0,0x1301,0x1331,0x1350,0x1372,0x13a6,0x13d3,
-0x1418,0x1439,0x1463,0x1484,0x14ad,0x14c0,0x14f3,0x150a,0x1519,0x152a,0x1555,0x156c,0x159d,0x15cb,0x160e,0x1619,
-0x1652,0x1663,0x1674,0x1681,0x1694,0x16ce,0x16f2,0x1716,0x1750,0x1788,0x17b3,0x17cb,0x17f7,0x1823,0x1830,0x183f,
-0x185c,0x187e,0x18ac,0x18cc,0x18f3,0x191a,0x1939,0x194c,0x195d,0x196e,0x1993,0x19b8,0x19df,0x1a13,0x1a40,0x1a5e,
-0x1a71,0x1a8a,0x1ac3,0x1ad2,0x1af2,0x1b14,0x1b36,0x1b4d,0x1b64,0x1b91,0x1baa,0x1bc3,0x1bf4,0x1c1e,0x1c39,0x1c4c,
-0x1c6b,0x1c74,0x1c87,0x1ca5,0x1cc3,0x1cd6,0x1ced,0x1d02,0x1d37,0x1d5b,0x1d70,0x1d7f,0x1d92,0x1db6,0x1dbf,0x1de3,
-0x1dfa,0x1e0d,0x1e1c,0x1e27,0x1e48,0x1e60,0x1e6f,0x1e7e,0x1e8d,0x1ea4,0x1eb9,0x1ece,0x1f07,0x1f1a,0x1f36,0x1f41,
-0x1f4e,0x1f7c,0x1fa0,0x1fc3,0x1fd6,0x1ff8,0x200b,0x2026,0x2049,0x206c,0x2091,0x20a2,0x20d1,0x20fe,0x2115,0x2130,
-0x213f,0x216a,0x21a2,0x21dc,0x220a,0x221b,0x2228,0x224c,0x225b,0x2277,0x2291,0x22ae,0x22e6,0x22fb,0x2328,0x2347,
-0x2375,0x2395,0x23c9,0x23d8,0x2402,0x2425,0x2450,0x245b,0x246c,0x2487,0x24ab,0x24b8,0x24cd,0x24f4,0x251f,0x2556,
-0x2569,0x257a,0x25aa,0x25bb,0x25ca,0x25df,0x25fd,0x2610,0x2623,0x263a,0x2657,0x2662,0x266b,0x268d,0x26a2,0x26c7,
-0x26de,0x2707,0x2722,0x2737,0x2750,0x2771,0x27a6,0x27b7,0x27e8,0x280c,0x281d,0x2836,0x2841,0x286e,0x2890,0x28be,
-0x28f1,0x2900,0x2911,0x292e,0x2970,0x2997,0x29a4,0x29b9,0x29dd,0x2a03,0x2a3c,0x2a4d,0x2a71,0x2a7c,0x2a89,0x2a98,
-0x2abd,0x2aeb,0x2b07,0x2b24,0x2b31,0x2b42,0x2b60,0x2b83,0x2ba0,0x2bad,0x2bcd,0x2bea,0x2c0b,0x2c34,0x2c45,0x2c64,
-0x2c7d,0x2c96,0x2ca7,0x2cf0,0x2d01,0x2d1a,0x2d49,0x2d76,0x2d9b,0x2ddd,0x2df9,0x2e08,0x2e1f,0x2e4d,0x2e66,0x2e8f,
-0x2ea9,0x2ee4,0x2f02,0x1e85,1,0,0x12,0x2f47,0x2f57,0x2f6a,0x2f7a,0x2f8a,0x2f99,0x2fa9,0x2fbb,0x2fce,
-0x2fe0,0x2ff0,0x3000,0x300f,0x301e,0x302e,0x303b,0x304a,0x305e,0x1f43,1,0,6,0x3083,0x308e,0x309b,
-0x30a8,0x30b5,0x30c0,0x1f87,1,0,0x1e,0x30dd,0x30ec,0x3101,0x3116,0x312b,0x313f,0x3150,0x3164,0x3177,
-0x3188,0x31a1,0x31b3,0x31c4,0x31d8,0x31eb,0x3203,0x3215,0x3220,0x3230,0x323e,0x3253,0x3268,0x327e,0x3298,0x32ae,
-0x32be,0x32d2,0x32e6,0x32f7,0x330f,0x21b2,1,0,0x66,0x3337,0x335a,0x3363,0x3370,0x337b,0x3384,0x338f,
-0x3398,0x33b1,0x33b6,0x33bf,0x33dc,0x33e5,0x33f2,0x33fb,0x341f,0x3426,0x342f,0x3442,0x344d,0x3456,0x3461,0x347a,
-0x3483,0x3492,0x349d,0x34a6,0x34b1,0x34ba,0x34c1,0x34ca,0x34d5,0x34de,0x34f7,0x3500,0x350d,0x3518,0x3529,0x3534,
-0x3549,0x3560,0x3569,0x3572,0x358b,0x3596,0x359f,0x35a8,0x35bf,0x35dc,0x35e7,0x35f8,0x3603,0x360a,0x3617,0x3624,
-0x3651,0x3666,0x366f,0x368a,0x36ad,0x36ce,0x36ef,0x3714,0x373b,0x375c,0x377f,0x37a0,0x37c7,0x37e8,0x380d,0x382c,
-0x384b,0x386a,0x3887,0x38a8,0x38c9,0x38ec,0x3911,0x3930,0x394f,0x3970,0x3997,0x39bc,0x39db,0x39fc,0x3a1f,0x3a3a,
-0x3a53,0x3a6e,0x3a87,0x3aa4,0x3abf,0x3adc,0x3afb,0x3b18,0x3b35,0x3b54,0x3b71,0x3b8c,0x3ba9,0x3bc6,0x3bf9,0x24f7,
-1,0,6,0x3c31,0x3c40,0x3c50,0x3c60,0x3c70,0x3c81,0x2555,1,0,0x2b,0x3c9f,0x3cab,0x3cb9,
-0x3cc8,0x3cd7,0x3ce7,0x3cf8,0x3d0c,0x3d21,0x3d37,0x3d4a,0x3d5e,0x3d6e,0x3d77,0x3d82,0x3d92,0x3dae,0x3dc0,0x3dce,
-0x3ddd,0x3de9,0x3dfe,0x3e12,0x3e25,0x3e33,0x3e47,0x3e55,0x3e5f,0x3e71,0x3e7d,0x3e8b,0x3e9b,0x3ea2,0x3ea9,0x3eb0,
-0x3eb7,0x3ebe,0x3ed4,0x3ef5,0x870,0x3f07,0x3f12,0x3f21,0x27ae,1,0,4,0x3f3b,0x3f46,0x3f52,0x3f5c,
-0x27d4,1,0,0xc1,0x3f73,0x3f80,0x3f95,0x3fa2,0x3fb1,0x3fbf,0x3fce,0x3fdd,0x3fef,0x3ffe,0x400c,0x401d,
-0x402c,0x403b,0x4048,0x4054,0x4063,0x4072,0x407c,0x4089,0x4096,0x40a5,0x40b3,0x40c2,0x40ce,0x40d8,0x40e4,0x40f4,
-0x4104,0x4112,0x411e,0x412f,0x413b,0x4147,0x4155,0x4162,0x416e,0x417b,0xd0c,0x4188,0x4196,0x41b0,0x41b9,0x41c7,
-0x41d5,0x41e1,0x41f0,0x41fe,0x420c,0x4218,0x4227,0x4235,0x4243,0x4250,0x425f,0x427a,0x4289,0x429a,0x42ab,0x42be,
-0x42d0,0x42df,0x42f1,0x4300,0x430c,0x4317,0x1e1c,0x4324,0x432f,0x433a,0x4345,0x4350,0x436b,0x4376,0x4381,0x438c,
-0x439f,0x43b3,0x43be,0x43cd,0x43dc,0x43e7,0x43f2,0x43ff,0x440e,0x441c,0x4427,0x4442,0x444c,0x445d,0x446e,0x447d,
-0x448e,0x4499,0x44a4,0x44af,0x44ba,0x44c5,0x44d0,0x44db,0x44e5,0x44f0,0x4500,0x450b,0x4519,0x4526,0x4531,0x4540,
-0x454d,0x455a,0x4569,0x4576,0x4587,0x4599,0x45a9,0x45b4,0x45c7,0x45de,0x45ec,0x45f9,0x4604,0x4611,0x4622,0x463e,
-0x4654,0x465f,0x467c,0x468c,0x469b,0x46a6,0x46b1,0x1f36,0x46bd,0x46c8,0x46e0,0x46f0,0x46ff,0x470d,0x471b,0x4726,
-0x4731,0x4745,0x475c,0x4774,0x4784,0x4794,0x47a4,0x47b6,0x47c1,0x47cc,0x47d6,0x47e2,0x47f0,0x4803,0x480f,0x481c,
-0x4827,0x4843,0x4850,0x485e,0x4877,0x2836,0x4886,0x2657,0x4893,0x48a1,0x48b3,0x48c1,0x48cd,0x48dd,0x2a71,0x48eb,
-0x48f7,0x4902,0x490d,0x4918,0x492c,0x493a,0x4951,0x495d,0x4971,0x497f,0x4991,0x49a7,0x49b5,0x49c7,0x49d5,0x49f2,
-0x4a04,0x4a11,0x4a22,0x4a34,0x4a4e,0x31cc,1,0,6,0x4a75,0x4a88,0x4a98,0x4aa6,0x4ab7,0x4ac7,0x3228,
-0x12,0,1,0x4af1,0x4af7,0x3235,0x12,0,1,0x4af1,0x4af7,0x3242,1,0,3,0x4af1,
-0x4af7,0x4b30,0x3258,1,0,3,0x4af1,0x4af7,0x4b30,0x326e,1,0,0x12,0x4bba,0x4bc4,0x4bd0,
-0x4bd7,0x4be2,0x4be7,0x4bee,0x4bf5,0x4bfe,0x4c03,0x4c08,0x4c18,0x870,0x3f07,0x4c24,0x3f12,0x4c34,0x3f21,0x3317,
-1,0,0xf,0x4bba,0x4c5b,0x4c65,0x4c6f,0x4c7a,0x3ddd,0x4c84,0x4c90,0x4c98,0x4c9f,0x4ca9,0x4bd0,0x4bd7,
-0x4be7,0x4cb3,0x339e,1,0,0x17,0x4bba,0x4cd0,0x4c6f,0x4cdc,0x4ce9,0x4cf7,0x3ddd,0x4d02,0x4bd0,0x4d13,
-0x4be7,0x4d22,0x4d30,0x870,0x3ef5,0x4d3c,0x4d4d,0x3f07,0x4c24,0x3f12,0x4c34,0x3f21,0x4d5e,0x34bb,1,0,
-3,0x4d91,0x4d99,0x4da1,0x34d4,1,0,0x10,0x4dca,0x4dd1,0x4de0,0x4e01,0x4e24,0x4e2f,0x4e4e,0x4e65,
-0x4e72,0x4e7b,0x4e9a,0x4ecd,0x4ee8,0x4f17,0x4f34,0x4f59,0x356d,1,0,0x24,0x4fa8,0x4fb5,0x4fc8,0x4fd5,
-0x5002,0x5027,0x503c,0x505b,0x507c,0x50a9,0x50e2,0x5105,0x5128,0x5155,0x518a,0x51b1,0x51da,0x5211,0x5240,0x5261,
-0x5286,0x5295,0x52b8,0x52cf,0x52dc,0x52eb,0x5308,0x5321,0x5344,0x5369,0x5382,0x5397,0x53a6,0x53b7,0x53c4,0x53e5,
-0x373d,1,0,4,0x5423,0x542e,0x5446,0x545e,0x3779,0x36,1,2,4,8,0xe,0x10,
-0x20,0x3e,0x40,0x80,0x100,0x1c0,0x200,0x400,0x800,0xe00,0x1000,0x2000,0x4000,0x7000,0x8000,0x10000,
-0x20000,0x40000,0x78001,0x80000,0x100000,0x200000,0x400000,0x800000,0x1000000,0x2000000,0x4000000,0x8000000,0xf000000,0x10000000,0x20000000,0x30f80000,
-0x30dd,0x30ec,0x3101,0x3116,0x5497,0x312b,0x313f,0x548d,0x3150,0x3164,0x3177,0x54a8,0x3188,0x31a1,0x31b3,0x54bf,
-0x31c4,0x31d8,0x31eb,0x54e8,0x3203,0x3215,0x3220,0x3230,0x5484,0x323e,0x3253,0x3268,0x327e,0x3298,0x32ae,0x32be,
-0x32d2,0x32e6,0x54de,0x32f7,0x330f,0x54c9
-};
-
-const uint8_t PropNameData::bytesTries[14992]={
-0,0x15,0x6d,0xc3,0x78,0x73,0xc2,0x12,0x76,0x7a,0x76,0x6a,0x77,0xa2,0x52,0x78,
-1,0x64,0x50,0x69,0x10,0x64,1,0x63,0x30,0x73,0x62,0x13,0x74,0x61,0x72,0x74,
-0x63,0x60,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x61,0x13,0x69,0x67,0x69,0x74,
-0x81,3,0x61,0x2e,0x65,0x4c,0x6f,0xc3,0x18,0x73,0x69,0x1e,0x72,0x69,0x61,0x74,
-0x69,0x6f,0x6e,0x73,0x65,0x6c,0x65,0x63,0x74,0x6f,0x72,0x69,0x10,0x72,0x1f,0x74,
-0x69,0x63,0x61,0x6c,0x6f,0x72,0x69,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0xc3,
-0x18,3,0x62,0xc3,0x14,0x68,0x32,0x6f,0x42,0x73,0x13,0x70,0x61,0x63,0x65,0x5f,
-0x17,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x5f,0x16,0x72,0x64,0x62,0x72,0x65,
-0x61,0x6b,0xc3,0x14,0x73,0xa2,0x49,0x74,0xa4,0x3b,0x75,3,0x63,0xd9,0x40,0xc,
-0x69,0x52,0x6e,0x58,0x70,0x12,0x70,0x65,0x72,0x5c,0x13,0x63,0x61,0x73,0x65,0x5c,
-0x16,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,0xc,0x12,0x64,0x65,0x6f,0x5b,
-0x10,0x69,1,0x63,0x3e,0x66,0x1b,0x69,0x65,0x64,0x69,0x64,0x65,0x6f,0x67,0x72,
-0x61,0x70,0x68,0x5b,0x17,0x6f,0x64,0x65,0x31,0x6e,0x61,0x6d,0x65,0xd9,0x40,0xb,
-0xa,0x69,0x84,0x70,0x19,0x70,0x30,0x74,0x36,0x75,0x10,0x63,0xd9,0x40,9,0x12,
-0x61,0x63,0x65,0x5f,1,0x63,0xd9,0x40,8,0x65,0x11,0x72,0x6d,0x67,0x69,0x3c,
-0x6c,0xa2,0x5f,0x6f,0x17,0x66,0x74,0x64,0x6f,0x74,0x74,0x65,0x64,0x57,0x13,0x6d,
-0x70,0x6c,0x65,3,0x63,0x50,0x6c,0x68,0x74,0x8a,0x75,0x1e,0x70,0x70,0x65,0x72,
-0x63,0x61,0x73,0x65,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,9,0x19,0x61,
-0x73,0x65,0x66,0x6f,0x6c,0x64,0x69,0x6e,0x67,0xd9,0x40,6,0x1e,0x6f,0x77,0x65,
-0x72,0x63,0x61,0x73,0x65,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,7,0x1e,
-0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,
-0x40,8,0x10,0x63,0xd9,0x40,7,0x62,0xc3,0x13,0x63,0x34,0x64,0x57,0x65,0x6e,
-0x66,0x10,0x63,0xd9,0x40,6,0xc2,0xa,2,0x66,0xd9,0x40,6,0x72,0x28,0x78,
-0xd9,0x70,0,0x12,0x69,0x70,0x74,0xc2,0xa,0x19,0x65,0x78,0x74,0x65,0x6e,0x73,
-0x69,0x6f,0x6e,0x73,0xd9,0x70,0,1,0x67,0x6a,0x6e,1,0x73,0x54,0x74,0x13,
-0x65,0x6e,0x63,0x65,1,0x62,0x34,0x74,0x16,0x65,0x72,0x6d,0x69,0x6e,0x61,0x6c,
-0x67,0x13,0x72,0x65,0x61,0x6b,0xc3,0x13,0x14,0x69,0x74,0x69,0x76,0x65,0x65,1,
-0x6d,0x2e,0x73,0x13,0x74,0x61,0x72,0x74,0x73,0x19,0x65,0x6e,0x74,0x73,0x74,0x61,
-0x72,0x74,0x65,0x72,0x73,3,0x63,0x66,0x65,0x72,0x69,0x98,0x72,0x19,0x61,0x69,
-0x6c,0x63,0x61,0x6e,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d,0x62,0x69,
-0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,0x11,0xd8,0x40,0xa,0x11,0x63,
-0x63,0xc3,0x11,0x11,0x72,0x6d,0x58,0x1e,0x69,0x6e,0x61,0x6c,0x70,0x75,0x6e,0x63,
-0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x59,0x1d,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,
-0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,0xa,0x6d,0x70,0x6e,0x76,0x70,0xa2,
-0xf1,0x71,0xa4,0x43,0x72,2,0x61,0x28,0x65,0x32,0x69,0x9d,0x14,0x64,0x69,0x63,
-0x61,0x6c,0x55,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,0x69,0x63,0x61,
-0x74,0x6f,0x72,0x9d,0x12,0x61,0x74,0x68,0x4f,6,0x6f,0x39,0x6f,0x32,0x74,0xc3,
-9,0x75,0x54,0x76,0xd9,0x30,0,0x12,0x6e,0x63,0x68,0x1f,0x61,0x72,0x61,0x63,
-0x74,0x65,0x72,0x63,0x6f,0x64,0x65,0x70,0x6f,0x69,0x6e,0x74,0x51,0x14,0x6d,0x65,
-0x72,0x69,0x63,1,0x74,0x32,0x76,0x13,0x61,0x6c,0x75,0x65,0xd9,0x30,0,0x12,
-0x79,0x70,0x65,0xc3,9,0x61,0xa2,0x77,0x63,0xa2,0x82,0x66,2,0x63,0x98,0x64,
-0xa2,0x53,0x6b,1,0x63,0x56,0x64,1,0x69,0x42,0x71,1,0x63,0xc3,0xd,0x75,
-0x17,0x69,0x63,0x6b,0x63,0x68,0x65,0x63,0x6b,0xc3,0xd,0x13,0x6e,0x65,0x72,0x74,
-0x6d,1,0x69,0x42,0x71,1,0x63,0xc3,0xf,0x75,0x17,0x69,0x63,0x6b,0x63,0x68,
-0x65,0x63,0x6b,0xc3,0xf,0x13,0x6e,0x65,0x72,0x74,0x71,1,0x69,0x42,0x71,1,
-0x63,0xc3,0xe,0x75,0x17,0x69,0x63,0x6b,0x63,0x68,0x65,0x63,0x6b,0xc3,0xe,0x13,
-0x6e,0x65,0x72,0x74,0x6f,1,0x69,0x42,0x71,1,0x63,0xc3,0xc,0x75,0x17,0x69,
-0x63,0x6b,0x63,0x68,0x65,0x63,0x6b,0xc3,0xc,0x13,0x6e,0x65,0x72,0x74,0x6b,0xd8,
-0x40,5,1,0x31,0xd9,0x40,0xb,0x6d,0x10,0x65,0xd9,0x40,5,0x12,0x68,0x61,
-0x72,0x51,2,0x61,0x6c,0x63,0xa2,0x4c,0x72,1,0x65,0x2a,0x69,0x11,0x6e,0x74,
-0x7f,0x16,0x70,0x65,0x6e,0x64,0x65,0x64,0x63,0x1f,0x6f,0x6e,0x63,0x61,0x74,0x65,
-0x6e,0x61,0x74,0x69,0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x9f,0x10,0x74,2,0x73,0x2c,
-0x74,0x30,0x77,0x10,0x73,0x77,0x11,0x79,0x6e,0x75,0x12,0x65,0x72,0x6e,1,0x73,
-0x38,0x77,0x18,0x68,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x77,0x14,0x79,0x6e,
-0x74,0x61,0x78,0x75,0x10,0x6d,0x9f,1,0x6d,0x3c,0x75,0x1a,0x6f,0x74,0x61,0x74,
-0x69,0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x53,0x12,0x61,0x72,0x6b,0x53,0x66,0xc1,0xf8,
-0x69,0xc1,0x3c,0x69,0xa2,0x6f,0x6a,0xa4,9,0x6c,4,0x62,0xc3,8,0x63,0x8c,
-0x65,0x98,0x69,0xa2,0x56,0x6f,2,0x65,0x4b,0x67,0x4c,0x77,0x11,0x65,0x72,0x4c,
-0x13,0x63,0x61,0x73,0x65,0x4c,0x16,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,
-4,0x11,0x69,0x63,0x1f,0x61,0x6c,0x6f,0x72,0x64,0x65,0x72,0x65,0x78,0x63,0x65,
-0x70,0x74,0x69,0x6f,0x6e,0x4b,0xd8,0x40,4,0x11,0x63,0x63,0xc3,0x10,0x18,0x61,
-0x64,0x63,0x61,0x6e,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d,0x62,0x69,
-0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,0x10,0x16,0x6e,0x65,0x62,0x72,
-0x65,0x61,0x6b,0xc3,8,2,0x64,0x4a,0x6e,0xa2,0x5b,0x73,1,0x63,0xd9,0x40,
-3,0x6f,0x16,0x63,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0xd9,0x40,3,2,0x63,0x80,
-0x65,0x90,0x73,0x40,1,0x62,0x52,0x74,0x46,1,0x61,0x40,0x72,0x1c,0x69,0x6e,
-0x61,0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x47,0x11,0x72,0x74,0x41,
-0x44,0x1c,0x69,0x6e,0x61,0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x45,
-0x3e,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x3f,0x10,0x6f,0x42,0x16,0x67,0x72,
-0x61,0x70,0x68,0x69,0x63,0x43,2,0x64,0x2e,0x70,0x86,0x73,0x10,0x63,0xc3,0x17,
-0x11,0x69,0x63,1,0x70,0x46,0x73,0x1e,0x79,0x6c,0x6c,0x61,0x62,0x69,0x63,0x63,
-0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc3,0x17,0x10,0x6f,0x1f,0x73,0x69,0x74,0x69,
-0x6f,0x6e,0x61,0x6c,0x63,0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc3,0x16,0x10,0x63,
-0xc3,0x16,2,0x67,0xc3,6,0x6f,0x26,0x74,0xc3,7,0x11,0x69,0x6e,1,0x63,
-0x4a,0x69,0x11,0x6e,0x67,1,0x67,0x2e,0x74,0x12,0x79,0x70,0x65,0xc3,7,0x13,
-0x72,0x6f,0x75,0x70,0xc3,6,0x48,0x15,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x49,0x66,
-0x86,0x67,0xa2,0x4a,0x68,3,0x61,0x36,0x65,0x58,0x73,0x68,0x79,0x13,0x70,0x68,
-0x65,0x6e,0x3d,0x1f,0x6e,0x67,0x75,0x6c,0x73,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,
-0x74,0x79,0x70,0x65,0xc3,0xb,0x10,0x78,0x3a,0x14,0x64,0x69,0x67,0x69,0x74,0x3b,
-0x10,0x74,0xc3,0xb,0x16,0x75,0x6c,0x6c,0x63,0x6f,0x6d,0x70,0x1f,0x6f,0x73,0x69,
-0x74,0x69,0x6f,0x6e,0x65,0x78,0x63,0x6c,0x75,0x73,0x69,0x6f,0x6e,0x33,2,0x63,
-0xa2,0x44,0x65,0xa2,0x4b,0x72,3,0x61,0x34,0x62,0x84,0x65,0x8a,0x6c,0x12,0x69,
-0x6e,0x6b,0x39,0x11,0x70,0x68,0x7c,0x12,0x65,0x6d,0x65,3,0x62,0x5e,0x63,0x30,
-0x65,0x48,0x6c,0x12,0x69,0x6e,0x6b,0x39,0x1a,0x6c,0x75,0x73,0x74,0x65,0x72,0x62,
-0x72,0x65,0x61,0x6b,0xc3,0x12,0x14,0x78,0x74,0x65,0x6e,0x64,0x37,0x12,0x61,0x73,
-0x65,0x35,0x11,0x78,0x74,0x37,0xc2,5,1,0x62,0xc3,0x12,0x6d,0xd9,0x20,0,
-0x1c,0x6e,0x65,0x72,0x61,0x6c,0x63,0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc2,5,
-0x13,0x6d,0x61,0x73,0x6b,0xd9,0x20,0,0x61,0xa2,0x90,0x62,0xa2,0xbe,0x63,0xa4,
-0x30,0x64,0xa4,0xfd,0x65,5,0x6d,0x63,0x6d,0x6e,0x70,0xa2,0x59,0x78,0x10,0x74,
-0x30,1,0x65,0x2c,0x70,0x12,0x69,0x63,0x74,0xa1,0x12,0x6e,0x64,0x65,1,0x64,
-0x24,0x72,0x31,0x1b,0x70,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63,
-0xa1,0x10,0x6f,1,0x64,0x97,0x6a,0x10,0x69,0x92,2,0x63,0x40,0x6d,0x50,0x70,
-0x1a,0x72,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0x95,0x17,0x6f,0x6d,
-0x70,0x6f,0x6e,0x65,0x6e,0x74,0x9b,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x96,
-0x13,0x62,0x61,0x73,0x65,0x99,0x12,0x72,0x65,0x73,0x95,0x61,0x30,0x62,0x4e,0x63,
-0x12,0x6f,0x6d,0x70,0x9b,0xc2,4,0x1b,0x73,0x74,0x61,0x73,0x69,0x61,0x6e,0x77,
-0x69,0x64,0x74,0x68,0xc3,4,0x12,0x61,0x73,0x65,0x99,3,0x67,0x44,0x68,0x4a,
-0x6c,0x4e,0x73,0x1a,0x63,0x69,0x69,0x68,0x65,0x78,0x64,0x69,0x67,0x69,0x74,0x23,
-0x10,0x65,0xd9,0x40,0,0x11,0x65,0x78,0x23,1,0x6e,0x38,0x70,0x11,0x68,0x61,
-0x20,0x14,0x62,0x65,0x74,0x69,0x63,0x21,0x11,0x75,0x6d,0x79,4,0x63,0xc3,0,
-0x69,0x3e,0x6c,0xa2,0x57,0x6d,0xa2,0x64,0x70,1,0x62,0xd9,0x40,0xd,0x74,0xc3,
-0x15,0x11,0x64,0x69,2,0x63,0x54,0x6d,0x74,0x70,0x1b,0x61,0x69,0x72,0x65,0x64,
-0x62,0x72,0x61,0x63,0x6b,0x65,0x74,0xd8,0x40,0xd,0x13,0x74,0x79,0x70,0x65,0xc3,
-0x15,0x24,1,0x6c,0x30,0x6f,0x14,0x6e,0x74,0x72,0x6f,0x6c,0x25,0x12,0x61,0x73,
-0x73,0xc3,0,0x26,0x14,0x69,0x72,0x72,0x6f,0x72,1,0x65,0x38,0x69,0x16,0x6e,
-0x67,0x67,0x6c,0x79,0x70,0x68,0xd9,0x40,1,0x10,0x64,0x27,2,0x61,0x32,0x6b,
-0xc3,1,0x6f,0x11,0x63,0x6b,0xc3,1,0x11,0x6e,0x6b,0x7b,0x10,0x67,0xd9,0x40,
-1,6,0x68,0x7c,0x68,0x54,0x69,0x85,0x6f,0xa2,0x6f,0x77,4,0x63,0x30,0x6b,
-0x36,0x6c,0x87,0x74,0x8b,0x75,0x89,1,0x66,0x8d,0x6d,0x8f,0x11,0x63,0x66,0x91,
-0x18,0x61,0x6e,0x67,0x65,0x73,0x77,0x68,0x65,0x6e,4,0x63,0x44,0x6c,0x6c,0x6e,
-0x7e,0x74,0x98,0x75,0x18,0x70,0x70,0x65,0x72,0x63,0x61,0x73,0x65,0x64,0x89,0x12,
-0x61,0x73,0x65,1,0x66,0x30,0x6d,0x14,0x61,0x70,0x70,0x65,0x64,0x8f,0x14,0x6f,
-0x6c,0x64,0x65,0x64,0x8d,0x18,0x6f,0x77,0x65,0x72,0x63,0x61,0x73,0x65,0x64,0x87,
-0x1c,0x66,0x6b,0x63,0x63,0x61,0x73,0x65,0x66,0x6f,0x6c,0x64,0x65,0x64,0x91,0x18,
-0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x64,0x8b,0x13,0x6d,0x70,0x65,0x78,0x33,
-0x61,0x2e,0x63,0xa2,0x48,0x66,0xd9,0x40,2,1,0x6e,0x72,0x73,0x10,0x65,3,
-0x64,0x83,0x66,0x3a,0x69,0x4a,0x73,0x17,0x65,0x6e,0x73,0x69,0x74,0x69,0x76,0x65,
-0x65,0x15,0x6f,0x6c,0x64,0x69,0x6e,0x67,0xd9,0x40,2,0x17,0x67,0x6e,0x6f,0x72,
-0x61,0x62,0x6c,0x65,0x85,0x13,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d,
-0x62,0x69,0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,2,0x10,0x63,0xc3,
-2,3,0x61,0x30,0x65,0x34,0x69,0xa2,0x41,0x74,0xc3,3,0x11,0x73,0x68,0x29,
-2,0x63,0x3a,0x66,0x58,0x70,0x2c,0x16,0x72,0x65,0x63,0x61,0x74,0x65,0x64,0x2d,
-0x1d,0x6f,0x6d,0x70,0x6f,0x73,0x69,0x74,0x69,0x6f,0x6e,0x74,0x79,0x70,0x65,0xc3,
-3,0x15,0x61,0x75,0x6c,0x74,0x69,0x67,0x1f,0x6e,0x6f,0x72,0x61,0x62,0x6c,0x65,
-0x63,0x6f,0x64,0x65,0x70,0x6f,0x69,0x6e,0x74,0x2b,0x2a,0x10,0x61,0x2e,0x15,0x63,
-0x72,0x69,0x74,0x69,0x63,0x2f,3,0x66,0x34,0x6e,0x3e,0x74,0x42,0x79,0x22,0x11,
-0x65,0x73,0x23,0x20,0x13,0x61,0x6c,0x73,0x65,0x21,0x20,0x10,0x6f,0x21,0x22,0x12,
-0x72,0x75,0x65,0x23,0xb,0x6b,0x5b,0x6f,0x23,0x6f,0x3c,0x72,0x4c,0x76,1,0x69,
-0x24,0x72,0x33,0x13,0x72,0x61,0x6d,0x61,0x33,0x10,0x76,0x22,0x14,0x65,0x72,0x6c,
-0x61,0x79,0x23,0xa2,0xe2,0x13,0x69,0x67,0x68,0x74,0xa3,0xe2,0x6b,0x58,0x6c,0x74,
-0x6e,3,0x6b,0x2f,0x6f,0x30,0x72,0x21,0x75,0x12,0x6b,0x74,0x61,0x2f,0x19,0x74,
-0x72,0x65,0x6f,0x72,0x64,0x65,0x72,0x65,0x64,0x21,1,0x61,0x24,0x76,0x31,0x18,
-0x6e,0x61,0x76,0x6f,0x69,0x63,0x69,0x6e,0x67,0x31,0xa2,0xe0,0x12,0x65,0x66,0x74,
-0xa3,0xe0,0x64,0x45,0x64,0x4e,0x68,0x88,0x69,1,0x6f,0x26,0x73,0xa3,0xf0,0x1a,
-0x74,0x61,0x73,0x75,0x62,0x73,0x63,0x72,0x69,0x70,0x74,0xa3,0xf0,2,0x61,0xa3,
-0xea,0x62,0xa3,0xe9,0x6f,0x13,0x75,0x62,0x6c,0x65,1,0x61,0x30,0x62,0x13,0x65,
-0x6c,0x6f,0x77,0xa3,0xe9,0x13,0x62,0x6f,0x76,0x65,0xa3,0xea,0x12,0x61,0x6e,0x72,
-0x2c,0x15,0x65,0x61,0x64,0x69,0x6e,0x67,0x2d,0x61,0xa2,0x7b,0x62,0xa2,0xd4,0x63,
-0x11,0x63,0x63,4,0x31,0x3c,0x32,0xa2,0x42,0x33,0xa2,0x56,0x38,0xa2,0x64,0x39,
-0x10,0x31,0xa3,0x5b,9,0x35,0xa,0x35,0x3f,0x36,0x41,0x37,0x43,0x38,0x45,0x39,
-0x47,0x30,0x30,0x31,0x3c,0x32,0x42,0x33,0x4e,0x34,0x3d,0x34,1,0x33,0xa3,0x67,
-0x37,0xa3,0x6b,0x36,0x10,0x38,0xa3,0x76,0x38,1,0x32,0xa3,0x7a,0x39,0xa3,0x81,
-0x3a,2,0x30,0xa3,0x82,0x32,0xa3,0x84,0x33,0xa3,0x85,9,0x35,0xa,0x35,0x53,
-0x36,0x55,0x37,0x57,0x38,0x59,0x39,0x5b,0x30,0x49,0x31,0x4b,0x32,0x4d,0x33,0x4f,
-0x34,0x51,6,0x33,8,0x33,0x63,0x34,0x65,0x35,0x67,0x36,0x69,0x30,0x5d,0x31,
-0x5f,0x32,0x61,0x10,0x34,0xa3,0x54,0xa2,0xe6,3,0x62,0xa0,0x6c,0xa3,0xe4,0x72,
-0xa3,0xe8,0x74,2,0x61,0x74,0x62,0x7c,0x74,0x14,0x61,0x63,0x68,0x65,0x64,1,
-0x61,0x3e,0x62,0x13,0x65,0x6c,0x6f,0x77,0xa2,0xca,0x13,0x6c,0x65,0x66,0x74,0xa3,
-0xc8,0x13,0x62,0x6f,0x76,0x65,0xa2,0xd6,0x14,0x72,0x69,0x67,0x68,0x74,0xa3,0xd8,
-0xa2,0xd6,0x10,0x72,0xa3,0xd8,0xa2,0xca,0x10,0x6c,0xa3,0xc8,0x12,0x6f,0x76,0x65,
-0xa2,0xe6,1,0x6c,0x30,0x72,0x13,0x69,0x67,0x68,0x74,0xa3,0xe8,0x12,0x65,0x66,
-0x74,0xa3,0xe4,0xa2,0xdc,2,0x65,0x2c,0x6c,0xa3,0xda,0x72,0xa3,0xde,0x12,0x6c,
-0x6f,0x77,0xa2,0xdc,1,0x6c,0x30,0x72,0x13,0x69,0x67,0x68,0x74,0xa3,0xde,0x12,
-0x65,0x66,0x74,0xa3,0xda,0xb,0x6e,0xc0,0xca,0x72,0x5f,0x72,0x46,0x73,0xa2,0x48,
-0x77,1,0x68,0x24,0x73,0x33,0x17,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x33,
-0x22,1,0x69,0x30,0x6c,2,0x65,0x3d,0x69,0x4b,0x6f,0x3f,0x18,0x67,0x68,0x74,
-0x74,0x6f,0x6c,0x65,0x66,0x74,0x22,2,0x65,0x38,0x69,0x48,0x6f,0x16,0x76,0x65,
-0x72,0x72,0x69,0x64,0x65,0x3f,0x17,0x6d,0x62,0x65,0x64,0x64,0x69,0x6e,0x67,0x3d,
-0x15,0x73,0x6f,0x6c,0x61,0x74,0x65,0x4b,0x30,0x1e,0x65,0x67,0x6d,0x65,0x6e,0x74,
-0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x31,0x6e,0xa2,0x41,0x6f,0xa2,0x53,
-0x70,2,0x61,0x66,0x64,0x86,0x6f,0x1b,0x70,0x64,0x69,0x72,0x65,0x63,0x74,0x69,
-0x6f,0x6e,0x61,0x6c,1,0x66,0x32,0x69,0x15,0x73,0x6f,0x6c,0x61,0x74,0x65,0x4d,
-0x14,0x6f,0x72,0x6d,0x61,0x74,0x41,0x1f,0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,
-0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x2f,1,0x66,0x41,0x69,0x4d,1,0x6f,
-0x28,0x73,0x10,0x6d,0x43,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,
-0x72,0x6b,0x43,1,0x6e,0x35,0x74,0x19,0x68,0x65,0x72,0x6e,0x65,0x75,0x74,0x72,
-0x61,0x6c,0x35,0x65,0x88,0x65,0x98,0x66,0xa2,0x6a,0x6c,0x20,1,0x65,0x30,0x72,
-2,0x65,0x37,0x69,0x49,0x6f,0x39,0x18,0x66,0x74,0x74,0x6f,0x72,0x69,0x67,0x68,
-0x74,0x20,2,0x65,0x38,0x69,0x48,0x6f,0x16,0x76,0x65,0x72,0x72,0x69,0x64,0x65,
-0x39,0x17,0x6d,0x62,0x65,0x64,0x64,0x69,0x6e,0x67,0x37,0x15,0x73,0x6f,0x6c,0x61,
-0x74,0x65,0x49,3,0x6e,0x25,0x73,0x27,0x74,0x29,0x75,0x15,0x72,0x6f,0x70,0x65,
-0x61,0x6e,2,0x6e,0x3c,0x73,0x46,0x74,0x18,0x65,0x72,0x6d,0x69,0x6e,0x61,0x74,
-0x6f,0x72,0x29,0x14,0x75,0x6d,0x62,0x65,0x72,0x25,0x17,0x65,0x70,0x61,0x72,0x61,
-0x74,0x6f,0x72,0x27,1,0x69,0x28,0x73,0x10,0x69,0x47,0x1f,0x72,0x73,0x74,0x73,
-0x74,0x72,0x6f,0x6e,0x67,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x47,0x61,0x4e,0x62,
-0x84,0x63,1,0x6f,0x24,0x73,0x2d,0x1c,0x6d,0x6d,0x6f,0x6e,0x73,0x65,0x70,0x61,
-0x72,0x61,0x74,0x6f,0x72,0x2d,2,0x6c,0x3b,0x6e,0x2b,0x72,0x13,0x61,0x62,0x69,
-0x63,1,0x6c,0x30,0x6e,0x14,0x75,0x6d,0x62,0x65,0x72,0x2b,0x14,0x65,0x74,0x74,
-0x65,0x72,0x3b,0x2e,1,0x6e,0x45,0x6f,0x1c,0x75,0x6e,0x64,0x61,0x72,0x79,0x6e,
-0x65,0x75,0x74,0x72,0x61,0x6c,0x45,0,0x16,0x6d,0xc8,0xc8,0x74,0xc1,0xee,0x77,
-0x6a,0x77,0x48,0x79,0x70,0x7a,0x1d,0x61,0x6e,0x61,0x62,0x61,0x7a,0x61,0x72,0x73,
-0x71,0x75,0x61,0x72,0x65,0xa5,0x18,0x10,0x61,1,0x6e,0x36,0x72,0x16,0x61,0x6e,
-0x67,0x63,0x69,0x74,0x69,0xa3,0xfc,0x12,0x63,0x68,0x6f,0xa5,0x2c,1,0x65,0x88,
-0x69,2,0x6a,0x3c,0x72,0x68,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x73,
-0xa3,0x48,0x12,0x69,0x6e,0x67,0xa2,0x74,0x1e,0x68,0x65,0x78,0x61,0x67,0x72,0x61,
-0x6d,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x74,0x16,0x61,0x64,0x69,0x63,0x61,
-0x6c,0x73,0xa3,0x49,0x13,0x7a,0x69,0x64,0x69,0xa5,0x34,0x74,0xa2,0x59,0x75,0xa4,
-0x35,0x76,2,0x61,0x36,0x65,0x7a,0x73,0xa2,0x6c,0x12,0x73,0x75,0x70,0xa3,0x7d,
-1,0x69,0xa3,0x9f,0x72,0x1e,0x69,0x61,0x74,0x69,0x6f,0x6e,0x73,0x65,0x6c,0x65,
-0x63,0x74,0x6f,0x72,0x73,0xa2,0x6c,0x19,0x73,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,
-0x6e,0x74,0xa3,0x7d,1,0x64,0x3c,0x72,0x19,0x74,0x69,0x63,0x61,0x6c,0x66,0x6f,
-0x72,0x6d,0x73,0xa3,0x91,0x14,0x69,0x63,0x65,0x78,0x74,0xa2,0xaf,0x16,0x65,0x6e,
-0x73,0x69,0x6f,0x6e,0x73,0xa3,0xaf,4,0x61,0x68,0x65,0xa2,0xad,0x68,0xa2,0xb0,
-0x69,0xa2,0xb8,0x72,0x1c,0x61,0x6e,0x73,0x70,0x6f,0x72,0x74,0x61,0x6e,0x64,0x6d,
-0x61,0x70,0xa2,0xcf,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xcf,4,0x67,
-0x7e,0x69,0xa2,0x41,0x6b,0xa2,0x6a,0x6d,0xa2,0x6c,0x6e,0x12,0x67,0x75,0x74,0xa4,
-0x10,1,0x63,0x40,0x73,0x11,0x75,0x70,0xa4,0x33,0x16,0x70,0x6c,0x65,0x6d,0x65,
-0x6e,0x74,0xa5,0x33,0x18,0x6f,0x6d,0x70,0x6f,0x6e,0x65,0x6e,0x74,0x73,0xa5,0x11,
-2,0x61,0x2a,0x62,0x32,0x73,0xa3,0x60,0x12,0x6c,0x6f,0x67,0xa3,0x62,0x13,0x61,
-0x6e,0x77,0x61,0xa3,0x65,3,0x6c,0x52,0x74,0x56,0x76,0x5e,0x78,0x16,0x75,0x61,
-0x6e,0x6a,0x69,0x6e,0x67,0xa2,0x7c,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,
-0x7c,0x10,0x65,0xa3,0x70,0x12,0x68,0x61,0x6d,0xa3,0xae,0x12,0x69,0x65,0x74,0xa3,
-0xb7,0x11,0x72,0x69,0xa3,0xdc,0x11,0x69,0x6c,0x48,0x12,0x73,0x75,0x70,0xa4,0x2b,
-0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x2b,0x13,0x6c,0x75,0x67,0x75,0x4b,
-0x10,0x61,1,0x61,0x24,0x69,0x53,0x11,0x6e,0x61,0x3d,2,0x62,0x34,0x66,0x3c,
-0x72,0x13,0x68,0x75,0x74,0x61,0xa3,0xfb,0x13,0x65,0x74,0x61,0x6e,0x57,0x14,0x69,
-0x6e,0x61,0x67,0x68,0xa3,0x90,2,0x63,0x82,0x67,0x92,0x6e,0x1f,0x69,0x66,0x69,
-0x65,0x64,0x63,0x61,0x6e,0x61,0x64,0x69,0x61,0x6e,0x61,0x62,0x6f,0x1f,0x72,0x69,
-0x67,0x69,0x6e,0x61,0x6c,0x73,0x79,0x6c,0x6c,0x61,0x62,0x69,0x63,0x73,0x62,0x17,
-0x65,0x78,0x74,0x65,0x6e,0x64,0x65,0x64,0xa3,0xad,0x11,0x61,0x73,0x62,0x12,0x65,
-0x78,0x74,0xa3,0xad,0x15,0x61,0x72,0x69,0x74,0x69,0x63,0xa3,0x78,0x70,0xc3,0x4b,
-0x70,0xa6,0x61,0x72,0xa8,0x1d,0x73,7,0x6f,0xc1,0xbe,0x6f,0xa2,0x69,0x70,0xa2,
-0x85,0x75,0xa2,0xa4,0x79,2,0x6c,0x50,0x6d,0x62,0x72,0x12,0x69,0x61,0x63,0x3a,
-0x12,0x73,0x75,0x70,0xa4,0x17,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x17,
-0x17,0x6f,0x74,0x69,0x6e,0x61,0x67,0x72,0x69,0xa3,0x8f,0x13,0x62,0x6f,0x6c,0x73,
-1,0x61,0x4c,0x66,0x10,0x6f,0x1f,0x72,0x6c,0x65,0x67,0x61,0x63,0x79,0x63,0x6f,
-0x6d,0x70,0x75,0x74,0x69,0x6e,0x67,0xa5,0x32,0x1f,0x6e,0x64,0x70,0x69,0x63,0x74,
-0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x78,0x74,1,0x61,0xa5,0x2a,0x65,0x14,
-0x6e,0x64,0x65,0x64,0x61,0xa5,0x2a,2,0x67,0x34,0x72,0x3e,0x79,0x13,0x6f,0x6d,
-0x62,0x6f,0xa5,0x16,0x13,0x64,0x69,0x61,0x6e,0xa5,0x23,0x17,0x61,0x73,0x6f,0x6d,
-0x70,0x65,0x6e,0x67,0xa3,0xda,1,0x61,0x32,0x65,0x14,0x63,0x69,0x61,0x6c,0x73,
-0xa3,0x56,0x12,0x63,0x69,0x6e,0x1f,0x67,0x6d,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,
-0x6c,0x65,0x74,0x74,0x65,0x72,0x73,0x2d,2,0x6e,0x48,0x70,0x76,0x74,0x1d,0x74,
-0x6f,0x6e,0x73,0x69,0x67,0x6e,0x77,0x72,0x69,0x74,0x69,0x6e,0x67,0xa5,6,0x15,
-0x64,0x61,0x6e,0x65,0x73,0x65,0xa2,0x9b,0x12,0x73,0x75,0x70,0xa2,0xdb,0x16,0x70,
-0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xdb,4,0x61,0xa2,0xa8,0x65,0x5c,0x6d,0x9e,
-0x70,0xa2,0x4b,0x73,0x13,0x79,0x6d,0x62,0x6f,0x1f,0x6c,0x73,0x61,0x6e,0x64,0x70,
-0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa5,5,0x10,0x72,1,0x61,
-0x4e,0x73,0x12,0x63,0x72,0x69,0x1f,0x70,0x74,0x73,0x61,0x6e,0x64,0x73,0x75,0x62,
-0x73,0x63,0x72,0x69,0x70,0x74,0x73,0x73,0x14,0x6e,0x64,0x73,0x75,0x62,0x73,0x1b,
-0x61,0x74,0x68,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x73,0xa3,0x6a,1,0x6c,
-0x40,0x75,1,0x61,0x6e,0x6e,0x17,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3,
-0x8e,0x15,0x65,0x6d,0x65,0x6e,0x74,0x61,1,0x6c,0x50,0x72,0x1e,0x79,0x70,0x72,
-0x69,0x76,0x61,0x74,0x65,0x75,0x73,0x65,0x61,0x72,0x65,0x61,1,0x61,0xa3,0x6d,
-0x62,0xa3,0x6e,3,0x61,0x5c,0x6d,0x78,0x70,0xa2,0x41,0x73,0x13,0x79,0x6d,0x62,
-0x6f,0x1f,0x6c,0x73,0x61,0x6e,0x64,0x70,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,
-0x68,0x73,0xa5,5,0x14,0x72,0x72,0x6f,0x77,0x73,2,0x61,0xa3,0x67,0x62,0xa3,
-0x68,0x63,0xa3,0xfa,0x13,0x61,0x74,0x68,0x65,0x1f,0x6d,0x61,0x74,0x69,0x63,0x61,
-0x6c,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x73,0xa3,0x6a,0x19,0x75,0x6e,0x63,
-0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x8e,0x61,0x88,0x68,0xa2,0x48,0x69,0xa2,
-0x71,0x6d,0x12,0x61,0x6c,0x6c,1,0x66,0x46,0x6b,0x15,0x61,0x6e,0x61,0x65,0x78,
-0x74,0xa4,0x29,0x15,0x65,0x6e,0x73,0x69,0x6f,0x6e,0xa5,0x29,0x12,0x6f,0x72,0x6d,
-1,0x73,0xa3,0x54,0x76,0x16,0x61,0x72,0x69,0x61,0x6e,0x74,0x73,0xa3,0x54,1,
-0x6d,0x36,0x75,0x16,0x72,0x61,0x73,0x68,0x74,0x72,0x61,0xa3,0xa1,0x15,0x61,0x72,
-0x69,0x74,0x61,0x6e,0xa3,0xac,1,0x61,0x52,0x6f,0x13,0x72,0x74,0x68,0x61,0x1f,
-0x6e,0x64,0x66,0x6f,0x72,0x6d,0x61,0x74,0x63,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x73,
-0xa3,0xf7,1,0x72,0x2e,0x76,0x12,0x69,0x61,0x6e,0xa3,0x79,0x12,0x61,0x64,0x61,
-0xa3,0xd9,1,0x64,0x50,0x6e,0x13,0x68,0x61,0x6c,0x61,0x50,0x1d,0x61,0x72,0x63,
-0x68,0x61,0x69,0x63,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0xf9,0x13,0x64,0x68,
-0x61,0x6d,0xa3,0xf8,5,0x72,0x35,0x72,0x44,0x73,0x64,0x75,1,0x61,0xa3,0x4e,
-0x6e,0x17,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x71,0x17,0x69,0x76,0x61,0x74,
-0x65,0x75,0x73,0x65,0xa2,0x4e,0x13,0x61,0x72,0x65,0x61,0xa3,0x4e,0x1b,0x61,0x6c,
-0x74,0x65,0x72,0x70,0x61,0x68,0x6c,0x61,0x76,0x69,0xa3,0xf6,0x61,0x40,0x68,0x82,
-0x6c,0x19,0x61,0x79,0x69,0x6e,0x67,0x63,0x61,0x72,0x64,0x73,0xa3,0xcc,2,0x68,
-0x38,0x6c,0x4a,0x75,0x15,0x63,0x69,0x6e,0x68,0x61,0x75,0xa3,0xf5,0x17,0x61,0x77,
-0x68,0x68,0x6d,0x6f,0x6e,0x67,0xa3,0xf3,0x15,0x6d,0x79,0x72,0x65,0x6e,0x65,0xa3,
-0xf4,1,0x61,0x8e,0x6f,1,0x65,0x74,0x6e,0x16,0x65,0x74,0x69,0x63,0x65,0x78,
-0x74,0xa2,0x72,1,0x65,0x2c,0x73,0x11,0x75,0x70,0xa3,0x8d,0x15,0x6e,0x73,0x69,
-0x6f,0x6e,0x73,0xa2,0x72,0x19,0x73,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,
-0xa3,0x8d,0x15,0x6e,0x69,0x63,0x69,0x61,0x6e,0xa3,0x97,1,0x67,0x3e,0x69,0x13,
-0x73,0x74,0x6f,0x73,0xa2,0xa6,0x13,0x64,0x69,0x73,0x63,0xa3,0xa6,0x12,0x73,0x70,
-0x61,0xa3,0x96,1,0x65,0x5c,0x75,1,0x6d,0x2a,0x6e,0x11,0x69,0x63,0x67,0x10,
-0x69,0xa2,0xc0,0x1d,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,
-0x6c,0x73,0xa3,0xc0,0x13,0x6a,0x61,0x6e,0x67,0xa3,0xa3,0x6d,0xa2,0xe6,0x6e,0xa8,
-0x19,0x6f,6,0x70,0x63,0x70,0x56,0x72,0x8a,0x73,0xa2,0x4c,0x74,0x10,0x74,0x1f,
-0x6f,0x6d,0x61,0x6e,0x73,0x69,0x79,0x61,0x71,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,
-0xa5,0x28,0x18,0x74,0x69,0x63,0x61,0x6c,0x63,0x68,0x61,0x72,0x1f,0x61,0x63,0x74,
-0x65,0x72,0x72,0x65,0x63,0x6f,0x67,0x6e,0x69,0x74,0x69,0x6f,0x6e,0x85,1,0x69,
-0x46,0x6e,0x1e,0x61,0x6d,0x65,0x6e,0x74,0x61,0x6c,0x64,0x69,0x6e,0x67,0x62,0x61,
-0x74,0x73,0xa3,0xf2,0x11,0x79,0x61,0x47,1,0x61,0x30,0x6d,0x13,0x61,0x6e,0x79,
-0x61,0xa3,0x7a,0x11,0x67,0x65,0xa5,0xf,0x63,0xa2,0x71,0x67,0xa2,0x71,0x6c,1,
-0x63,0xa2,0x62,0x64,5,0x70,0x38,0x70,0x36,0x73,0x56,0x74,0x14,0x75,0x72,0x6b,
-0x69,0x63,0xa3,0xbf,0x11,0x65,0x72,1,0x6d,0x2e,0x73,0x12,0x69,0x61,0x6e,0xa3,
-0x8c,0x11,0x69,0x63,0xa3,0xf1,0x10,0x6f,1,0x67,0x3a,0x75,0x18,0x74,0x68,0x61,
-0x72,0x61,0x62,0x69,0x61,0x6e,0xa3,0xbb,0x13,0x64,0x69,0x61,0x6e,0xa5,0x22,0x68,
-0x42,0x69,0x54,0x6e,0x1a,0x6f,0x72,0x74,0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e,
-0xa3,0xf0,0x17,0x75,0x6e,0x67,0x61,0x72,0x69,0x61,0x6e,0xa5,4,0x14,0x74,0x61,
-0x6c,0x69,0x63,0xa3,0x58,0x13,0x68,0x69,0x6b,0x69,0xa3,0x9d,0x10,0x72,0x85,0x12,
-0x68,0x61,0x6d,0x65,6,0x6f,0x86,0x6f,0x6c,0x72,0xa2,0x61,0x75,0xa2,0x62,0x79,
-0x14,0x61,0x6e,0x6d,0x61,0x72,0x58,0x12,0x65,0x78,0x74,2,0x61,0xa3,0xb6,0x62,
-0xa3,0xee,0x65,0x13,0x6e,0x64,0x65,0x64,1,0x61,0xa3,0xb6,0x62,0xa3,0xee,1,
-0x64,0x52,0x6e,0x15,0x67,0x6f,0x6c,0x69,0x61,0x6e,0x6a,0x12,0x73,0x75,0x70,0xa4,
-0xd,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0xd,0x10,0x69,0xa2,0xec,0x13,
-0x66,0x69,0x65,0x72,1,0x6c,0x3c,0x74,0x19,0x6f,0x6e,0x65,0x6c,0x65,0x74,0x74,
-0x65,0x72,0x73,0xa3,0x8a,0x15,0x65,0x74,0x74,0x65,0x72,0x73,0x2d,0x10,0x6f,0xa3,
-0xed,1,0x6c,0x44,0x73,0x11,0x69,0x63,0xa2,0x5c,0x18,0x61,0x6c,0x73,0x79,0x6d,
-0x62,0x6f,0x6c,0x73,0xa3,0x5c,0x13,0x74,0x61,0x6e,0x69,0xa5,3,0x61,0xa2,0x9b,
-0x65,0xa4,0x4c,0x69,1,0x61,0xa2,0x8f,0x73,0x10,0x63,5,0x70,0x18,0x70,0xa2,
-0x71,0x73,0x36,0x74,0x17,0x65,0x63,0x68,0x6e,0x69,0x63,0x61,0x6c,0x81,0x15,0x79,
-0x6d,0x62,0x6f,0x6c,0x73,0x8f,0x61,0xa2,0x66,0x65,0x46,0x6d,0x19,0x61,0x74,0x68,
-0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,1,0x61,0xa3,0x66,0x62,0xa3,0x69,0x17,0x6c,
-0x6c,0x61,0x6e,0x65,0x6f,0x75,0x73,2,0x6d,0x3a,0x73,0x6c,0x74,0x17,0x65,0x63,
-0x68,0x6e,0x69,0x63,0x61,0x6c,0x81,0x11,0x61,0x74,0x1f,0x68,0x65,0x6d,0x61,0x74,
-0x69,0x63,0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,1,0x61,0xa3,0x66,0x62,
-0xa3,0x69,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x8e,0x12,0x61,0x6e,0x64,1,0x61,
-0x3c,0x70,0x19,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa3,0xcd,0x14,
-0x72,0x72,0x6f,0x77,0x73,0xa3,0x73,0x10,0x6f,0xa3,0xd8,7,0x72,0x6f,0x72,0x44,
-0x73,0x4e,0x74,0x62,0x79,0x19,0x61,0x6e,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73,
-0xa5,0x20,0x13,0x63,0x68,0x65,0x6e,0xa5,0xc,0x18,0x61,0x72,0x61,0x6d,0x67,0x6f,
-0x6e,0x64,0x69,0xa5,0x14,0x10,0x68,2,0x61,0x3a,0x65,0x4a,0x6f,0x17,0x70,0x65,
-0x72,0x61,0x74,0x6f,0x72,0x73,0x7f,0x16,0x6c,0x70,0x68,0x61,0x6e,0x75,0x6d,0xa3,
-0x5d,0x16,0x6d,0x61,0x74,0x69,0x63,0x61,0x6c,1,0x61,0x36,0x6f,0x17,0x70,0x65,
-0x72,0x61,0x74,0x6f,0x72,0x73,0x7f,0x11,0x6c,0x70,0x1f,0x68,0x61,0x6e,0x75,0x6d,
-0x65,0x72,0x69,0x63,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x5d,0x68,0x50,0x6b,
-0x7e,0x6c,0x88,0x6e,1,0x64,0x34,0x69,0x15,0x63,0x68,0x61,0x65,0x61,0x6e,0xa3,
-0xea,0x12,0x61,0x69,0x63,0xa3,0xc6,1,0x61,0x3e,0x6a,0x12,0x6f,0x6e,0x67,0xa2,
-0xaa,0x14,0x74,0x69,0x6c,0x65,0x73,0xa3,0xaa,0x13,0x6a,0x61,0x6e,0x69,0xa3,0xe9,
-0x13,0x61,0x73,0x61,0x72,0xa5,0x1f,0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,0x4f,3,
-0x64,0x6c,0x65,0x7e,0x6e,0xa2,0x47,0x72,0x14,0x6f,0x69,0x74,0x69,0x63,1,0x63,
-0x3c,0x68,0x19,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0xd7,0x15,
-0x75,0x72,0x73,0x69,0x76,0x65,0xa3,0xd6,0x17,0x65,0x66,0x61,0x69,0x64,0x72,0x69,
-0x6e,0xa5,0x21,0x17,0x74,0x65,0x69,0x6d,0x61,0x79,0x65,0x6b,0xa2,0xb8,0x12,0x65,
-0x78,0x74,0xa2,0xd5,0x16,0x65,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa3,0xd5,0x18,0x64,
-0x65,0x6b,0x69,0x6b,0x61,0x6b,0x75,0x69,0xa3,0xeb,6,0x6b,0x3b,0x6b,0x56,0x6f,
-0x5a,0x75,0x64,0x79,0x11,0x69,0x61,0x1f,0x6b,0x65,0x6e,0x67,0x70,0x75,0x61,0x63,
-0x68,0x75,0x65,0x68,0x6d,0x6f,0x6e,0x67,0xa5,0x27,0x10,0x6f,0xa3,0x92,0x14,0x62,
-0x6c,0x6f,0x63,0x6b,0x21,1,0x6d,0x2c,0x73,0x11,0x68,0x75,0xa5,0x15,0x17,0x62,
-0x65,0x72,0x66,0x6f,0x72,0x6d,0x73,0x7b,0x61,0x44,0x62,0x21,0x65,0x10,0x77,1,
-0x61,0xa5,0xe,0x74,0x14,0x61,0x69,0x6c,0x75,0x65,0xa3,0x8b,1,0x62,0x38,0x6e,
-0x17,0x64,0x69,0x6e,0x61,0x67,0x61,0x72,0x69,0xa5,0x26,0x15,0x61,0x74,0x61,0x65,
-0x61,0x6e,0xa3,0xef,0x67,0xc4,0x32,0x6a,0xc1,0xb9,0x6a,0xa2,0xd5,0x6b,0xa2,0xee,
-0x6c,4,0x61,0x54,0x65,0xa2,0x61,0x69,0xa2,0x78,0x6f,0xa2,0xb7,0x79,1,0x63,
-0x2e,0x64,0x12,0x69,0x61,0x6e,0xa3,0xa9,0x12,0x69,0x61,0x6e,0xa3,0xa7,1,0x6f,
-0x55,0x74,0x11,0x69,0x6e,1,0x31,0x82,0x65,0x11,0x78,0x74,4,0x61,0x5c,0x62,
-0x29,0x63,0xa3,0x94,0x64,0xa3,0x95,0x65,0xa2,0xe7,0x13,0x6e,0x64,0x65,0x64,4,
-0x61,0x36,0x62,0x29,0x63,0xa3,0x94,0x64,0xa3,0x95,0x65,0xa3,0xe7,0x26,0x18,0x64,
-0x64,0x69,0x74,0x69,0x6f,0x6e,0x61,0x6c,0x6d,0x24,0x12,0x73,0x75,0x70,0x24,0x16,
-0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x25,1,0x70,0x42,0x74,0x1d,0x74,0x65,0x72,
-0x6c,0x69,0x6b,0x65,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x79,0x12,0x63,0x68,0x61,
-0xa3,0x9c,2,0x6d,0x4e,0x6e,0x54,0x73,0x10,0x75,0xa2,0xb0,0x12,0x73,0x75,0x70,
-0xa4,0x31,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x31,0x11,0x62,0x75,0xa3,
-0x6f,0x12,0x65,0x61,0x72,1,0x61,0xa3,0xe8,0x62,1,0x69,0x38,0x73,0x17,0x79,
-0x6c,0x6c,0x61,0x62,0x61,0x72,0x79,0xa3,0x75,0x17,0x64,0x65,0x6f,0x67,0x72,0x61,
-0x6d,0x73,0xa3,0x76,0x1a,0x77,0x73,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x73,
-0xa3,0x4d,0x10,0x61,1,0x6d,0x32,0x76,0x14,0x61,0x6e,0x65,0x73,0x65,0xa3,0xb5,
-0x10,0x6f,0x5c,0x12,0x65,0x78,0x74,1,0x61,0xa3,0xb4,0x62,0xa3,0xb9,1,0x61,
-0xa2,0x43,0x68,4,0x61,0x40,0x69,0x50,0x6d,0x6e,0x6f,0x86,0x75,0x15,0x64,0x61,
-0x77,0x61,0x64,0x69,0xa3,0xe6,0x16,0x72,0x6f,0x73,0x68,0x74,0x68,0x69,0xa3,0x89,
-0x1d,0x74,0x61,0x6e,0x73,0x6d,0x61,0x6c,0x6c,0x73,0x63,0x72,0x69,0x70,0x74,0xa5,
-0x30,0x11,0x65,0x72,0x68,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x71,0x12,
-0x6a,0x6b,0x69,0xa3,0xe5,3,0x69,0x3a,0x6e,0x42,0x74,0xa2,0x51,0x79,0x13,0x61,
-0x68,0x6c,0x69,0xa3,0xa2,0x12,0x74,0x68,0x69,0xa3,0xc1,3,0x61,0x34,0x62,0x76,
-0x67,0x7c,0x6e,0x12,0x61,0x64,0x61,0x4d,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2,
-0xcb,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xcb,0x11,0x78,0x74,1,0x61,
-0xa5,0x13,0x65,0x14,0x6e,0x64,0x65,0x64,0x61,0xa5,0x13,0x11,0x75,0x6e,0xa3,0x42,
-0x11,0x78,0x69,0x96,0x17,0x72,0x61,0x64,0x69,0x63,0x61,0x6c,0x73,0x97,0x14,0x61,
-0x6b,0x61,0x6e,0x61,0x9e,1,0x65,0x4c,0x70,0x10,0x68,0x1f,0x6f,0x6e,0x65,0x74,
-0x69,0x63,0x65,0x78,0x74,0x65,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa3,0x6b,0x11,0x78,
-0x74,0xa3,0x6b,0x67,0xa2,0xb5,0x68,0xa4,0x84,0x69,3,0x64,0x4c,0x6d,0xa2,0x55,
-0x6e,0xa2,0x62,0x70,0x13,0x61,0x65,0x78,0x74,0x2a,0x16,0x65,0x6e,0x73,0x69,0x6f,
-0x6e,0x73,0x2b,1,0x63,0x99,0x65,0x17,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63,
-1,0x64,0x56,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa4,0xb,0x1d,0x61,0x6e,
-0x64,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa5,0xb,0x13,0x65,
-0x73,0x63,0x72,0x1f,0x69,0x70,0x74,0x69,0x6f,0x6e,0x63,0x68,0x61,0x72,0x61,0x63,
-0x74,0x65,0x72,0x73,0x99,0x1c,0x70,0x65,0x72,0x69,0x61,0x6c,0x61,0x72,0x61,0x6d,
-0x61,0x69,0x63,0xa3,0xba,1,0x64,0x62,0x73,0x1b,0x63,0x72,0x69,0x70,0x74,0x69,
-0x6f,0x6e,0x61,0x6c,0x70,0x61,1,0x68,0x32,0x72,0x14,0x74,0x68,0x69,0x61,0x6e,
-0xa3,0xbd,0x13,0x6c,0x61,0x76,0x69,0xa3,0xbe,0x11,0x69,0x63,1,0x6e,0x3e,0x73,
-0x1a,0x69,0x79,0x61,0x71,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa5,0x1e,0x19,0x75,
-0x6d,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d,0x73,0xa3,0xb2,4,0x65,0x74,0x6c,0xa2,
-0x82,0x6f,0xa2,0x9a,0x72,0xa2,0x9e,0x75,2,0x6a,0x34,0x6e,0x3e,0x72,0x14,0x6d,
-0x75,0x6b,0x68,0x69,0x43,0x14,0x61,0x72,0x61,0x74,0x69,0x45,0x18,0x6a,0x61,0x6c,
-0x61,0x67,0x6f,0x6e,0x64,0x69,0xa5,0x1c,1,0x6e,0xa2,0x46,0x6f,1,0x6d,0x6e,
-0x72,0x13,0x67,0x69,0x61,0x6e,0x5a,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2,0x87,
-0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x87,0x11,0x78,0x74,0xa4,0x1b,0x14,
-0x65,0x6e,0x64,0x65,0x64,0xa5,0x1b,0x1a,0x65,0x74,0x72,0x69,0x63,0x73,0x68,0x61,
-0x70,0x65,0x73,0x8c,0x12,0x65,0x78,0x74,0xa2,0xe3,0x14,0x65,0x6e,0x64,0x65,0x64,
-0xa3,0xe3,0x1e,0x65,0x72,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,
-0x6f,0x6e,0x71,0x17,0x61,0x67,0x6f,0x6c,0x69,0x74,0x69,0x63,0xa2,0x88,0x12,0x73,
-0x75,0x70,0xa4,0xa,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0xa,0x13,0x74,
-0x68,0x69,0x63,0xa3,0x59,1,0x61,0x5c,0x65,0x11,0x65,0x6b,0x30,1,0x61,0x38,
-0x65,0x11,0x78,0x74,0x6e,0x14,0x65,0x6e,0x64,0x65,0x64,0x6f,0x17,0x6e,0x64,0x63,
-0x6f,0x70,0x74,0x69,0x63,0x31,0x13,0x6e,0x74,0x68,0x61,0xa3,0xe4,2,0x61,0xa2,
-0x48,0x65,0xa2,0xdf,0x69,1,0x67,0x30,0x72,0x14,0x61,0x67,0x61,0x6e,0x61,0x9d,
-0x10,0x68,1,0x70,0x3a,0x73,0x18,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x73,
-0xa3,0x4b,1,0x72,0x3c,0x75,0x19,0x73,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,
-0x73,0xa3,0x4c,0x11,0x69,0x76,0x1f,0x61,0x74,0x65,0x75,0x73,0x65,0x73,0x75,0x72,
-0x72,0x6f,0x67,0x61,0x74,0x65,0x73,0xa3,0x4c,2,0x6c,0x32,0x6e,0x9a,0x74,0x12,
-0x72,0x61,0x6e,0xa5,2,0x10,0x66,2,0x61,0x58,0x6d,0x70,0x77,0x14,0x69,0x64,
-0x74,0x68,0x61,0x1f,0x6e,0x64,0x66,0x75,0x6c,0x6c,0x77,0x69,0x64,0x74,0x68,0x66,
-0x6f,0x72,0x6d,0x73,0xa3,0x57,0x1a,0x6e,0x64,0x66,0x75,0x6c,0x6c,0x66,0x6f,0x72,
-0x6d,0x73,0xa3,0x57,0x13,0x61,0x72,0x6b,0x73,0xa3,0x52,2,0x67,0x34,0x69,0xa2,
-0x45,0x75,0x12,0x6e,0x6f,0x6f,0xa3,0x63,0x11,0x75,0x6c,0xa2,0x4a,2,0x63,0x3c,
-0x6a,0x5e,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x73,0xa3,0x4a,0x1f,0x6f,
-0x6d,0x70,0x61,0x74,0x69,0x62,0x69,0x6c,0x69,0x74,0x79,0x6a,0x61,0x6d,0x6f,0xa3,
-0x41,0x12,0x61,0x6d,0x6f,0x5c,0x17,0x65,0x78,0x74,0x65,0x6e,0x64,0x65,0x64,1,
-0x61,0xa3,0xb4,0x62,0xa3,0xb9,0x19,0x66,0x69,0x72,0x6f,0x68,0x69,0x6e,0x67,0x79,
-0x61,0xa5,0x1d,0x13,0x62,0x72,0x65,0x77,0x37,0x61,0xa4,5,0x62,0xa6,0x45,0x63,
-0xa8,0x1a,0x64,0xac,0xb8,0x65,5,0x6d,0xa2,0x6d,0x86,0x6e,0x96,0x74,0x15,0x68,
-0x69,0x6f,0x70,0x69,0x63,0x5e,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2,0x86,0x16,
-0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x86,0x11,0x78,0x74,0xa2,0x85,1,0x61,
-0xa3,0xc8,0x65,0x13,0x6e,0x64,0x65,0x64,0xa2,0x85,0x10,0x61,0xa3,0xc8,0x16,0x6f,
-0x74,0x69,0x63,0x6f,0x6e,0x73,0xa3,0xce,0x15,0x63,0x6c,0x6f,0x73,0x65,0x64,2,
-0x61,0x5a,0x63,0x9e,0x69,0x1c,0x64,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63,
-0x73,0x75,0x70,0xa2,0xc4,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xc4,0x16,
-0x6c,0x70,0x68,0x61,0x6e,0x75,0x6d,0x86,1,0x65,0x2c,0x73,0x11,0x75,0x70,0xa3,
-0xc3,0x13,0x72,0x69,0x63,0x73,0x86,0x18,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,
-0x74,0xa3,0xc3,0x11,0x6a,0x6b,0xa2,0x44,0x1f,0x6c,0x65,0x74,0x74,0x65,0x72,0x73,
-0x61,0x6e,0x64,0x6d,0x6f,0x6e,0x74,0x68,0x73,0xa3,0x44,0x61,0x4a,0x67,0x76,0x6c,
-1,0x62,0x30,0x79,0x13,0x6d,0x61,0x69,0x63,0xa5,0x25,0x13,0x61,0x73,0x61,0x6e,
-0xa3,0xe2,0x13,0x72,0x6c,0x79,0x64,0x1f,0x79,0x6e,0x61,0x73,0x74,0x69,0x63,0x63,
-0x75,0x6e,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa5,1,0x1f,0x79,0x70,0x74,0x69,0x61,
-0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,1,0x66,0x26,0x73,0xa3,
-0xc2,0x1c,0x6f,0x72,0x6d,0x61,0x74,0x63,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x73,0xa5,
-0x24,7,0x6e,0xc0,0xe5,0x6e,0x3e,0x72,0xa2,0x5d,0x73,0xa2,0xd8,0x76,0x14,0x65,
-0x73,0x74,0x61,0x6e,0xa3,0xbc,1,0x61,0x92,0x63,0x13,0x69,0x65,0x6e,0x74,1,
-0x67,0x34,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xa5,0x13,0x72,0x65,0x65,
-0x6b,1,0x6d,0x34,0x6e,0x15,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0x7f,0x13,0x75,
-0x73,0x69,0x63,0xa2,0x7e,0x19,0x61,0x6c,0x6e,0x6f,0x74,0x61,0x74,0x69,0x6f,0x6e,
-0xa3,0x7e,0x10,0x74,0x1f,0x6f,0x6c,0x69,0x61,0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,
-0x6c,0x79,0x70,0x68,0x73,0xa3,0xfe,2,0x61,0x32,0x6d,0xa2,0x71,0x72,0x12,0x6f,
-0x77,0x73,0x7d,0x12,0x62,0x69,0x63,0x38,3,0x65,0x4a,0x6d,0x66,0x70,0xa2,0x43,
-0x73,0x11,0x75,0x70,0xa2,0x80,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x80,
-0x11,0x78,0x74,1,0x61,0xa3,0xd2,0x65,0x14,0x6e,0x64,0x65,0x64,0x61,0xa3,0xd2,
-0x12,0x61,0x74,0x68,0xa2,0xd3,0x18,0x65,0x6d,0x61,0x74,0x69,0x63,0x61,0x6c,0x61,
-0x1f,0x6c,0x70,0x68,0x61,0x62,0x65,0x74,0x69,0x63,0x73,0x79,0x6d,0x62,0x6f,0x6c,
-0x73,0xa3,0xd3,1,0x66,0x42,0x72,0x1e,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69,
-0x6f,0x6e,0x66,0x6f,0x72,0x6d,0x73,1,0x61,0xa3,0x51,0x62,0xa3,0x55,0x14,0x65,
-0x6e,0x69,0x61,0x6e,0x35,0x12,0x63,0x69,0x69,0x23,0x64,0x9e,0x65,0xa2,0x42,0x68,
-0xa2,0x4d,0x6c,1,0x63,0x62,0x70,0x17,0x68,0x61,0x62,0x65,0x74,0x69,0x63,0x70,
-1,0x66,0xa3,0x50,0x72,0x1e,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,
-0x66,0x6f,0x72,0x6d,0x73,0xa3,0x50,0x16,0x68,0x65,0x6d,0x69,0x63,0x61,0x6c,0xa2,
-0xd0,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xd0,0x12,0x6c,0x61,0x6d,0xa5,
-7,0x1a,0x67,0x65,0x61,0x6e,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0x77,0x11,
-0x6f,0x6d,0xa3,0xfd,7,0x6f,0x71,0x6f,0x64,0x72,0xa2,0x41,0x75,0xa2,0x58,0x79,
-0x1b,0x7a,0x61,0x6e,0x74,0x69,0x6e,0x65,0x6d,0x75,0x73,0x69,0x63,0xa2,0x5b,0x18,
-0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x5b,1,0x70,0x34,0x78,0x16,
-0x64,0x72,0x61,0x77,0x69,0x6e,0x67,0x89,0x14,0x6f,0x6d,0x6f,0x66,0x6f,0xa0,0x12,
-0x65,0x78,0x74,0xa2,0x43,0x14,0x65,0x6e,0x64,0x65,0x64,0xa3,0x43,0x10,0x61,1,
-0x68,0x40,0x69,0x12,0x6c,0x6c,0x65,0x92,0x17,0x70,0x61,0x74,0x74,0x65,0x72,0x6e,
-0x73,0x93,0x11,0x6d,0x69,0xa3,0xc9,1,0x67,0x2c,0x68,0x11,0x69,0x64,0xa3,0x64,
-0x14,0x69,0x6e,0x65,0x73,0x65,0xa3,0x81,0x61,0x48,0x65,0xa2,0x4e,0x68,0xa2,0x52,
-0x6c,0x1a,0x6f,0x63,0x6b,0x65,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x73,0x8b,3,0x6c,
-0x34,0x6d,0x40,0x73,0x66,0x74,0x11,0x61,0x6b,0xa3,0xc7,0x14,0x69,0x6e,0x65,0x73,
-0x65,0xa3,0x93,0x11,0x75,0x6d,0xa2,0xb1,0x12,0x73,0x75,0x70,0xa2,0xca,0x16,0x70,
-0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xca,1,0x69,0x30,0x73,0x13,0x61,0x76,0x61,
-0x68,0xa3,0xdd,0x15,0x63,0x6c,0x61,0x74,0x69,0x6e,0x23,0x14,0x6e,0x67,0x61,0x6c,
-0x69,0x41,0x16,0x61,0x69,0x6b,0x73,0x75,0x6b,0x69,0xa5,8,5,0x6f,0xc1,0x4c,
-0x6f,0xa2,0x55,0x75,0xa4,0x10,0x79,1,0x70,0x9c,0x72,0x14,0x69,0x6c,0x6c,0x69,
-0x63,0x32,1,0x65,0x4c,0x73,0x11,0x75,0x70,0xa2,0x61,0x16,0x70,0x6c,0x65,0x6d,
-0x65,0x6e,0x74,0xa2,0x61,0x12,0x61,0x72,0x79,0xa3,0x61,0x11,0x78,0x74,3,0x61,
-0xa3,0x9e,0x62,0xa3,0xa0,0x63,0xa5,9,0x65,0x13,0x6e,0x64,0x65,0x64,2,0x61,
-0xa3,0x9e,0x62,0xa3,0xa0,0x63,0xa5,9,0x1c,0x72,0x69,0x6f,0x74,0x73,0x79,0x6c,
-0x6c,0x61,0x62,0x61,0x72,0x79,0xa3,0x7b,3,0x6d,0x5a,0x6e,0xa2,0x95,0x70,0xa2,
-0xa0,0x75,0x17,0x6e,0x74,0x69,0x6e,0x67,0x72,0x6f,0x64,0xa2,0x9a,0x17,0x6e,0x75,
-0x6d,0x65,0x72,0x61,0x6c,0x73,0xa3,0x9a,2,0x62,0x3a,0x6d,0xa2,0x5f,0x70,0x15,
-0x61,0x74,0x6a,0x61,0x6d,0x6f,0xa3,0x41,0x14,0x69,0x6e,0x69,0x6e,0x67,2,0x64,
-0x46,0x68,0x9e,0x6d,0x1d,0x61,0x72,0x6b,0x73,0x66,0x6f,0x72,0x73,0x79,0x6d,0x62,
-0x6f,0x6c,0x73,0x77,0x1e,0x69,0x61,0x63,0x72,0x69,0x74,0x69,0x63,0x61,0x6c,0x6d,
-0x61,0x72,0x6b,0x73,0x2e,2,0x65,0x40,0x66,0xa6,0x41,0x73,0x18,0x75,0x70,0x70,
-0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x83,0x16,0x78,0x74,0x65,0x6e,0x64,0x65,0x64,
-0xa3,0xe0,0x17,0x61,0x6c,0x66,0x6d,0x61,0x72,0x6b,0x73,0xa3,0x52,0x11,0x6f,0x6e,
-0x1f,0x69,0x6e,0x64,0x69,0x63,0x6e,0x75,0x6d,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d,
-0x73,0xa3,0xb2,0x1b,0x74,0x72,0x6f,0x6c,0x70,0x69,0x63,0x74,0x75,0x72,0x65,0x73,
-0x83,0x12,0x74,0x69,0x63,0xa2,0x84,0x1b,0x65,0x70,0x61,0x63,0x74,0x6e,0x75,0x6d,
-0x62,0x65,0x72,0x73,0xa3,0xdf,1,0x6e,0x3e,0x72,0x1b,0x72,0x65,0x6e,0x63,0x79,
-0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x75,0x15,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa2,
-0x98,0x16,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa2,0x99,0x1d,0x61,0x6e,0x64,0x70,
-0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x99,0x61,0xa2,0xe1,0x68,
-0xa4,0xb,0x6a,0x10,0x6b,0xa2,0x47,4,0x63,0x8c,0x65,0xa2,0x80,0x72,0xa2,0x98,
-0x73,0xa2,0xaa,0x75,0x1f,0x6e,0x69,0x66,0x69,0x65,0x64,0x69,0x64,0x65,0x6f,0x67,
-0x72,0x61,0x70,0x68,0x73,0xa2,0x47,0x18,0x65,0x78,0x74,0x65,0x6e,0x73,0x69,0x6f,
-0x6e,6,0x64,0x6b,0x64,0xa3,0xd1,0x65,0xa5,0,0x66,0xa5,0x12,0x67,0xa5,0x2e,
-0x14,0x6f,0x6d,0x70,0x61,0x74,0xa2,0x45,1,0x66,0x96,0x69,1,0x62,0x44,0x64,
-0x17,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa2,0x4f,0x12,0x73,0x75,0x70,0xa3,
-0x5f,0x14,0x69,0x6c,0x69,0x74,0x79,0xa2,0x45,1,0x66,0x54,0x69,0x18,0x64,0x65,
-0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa2,0x4f,0x19,0x73,0x75,0x70,0x70,0x6c,0x65,
-0x6d,0x65,0x6e,0x74,0xa3,0x5f,0x13,0x6f,0x72,0x6d,0x73,0xa3,0x53,0x11,0x78,0x74,
-6,0x64,0xc,0x64,0xa3,0xd1,0x65,0xa5,0,0x66,0xa5,0x12,0x67,0xa5,0x2e,0x61,
-0xa3,0x46,0x62,0xa3,0x5e,0x63,0xa3,0xc5,0x19,0x61,0x64,0x69,0x63,0x61,0x6c,0x73,
-0x73,0x75,0x70,0x94,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x95,1,0x74,0x50,
-0x79,0x14,0x6d,0x62,0x6f,0x6c,0x73,0x9a,0x1d,0x61,0x6e,0x64,0x70,0x75,0x6e,0x63,
-0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x9b,0x14,0x72,0x6f,0x6b,0x65,0x73,0xa3,0x82,
-2,0x6e,0x48,0x72,0x64,0x75,0x1d,0x63,0x61,0x73,0x69,0x61,0x6e,0x61,0x6c,0x62,
-0x61,0x6e,0x69,0x61,0x6e,0xa3,0xde,0x1d,0x61,0x64,0x69,0x61,0x6e,0x73,0x79,0x6c,
-0x6c,0x61,0x62,0x69,0x63,0x73,0x63,0x12,0x69,0x61,0x6e,0xa3,0xa8,2,0x61,0x3a,
-0x65,0x4c,0x6f,0x16,0x72,0x61,0x73,0x6d,0x69,0x61,0x6e,0xa5,0x2d,1,0x6b,0x26,
-0x6d,0xa3,0xa4,0x11,0x6d,0x61,0xa3,0xd4,1,0x72,0x38,0x73,0x17,0x73,0x73,0x79,
-0x6d,0x62,0x6f,0x6c,0x73,0xa5,0x19,0x13,0x6f,0x6b,0x65,0x65,0x60,0x12,0x73,0x75,
-0x70,0xa2,0xff,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xff,3,0x65,0x3e,
-0x69,0x7e,0x6f,0xa2,0x69,0x75,0x15,0x70,0x6c,0x6f,0x79,0x61,0x6e,0xa3,0xe1,1,
-0x73,0x50,0x76,0x16,0x61,0x6e,0x61,0x67,0x61,0x72,0x69,0x3e,0x12,0x65,0x78,0x74,
-0xa2,0xb3,0x14,0x65,0x6e,0x64,0x65,0x64,0xa3,0xb3,0x13,0x65,0x72,0x65,0x74,0xa3,
-0x5a,2,0x61,0x3a,0x6e,0x82,0x76,0x16,0x65,0x73,0x61,0x6b,0x75,0x72,0x75,0xa5,
-0x2f,0x18,0x63,0x72,0x69,0x74,0x69,0x63,0x61,0x6c,0x73,0x2e,2,0x65,0x30,0x66,
-0x36,0x73,0x11,0x75,0x70,0xa3,0x83,0x11,0x78,0x74,0xa3,0xe0,0x18,0x6f,0x72,0x73,
-0x79,0x6d,0x62,0x6f,0x6c,0x73,0x77,0x14,0x67,0x62,0x61,0x74,0x73,0x91,1,0x67,
-0x3e,0x6d,0x12,0x69,0x6e,0x6f,0xa2,0xab,0x14,0x74,0x69,0x6c,0x65,0x73,0xa3,0xab,
-0x11,0x72,0x61,0xa5,0x1a,8,0x6d,0x5f,0x6d,0x3a,0x6e,0x48,0x73,0x7a,0x76,0xa2,
-0x4b,0x77,0x12,0x69,0x64,0x65,0x43,0x11,0x65,0x64,0x32,0x12,0x69,0x61,0x6c,0x33,
-2,0x61,0x40,0x62,0x37,0x6f,1,0x62,0x28,0x6e,0x10,0x65,0x21,0x13,0x72,0x65,
-0x61,0x6b,0x37,0x10,0x72,0x34,0x12,0x72,0x6f,0x77,0x35,2,0x6d,0x38,0x71,0x46,
-0x75,1,0x62,0x3d,0x70,0x3e,0x11,0x65,0x72,0x3f,1,0x61,0x24,0x6c,0x39,0x11,
-0x6c,0x6c,0x39,1,0x72,0x3b,0x75,0x12,0x61,0x72,0x65,0x3b,0x12,0x65,0x72,0x74,
-0x40,0x13,0x69,0x63,0x61,0x6c,0x41,0x63,0x58,0x65,0x92,0x66,0x96,0x69,1,0x6e,
-0x36,0x73,0x10,0x6f,0x30,0x14,0x6c,0x61,0x74,0x65,0x64,0x31,0x11,0x69,0x74,0x2e,
-0x12,0x69,0x61,0x6c,0x2f,2,0x61,0x36,0x69,0x48,0x6f,0x10,0x6d,0x24,0x12,0x70,
-0x61,0x74,0x25,0x10,0x6e,0x22,0x15,0x6f,0x6e,0x69,0x63,0x61,0x6c,0x23,0x13,0x72,
-0x63,0x6c,0x65,0x27,0x11,0x6e,0x63,0x27,2,0x69,0x3a,0x6f,0x44,0x72,0x10,0x61,
-0x2c,0x14,0x63,0x74,0x69,0x6f,0x6e,0x2d,0x10,0x6e,0x28,0x11,0x61,0x6c,0x29,0x11,
-0x6e,0x74,0x2b,4,0x61,0x3a,0x66,0x4c,0x68,0x5e,0x6e,0x70,0x77,0x2a,0x12,0x69,
-0x64,0x65,0x2b,0x22,0x17,0x6d,0x62,0x69,0x67,0x75,0x6f,0x75,0x73,0x23,0x26,0x17,
-0x75,0x6c,0x6c,0x77,0x69,0x64,0x74,0x68,0x27,0x24,0x17,0x61,0x6c,0x66,0x77,0x69,
-0x64,0x74,0x68,0x25,0x20,1,0x61,0x30,0x65,0x14,0x75,0x74,0x72,0x61,0x6c,0x21,
-0x28,0x13,0x72,0x72,0x6f,0x77,0x29,0xd,0x6e,0xc0,0xfb,0x73,0x6d,0x73,0x3a,0x74,
-0x98,0x75,0xa2,0x49,0x7a,2,0x6c,0x3b,0x70,0x3d,0x73,0x39,5,0x6f,0x28,0x6f,
-0x57,0x70,0x34,0x75,0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x45,0x11,0x61,0x63,
-1,0x65,0x32,0x69,0x15,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x31,0x18,0x73,0x65,0x70,
-0x61,0x72,0x61,0x74,0x6f,0x72,0x39,0x63,0x53,0x6b,0x55,0x6d,0x51,0x1d,0x69,0x74,
-0x6c,0x65,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x27,1,0x6e,0x40,
-0x70,0x1c,0x70,0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x23,
-0x17,0x61,0x73,0x73,0x69,0x67,0x6e,0x65,0x64,0x21,0x6e,0x8a,0x6f,0xa2,0x47,0x70,
-8,0x66,0x14,0x66,0x5b,0x69,0x59,0x6f,0x4f,0x72,0x24,0x73,0x49,0x17,0x69,0x76,
-0x61,0x74,0x65,0x75,0x73,0x65,0x43,0x61,0x2c,0x63,0x4d,0x64,0x47,0x65,0x4b,0x1f,
-0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,
-0x3d,2,0x64,0x33,0x6c,0x35,0x6f,0x36,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e,
-0x67,0x6d,0x61,0x72,0x6b,0x2d,1,0x70,0x7c,0x74,0x12,0x68,0x65,0x72,3,0x6c,
-0x38,0x6e,0x42,0x70,0x4c,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0x57,0x14,0x65,0x74,
-0x74,0x65,0x72,0x2b,0x14,0x75,0x6d,0x62,0x65,0x72,0x37,0x19,0x75,0x6e,0x63,0x74,
-0x75,0x61,0x74,0x69,0x6f,0x6e,0x4f,0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63,0x74,0x75,
-0x61,0x74,0x69,0x6f,0x6e,0x49,0x66,0x9e,0x66,0x88,0x69,0xa2,0x4b,0x6c,0xa2,0x5c,
-0x6d,4,0x61,0x60,0x63,0x31,0x65,0x2f,0x6e,0x2d,0x6f,0x15,0x64,0x69,0x66,0x69,
-0x65,0x72,1,0x6c,0x30,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0x55,0x14,0x65,0x74,
-0x74,0x65,0x72,0x29,0x17,0x74,0x68,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x51,1,0x69,
-0x2e,0x6f,0x13,0x72,0x6d,0x61,0x74,0x41,0x1d,0x6e,0x61,0x6c,0x70,0x75,0x6e,0x63,
-0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x5b,0x10,0x6e,0x1f,0x69,0x74,0x69,0x61,0x6c,
-0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x59,6,0x6d,0x18,0x6d,
-0x29,0x6f,0x28,0x74,0x27,0x75,0x23,0x2a,0x1c,0x77,0x65,0x72,0x63,0x61,0x73,0x65,
-0x6c,0x65,0x74,0x74,0x65,0x72,0x25,0x65,0x28,0x69,0x3c,0x6c,0x25,0x19,0x74,0x74,
-0x65,0x72,0x6e,0x75,0x6d,0x62,0x65,0x72,0x35,0x1a,0x6e,0x65,0x73,0x65,0x70,0x61,
-0x72,0x61,0x74,0x6f,0x72,0x3b,0x63,0x44,0x64,0xa2,0x60,0x65,0x1b,0x6e,0x63,0x6c,
-0x6f,0x73,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x2f,6,0x6e,0x39,0x6e,0x46,0x6f,
-0x4e,0x73,0x45,0x75,0x1b,0x72,0x72,0x65,0x6e,0x63,0x79,0x73,0x79,0x6d,0x62,0x6f,
-0x6c,0x53,0x20,0x12,0x74,0x72,0x6c,0x3f,0x42,0x10,0x6e,1,0x6e,0x2c,0x74,0x12,
-0x72,0x6f,0x6c,0x3f,0x1f,0x65,0x63,0x74,0x6f,0x72,0x70,0x75,0x6e,0x63,0x74,0x75,
-0x61,0x74,0x69,0x6f,0x6e,0x4d,0x63,0x3f,0x66,0x41,0x6c,0x1d,0x6f,0x73,0x65,0x70,
-0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x4b,2,0x61,0x30,0x65,0x4a,
-0x69,0x12,0x67,0x69,0x74,0x33,0x1c,0x73,0x68,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,
-0x74,0x69,0x6f,0x6e,0x47,0x1a,0x63,0x69,0x6d,0x61,0x6c,0x6e,0x75,0x6d,0x62,0x65,
-0x72,0x33,0,0x12,0x6d,0xc2,0x3f,0x73,0xa1,0x73,0x4e,0x74,0xa2,0x56,0x77,0xa2,
-0x72,0x79,0xa2,0x73,0x7a,1,0x61,0x2c,0x68,0x12,0x61,0x69,0x6e,0x8b,0x11,0x69,
-0x6e,0x85,5,0x74,0x22,0x74,0x38,0x77,0x4c,0x79,0x16,0x72,0x69,0x61,0x63,0x77,
-0x61,0x77,0x6f,0x18,0x72,0x61,0x69,0x67,0x68,0x74,0x77,0x61,0x77,0xa3,0x55,0x15,
-0x61,0x73,0x68,0x6b,0x61,0x66,0x6d,0x61,0x2e,0x65,0x38,0x68,0x11,0x69,0x6e,0x6b,
-0x10,0x64,0x62,0x11,0x68,0x65,0x65,1,0x65,0x2e,0x6d,0x13,0x6b,0x61,0x74,0x68,
-0x69,0x10,0x6e,0x67,1,0x61,0x4e,0x65,1,0x68,0x28,0x74,0x10,0x68,0x77,0x16,
-0x6d,0x61,0x72,0x62,0x75,0x74,0x61,0x74,0x13,0x67,0x6f,0x61,0x6c,0x3d,1,0x68,
-0x71,0x77,0x73,0x11,0x61,0x77,0x79,1,0x65,0x32,0x75,0x11,0x64,0x68,0x80,0x11,
-0x68,0x65,0x83,0x10,0x68,0x7a,1,0x62,0x34,0x77,0x16,0x69,0x74,0x68,0x74,0x61,
-0x69,0x6c,0x7f,0x14,0x61,0x72,0x72,0x65,0x65,0x7d,0x6d,0x6c,0x6e,0xa4,0x6b,0x70,
-0xa4,0x88,0x71,0xa4,0x88,0x72,1,0x65,0x38,0x6f,0x18,0x68,0x69,0x6e,0x67,0x79,
-0x61,0x79,0x65,0x68,0x93,1,0x68,0x5f,0x76,0x16,0x65,0x72,0x73,0x65,0x64,0x70,
-0x65,0x61,2,0x61,0x2e,0x65,0xa4,0x3e,0x69,0x10,0x6d,0x53,1,0x6c,0xa2,0xe7,
-0x6e,0x16,0x69,0x63,0x68,0x61,0x65,0x61,0x6e,0,0x12,0x6e,0x76,0x73,0x51,0x73,
-0x3e,0x74,0x5c,0x77,0xa0,0x79,0xa2,0x42,0x7a,0x13,0x61,0x79,0x69,0x6e,0xa3,0x54,
-0x10,0x61,1,0x64,0x2e,0x6d,0x12,0x65,0x6b,0x68,0xa3,0x4c,0x11,0x68,0x65,0xa3,
-0x4b,3,0x61,0x38,0x65,0x3c,0x68,0x4a,0x77,0x13,0x65,0x6e,0x74,0x79,0xa3,0x51,
-0x10,0x77,0xa3,0x4d,1,0x6e,0xa3,0x4e,0x74,0x10,0x68,0xa3,0x4f,0x14,0x61,0x6d,
-0x65,0x64,0x68,0xa3,0x50,0x11,0x61,0x77,0xa3,0x52,0x12,0x6f,0x64,0x68,0xa3,0x53,
-0x6e,0x3a,0x6f,0x40,0x70,0x46,0x71,0x4a,0x72,0x12,0x65,0x73,0x68,0xa3,0x4a,0x11,
-0x75,0x6e,0xa3,0x46,0x11,0x6e,0x65,0xa3,0x47,0x10,0x65,0xa3,0x48,0x12,0x6f,0x70,
-0x68,0xa3,0x49,0x67,0x33,0x67,0x38,0x68,0x40,0x6b,0x5e,0x6c,0x66,0x6d,0x11,0x65,
-0x6d,0xa3,0x45,0x13,0x69,0x6d,0x65,0x6c,0xa1,1,0x65,0x32,0x75,0x14,0x6e,0x64,
-0x72,0x65,0x64,0xa3,0x42,0x11,0x74,0x68,0xa3,0x41,0x12,0x61,0x70,0x68,0xa3,0x43,
-0x14,0x61,0x6d,0x65,0x64,0x68,0xa3,0x44,0x61,0x34,0x62,0x4a,0x64,0x50,0x66,0x12,
-0x69,0x76,0x65,0x9f,1,0x6c,0x2a,0x79,0x11,0x69,0x6e,0x97,0x12,0x65,0x70,0x68,
-0x95,0x12,0x65,0x74,0x68,0x99,1,0x61,0x30,0x68,0x14,0x61,0x6d,0x65,0x64,0x68,
-0x9d,0x13,0x6c,0x65,0x74,0x68,0x9b,0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,6,0x6e,
-0x2c,0x6e,0x34,0x72,0x5e,0x73,0x62,0x74,0x11,0x74,0x61,0xa3,0x63,2,0x67,0x2e,
-0x6e,0x32,0x79,0x10,0x61,0xa3,0x60,0x10,0x61,0xa3,0x5d,1,0x61,0xa3,0x5e,0x6e,
-0x10,0x61,0xa3,0x5f,0x10,0x61,0xa3,0x61,0x11,0x73,0x61,0xa3,0x62,0x62,0x3c,0x6a,
-0x42,0x6c,0x10,0x6c,1,0x61,0xa3,0x5b,0x6c,0x10,0x61,0xa3,0x5c,0x11,0x68,0x61,
-0xa3,0x59,0x10,0x61,0xa3,0x5a,0x11,0x65,0x6d,0x51,2,0x6f,0x2c,0x75,0x50,0x79,
-0x10,0x61,0x91,1,0x6a,0x28,0x6f,0x10,0x6e,0x55,0x1a,0x6f,0x69,0x6e,0x69,0x6e,
-0x67,0x67,0x72,0x6f,0x75,0x70,0x21,0x10,0x6e,0x57,0x10,0x65,0x59,0x10,0x61,1,
-0x66,0x5b,0x70,0x10,0x68,0x5d,0x66,0x9a,0x66,0x42,0x67,0x7a,0x68,0x8a,0x6b,0xa2,
-0x75,0x6c,0x11,0x61,0x6d,0x4c,0x12,0x61,0x64,0x68,0x4f,2,0x61,0x3e,0x65,0x4a,
-0x69,0x19,0x6e,0x61,0x6c,0x73,0x65,0x6d,0x6b,0x61,0x74,0x68,0x35,0x15,0x72,0x73,
-0x69,0x79,0x65,0x68,0x8f,0x86,0x10,0x68,0x33,0x10,0x61,1,0x66,0x37,0x6d,0x11,
-0x61,0x6c,0x39,1,0x61,0x40,0x65,0x3e,1,0x68,0x28,0x74,0x10,0x68,0x45,0x40,
-0x13,0x67,0x6f,0x61,0x6c,0x43,2,0x68,0x3b,0x6d,0x5c,0x6e,0x1a,0x69,0x66,0x69,
-0x72,0x6f,0x68,0x69,0x6e,0x67,0x79,0x61,1,0x6b,0x2a,0x70,0x10,0x61,0xa3,0x65,
-0x15,0x69,0x6e,0x6e,0x61,0x79,0x61,0xa3,0x64,0x1a,0x7a,0x61,0x6f,0x6e,0x68,0x65,
-0x68,0x67,0x6f,0x61,0x6c,0x3d,2,0x61,0x3a,0x68,0x44,0x6e,0x17,0x6f,0x74,0x74,
-0x65,0x64,0x68,0x65,0x68,0x4b,1,0x66,0x47,0x70,0x10,0x68,0x49,0x12,0x61,0x70,
-0x68,0x89,0x61,0x2e,0x62,0x8a,0x64,0xa2,0x51,0x65,0x31,2,0x66,0x3c,0x69,0x70,
-0x6c,1,0x61,0x28,0x65,0x10,0x66,0x27,0x11,0x70,0x68,0x25,0x14,0x72,0x69,0x63,
-0x61,0x6e,2,0x66,0x30,0x6e,0x36,0x71,0x11,0x61,0x66,0xa3,0x58,0x11,0x65,0x68,
-0xa3,0x56,0x12,0x6f,0x6f,0x6e,0xa3,0x57,0x10,0x6e,0x23,1,0x65,0x4a,0x75,0x10,
-0x72,0x1f,0x75,0x73,0x68,0x61,0x73,0x6b,0x69,0x79,0x65,0x68,0x62,0x61,0x72,0x72,
-0x65,0x65,0x8d,1,0x68,0x29,0x74,0x10,0x68,0x2b,0x11,0x61,0x6c,0x2c,0x16,0x61,
-0x74,0x68,0x72,0x69,0x73,0x68,0x2f,7,0x6e,0x2e,0x6e,0x2c,0x72,0x3e,0x74,0x56,
-0x75,0x21,0x18,0x6f,0x6e,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x21,0x28,0x1a,0x69,
-0x67,0x68,0x74,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x29,0x2a,0x19,0x72,0x61,0x6e,
-0x73,0x70,0x61,0x72,0x65,0x6e,0x74,0x2b,0x63,0x23,0x64,0x40,0x6a,0x56,0x6c,0x26,
-0x19,0x65,0x66,0x74,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x27,0x24,0x19,0x75,0x61,
-0x6c,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x25,0x19,0x6f,0x69,0x6e,0x63,0x61,0x75,
-0x73,0x69,0x6e,0x67,0x23,0,0x13,0x6e,0xc0,0xd0,0x73,0x49,0x73,0x48,0x75,0x78,
-0x77,0x84,0x78,0x9c,0x7a,0x10,0x77,0x58,1,0x6a,0x75,0x73,0x13,0x70,0x61,0x63,
-0x65,0x59,4,0x61,0x51,0x67,0x53,0x70,0x28,0x75,0x30,0x79,0x57,0x54,0x12,0x61,
-0x63,0x65,0x55,0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x53,0x15,0x6e,0x6b,0x6e,
-0x6f,0x77,0x6e,0x21,1,0x6a,0x5d,0x6f,0x17,0x72,0x64,0x6a,0x6f,0x69,0x6e,0x65,
-0x72,0x5d,0x10,0x78,0x21,0x6e,0x60,0x6f,0xa2,0x41,0x70,0xa2,0x50,0x71,0xa2,0x6e,
-0x72,1,0x65,0x24,0x69,0x6f,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,
-0x69,0x63,0x61,0x74,0x6f,0x72,0x6f,4,0x65,0x3e,0x6c,0x5b,0x6f,0x46,0x73,0x45,
-0x75,0x46,0x14,0x6d,0x65,0x72,0x69,0x63,0x47,0x15,0x78,0x74,0x6c,0x69,0x6e,0x65,
-0x5b,0x17,0x6e,0x73,0x74,0x61,0x72,0x74,0x65,0x72,0x45,0x10,0x70,0x48,0x1c,0x65,
-0x6e,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x49,1,0x6f,0x3e,
-0x72,0x4c,0x1a,0x65,0x66,0x69,0x78,0x6e,0x75,0x6d,0x65,0x72,0x69,0x63,0x4d,0x4a,
-0x1b,0x73,0x74,0x66,0x69,0x78,0x6e,0x75,0x6d,0x65,0x72,0x69,0x63,0x4b,0x10,0x75,
-0x4e,0x16,0x6f,0x74,0x61,0x74,0x69,0x6f,0x6e,0x4f,0x68,0x7b,0x68,0x50,0x69,0x86,
-0x6a,0xa2,0x61,0x6c,0xa2,0x65,0x6d,0x1c,0x61,0x6e,0x64,0x61,0x74,0x6f,0x72,0x79,
-0x62,0x72,0x65,0x61,0x6b,0x2d,4,0x32,0x5f,0x33,0x61,0x65,0x34,0x6c,0x6d,0x79,
-0x3a,0x13,0x70,0x68,0x65,0x6e,0x3b,0x19,0x62,0x72,0x65,0x77,0x6c,0x65,0x74,0x74,
-0x65,0x72,0x6d,2,0x64,0x28,0x6e,0x3c,0x73,0x41,0x3c,0x18,0x65,0x6f,0x67,0x72,
-0x61,0x70,0x68,0x69,0x63,0x3d,0x3e,1,0x66,0x3e,0x73,0x11,0x65,0x70,1,0x61,
-0x22,0x65,0x14,0x72,0x61,0x62,0x6c,0x65,0x3f,0x18,0x69,0x78,0x6e,0x75,0x6d,0x65,
-0x72,0x69,0x63,0x41,2,0x6c,0x63,0x74,0x65,0x76,0x67,1,0x66,0x43,0x69,0x15,
-0x6e,0x65,0x66,0x65,0x65,0x64,0x43,0x61,0x40,0x62,0x70,0x63,0xa2,0x55,0x65,0xa2,
-0xdb,0x67,0x10,0x6c,0x38,0x11,0x75,0x65,0x39,2,0x69,0x23,0x6c,0x34,0x6d,0x16,
-0x62,0x69,0x67,0x75,0x6f,0x75,0x73,0x23,0x24,0x17,0x70,0x68,0x61,0x62,0x65,0x74,
-0x69,0x63,0x25,4,0x32,0x27,0x61,0x29,0x62,0x2b,0x6b,0x2d,0x72,0x12,0x65,0x61,
-0x6b,2,0x61,0x36,0x62,0x3e,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x57,0x13,
-0x66,0x74,0x65,0x72,0x29,1,0x65,0x2a,0x6f,0x11,0x74,0x68,0x27,0x13,0x66,0x6f,
-0x72,0x65,0x2b,7,0x6d,0x51,0x6d,0x33,0x6f,0x28,0x70,0x69,0x72,0x35,1,0x6d,
-0x76,0x6e,1,0x64,0x3c,0x74,0x1a,0x69,0x6e,0x67,0x65,0x6e,0x74,0x62,0x72,0x65,
-0x61,0x6b,0x2f,0x15,0x69,0x74,0x69,0x6f,0x6e,0x61,0x1f,0x6c,0x6a,0x61,0x70,0x61,
-0x6e,0x65,0x73,0x65,0x73,0x74,0x61,0x72,0x74,0x65,0x72,0x6b,1,0x62,0x3a,0x70,
-0x19,0x6c,0x65,0x78,0x63,0x6f,0x6e,0x74,0x65,0x78,0x74,0x51,0x18,0x69,0x6e,0x69,
-0x6e,0x67,0x6d,0x61,0x72,0x6b,0x33,0x61,0x6a,0x62,0x2f,0x6a,0x6b,0x6c,0x30,0x13,
-0x6f,0x73,0x65,0x70,1,0x61,0x38,0x75,0x18,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,
-0x6f,0x6e,0x31,0x18,0x72,0x65,0x6e,0x74,0x68,0x65,0x73,0x69,0x73,0x69,0x1b,0x72,
-0x72,0x69,0x61,0x67,0x65,0x72,0x65,0x74,0x75,0x72,0x6e,0x35,2,0x62,0x3e,0x6d,
-0x46,0x78,0x36,0x18,0x63,0x6c,0x61,0x6d,0x61,0x74,0x69,0x6f,0x6e,0x37,0x70,0x12,
-0x61,0x73,0x65,0x71,0x72,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x73,1,0x64,
-0x42,0x6e,1,0x6f,0x32,0x75,0x26,0x14,0x6d,0x65,0x72,0x69,0x63,0x27,0x11,0x6e,
-0x65,0x21,1,0x65,0x2e,0x69,0x24,0x12,0x67,0x69,0x74,0x25,0x22,0x14,0x63,0x69,
-0x6d,0x61,0x6c,0x23,0,0x18,0x6e,0xc4,0x2a,0x74,0xc1,0x6d,0x77,0x96,0x77,0xa2,
-0x4c,0x78,0xa2,0x70,0x79,0xa2,0x7a,0x7a,6,0x73,0x1e,0x73,0x34,0x78,0x42,0x79,
-0x48,0x7a,0x11,0x7a,0x7a,0xa3,0x67,0x10,0x79,1,0x65,0xa3,0xae,0x6d,0xa3,0x81,
-0x11,0x78,0x78,0xa3,0x66,0x11,0x79,0x79,0x21,0x61,0x30,0x69,0x58,0x6d,0x11,0x74,
-0x68,0xa3,0x80,0x10,0x6e,1,0x61,0x26,0x62,0xa3,0xb1,0x1a,0x62,0x61,0x7a,0x61,
-0x72,0x73,0x71,0x75,0x61,0x72,0x65,0xa3,0xb1,0x11,0x6e,0x68,0x23,2,0x61,0x30,
-0x63,0x5a,0x6f,0x11,0x6c,0x65,0xa3,0x9b,1,0x6e,0x3c,0x72,0x10,0x61,0xa2,0x92,
-0x15,0x6e,0x67,0x63,0x69,0x74,0x69,0xa3,0x92,0x12,0x63,0x68,0x6f,0xa3,0xbc,0x11,
-0x68,0x6f,0xa3,0xbc,1,0x70,0x2c,0x73,0x11,0x75,0x78,0xa3,0x65,0x11,0x65,0x6f,
-0x9b,1,0x65,0x2c,0x69,0x72,0x11,0x69,0x69,0x73,0x11,0x7a,0x69,0xa2,0xc0,0x11,
-0x64,0x69,0xa3,0xc0,0x74,0x4a,0x75,0xa2,0xba,0x76,1,0x61,0x2c,0x69,0x11,0x73,
-0x70,0xa3,0x64,0x10,0x69,0xa2,0x63,0x10,0x69,0xa3,0x63,5,0x67,0x36,0x67,0x68,
-0x68,0x6c,0x69,2,0x62,0x3a,0x66,0x4a,0x72,0x10,0x68,0xa2,0x9e,0x12,0x75,0x74,
-0x61,0xa3,0x9e,1,0x65,0x24,0x74,0x6f,0x12,0x74,0x61,0x6e,0x6f,0x14,0x69,0x6e,
-0x61,0x67,0x68,0x99,0x11,0x6c,0x67,0x75,0x10,0x61,1,0x61,0x24,0x69,0x6d,0x6a,
-0x11,0x6e,0x61,0x6b,0x61,0x30,0x65,0xa2,0x5b,0x66,0x11,0x6e,0x67,0x99,6,0x6c,
-0x21,0x6c,0x32,0x6d,0x38,0x6e,0x44,0x76,0x10,0x74,0xa3,0x7f,1,0x65,0x89,0x75,
-0x97,1,0x69,0x24,0x6c,0x67,0x10,0x6c,0x67,0x10,0x67,0xa2,0x9a,0x11,0x75,0x74,
-0xa3,0x9a,0x67,0x36,0x69,0x52,0x6b,0x10,0x72,0xa2,0x99,0x10,0x69,0xa3,0x99,1,
-0x61,0x30,0x62,0x7a,0x13,0x61,0x6e,0x77,0x61,0x7b,0x12,0x6c,0x6f,0x67,0x75,2,
-0x6c,0x32,0x74,0x34,0x76,0x12,0x69,0x65,0x74,0xa3,0x7f,0x10,0x65,0x89,0x12,0x68,
-0x61,0x6d,0xa3,0x6a,1,0x6c,0x2a,0x6e,0x10,0x67,0xa3,0x62,0x10,0x75,0x68,0x11,
-0x67,0x75,0x69,1,0x67,0x32,0x6e,0x14,0x6b,0x6e,0x6f,0x77,0x6e,0xa3,0x67,0x11,
-0x61,0x72,0x8a,0x13,0x69,0x74,0x69,0x63,0x8b,0x71,0xc1,0x13,0x71,0xa2,0xde,0x72,
-0xa2,0xe3,0x73,6,0x69,0x8a,0x69,0x72,0x6f,0xa2,0x4c,0x75,0xa2,0x75,0x79,1,
-0x6c,0x46,0x72,4,0x63,0x65,0x65,0xa3,0x5f,0x69,0x2c,0x6a,0xa3,0x60,0x6e,0xa3,
-0x61,0x11,0x61,0x63,0x65,0x10,0x6f,0x94,0x16,0x74,0x69,0x6e,0x61,0x67,0x72,0x69,
-0x95,2,0x64,0x3c,0x67,0x4c,0x6e,1,0x64,0xa3,0x91,0x68,0x62,0x12,0x61,0x6c,
-0x61,0x63,0x10,0x64,0xa2,0xa6,0x12,0x68,0x61,0x6d,0xa3,0xa6,0x17,0x6e,0x77,0x72,
-0x69,0x74,0x69,0x6e,0x67,0xa3,0x70,2,0x67,0x3a,0x72,0x52,0x79,0x10,0x6f,0xa2,
-0xb0,0x12,0x6d,0x62,0x6f,0xa3,0xb0,1,0x64,0x26,0x6f,0xa3,0xb8,0xa2,0xb7,0x12,
-0x69,0x61,0x6e,0xa3,0xb7,0x10,0x61,0xa2,0x98,0x16,0x73,0x6f,0x6d,0x70,0x65,0x6e,
-0x67,0xa3,0x98,0x11,0x6e,0x64,0xa2,0x71,0x14,0x61,0x6e,0x65,0x73,0x65,0xa3,0x71,
-0x61,0x5c,0x67,0xa2,0x43,0x68,1,0x61,0x2a,0x72,0x10,0x64,0xa3,0x97,2,0x72,
-0x28,0x76,0x30,0x77,0x87,0x12,0x61,0x64,0x61,0xa3,0x97,0x12,0x69,0x61,0x6e,0x87,
-2,0x6d,0x40,0x72,0x58,0x75,0x10,0x72,0xa2,0x6f,0x15,0x61,0x73,0x68,0x74,0x72,
-0x61,0xa3,0x6f,1,0x61,0x26,0x72,0xa3,0x7e,0x14,0x72,0x69,0x74,0x61,0x6e,0xa3,
-0x7e,1,0x61,0xa3,0x5e,0x62,0xa3,0x85,0x11,0x6e,0x77,0xa3,0x70,0x11,0x61,0x61,
-1,0x63,0x2f,0x69,0x23,3,0x65,0x3e,0x6a,0x48,0x6f,0x4e,0x75,0x10,0x6e,1,
-0x69,0x24,0x72,0x61,0x10,0x63,0x61,0x13,0x6a,0x61,0x6e,0x67,0xa3,0x6e,0x11,0x6e,
-0x67,0xa3,0x6e,1,0x68,0x2a,0x72,0x10,0x6f,0xa3,0x5d,0x10,0x67,0xa3,0xb6,0x6e,
-0xa2,0x83,0x6f,0xa2,0xf2,0x70,5,0x6c,0x1e,0x6c,0x44,0x72,0x4a,0x73,0x1b,0x61,
-0x6c,0x74,0x65,0x72,0x70,0x61,0x68,0x6c,0x61,0x76,0x69,0xa3,0x7b,0x11,0x72,0x64,
-0xa3,0x5c,0x11,0x74,0x69,0xa3,0x7d,0x61,0x7c,0x65,0xa2,0x54,0x68,3,0x61,0x3e,
-0x6c,0x4e,0x6e,0x5e,0x6f,0x16,0x65,0x6e,0x69,0x63,0x69,0x61,0x6e,0xa3,0x5b,0x10,
-0x67,0xa2,0x5a,0x12,0x73,0x70,0x61,0xa3,0x5a,2,0x69,0xa3,0x7a,0x70,0xa3,0x7b,
-0x76,0xa3,0x7c,0x10,0x78,0xa3,0x5b,2,0x68,0x3e,0x6c,0x50,0x75,0x10,0x63,0xa2,
-0xa5,0x14,0x69,0x6e,0x68,0x61,0x75,0xa3,0xa5,0x17,0x61,0x77,0x68,0x68,0x6d,0x6f,
-0x6e,0x67,0xa3,0x4b,0x10,0x6d,0xa2,0x90,0x14,0x79,0x72,0x65,0x6e,0x65,0xa3,0x90,
-0x11,0x72,0x6d,0xa3,0x59,6,0x6b,0x36,0x6b,0x56,0x73,0x6e,0x75,0x74,0x79,0x11,
-0x69,0x61,0x1f,0x6b,0x65,0x6e,0x67,0x70,0x75,0x61,0x63,0x68,0x75,0x65,0x68,0x6d,
-0x6f,0x6e,0x67,0xa3,0xba,1,0x67,0x2e,0x6f,0xa2,0x57,0x10,0x6f,0xa3,0x57,0x10,
-0x62,0xa3,0x84,0x11,0x68,0x75,0xa3,0x96,0x12,0x73,0x68,0x75,0xa3,0x96,0x61,0x42,
-0x62,0x80,0x65,0x10,0x77,1,0x61,0xa3,0xaa,0x74,0x14,0x61,0x69,0x6c,0x75,0x65,
-0x97,2,0x62,0x2e,0x6e,0x3c,0x72,0x10,0x62,0xa3,0x8e,0x15,0x61,0x74,0x61,0x65,
-0x61,0x6e,0xa3,0x8f,0x10,0x64,0xa2,0xbb,0x16,0x69,0x6e,0x61,0x67,0x61,0x72,0x69,
-0xa3,0xbb,0x11,0x61,0x74,0xa3,0x8f,3,0x67,0x5a,0x6c,0x6c,0x72,0xa2,0x93,0x73,
-2,0x61,0x36,0x67,0x3c,0x6d,0x10,0x61,0x84,0x12,0x6e,0x79,0x61,0x85,0x11,0x67,
-0x65,0xa3,0xab,0x10,0x65,0xa3,0xab,1,0x61,0x2a,0x68,0x11,0x61,0x6d,0x5b,0x10,
-0x6d,0x5b,1,0x63,0xa2,0x60,0x64,5,0x70,0x37,0x70,0x36,0x73,0x54,0x74,0x14,
-0x75,0x72,0x6b,0x69,0x63,0xa3,0x58,0x11,0x65,0x72,1,0x6d,0x2c,0x73,0x12,0x69,
-0x61,0x6e,0x9b,0x11,0x69,0x63,0xa3,0x59,0x10,0x6f,1,0x67,0x3a,0x75,0x18,0x74,
-0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e,0xa3,0x85,0x13,0x64,0x69,0x61,0x6e,0xa3,
-0xb8,0x68,0x42,0x69,0x54,0x6e,0x1a,0x6f,0x72,0x74,0x68,0x61,0x72,0x61,0x62,0x69,
-0x61,0x6e,0xa3,0x8e,0x17,0x75,0x6e,0x67,0x61,0x72,0x69,0x61,0x6e,0xa3,0x4c,0x14,
-0x74,0x61,0x6c,0x69,0x63,0x5d,1,0x68,0x26,0x6b,0xa3,0x6d,0x12,0x69,0x6b,0x69,
-0xa3,0x6d,2,0x69,0x2c,0x6b,0x30,0x79,0x10,0x61,0x5f,0x11,0x79,0x61,0x5f,0x10,
-0x68,0xa3,0x58,0x68,0xc3,0xd,0x6b,0xc2,0x24,0x6b,0xa4,0x17,0x6c,0xa4,0xb2,0x6d,
-8,0x6f,0x46,0x6f,0x48,0x72,0x74,0x74,0x80,0x75,0x86,0x79,1,0x61,0x28,0x6d,
-0x10,0x72,0x59,0x13,0x6e,0x6d,0x61,0x72,0x59,2,0x64,0x2e,0x6e,0x32,0x6f,0x10,
-0x6e,0xa3,0x72,0x10,0x69,0xa3,0xa3,0x10,0x67,0x56,0x14,0x6f,0x6c,0x69,0x61,0x6e,
-0x57,0x10,0x6f,0xa2,0x95,0x10,0x6f,0xa3,0x95,0x11,0x65,0x69,0xa3,0x73,0x11,0x6c,
-0x74,0xa2,0xa4,0x12,0x61,0x6e,0x69,0xa3,0xa4,0x61,0x36,0x65,0xa2,0x67,0x69,0xa2,
-0xbd,0x6c,0x11,0x79,0x6d,0x55,6,0x6e,0x38,0x6e,0x32,0x72,0x5c,0x73,0x6c,0x79,
-0x10,0x61,0xa3,0x55,1,0x64,0x38,0x69,0xa2,0x79,0x15,0x63,0x68,0x61,0x65,0x61,
-0x6e,0xa3,0x79,0xa2,0x54,0x12,0x61,0x69,0x63,0xa3,0x54,0x10,0x63,0xa2,0xa9,0x12,
-0x68,0x65,0x6e,0xa3,0xa9,0x18,0x61,0x72,0x61,0x6d,0x67,0x6f,0x6e,0x64,0x69,0xa3,
-0xaf,0x68,0x36,0x6b,0x4c,0x6c,0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,0x55,1,0x61,
-0x26,0x6a,0xa3,0xa0,0x13,0x6a,0x61,0x6e,0x69,0xa3,0xa0,0x10,0x61,0xa2,0xb4,0x12,
-0x73,0x61,0x72,0xa3,0xb4,3,0x64,0x78,0x65,0x94,0x6e,0xa2,0x42,0x72,1,0x63,
-0xa3,0x8d,0x6f,0xa2,0x56,0x13,0x69,0x74,0x69,0x63,1,0x63,0x3c,0x68,0x19,0x69,
-0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x56,0x15,0x75,0x72,0x73,0x69,
-0x76,0x65,0xa3,0x8d,1,0x65,0x26,0x66,0xa3,0xb5,0x16,0x66,0x61,0x69,0x64,0x72,
-0x69,0x6e,0xa3,0xb5,0x17,0x74,0x65,0x69,0x6d,0x61,0x79,0x65,0x6b,0xa3,0x73,0x10,
-0x64,0xa2,0x8c,0x17,0x65,0x6b,0x69,0x6b,0x61,0x6b,0x75,0x69,0xa3,0x8c,0x11,0x61,
-0x6f,0xa3,0x5c,6,0x6e,0x1a,0x6e,0x34,0x6f,0x38,0x70,0x3e,0x74,0x11,0x68,0x69,
-0xa3,0x78,0x11,0x64,0x61,0x4b,0x11,0x72,0x65,0xa3,0x77,0x11,0x65,0x6c,0xa3,0x8a,
-0x61,0x30,0x68,0x9a,0x69,0x11,0x74,0x73,0xa3,0xbf,4,0x69,0x3c,0x6c,0x44,0x6e,
-0x48,0x74,0x56,0x79,0x13,0x61,0x68,0x6c,0x69,0xa3,0x4f,0x12,0x74,0x68,0x69,0xa3,
-0x78,0x10,0x69,0xa3,0x4f,1,0x61,0x4d,0x6e,0x12,0x61,0x64,0x61,0x4b,0x14,0x61,
-0x6b,0x61,0x6e,0x61,0x4c,0x19,0x6f,0x72,0x68,0x69,0x72,0x61,0x67,0x61,0x6e,0x61,
-0x8d,4,0x61,0x40,0x69,0x52,0x6d,0x70,0x6f,0x7c,0x75,0x15,0x64,0x61,0x77,0x61,
-0x64,0x69,0xa3,0x91,0x10,0x72,0x92,0x15,0x6f,0x73,0x68,0x74,0x68,0x69,0x93,0x1d,
-0x74,0x61,0x6e,0x73,0x6d,0x61,0x6c,0x6c,0x73,0x63,0x72,0x69,0x70,0x74,0xa3,0xbf,
-1,0x65,0x24,0x72,0x4f,0x10,0x72,0x4f,0x10,0x6a,0xa2,0x9d,0x11,0x6b,0x69,0xa3,
-0x9d,4,0x61,0x5c,0x65,0x90,0x69,0xa0,0x6f,0xa2,0x5d,0x79,1,0x63,0x34,0x64,
-0x10,0x69,0xa2,0x6c,0x11,0x61,0x6e,0xa3,0x6c,0x10,0x69,0xa2,0x6b,0x11,0x61,0x6e,
-0xa3,0x6b,2,0x6e,0x42,0x6f,0x46,0x74,3,0x66,0xa3,0x50,0x67,0xa3,0x51,0x69,
-0x24,0x6e,0x53,0x10,0x6e,0x53,0x10,0x61,0xa3,0x6a,0x50,0x10,0x6f,0x51,0x11,0x70,
-0x63,0xa2,0x52,0x11,0x68,0x61,0xa3,0x52,2,0x6d,0x2e,0x6e,0x36,0x73,0x10,0x75,
-0xa3,0x83,0x10,0x62,0x80,0x10,0x75,0x81,2,0x61,0xa3,0x53,0x62,0x83,0x65,0x11,
-0x61,0x72,1,0x61,0xa3,0x53,0x62,0x83,0x11,0x6d,0x61,0xa3,0x8b,0x68,0x6e,0x69,
-0xa2,0x95,0x6a,2,0x61,0x30,0x70,0x52,0x75,0x11,0x72,0x63,0xa3,0x94,1,0x6d,
-0x38,0x76,0x10,0x61,0xa2,0x4e,0x13,0x6e,0x65,0x73,0x65,0xa3,0x4e,0x10,0x6f,0xa3,
-0xad,0x11,0x61,0x6e,0xa3,0x69,6,0x6c,0x1e,0x6c,0x34,0x6d,0x3a,0x72,0x48,0x75,
-0x11,0x6e,0x67,0xa3,0x4c,0x11,0x75,0x77,0xa3,0x9c,0x10,0x6e,1,0x67,0xa3,0x4b,
-0x70,0xa3,0xba,0x11,0x6b,0x74,0x8d,0x61,0x3c,0x65,0xa2,0x43,0x69,0x11,0x72,0x61,
-0x48,0x13,0x67,0x61,0x6e,0x61,0x49,1,0x6e,0x34,0x74,0x10,0x72,0xa2,0xa2,0x11,
-0x61,0x6e,0xa3,0xa2,0x42,6,0x6f,0xe,0x6f,0x77,0x73,0xa3,0x49,0x74,0xa3,0x4a,
-0x75,0x12,0x6e,0x6f,0x6f,0x77,0x62,0xa3,0xac,0x67,0x3e,0x69,0x42,0x19,0x66,0x69,
-0x72,0x6f,0x68,0x69,0x6e,0x67,0x79,0x61,0xa3,0xb6,0x44,0x11,0x75,0x6c,0x45,0x11,
-0x62,0x72,0x46,0x11,0x65,0x77,0x47,2,0x6d,0x2e,0x6e,0x4a,0x74,0x11,0x61,0x6c,
-0x5d,0x1c,0x70,0x65,0x72,0x69,0x61,0x6c,0x61,0x72,0x61,0x6d,0x61,0x69,0x63,0xa3,
-0x74,2,0x64,0x66,0x68,0x6a,0x73,0x1b,0x63,0x72,0x69,0x70,0x74,0x69,0x6f,0x6e,
-0x61,0x6c,0x70,0x61,1,0x68,0x32,0x72,0x14,0x74,0x68,0x69,0x61,0x6e,0xa3,0x7d,
-0x13,0x6c,0x61,0x76,0x69,0xa3,0x7a,0x10,0x73,0xa3,0x4d,0x15,0x65,0x72,0x69,0x74,
-0x65,0x64,0x23,0x64,0xc1,0xd,0x64,0xa2,0x7a,0x65,0xa2,0xc1,0x67,4,0x65,0x82,
-0x6c,0x9a,0x6f,0xa2,0x46,0x72,0xa2,0x55,0x75,2,0x6a,0x3c,0x6e,0x4e,0x72,1,
-0x6d,0x24,0x75,0x41,0x13,0x75,0x6b,0x68,0x69,0x41,1,0x61,0x24,0x72,0x3f,0x13,
-0x72,0x61,0x74,0x69,0x3f,0x18,0x6a,0x61,0x6c,0x61,0x67,0x6f,0x6e,0x64,0x69,0xa3,
-0xb3,0x10,0x6f,1,0x6b,0xa3,0x48,0x72,0x38,0x13,0x67,0x69,0x61,0x6e,0x39,0x11,
-0x61,0x67,0x90,0x15,0x6f,0x6c,0x69,0x74,0x69,0x63,0x91,1,0x6e,0x30,0x74,0x10,
-0x68,0x3a,0x11,0x69,0x63,0x3b,1,0x67,0xa3,0xb3,0x6d,0xa3,0xaf,1,0x61,0x32,
-0x65,1,0x65,0x24,0x6b,0x3d,0x10,0x6b,0x3d,0x10,0x6e,0xa2,0x89,0x12,0x74,0x68,
-0x61,0xa3,0x89,4,0x65,0x46,0x69,0x6c,0x6f,0x8c,0x73,0x9a,0x75,0x11,0x70,0x6c,
-0xa2,0x87,0x13,0x6f,0x79,0x61,0x6e,0xa3,0x87,1,0x73,0x38,0x76,0x10,0x61,0x34,
-0x15,0x6e,0x61,0x67,0x61,0x72,0x69,0x35,0x13,0x65,0x72,0x65,0x74,0x33,1,0x61,
-0x36,0x76,0x16,0x65,0x73,0x61,0x6b,0x75,0x72,0x75,0xa3,0xbe,0x10,0x6b,0xa3,0xbe,
-0x11,0x67,0x72,0xa2,0xb2,0x10,0x61,0xa3,0xb2,0x11,0x72,0x74,0x33,2,0x67,0x3a,
-0x6c,0x72,0x74,0x11,0x68,0x69,0x36,0x13,0x6f,0x70,0x69,0x63,0x37,0x10,0x79,2,
-0x64,0xa3,0x45,0x68,0xa3,0x46,0x70,0xa2,0x47,0x1e,0x74,0x69,0x61,0x6e,0x68,0x69,
-0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x47,1,0x62,0x36,0x79,0x10,
-0x6d,0xa2,0xb9,0x12,0x61,0x69,0x63,0xa3,0xb9,0x10,0x61,0xa2,0x88,0x12,0x73,0x61,
-0x6e,0xa3,0x88,0x61,0xa2,0xb4,0x62,0xa4,0x19,0x63,6,0x6f,0x3d,0x6f,0x5a,0x70,
-0x76,0x75,0x7a,0x79,1,0x70,0x3e,0x72,2,0x69,0x2a,0x6c,0x31,0x73,0xa3,0x44,
-0x13,0x6c,0x6c,0x69,0x63,0x31,0x13,0x72,0x69,0x6f,0x74,0x7f,1,0x6d,0x30,0x70,
-0x10,0x74,0x2e,0x11,0x69,0x63,0x2f,0x12,0x6d,0x6f,0x6e,0x21,0x11,0x72,0x74,0x7f,
-0x16,0x6e,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa3,0x65,0x61,0x32,0x68,0xa2,0x41,0x69,
-0x11,0x72,0x74,0xa3,0x43,3,0x6b,0x4c,0x6e,0x50,0x72,0x76,0x75,0x1d,0x63,0x61,
-0x73,0x69,0x61,0x6e,0x61,0x6c,0x62,0x61,0x6e,0x69,0x61,0x6e,0xa3,0x9f,0x10,0x6d,
-0xa3,0x76,1,0x61,0x24,0x73,0x71,0x1d,0x64,0x69,0x61,0x6e,0x61,0x62,0x6f,0x72,
-0x69,0x67,0x69,0x6e,0x61,0x6c,0x71,0x10,0x69,0xa2,0x68,0x11,0x61,0x6e,0xa3,0x68,
-3,0x61,0x32,0x65,0x44,0x6f,0x52,0x72,0x10,0x73,0xa3,0xbd,1,0x6b,0x26,0x6d,
-0xa3,0x42,0x11,0x6d,0x61,0xa3,0x76,0x10,0x72,0x2c,0x13,0x6f,0x6b,0x65,0x65,0x2d,
-0x16,0x72,0x61,0x73,0x6d,0x69,0x61,0x6e,0xa3,0xbd,6,0x68,0x4a,0x68,0x48,0x6e,
-0x4e,0x72,0x76,0x76,1,0x65,0x2a,0x73,0x10,0x74,0xa3,0x75,0x13,0x73,0x74,0x61,
-0x6e,0xa3,0x75,0x11,0x6f,0x6d,0xa3,0xa1,0x11,0x61,0x74,0x1f,0x6f,0x6c,0x69,0x61,
-0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x9c,1,0x61,
-0x3e,0x6d,2,0x65,0x2a,0x69,0xa3,0x74,0x6e,0x27,0x13,0x6e,0x69,0x61,0x6e,0x27,
-0x10,0x62,0x24,0x11,0x69,0x63,0x25,0x64,0x30,0x66,0x44,0x67,0x11,0x68,0x62,0xa3,
-0x9f,0x10,0x6c,1,0x61,0x26,0x6d,0xa3,0xa7,0x10,0x6d,0xa3,0xa7,0x11,0x61,0x6b,
-0xa3,0x93,6,0x6c,0x3c,0x6c,0x52,0x6f,0x56,0x72,0x66,0x75,1,0x67,0x30,0x68,
-1,0x64,0x79,0x69,0x10,0x64,0x79,0x10,0x69,0x8e,0x13,0x6e,0x65,0x73,0x65,0x8f,
-0x11,0x69,0x73,0xa1,0x11,0x70,0x6f,0x2a,0x13,0x6d,0x6f,0x66,0x6f,0x2b,0x10,0x61,
-1,0x68,0x2e,0x69,0x7c,0x12,0x6c,0x6c,0x65,0x7d,0xa2,0x41,0x11,0x6d,0x69,0xa3,
-0x41,0x61,0x48,0x65,0x9c,0x68,1,0x61,0x2a,0x6b,0x10,0x73,0xa3,0xa8,0x15,0x69,
-0x6b,0x73,0x75,0x6b,0x69,0xa3,0xa8,3,0x6c,0x3a,0x6d,0x48,0x73,0x54,0x74,1,
-0x61,0x24,0x6b,0x9f,0x10,0x6b,0x9f,0x10,0x69,0x9c,0x13,0x6e,0x65,0x73,0x65,0x9d,
-0x10,0x75,0xa2,0x82,0x10,0x6d,0xa3,0x82,0x10,0x73,0xa2,0x86,0x13,0x61,0x76,0x61,
-0x68,0xa3,0x86,0x11,0x6e,0x67,0x28,0x12,0x61,0x6c,0x69,0x29,3,0x6c,0x42,0x6e,
-0x90,0x74,0xa2,0x46,0x76,0x24,0x17,0x6f,0x77,0x65,0x6c,0x6a,0x61,0x6d,0x6f,0x25,
-0x22,1,0x65,0x54,0x76,0x28,1,0x73,0x38,0x74,0x2a,0x17,0x73,0x79,0x6c,0x6c,
-0x61,0x62,0x6c,0x65,0x2b,0x16,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x29,0x18,0x61,
-0x64,0x69,0x6e,0x67,0x6a,0x61,0x6d,0x6f,0x23,1,0x61,0x21,0x6f,0x1a,0x74,0x61,
-0x70,0x70,0x6c,0x69,0x63,0x61,0x62,0x6c,0x65,0x21,0x26,0x1a,0x72,0x61,0x69,0x6c,
-0x69,0x6e,0x67,0x6a,0x61,0x6d,0x6f,0x27,1,0x6e,0x2c,0x79,0x22,0x11,0x65,0x73,
-0x23,0x20,0x10,0x6f,0x21,1,0x6e,0x2c,0x79,0x22,0x11,0x65,0x73,0x23,0x20,0x10,
-0x6f,0x21,2,0x6d,0x30,0x6e,0x3a,0x79,0x22,0x11,0x65,0x73,0x23,0x24,0x13,0x61,
-0x79,0x62,0x65,0x25,0x20,0x10,0x6f,0x21,2,0x6d,0x30,0x6e,0x3a,0x79,0x22,0x11,
-0x65,0x73,0x23,0x24,0x13,0x61,0x79,0x62,0x65,0x25,0x20,0x10,0x6f,0x21,0xb,0x72,
-0x39,0x76,0xc,0x76,0x33,0x78,0x2a,0x7a,0x11,0x77,0x6a,0x43,0x10,0x78,0x21,0x72,
-0x28,0x73,0x50,0x74,0x31,1,0x65,0x24,0x69,0x39,0x1e,0x67,0x69,0x6f,0x6e,0x61,
-0x6c,0x69,0x6e,0x64,0x69,0x63,0x61,0x74,0x6f,0x72,0x39,1,0x6d,0x35,0x70,0x18,
-0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x35,0x6c,0x1f,0x6c,0x3c,0x6f,0x4a,
-0x70,1,0x70,0x37,0x72,0x14,0x65,0x70,0x65,0x6e,0x64,0x37,0x28,1,0x66,0x2b,
-0x76,0x2c,0x10,0x74,0x2f,0x13,0x74,0x68,0x65,0x72,0x21,0x63,0x4c,0x65,0x64,0x67,
-1,0x61,0x3a,0x6c,0x19,0x75,0x65,0x61,0x66,0x74,0x65,0x72,0x7a,0x77,0x6a,0x41,
-0x10,0x7a,0x41,2,0x6e,0x23,0x6f,0x24,0x72,0x25,0x14,0x6e,0x74,0x72,0x6f,0x6c,
-0x23,2,0x62,0x34,0x6d,0x4e,0x78,0x26,0x13,0x74,0x65,0x6e,0x64,0x27,0x3a,1,
-0x61,0x24,0x67,0x3d,0x11,0x73,0x65,0x3a,0x12,0x67,0x61,0x7a,0x3d,0x3e,0x16,0x6f,
-0x64,0x69,0x66,0x69,0x65,0x72,0x3f,9,0x6e,0x4a,0x6e,0x34,0x6f,0x44,0x73,0x60,
-0x75,0x94,0x78,0x10,0x78,0x21,0x10,0x75,0x2a,0x14,0x6d,0x65,0x72,0x69,0x63,0x2b,
-1,0x6c,0x2c,0x74,0x12,0x68,0x65,0x72,0x21,0x14,0x65,0x74,0x74,0x65,0x72,0x2d,
-3,0x63,0x36,0x65,0x46,0x70,0x31,0x74,0x32,0x12,0x65,0x72,0x6d,0x33,0x3c,0x16,
-0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x3d,0x2e,0x10,0x70,0x2f,0x10,0x70,0x34,0x12,
-0x70,0x65,0x72,0x35,0x61,0x46,0x63,0x52,0x65,0x64,0x66,0x72,0x6c,2,0x65,0x2d,
-0x66,0x3b,0x6f,0x28,0x12,0x77,0x65,0x72,0x29,0x10,0x74,0x22,0x12,0x65,0x72,0x6d,
-0x23,1,0x6c,0x24,0x72,0x37,0x24,0x12,0x6f,0x73,0x65,0x25,0x10,0x78,0x38,0x13,
-0x74,0x65,0x6e,0x64,0x39,0x10,0x6f,0x26,0x13,0x72,0x6d,0x61,0x74,0x27,0,0x10,
-0x6c,0x88,0x72,0x40,0x72,0x36,0x73,0x5e,0x77,0x7a,0x78,0x8a,0x7a,0x11,0x77,0x6a,
-0x4b,1,0x65,0x24,0x69,0x3b,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,
-0x69,0x63,0x61,0x74,0x6f,0x72,0x3b,1,0x69,0x24,0x71,0x3f,0x18,0x6e,0x67,0x6c,
-0x65,0x71,0x75,0x6f,0x74,0x65,0x3f,0x17,0x73,0x65,0x67,0x73,0x70,0x61,0x63,0x65,
-0x4d,0x10,0x78,0x21,0x6c,0x36,0x6d,0x3c,0x6e,0x76,0x6f,0x13,0x74,0x68,0x65,0x72,
-0x21,1,0x65,0x23,0x66,0x35,3,0x62,0x37,0x69,0x28,0x6c,0x29,0x6e,0x2b,0x10,
-0x64,1,0x6c,0x34,0x6e,0x11,0x75,0x6d,0x2a,0x12,0x6c,0x65,0x74,0x37,0x14,0x65,
-0x74,0x74,0x65,0x72,0x29,2,0x65,0x36,0x6c,0x39,0x75,0x2c,0x14,0x6d,0x65,0x72,
-0x69,0x63,0x2d,0x14,0x77,0x6c,0x69,0x6e,0x65,0x39,0x66,0x3f,0x66,0x40,0x67,0x4e,
-0x68,0x70,0x6b,0x10,0x61,0x26,0x15,0x74,0x61,0x6b,0x61,0x6e,0x61,0x27,0x10,0x6f,
-0x24,0x13,0x72,0x6d,0x61,0x74,0x25,1,0x61,0x3a,0x6c,0x19,0x75,0x65,0x61,0x66,
-0x74,0x65,0x72,0x7a,0x77,0x6a,0x49,0x10,0x7a,0x49,1,0x65,0x24,0x6c,0x3d,0x19,
-0x62,0x72,0x65,0x77,0x6c,0x65,0x74,0x74,0x65,0x72,0x3d,0x61,0x86,0x63,0x92,0x64,
-0x94,0x65,2,0x62,0x44,0x6d,0x5e,0x78,0x2e,0x13,0x74,0x65,0x6e,0x64,0x32,0x15,
-0x6e,0x75,0x6d,0x6c,0x65,0x74,0x2f,0x42,1,0x61,0x24,0x67,0x45,0x11,0x73,0x65,
-0x42,0x12,0x67,0x61,0x7a,0x45,0x46,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x47,
-0x15,0x6c,0x65,0x74,0x74,0x65,0x72,0x23,0x10,0x72,0x31,1,0x6f,0x24,0x71,0x41,
-0x18,0x75,0x62,0x6c,0x65,0x71,0x75,0x6f,0x74,0x65,0x41,2,0x63,0x32,0x6e,0x3c,
-0x6f,0x22,0x12,0x70,0x65,0x6e,0x23,0x24,0x13,0x6c,0x6f,0x73,0x65,0x25,0x20,0x12,
-0x6f,0x6e,0x65,0x21,6,0x6f,0x65,0x6f,0x4a,0x72,0x5c,0x74,0x64,0x76,0x1d,0x69,
-0x73,0x75,0x61,0x6c,0x6f,0x72,0x64,0x65,0x72,0x6c,0x65,0x66,0x74,0x3d,0x18,0x76,
-0x65,0x72,0x73,0x74,0x72,0x75,0x63,0x6b,0x2d,0x13,0x69,0x67,0x68,0x74,0x2f,0x11,
-0x6f,0x70,0x30,0x12,0x61,0x6e,0x64,2,0x62,0x32,0x6c,0x62,0x72,0x13,0x69,0x67,
-0x68,0x74,0x3b,0x14,0x6f,0x74,0x74,0x6f,0x6d,0x32,0x12,0x61,0x6e,0x64,1,0x6c,
-0x2e,0x72,0x13,0x69,0x67,0x68,0x74,0x35,0x12,0x65,0x66,0x74,0x3f,0x12,0x65,0x66,
-0x74,0x36,0x17,0x61,0x6e,0x64,0x72,0x69,0x67,0x68,0x74,0x39,0x62,0x2c,0x6c,0x5c,
-0x6e,0x10,0x61,0x21,0x14,0x6f,0x74,0x74,0x6f,0x6d,0x22,0x12,0x61,0x6e,0x64,1,
-0x6c,0x2e,0x72,0x13,0x69,0x67,0x68,0x74,0x27,0x12,0x65,0x66,0x74,0x25,0x12,0x65,
-0x66,0x74,0x28,0x17,0x61,0x6e,0x64,0x72,0x69,0x67,0x68,0x74,0x2b,0xd,0x6e,0xaa,
-0x72,0x70,0x72,0x92,0x73,0xa2,0x46,0x74,0xa2,0x54,0x76,1,0x69,0x60,0x6f,0x12,
-0x77,0x65,0x6c,0x62,1,0x64,0x3a,0x69,0x19,0x6e,0x64,0x65,0x70,0x65,0x6e,0x64,
-0x65,0x6e,0x74,0x67,0x17,0x65,0x70,0x65,0x6e,0x64,0x65,0x6e,0x74,0x65,1,0x72,
-0x2e,0x73,0x13,0x61,0x72,0x67,0x61,0x61,0x12,0x61,0x6d,0x61,0x5f,0x1d,0x65,0x67,
-0x69,0x73,0x74,0x65,0x72,0x73,0x68,0x69,0x66,0x74,0x65,0x72,0x57,0x1e,0x79,0x6c,
-0x6c,0x61,0x62,0x6c,0x65,0x6d,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x59,0x12,0x6f,
-0x6e,0x65,1,0x6c,0x2c,0x6d,0x12,0x61,0x72,0x6b,0x5d,0x14,0x65,0x74,0x74,0x65,
-0x72,0x5b,0x6e,0x3c,0x6f,0x7c,0x70,0x18,0x75,0x72,0x65,0x6b,0x69,0x6c,0x6c,0x65,
-0x72,0x55,1,0x6f,0x4c,0x75,1,0x6b,0x3c,0x6d,0x12,0x62,0x65,0x72,0x50,0x15,
-0x6a,0x6f,0x69,0x6e,0x65,0x72,0x53,0x11,0x74,0x61,0x4f,0x16,0x6e,0x6a,0x6f,0x69,
-0x6e,0x65,0x72,0x4d,0x13,0x74,0x68,0x65,0x72,0x21,0x67,0x3e,0x67,0x4a,0x69,0x64,
-0x6a,0x82,0x6d,0x1d,0x6f,0x64,0x69,0x66,0x79,0x69,0x6e,0x67,0x6c,0x65,0x74,0x74,
-0x65,0x72,0x4b,0x1c,0x65,0x6d,0x69,0x6e,0x61,0x74,0x69,0x6f,0x6e,0x6d,0x61,0x72,
-0x6b,0x45,0x1e,0x6e,0x76,0x69,0x73,0x69,0x62,0x6c,0x65,0x73,0x74,0x61,0x63,0x6b,
-0x65,0x72,0x47,0x14,0x6f,0x69,0x6e,0x65,0x72,0x49,0x61,0xa2,0xba,0x62,0xa2,0xc0,
-0x63,1,0x61,0xa2,0xa2,0x6f,0x16,0x6e,0x73,0x6f,0x6e,0x61,0x6e,0x74,0x2a,8,
-0x6b,0x67,0x6b,0x48,0x6d,0x52,0x70,0x5c,0x73,0xa2,0x42,0x77,0x19,0x69,0x74,0x68,
-0x73,0x74,0x61,0x63,0x6b,0x65,0x72,0x43,0x14,0x69,0x6c,0x6c,0x65,0x72,0x35,0x14,
-0x65,0x64,0x69,0x61,0x6c,0x37,1,0x6c,0x52,0x72,0x10,0x65,1,0x63,0x2e,0x66,
-0x13,0x69,0x78,0x65,0x64,0x3d,0x19,0x65,0x64,0x69,0x6e,0x67,0x72,0x65,0x70,0x68,
-0x61,0x3b,0x18,0x61,0x63,0x65,0x68,0x6f,0x6c,0x64,0x65,0x72,0x39,0x10,0x75,1,
-0x62,0x3e,0x63,0x1b,0x63,0x65,0x65,0x64,0x69,0x6e,0x67,0x72,0x65,0x70,0x68,0x61,
-0x41,0x15,0x6a,0x6f,0x69,0x6e,0x65,0x64,0x3f,0x64,0x4c,0x66,0x52,0x68,0x5a,0x69,
-0x1e,0x6e,0x69,0x74,0x69,0x61,0x6c,0x70,0x6f,0x73,0x74,0x66,0x69,0x78,0x65,0x64,
-0x33,0x12,0x65,0x61,0x64,0x2d,0x13,0x69,0x6e,0x61,0x6c,0x2f,0x18,0x65,0x61,0x64,
-0x6c,0x65,0x74,0x74,0x65,0x72,0x31,0x1d,0x6e,0x74,0x69,0x6c,0x6c,0x61,0x74,0x69,
-0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x29,0x16,0x76,0x61,0x67,0x72,0x61,0x68,0x61,0x23,
-1,0x69,0x4a,0x72,0x10,0x61,0x1f,0x68,0x6d,0x69,0x6a,0x6f,0x69,0x6e,0x69,0x6e,
-0x67,0x6e,0x75,0x6d,0x62,0x65,0x72,0x27,0x12,0x6e,0x64,0x75,0x25,2,0x72,0x38,
-0x74,0x46,0x75,0x26,0x15,0x70,0x72,0x69,0x67,0x68,0x74,0x27,0x20,0x15,0x6f,0x74,
-0x61,0x74,0x65,0x64,0x21,1,0x72,0x24,0x75,0x25,0x22,0x18,0x61,0x6e,0x73,0x66,
-0x6f,0x72,0x6d,0x65,0x64,1,0x72,0x32,0x75,0x15,0x70,0x72,0x69,0x67,0x68,0x74,
-0x25,0x15,0x6f,0x74,0x61,0x74,0x65,0x64,0x23,0xd,0x6e,0xc1,0x86,0x73,0xa8,0x73,
-0x4c,0x74,0xa2,0x76,0x75,0xa2,0x83,0x7a,0xd8,0x70,0,2,0x6c,0xd9,0x20,0,
-0x70,0xd9,0x40,0,0x73,0xc3,0,0xfe,0xf,0,0,0,7,0x6f,0x3c,0x6f,
-0xff,8,0,0,0,0x70,0x3a,0x75,0x6e,0x79,0x13,0x6d,0x62,0x6f,0x6c,0xff,
-0xf,0,0,0,0x11,0x61,0x63,1,0x65,0x34,0x69,0x15,0x6e,0x67,0x6d,0x61,
-0x72,0x6b,0xa5,0,0x18,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xc3,0,
-0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0xe1,0,0,0x63,0xff,2,0,0,
-0,0x65,0x38,0x6b,0xff,4,0,0,0,0x6d,0xff,1,0,0,0,0x16,
-0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xd9,0x70,0,0x1d,0x69,0x74,0x6c,0x65,0x63,
-0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x31,1,0x6e,0x40,0x70,0x1c,0x70,
-0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x25,0x17,0x61,0x73,
-0x73,0x69,0x67,0x6e,0x65,0x64,0x23,0x6e,0xa2,0x69,0x6f,0xa2,0x89,0x70,0xfe,0x30,
-0xf8,0,0,9,0x69,0x33,0x69,0xff,0x10,0,0,0,0x6f,0xfd,0x80,0,
-0,0x72,0x54,0x73,0xf9,0,0,0x75,0x12,0x6e,0x63,0x74,0xfe,0x30,0xf8,0,
-0,0x15,0x75,0x61,0x74,0x69,0x6f,0x6e,0xff,0x30,0xf8,0,0,0x17,0x69,0x76,
-0x61,0x74,0x65,0x75,0x73,0x65,0xdd,0,0,0x61,0x48,0x63,0xfd,0x40,0,0,
-0x64,0xe9,0,0,0x65,0xfd,0x20,0,0,0x66,0xff,0x20,0,0,0,0x1f,
-0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,
-0xd9,0x40,0,0xbe,0,3,0x64,0xa7,0,0x6c,0xab,0,0x6f,0x30,0x75,0x13,
-0x6d,0x62,0x65,0x72,0xbf,0,0xb2,0,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e,
-0x67,0x6d,0x61,0x72,0x6b,0xa1,1,0x70,0x92,0x74,0x12,0x68,0x65,0x72,0xe6,0x80,
-1,3,0x6c,0x40,0x6e,0x4a,0x70,0x56,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0xff,
-8,0,0,0,0x14,0x65,0x74,0x74,0x65,0x72,0x61,0x14,0x75,0x6d,0x62,0x65,
-0x72,0xb3,0,0x19,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x80,
-0,0,0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,
-0xf9,0,0,0x66,0xc0,0xc4,0x66,0xa2,0x47,0x69,0xa2,0x64,0x6c,0xa2,0x79,0x6d,
-0xa4,0xc0,4,0x61,0x6c,0x63,0xa5,0,0x65,0xa3,0x80,0x6e,0xa1,0x6f,0x15,0x64,
-0x69,0x66,0x69,0x65,0x72,1,0x6c,0x38,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0xff,
-4,0,0,0,0x14,0x65,0x74,0x74,0x65,0x72,0x41,1,0x72,0x3c,0x74,0x16,
-0x68,0x73,0x79,0x6d,0x62,0x6f,0x6c,0xff,1,0,0,0,0x10,0x6b,0xa5,0xc0,
-1,0x69,0x32,0x6f,0x13,0x72,0x6d,0x61,0x74,0xdb,0,0,0x1d,0x6e,0x61,0x6c,
-0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xff,0x20,0,0,0,
-0x10,0x6e,0x1f,0x69,0x74,0x69,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,
-0x69,0x6f,0x6e,0xff,0x10,0,0,0,0x9c,7,0x6d,0x18,0x6d,0x41,0x6f,0x28,
-0x74,0x31,0x75,0x25,0x60,0x1c,0x77,0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,
-0x74,0x65,0x72,0x29,0x63,0x3d,0x65,0x28,0x69,0x42,0x6c,0x29,0x13,0x74,0x74,0x65,
-0x72,0x9c,0x15,0x6e,0x75,0x6d,0x62,0x65,0x72,0xab,0,0x1a,0x6e,0x65,0x73,0x65,
-0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xd9,0x20,0,0x63,0x46,0x64,0xa2,0x96,0x65,
-0x1b,0x6e,0x63,0x6c,0x6f,0x73,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa3,0x80,0xe6,
-0x80,1,7,0x6e,0x57,0x6e,0x52,0x6f,0x5e,0x73,0xe1,0,0,0x75,0x1b,0x72,
-0x72,0x65,0x6e,0x63,0x79,0x73,0x79,0x6d,0x62,0x6f,0x6c,0xff,2,0,0,0,
-0x22,0x12,0x74,0x72,0x6c,0xd9,0x80,0,0xdc,0,0,1,0x6d,0x62,0x6e,1,
-0x6e,0x30,0x74,0x12,0x72,0x6f,0x6c,0xd9,0x80,0,0x1f,0x65,0x63,0x74,0x6f,0x72,
-0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x40,0,0,0x19,
-0x62,0x69,0x6e,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa5,0xc0,0x61,0x58,0x63,0xd9,
-0x80,0,0x66,0xdb,0,0,0x6c,0x1d,0x6f,0x73,0x65,0x70,0x75,0x6e,0x63,0x74,
-0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x20,0,0,0x18,0x73,0x65,0x64,0x6c,0x65,
-0x74,0x74,0x65,0x72,0x3d,2,0x61,0x32,0x65,0x50,0x69,0x12,0x67,0x69,0x74,0xa7,
-0,0x1c,0x73,0x68,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xe9,
-0,0,0x1a,0x63,0x69,0x6d,0x61,0x6c,0x6e,0x75,0x6d,0x62,0x65,0x72,0xa7,0
-};
-
-const char PropNameData::nameGroups[22098]={
-2,'A','l','p','h','a',0,'A','l','p','h','a','b','e','t','i','c',0,
-4,'N',0,'N','o',0,'F',0,'F','a','l','s','e',0,4,'Y',0,'Y','e','s',0,'T',0,'T','r','u','e',0,
-2,'N','R',0,'N','o','t','_','R','e','o','r','d','e','r','e','d',0,
-2,'O','V',0,'O','v','e','r','l','a','y',0,2,'H','A','N','R',0,'H','a','n','_','R','e','a','d','i','n','g',0,
-2,'N','K',0,'N','u','k','t','a',0,2,'K','V',0,'K','a','n','a','_','V','o','i','c','i','n','g',0,
-2,'V','R',0,'V','i','r','a','m','a',0,2,'C','C','C','1','0',0,'C','C','C','1','0',0,
-2,'C','C','C','1','1',0,'C','C','C','1','1',0,2,'C','C','C','1','2',0,'C','C','C','1','2',0,
-2,'C','C','C','1','3',0,'C','C','C','1','3',0,2,'C','C','C','1','4',0,'C','C','C','1','4',0,
-2,'C','C','C','1','5',0,'C','C','C','1','5',0,2,'C','C','C','1','6',0,'C','C','C','1','6',0,
-2,'C','C','C','1','7',0,'C','C','C','1','7',0,2,'C','C','C','1','8',0,'C','C','C','1','8',0,
-2,'C','C','C','1','9',0,'C','C','C','1','9',0,2,'C','C','C','2','0',0,'C','C','C','2','0',0,
-2,'C','C','C','2','1',0,'C','C','C','2','1',0,2,'C','C','C','2','2',0,'C','C','C','2','2',0,
-2,'C','C','C','2','3',0,'C','C','C','2','3',0,2,'C','C','C','2','4',0,'C','C','C','2','4',0,
-2,'C','C','C','2','5',0,'C','C','C','2','5',0,2,'C','C','C','2','6',0,'C','C','C','2','6',0,
-2,'C','C','C','2','7',0,'C','C','C','2','7',0,2,'C','C','C','2','8',0,'C','C','C','2','8',0,
-2,'C','C','C','2','9',0,'C','C','C','2','9',0,2,'C','C','C','3','0',0,'C','C','C','3','0',0,
-2,'C','C','C','3','1',0,'C','C','C','3','1',0,2,'C','C','C','3','2',0,'C','C','C','3','2',0,
-2,'C','C','C','3','3',0,'C','C','C','3','3',0,2,'C','C','C','3','4',0,'C','C','C','3','4',0,
-2,'C','C','C','3','5',0,'C','C','C','3','5',0,2,'C','C','C','3','6',0,'C','C','C','3','6',0,
-2,'C','C','C','8','4',0,'C','C','C','8','4',0,2,'C','C','C','9','1',0,'C','C','C','9','1',0,
-2,'C','C','C','1','0','3',0,'C','C','C','1','0','3',0,2,'C','C','C','1','0','7',0,'C','C','C','1','0','7',0,
-2,'C','C','C','1','1','8',0,'C','C','C','1','1','8',0,2,'C','C','C','1','2','2',0,'C','C','C','1','2','2',0,
-2,'C','C','C','1','2','9',0,'C','C','C','1','2','9',0,2,'C','C','C','1','3','0',0,'C','C','C','1','3','0',0,
-2,'C','C','C','1','3','2',0,'C','C','C','1','3','2',0,2,'C','C','C','1','3','3',0,'C','C','C','1','3','3',0,
-2,'A','T','B','L',0,'A','t','t','a','c','h','e','d','_','B','e','l','o','w','_','L','e','f','t',0,
-2,'A','T','B',0,'A','t','t','a','c','h','e','d','_','B','e','l','o','w',0,
-2,'A','T','A',0,'A','t','t','a','c','h','e','d','_','A','b','o','v','e',0,
-2,'A','T','A','R',0,'A','t','t','a','c','h','e','d','_','A','b','o','v','e','_','R','i','g','h','t',0,
-2,'B','L',0,'B','e','l','o','w','_','L','e','f','t',0,2,'B',0,'B','e','l','o','w',0,
-2,'B','R',0,'B','e','l','o','w','_','R','i','g','h','t',0,
-2,'L',0,'L','e','f','t',0,2,'R',0,'R','i','g','h','t',0,
-2,'A','L',0,'A','b','o','v','e','_','L','e','f','t',0,2,'A',0,'A','b','o','v','e',0,
-2,'A','R',0,'A','b','o','v','e','_','R','i','g','h','t',0,
-2,'D','B',0,'D','o','u','b','l','e','_','B','e','l','o','w',0,
-2,'D','A',0,'D','o','u','b','l','e','_','A','b','o','v','e',0,
-2,'I','S',0,'I','o','t','a','_','S','u','b','s','c','r','i','p','t',0,
-2,'A','H','e','x',0,'A','S','C','I','I','_','H','e','x','_','D','i','g','i','t',0,
-2,'B','i','d','i','_','C',0,'B','i','d','i','_','C','o','n','t','r','o','l',0,
-2,'B','i','d','i','_','M',0,'B','i','d','i','_','M','i','r','r','o','r','e','d',0,
-2,'D','a','s','h',0,'D','a','s','h',0,2,'D','I',0,'D','e','f','a','u','l','t','_','I','g','n','o','r','a','b','l','e',
-'_','C','o','d','e','_','P','o','i','n','t',0,2,'D','e','p',0,'D','e','p','r','e','c','a','t','e','d',0,
-2,'D','i','a',0,'D','i','a','c','r','i','t','i','c',0,2,'E','x','t',0,'E','x','t','e','n','d','e','r',0,
-2,'C','o','m','p','_','E','x',0,'F','u','l','l','_','C','o','m','p','o','s','i','t','i','o','n','_','E','x','c','l','u','s',
-'i','o','n',0,2,'G','r','_','B','a','s','e',0,'G','r','a','p','h','e','m','e','_','B','a','s','e',0,
-2,'G','r','_','E','x','t',0,'G','r','a','p','h','e','m','e','_','E','x','t','e','n','d',0,
-2,'G','r','_','L','i','n','k',0,'G','r','a','p','h','e','m','e','_','L','i','n','k',0,
-2,'H','e','x',0,'H','e','x','_','D','i','g','i','t',0,2,'H','y','p','h','e','n',0,'H','y','p','h','e','n',0,
-2,'I','D','C',0,'I','D','_','C','o','n','t','i','n','u','e',0,
-2,'I','D','S',0,'I','D','_','S','t','a','r','t',0,2,'I','d','e','o',0,'I','d','e','o','g','r','a','p','h','i','c',0,
-2,'I','D','S','B',0,'I','D','S','_','B','i','n','a','r','y','_','O','p','e','r','a','t','o','r',0,
-2,'I','D','S','T',0,'I','D','S','_','T','r','i','n','a','r','y','_','O','p','e','r','a','t','o','r',0,
-2,'J','o','i','n','_','C',0,'J','o','i','n','_','C','o','n','t','r','o','l',0,
-2,'L','O','E',0,'L','o','g','i','c','a','l','_','O','r','d','e','r','_','E','x','c','e','p','t','i','o','n',0,
-2,'L','o','w','e','r',0,'L','o','w','e','r','c','a','s','e',0,
-2,'M','a','t','h',0,'M','a','t','h',0,2,'N','C','h','a','r',0,'N','o','n','c','h','a','r','a','c','t','e','r','_','C',
-'o','d','e','_','P','o','i','n','t',0,2,'Q','M','a','r','k',0,'Q','u','o','t','a','t','i','o','n','_','M','a','r','k',0,
-2,'R','a','d','i','c','a','l',0,'R','a','d','i','c','a','l',0,
-2,'S','D',0,'S','o','f','t','_','D','o','t','t','e','d',0,
-2,'T','e','r','m',0,'T','e','r','m','i','n','a','l','_','P','u','n','c','t','u','a','t','i','o','n',0,
-2,'U','I','d','e','o',0,'U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',0,
-2,'U','p','p','e','r',0,'U','p','p','e','r','c','a','s','e',0,
-3,'W','S','p','a','c','e',0,'W','h','i','t','e','_','S','p','a','c','e',0,'s','p','a','c','e',0,
-2,'X','I','D','C',0,'X','I','D','_','C','o','n','t','i','n','u','e',0,
-2,'X','I','D','S',0,'X','I','D','_','S','t','a','r','t',0,
-2,'S','e','n','s','i','t','i','v','e',0,'C','a','s','e','_','S','e','n','s','i','t','i','v','e',0,
-2,'S','T','e','r','m',0,'S','e','n','t','e','n','c','e','_','T','e','r','m','i','n','a','l',0,
-2,'V','S',0,'V','a','r','i','a','t','i','o','n','_','S','e','l','e','c','t','o','r',0,
-2,'n','f','d','i','n','e','r','t',0,'N','F','D','_','I','n','e','r','t',0,
-2,'n','f','k','d','i','n','e','r','t',0,'N','F','K','D','_','I','n','e','r','t',0,
-2,'n','f','c','i','n','e','r','t',0,'N','F','C','_','I','n','e','r','t',0,
-2,'n','f','k','c','i','n','e','r','t',0,'N','F','K','C','_','I','n','e','r','t',0,
-2,'s','e','g','s','t','a','r','t',0,'S','e','g','m','e','n','t','_','S','t','a','r','t','e','r',0,
-2,'P','a','t','_','S','y','n',0,'P','a','t','t','e','r','n','_','S','y','n','t','a','x',0,
-2,'P','a','t','_','W','S',0,'P','a','t','t','e','r','n','_','W','h','i','t','e','_','S','p','a','c','e',0,
-2,0,'a','l','n','u','m',0,2,0,'b','l','a','n','k',0,
-2,0,'g','r','a','p','h',0,2,0,'p','r','i','n','t',0,
-2,0,'x','d','i','g','i','t',0,2,'C','a','s','e','d',0,'C','a','s','e','d',0,
-2,'C','I',0,'C','a','s','e','_','I','g','n','o','r','a','b','l','e',0,
-2,'C','W','L',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','L','o','w','e','r','c','a','s','e','d',0,
-2,'C','W','U',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','U','p','p','e','r','c','a','s','e','d',0,
-2,'C','W','T',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','T','i','t','l','e','c','a','s','e','d',0,
-2,'C','W','C','F',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','C','a','s','e','f','o','l','d','e','d',0,
-2,'C','W','C','M',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','C','a','s','e','m','a','p','p','e','d',0,
-2,'C','W','K','C','F',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','N','F','K','C','_','C','a','s','e','f','o','l',
-'d','e','d',0,2,'E','m','o','j','i',0,'E','m','o','j','i',0,
-2,'E','P','r','e','s',0,'E','m','o','j','i','_','P','r','e','s','e','n','t','a','t','i','o','n',0,
-2,'E','M','o','d',0,'E','m','o','j','i','_','M','o','d','i','f','i','e','r',0,
-2,'E','B','a','s','e',0,'E','m','o','j','i','_','M','o','d','i','f','i','e','r','_','B','a','s','e',0,
-2,'E','C','o','m','p',0,'E','m','o','j','i','_','C','o','m','p','o','n','e','n','t',0,
-2,'R','I',0,'R','e','g','i','o','n','a','l','_','I','n','d','i','c','a','t','o','r',0,
-2,'P','C','M',0,'P','r','e','p','e','n','d','e','d','_','C','o','n','c','a','t','e','n','a','t','i','o','n','_','M','a','r',
-'k',0,2,'E','x','t','P','i','c','t',0,'E','x','t','e','n','d','e','d','_','P','i','c','t','o','g','r','a','p','h','i','c',
-0,2,'b','c',0,'B','i','d','i','_','C','l','a','s','s',0,
-2,'L',0,'L','e','f','t','_','T','o','_','R','i','g','h','t',0,
-2,'R',0,'R','i','g','h','t','_','T','o','_','L','e','f','t',0,
-2,'E','N',0,'E','u','r','o','p','e','a','n','_','N','u','m','b','e','r',0,
-2,'E','S',0,'E','u','r','o','p','e','a','n','_','S','e','p','a','r','a','t','o','r',0,
-2,'E','T',0,'E','u','r','o','p','e','a','n','_','T','e','r','m','i','n','a','t','o','r',0,
-2,'A','N',0,'A','r','a','b','i','c','_','N','u','m','b','e','r',0,
-2,'C','S',0,'C','o','m','m','o','n','_','S','e','p','a','r','a','t','o','r',0,
-2,'B',0,'P','a','r','a','g','r','a','p','h','_','S','e','p','a','r','a','t','o','r',0,
-2,'S',0,'S','e','g','m','e','n','t','_','S','e','p','a','r','a','t','o','r',0,
-2,'W','S',0,'W','h','i','t','e','_','S','p','a','c','e',0,
-2,'O','N',0,'O','t','h','e','r','_','N','e','u','t','r','a','l',0,
-2,'L','R','E',0,'L','e','f','t','_','T','o','_','R','i','g','h','t','_','E','m','b','e','d','d','i','n','g',0,
-2,'L','R','O',0,'L','e','f','t','_','T','o','_','R','i','g','h','t','_','O','v','e','r','r','i','d','e',0,
-2,'A','L',0,'A','r','a','b','i','c','_','L','e','t','t','e','r',0,
-2,'R','L','E',0,'R','i','g','h','t','_','T','o','_','L','e','f','t','_','E','m','b','e','d','d','i','n','g',0,
-2,'R','L','O',0,'R','i','g','h','t','_','T','o','_','L','e','f','t','_','O','v','e','r','r','i','d','e',0,
-2,'P','D','F',0,'P','o','p','_','D','i','r','e','c','t','i','o','n','a','l','_','F','o','r','m','a','t',0,
-2,'N','S','M',0,'N','o','n','s','p','a','c','i','n','g','_','M','a','r','k',0,
-2,'B','N',0,'B','o','u','n','d','a','r','y','_','N','e','u','t','r','a','l',0,
-2,'F','S','I',0,'F','i','r','s','t','_','S','t','r','o','n','g','_','I','s','o','l','a','t','e',0,
-2,'L','R','I',0,'L','e','f','t','_','T','o','_','R','i','g','h','t','_','I','s','o','l','a','t','e',0,
-2,'R','L','I',0,'R','i','g','h','t','_','T','o','_','L','e','f','t','_','I','s','o','l','a','t','e',0,
-2,'P','D','I',0,'P','o','p','_','D','i','r','e','c','t','i','o','n','a','l','_','I','s','o','l','a','t','e',0,
-2,'b','l','k',0,'B','l','o','c','k',0,2,'N','B',0,'N','o','_','B','l','o','c','k',0,
-2,'A','S','C','I','I',0,'B','a','s','i','c','_','L','a','t','i','n',0,
-3,'L','a','t','i','n','_','1','_','S','u','p',0,'L','a','t','i','n','_','1','_','S','u','p','p','l','e','m','e','n','t',0,
-'L','a','t','i','n','_','1',0,2,'L','a','t','i','n','_','E','x','t','_','A',0,'L','a','t','i','n','_','E','x','t','e','n',
-'d','e','d','_','A',0,2,'L','a','t','i','n','_','E','x','t','_','B',0,'L','a','t','i','n','_','E','x','t','e','n','d','e',
-'d','_','B',0,2,'I','P','A','_','E','x','t',0,'I','P','A','_','E','x','t','e','n','s','i','o','n','s',0,
-2,'M','o','d','i','f','i','e','r','_','L','e','t','t','e','r','s',0,'S','p','a','c','i','n','g','_','M','o','d','i','f','i',
-'e','r','_','L','e','t','t','e','r','s',0,2,'D','i','a','c','r','i','t','i','c','a','l','s',0,
-'C','o','m','b','i','n','i','n','g','_','D','i','a','c','r','i','t','i','c','a','l','_','M','a','r','k','s',0,
-2,'G','r','e','e','k',0,'G','r','e','e','k','_','A','n','d','_','C','o','p','t','i','c',0,
-2,'C','y','r','i','l','l','i','c',0,'C','y','r','i','l','l','i','c',0,
-2,'A','r','m','e','n','i','a','n',0,'A','r','m','e','n','i','a','n',0,
-2,'H','e','b','r','e','w',0,'H','e','b','r','e','w',0,2,'A','r','a','b','i','c',0,'A','r','a','b','i','c',0,
-2,'S','y','r','i','a','c',0,'S','y','r','i','a','c',0,2,'T','h','a','a','n','a',0,'T','h','a','a','n','a',0,
-2,'D','e','v','a','n','a','g','a','r','i',0,'D','e','v','a','n','a','g','a','r','i',0,
-2,'B','e','n','g','a','l','i',0,'B','e','n','g','a','l','i',0,
-2,'G','u','r','m','u','k','h','i',0,'G','u','r','m','u','k','h','i',0,
-2,'G','u','j','a','r','a','t','i',0,'G','u','j','a','r','a','t','i',0,
-2,'O','r','i','y','a',0,'O','r','i','y','a',0,2,'T','a','m','i','l',0,'T','a','m','i','l',0,
-2,'T','e','l','u','g','u',0,'T','e','l','u','g','u',0,2,'K','a','n','n','a','d','a',0,
-'K','a','n','n','a','d','a',0,2,'M','a','l','a','y','a','l','a','m',0,'M','a','l','a','y','a','l','a','m',0,
-2,'S','i','n','h','a','l','a',0,'S','i','n','h','a','l','a',0,
-2,'T','h','a','i',0,'T','h','a','i',0,2,'L','a','o',0,'L','a','o',0,
-2,'T','i','b','e','t','a','n',0,'T','i','b','e','t','a','n',0,
-2,'M','y','a','n','m','a','r',0,'M','y','a','n','m','a','r',0,
-2,'G','e','o','r','g','i','a','n',0,'G','e','o','r','g','i','a','n',0,
-2,'J','a','m','o',0,'H','a','n','g','u','l','_','J','a','m','o',0,
-2,'E','t','h','i','o','p','i','c',0,'E','t','h','i','o','p','i','c',0,
-2,'C','h','e','r','o','k','e','e',0,'C','h','e','r','o','k','e','e',0,
-3,'U','C','A','S',0,'U','n','i','f','i','e','d','_','C','a','n','a','d','i','a','n','_','A','b','o','r','i','g','i','n','a',
-'l','_','S','y','l','l','a','b','i','c','s',0,'C','a','n','a','d','i','a','n','_','S','y','l','l','a','b','i','c','s',0,
-2,'O','g','h','a','m',0,'O','g','h','a','m',0,2,'R','u','n','i','c',0,'R','u','n','i','c',0,
-2,'K','h','m','e','r',0,'K','h','m','e','r',0,2,'M','o','n','g','o','l','i','a','n',0,
-'M','o','n','g','o','l','i','a','n',0,2,'L','a','t','i','n','_','E','x','t','_','A','d','d','i','t','i','o','n','a','l',0,
-'L','a','t','i','n','_','E','x','t','e','n','d','e','d','_','A','d','d','i','t','i','o','n','a','l',0,
-2,'G','r','e','e','k','_','E','x','t',0,'G','r','e','e','k','_','E','x','t','e','n','d','e','d',0,
-2,'P','u','n','c','t','u','a','t','i','o','n',0,'G','e','n','e','r','a','l','_','P','u','n','c','t','u','a','t','i','o','n',
-0,2,'S','u','p','e','r','_','A','n','d','_','S','u','b',0,'S','u','p','e','r','s','c','r','i','p','t','s','_','A','n','d',
-'_','S','u','b','s','c','r','i','p','t','s',0,2,'C','u','r','r','e','n','c','y','_','S','y','m','b','o','l','s',0,
-'C','u','r','r','e','n','c','y','_','S','y','m','b','o','l','s',0,
-3,'D','i','a','c','r','i','t','i','c','a','l','s','_','F','o','r','_','S','y','m','b','o','l','s',0,
-'C','o','m','b','i','n','i','n','g','_','D','i','a','c','r','i','t','i','c','a','l','_','M','a','r','k','s','_','F','o','r','_',
-'S','y','m','b','o','l','s',0,'C','o','m','b','i','n','i','n','g','_','M','a','r','k','s','_','F','o','r','_','S','y','m','b',
-'o','l','s',0,2,'L','e','t','t','e','r','l','i','k','e','_','S','y','m','b','o','l','s',0,
-'L','e','t','t','e','r','l','i','k','e','_','S','y','m','b','o','l','s',0,
-2,'N','u','m','b','e','r','_','F','o','r','m','s',0,'N','u','m','b','e','r','_','F','o','r','m','s',0,
-2,'A','r','r','o','w','s',0,'A','r','r','o','w','s',0,2,'M','a','t','h','_','O','p','e','r','a','t','o','r','s',0,
-'M','a','t','h','e','m','a','t','i','c','a','l','_','O','p','e','r','a','t','o','r','s',0,
-2,'M','i','s','c','_','T','e','c','h','n','i','c','a','l',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_','T','e',
-'c','h','n','i','c','a','l',0,2,'C','o','n','t','r','o','l','_','P','i','c','t','u','r','e','s',0,
-'C','o','n','t','r','o','l','_','P','i','c','t','u','r','e','s',0,
-2,'O','C','R',0,'O','p','t','i','c','a','l','_','C','h','a','r','a','c','t','e','r','_','R','e','c','o','g','n','i','t','i',
-'o','n',0,2,'E','n','c','l','o','s','e','d','_','A','l','p','h','a','n','u','m',0,'E','n','c','l','o','s','e','d','_','A',
-'l','p','h','a','n','u','m','e','r','i','c','s',0,2,'B','o','x','_','D','r','a','w','i','n','g',0,
-'B','o','x','_','D','r','a','w','i','n','g',0,2,'B','l','o','c','k','_','E','l','e','m','e','n','t','s',0,
-'B','l','o','c','k','_','E','l','e','m','e','n','t','s',0,2,'G','e','o','m','e','t','r','i','c','_','S','h','a','p','e','s',
-0,'G','e','o','m','e','t','r','i','c','_','S','h','a','p','e','s',0,
-2,'M','i','s','c','_','S','y','m','b','o','l','s',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_','S','y','m','b',
-'o','l','s',0,2,'D','i','n','g','b','a','t','s',0,'D','i','n','g','b','a','t','s',0,
-2,'B','r','a','i','l','l','e',0,'B','r','a','i','l','l','e','_','P','a','t','t','e','r','n','s',0,
-2,'C','J','K','_','R','a','d','i','c','a','l','s','_','S','u','p',0,'C','J','K','_','R','a','d','i','c','a','l','s','_','S',
-'u','p','p','l','e','m','e','n','t',0,2,'K','a','n','g','x','i',0,'K','a','n','g','x','i','_','R','a','d','i','c','a','l',
-'s',0,2,'I','D','C',0,'I','d','e','o','g','r','a','p','h','i','c','_','D','e','s','c','r','i','p','t','i','o','n','_','C',
-'h','a','r','a','c','t','e','r','s',0,2,'C','J','K','_','S','y','m','b','o','l','s',0,'C','J','K','_','S','y','m','b','o',
-'l','s','_','A','n','d','_','P','u','n','c','t','u','a','t','i','o','n',0,
-2,'H','i','r','a','g','a','n','a',0,'H','i','r','a','g','a','n','a',0,
-2,'K','a','t','a','k','a','n','a',0,'K','a','t','a','k','a','n','a',0,
-2,'B','o','p','o','m','o','f','o',0,'B','o','p','o','m','o','f','o',0,
-2,'C','o','m','p','a','t','_','J','a','m','o',0,'H','a','n','g','u','l','_','C','o','m','p','a','t','i','b','i','l','i','t',
-'y','_','J','a','m','o',0,2,'K','a','n','b','u','n',0,'K','a','n','b','u','n',0,
-2,'B','o','p','o','m','o','f','o','_','E','x','t',0,'B','o','p','o','m','o','f','o','_','E','x','t','e','n','d','e','d',0,
-2,'E','n','c','l','o','s','e','d','_','C','J','K',0,'E','n','c','l','o','s','e','d','_','C','J','K','_','L','e','t','t','e',
-'r','s','_','A','n','d','_','M','o','n','t','h','s',0,2,'C','J','K','_','C','o','m','p','a','t',0,
-'C','J','K','_','C','o','m','p','a','t','i','b','i','l','i','t','y',0,
-2,'C','J','K','_','E','x','t','_','A',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',
-'s','_','E','x','t','e','n','s','i','o','n','_','A',0,2,'C','J','K',0,'C','J','K','_','U','n','i','f','i','e','d','_','I',
-'d','e','o','g','r','a','p','h','s',0,2,'Y','i','_','S','y','l','l','a','b','l','e','s',0,
-'Y','i','_','S','y','l','l','a','b','l','e','s',0,2,'Y','i','_','R','a','d','i','c','a','l','s',0,
-'Y','i','_','R','a','d','i','c','a','l','s',0,2,'H','a','n','g','u','l',0,'H','a','n','g','u','l','_','S','y','l','l','a',
-'b','l','e','s',0,2,'H','i','g','h','_','S','u','r','r','o','g','a','t','e','s',0,'H','i','g','h','_','S','u','r','r','o',
-'g','a','t','e','s',0,2,'H','i','g','h','_','P','U','_','S','u','r','r','o','g','a','t','e','s',0,
-'H','i','g','h','_','P','r','i','v','a','t','e','_','U','s','e','_','S','u','r','r','o','g','a','t','e','s',0,
-2,'L','o','w','_','S','u','r','r','o','g','a','t','e','s',0,'L','o','w','_','S','u','r','r','o','g','a','t','e','s',0,
-3,'P','U','A',0,'P','r','i','v','a','t','e','_','U','s','e','_','A','r','e','a',0,'P','r','i','v','a','t','e','_','U','s',
-'e',0,2,'C','J','K','_','C','o','m','p','a','t','_','I','d','e','o','g','r','a','p','h','s',0,
-'C','J','K','_','C','o','m','p','a','t','i','b','i','l','i','t','y','_','I','d','e','o','g','r','a','p','h','s',0,
-2,'A','l','p','h','a','b','e','t','i','c','_','P','F',0,'A','l','p','h','a','b','e','t','i','c','_','P','r','e','s','e','n',
-'t','a','t','i','o','n','_','F','o','r','m','s',0,3,'A','r','a','b','i','c','_','P','F','_','A',0,
-'A','r','a','b','i','c','_','P','r','e','s','e','n','t','a','t','i','o','n','_','F','o','r','m','s','_','A',0,
-'A','r','a','b','i','c','_','P','r','e','s','e','n','t','a','t','i','o','n','_','F','o','r','m','s','-','A',0,
-2,'H','a','l','f','_','M','a','r','k','s',0,'C','o','m','b','i','n','i','n','g','_','H','a','l','f','_','M','a','r','k','s',
-0,2,'C','J','K','_','C','o','m','p','a','t','_','F','o','r','m','s',0,'C','J','K','_','C','o','m','p','a','t','i','b','i',
-'l','i','t','y','_','F','o','r','m','s',0,2,'S','m','a','l','l','_','F','o','r','m','s',0,
-'S','m','a','l','l','_','F','o','r','m','_','V','a','r','i','a','n','t','s',0,
-2,'A','r','a','b','i','c','_','P','F','_','B',0,'A','r','a','b','i','c','_','P','r','e','s','e','n','t','a','t','i','o','n',
-'_','F','o','r','m','s','_','B',0,2,'S','p','e','c','i','a','l','s',0,'S','p','e','c','i','a','l','s',0,
-2,'H','a','l','f','_','A','n','d','_','F','u','l','l','_','F','o','r','m','s',0,'H','a','l','f','w','i','d','t','h','_','A',
-'n','d','_','F','u','l','l','w','i','d','t','h','_','F','o','r','m','s',0,
-2,'O','l','d','_','I','t','a','l','i','c',0,'O','l','d','_','I','t','a','l','i','c',0,
-2,'G','o','t','h','i','c',0,'G','o','t','h','i','c',0,2,'D','e','s','e','r','e','t',0,
-'D','e','s','e','r','e','t',0,2,'B','y','z','a','n','t','i','n','e','_','M','u','s','i','c',0,
-'B','y','z','a','n','t','i','n','e','_','M','u','s','i','c','a','l','_','S','y','m','b','o','l','s',0,
-2,'M','u','s','i','c',0,'M','u','s','i','c','a','l','_','S','y','m','b','o','l','s',0,
-2,'M','a','t','h','_','A','l','p','h','a','n','u','m',0,'M','a','t','h','e','m','a','t','i','c','a','l','_','A','l','p','h',
-'a','n','u','m','e','r','i','c','_','S','y','m','b','o','l','s',0,
-2,'C','J','K','_','E','x','t','_','B',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',
-'s','_','E','x','t','e','n','s','i','o','n','_','B',0,2,'C','J','K','_','C','o','m','p','a','t','_','I','d','e','o','g','r',
-'a','p','h','s','_','S','u','p',0,'C','J','K','_','C','o','m','p','a','t','i','b','i','l','i','t','y','_','I','d','e','o','g',
-'r','a','p','h','s','_','S','u','p','p','l','e','m','e','n','t',0,
-2,'T','a','g','s',0,'T','a','g','s',0,3,'C','y','r','i','l','l','i','c','_','S','u','p',0,
-'C','y','r','i','l','l','i','c','_','S','u','p','p','l','e','m','e','n','t',0,'C','y','r','i','l','l','i','c','_','S','u','p',
-'p','l','e','m','e','n','t','a','r','y',0,2,'T','a','g','a','l','o','g',0,'T','a','g','a','l','o','g',0,
-2,'H','a','n','u','n','o','o',0,'H','a','n','u','n','o','o',0,
-2,'B','u','h','i','d',0,'B','u','h','i','d',0,2,'T','a','g','b','a','n','w','a',0,'T','a','g','b','a','n','w','a',0,
-2,'M','i','s','c','_','M','a','t','h','_','S','y','m','b','o','l','s','_','A',0,'M','i','s','c','e','l','l','a','n','e','o',
-'u','s','_','M','a','t','h','e','m','a','t','i','c','a','l','_','S','y','m','b','o','l','s','_','A',0,
-2,'S','u','p','_','A','r','r','o','w','s','_','A',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','A','r','r','o','w',
-'s','_','A',0,2,'S','u','p','_','A','r','r','o','w','s','_','B',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','A',
-'r','r','o','w','s','_','B',0,2,'M','i','s','c','_','M','a','t','h','_','S','y','m','b','o','l','s','_','B',0,
-'M','i','s','c','e','l','l','a','n','e','o','u','s','_','M','a','t','h','e','m','a','t','i','c','a','l','_','S','y','m','b','o',
-'l','s','_','B',0,2,'S','u','p','_','M','a','t','h','_','O','p','e','r','a','t','o','r','s',0,
-'S','u','p','p','l','e','m','e','n','t','a','l','_','M','a','t','h','e','m','a','t','i','c','a','l','_','O','p','e','r','a','t',
-'o','r','s',0,2,'K','a','t','a','k','a','n','a','_','E','x','t',0,'K','a','t','a','k','a','n','a','_','P','h','o','n','e',
-'t','i','c','_','E','x','t','e','n','s','i','o','n','s',0,2,'V','S',0,'V','a','r','i','a','t','i','o','n','_','S','e','l',
-'e','c','t','o','r','s',0,2,'S','u','p','_','P','U','A','_','A',0,'S','u','p','p','l','e','m','e','n','t','a','r','y','_',
-'P','r','i','v','a','t','e','_','U','s','e','_','A','r','e','a','_','A',0,
-2,'S','u','p','_','P','U','A','_','B',0,'S','u','p','p','l','e','m','e','n','t','a','r','y','_','P','r','i','v','a','t','e',
-'_','U','s','e','_','A','r','e','a','_','B',0,2,'L','i','m','b','u',0,'L','i','m','b','u',0,
-2,'T','a','i','_','L','e',0,'T','a','i','_','L','e',0,2,'K','h','m','e','r','_','S','y','m','b','o','l','s',0,
-'K','h','m','e','r','_','S','y','m','b','o','l','s',0,2,'P','h','o','n','e','t','i','c','_','E','x','t',0,
-'P','h','o','n','e','t','i','c','_','E','x','t','e','n','s','i','o','n','s',0,
-2,'M','i','s','c','_','A','r','r','o','w','s',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_','S','y','m','b','o',
-'l','s','_','A','n','d','_','A','r','r','o','w','s',0,2,'Y','i','j','i','n','g',0,'Y','i','j','i','n','g','_','H','e','x',
-'a','g','r','a','m','_','S','y','m','b','o','l','s',0,2,'L','i','n','e','a','r','_','B','_','S','y','l','l','a','b','a','r',
-'y',0,'L','i','n','e','a','r','_','B','_','S','y','l','l','a','b','a','r','y',0,
-2,'L','i','n','e','a','r','_','B','_','I','d','e','o','g','r','a','m','s',0,'L','i','n','e','a','r','_','B','_','I','d','e',
-'o','g','r','a','m','s',0,2,'A','e','g','e','a','n','_','N','u','m','b','e','r','s',0,'A','e','g','e','a','n','_','N','u',
-'m','b','e','r','s',0,2,'U','g','a','r','i','t','i','c',0,'U','g','a','r','i','t','i','c',0,
-2,'S','h','a','v','i','a','n',0,'S','h','a','v','i','a','n',0,
-2,'O','s','m','a','n','y','a',0,'O','s','m','a','n','y','a',0,
-2,'C','y','p','r','i','o','t','_','S','y','l','l','a','b','a','r','y',0,'C','y','p','r','i','o','t','_','S','y','l','l','a',
-'b','a','r','y',0,2,'T','a','i','_','X','u','a','n','_','J','i','n','g',0,'T','a','i','_','X','u','a','n','_','J','i','n',
-'g','_','S','y','m','b','o','l','s',0,2,'V','S','_','S','u','p',0,'V','a','r','i','a','t','i','o','n','_','S','e','l','e',
-'c','t','o','r','s','_','S','u','p','p','l','e','m','e','n','t',0,
-2,'A','n','c','i','e','n','t','_','G','r','e','e','k','_','M','u','s','i','c',0,'A','n','c','i','e','n','t','_','G','r','e',
-'e','k','_','M','u','s','i','c','a','l','_','N','o','t','a','t','i','o','n',0,
-2,'A','n','c','i','e','n','t','_','G','r','e','e','k','_','N','u','m','b','e','r','s',0,'A','n','c','i','e','n','t','_','G',
-'r','e','e','k','_','N','u','m','b','e','r','s',0,2,'A','r','a','b','i','c','_','S','u','p',0,
-'A','r','a','b','i','c','_','S','u','p','p','l','e','m','e','n','t',0,
-2,'B','u','g','i','n','e','s','e',0,'B','u','g','i','n','e','s','e',0,
-2,'C','J','K','_','S','t','r','o','k','e','s',0,'C','J','K','_','S','t','r','o','k','e','s',0,
-2,'D','i','a','c','r','i','t','i','c','a','l','s','_','S','u','p',0,'C','o','m','b','i','n','i','n','g','_','D','i','a','c',
-'r','i','t','i','c','a','l','_','M','a','r','k','s','_','S','u','p','p','l','e','m','e','n','t',0,
-2,'C','o','p','t','i','c',0,'C','o','p','t','i','c',0,2,'E','t','h','i','o','p','i','c','_','E','x','t',0,
-'E','t','h','i','o','p','i','c','_','E','x','t','e','n','d','e','d',0,
-2,'E','t','h','i','o','p','i','c','_','S','u','p',0,'E','t','h','i','o','p','i','c','_','S','u','p','p','l','e','m','e','n',
-'t',0,2,'G','e','o','r','g','i','a','n','_','S','u','p',0,'G','e','o','r','g','i','a','n','_','S','u','p','p','l','e','m',
-'e','n','t',0,2,'G','l','a','g','o','l','i','t','i','c',0,'G','l','a','g','o','l','i','t','i','c',0,
-2,'K','h','a','r','o','s','h','t','h','i',0,'K','h','a','r','o','s','h','t','h','i',0,
-2,'M','o','d','i','f','i','e','r','_','T','o','n','e','_','L','e','t','t','e','r','s',0,'M','o','d','i','f','i','e','r','_',
-'T','o','n','e','_','L','e','t','t','e','r','s',0,2,'N','e','w','_','T','a','i','_','L','u','e',0,
-'N','e','w','_','T','a','i','_','L','u','e',0,2,'O','l','d','_','P','e','r','s','i','a','n',0,
-'O','l','d','_','P','e','r','s','i','a','n',0,2,'P','h','o','n','e','t','i','c','_','E','x','t','_','S','u','p',0,
-'P','h','o','n','e','t','i','c','_','E','x','t','e','n','s','i','o','n','s','_','S','u','p','p','l','e','m','e','n','t',0,
-2,'S','u','p','_','P','u','n','c','t','u','a','t','i','o','n',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','P','u',
-'n','c','t','u','a','t','i','o','n',0,2,'S','y','l','o','t','i','_','N','a','g','r','i',0,
-'S','y','l','o','t','i','_','N','a','g','r','i',0,2,'T','i','f','i','n','a','g','h',0,'T','i','f','i','n','a','g','h',0,
-2,'V','e','r','t','i','c','a','l','_','F','o','r','m','s',0,'V','e','r','t','i','c','a','l','_','F','o','r','m','s',0,
-2,'N','K','o',0,'N','K','o',0,2,'B','a','l','i','n','e','s','e',0,'B','a','l','i','n','e','s','e',0,
-2,'L','a','t','i','n','_','E','x','t','_','C',0,'L','a','t','i','n','_','E','x','t','e','n','d','e','d','_','C',0,
-2,'L','a','t','i','n','_','E','x','t','_','D',0,'L','a','t','i','n','_','E','x','t','e','n','d','e','d','_','D',0,
-2,'P','h','a','g','s','_','P','a',0,'P','h','a','g','s','_','P','a',0,
-2,'P','h','o','e','n','i','c','i','a','n',0,'P','h','o','e','n','i','c','i','a','n',0,
-2,'C','u','n','e','i','f','o','r','m',0,'C','u','n','e','i','f','o','r','m',0,
-2,'C','u','n','e','i','f','o','r','m','_','N','u','m','b','e','r','s',0,'C','u','n','e','i','f','o','r','m','_','N','u','m',
-'b','e','r','s','_','A','n','d','_','P','u','n','c','t','u','a','t','i','o','n',0,
-2,'C','o','u','n','t','i','n','g','_','R','o','d',0,'C','o','u','n','t','i','n','g','_','R','o','d','_','N','u','m','e','r',
-'a','l','s',0,2,'S','u','n','d','a','n','e','s','e',0,'S','u','n','d','a','n','e','s','e',0,
-2,'L','e','p','c','h','a',0,'L','e','p','c','h','a',0,2,'O','l','_','C','h','i','k','i',0,
-'O','l','_','C','h','i','k','i',0,2,'C','y','r','i','l','l','i','c','_','E','x','t','_','A',0,
-'C','y','r','i','l','l','i','c','_','E','x','t','e','n','d','e','d','_','A',0,
-2,'V','a','i',0,'V','a','i',0,2,'C','y','r','i','l','l','i','c','_','E','x','t','_','B',0,
-'C','y','r','i','l','l','i','c','_','E','x','t','e','n','d','e','d','_','B',0,
-2,'S','a','u','r','a','s','h','t','r','a',0,'S','a','u','r','a','s','h','t','r','a',0,
-2,'K','a','y','a','h','_','L','i',0,'K','a','y','a','h','_','L','i',0,
-2,'R','e','j','a','n','g',0,'R','e','j','a','n','g',0,2,'C','h','a','m',0,'C','h','a','m',0,
-2,'A','n','c','i','e','n','t','_','S','y','m','b','o','l','s',0,'A','n','c','i','e','n','t','_','S','y','m','b','o','l','s',
-0,2,'P','h','a','i','s','t','o','s',0,'P','h','a','i','s','t','o','s','_','D','i','s','c',0,
-2,'L','y','c','i','a','n',0,'L','y','c','i','a','n',0,2,'C','a','r','i','a','n',0,'C','a','r','i','a','n',0,
-2,'L','y','d','i','a','n',0,'L','y','d','i','a','n',0,2,'M','a','h','j','o','n','g',0,
-'M','a','h','j','o','n','g','_','T','i','l','e','s',0,2,'D','o','m','i','n','o',0,'D','o','m','i','n','o','_','T','i','l',
-'e','s',0,2,'S','a','m','a','r','i','t','a','n',0,'S','a','m','a','r','i','t','a','n',0,
-2,'U','C','A','S','_','E','x','t',0,'U','n','i','f','i','e','d','_','C','a','n','a','d','i','a','n','_','A','b','o','r','i',
-'g','i','n','a','l','_','S','y','l','l','a','b','i','c','s','_','E','x','t','e','n','d','e','d',0,
-2,'T','a','i','_','T','h','a','m',0,'T','a','i','_','T','h','a','m',0,
-2,'V','e','d','i','c','_','E','x','t',0,'V','e','d','i','c','_','E','x','t','e','n','s','i','o','n','s',0,
-2,'L','i','s','u',0,'L','i','s','u',0,2,'B','a','m','u','m',0,'B','a','m','u','m',0,
-2,'I','n','d','i','c','_','N','u','m','b','e','r','_','F','o','r','m','s',0,'C','o','m','m','o','n','_','I','n','d','i','c',
-'_','N','u','m','b','e','r','_','F','o','r','m','s',0,2,'D','e','v','a','n','a','g','a','r','i','_','E','x','t',0,
-'D','e','v','a','n','a','g','a','r','i','_','E','x','t','e','n','d','e','d',0,
-2,'J','a','m','o','_','E','x','t','_','A',0,'H','a','n','g','u','l','_','J','a','m','o','_','E','x','t','e','n','d','e','d',
-'_','A',0,2,'J','a','v','a','n','e','s','e',0,'J','a','v','a','n','e','s','e',0,
-2,'M','y','a','n','m','a','r','_','E','x','t','_','A',0,'M','y','a','n','m','a','r','_','E','x','t','e','n','d','e','d','_',
-'A',0,2,'T','a','i','_','V','i','e','t',0,'T','a','i','_','V','i','e','t',0,
-2,'M','e','e','t','e','i','_','M','a','y','e','k',0,'M','e','e','t','e','i','_','M','a','y','e','k',0,
-2,'J','a','m','o','_','E','x','t','_','B',0,'H','a','n','g','u','l','_','J','a','m','o','_','E','x','t','e','n','d','e','d',
-'_','B',0,2,'I','m','p','e','r','i','a','l','_','A','r','a','m','a','i','c',0,'I','m','p','e','r','i','a','l','_','A','r',
-'a','m','a','i','c',0,2,'O','l','d','_','S','o','u','t','h','_','A','r','a','b','i','a','n',0,
-'O','l','d','_','S','o','u','t','h','_','A','r','a','b','i','a','n',0,
-2,'A','v','e','s','t','a','n',0,'A','v','e','s','t','a','n',0,
-2,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','r','t','h','i','a','n',0,
-'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','r','t','h','i','a','n',0,
-2,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','h','l','a','v','i',0,'I','n','s','c','r','i','p','t','i',
-'o','n','a','l','_','P','a','h','l','a','v','i',0,2,'O','l','d','_','T','u','r','k','i','c',0,
-'O','l','d','_','T','u','r','k','i','c',0,2,'R','u','m','i',0,'R','u','m','i','_','N','u','m','e','r','a','l','_','S','y',
-'m','b','o','l','s',0,2,'K','a','i','t','h','i',0,'K','a','i','t','h','i',0,
-2,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g','l','y','p','h','s',0,'E','g','y','p','t','i','a','n','_','H',
-'i','e','r','o','g','l','y','p','h','s',0,2,'E','n','c','l','o','s','e','d','_','A','l','p','h','a','n','u','m','_','S','u',
-'p',0,'E','n','c','l','o','s','e','d','_','A','l','p','h','a','n','u','m','e','r','i','c','_','S','u','p','p','l','e','m','e',
-'n','t',0,2,'E','n','c','l','o','s','e','d','_','I','d','e','o','g','r','a','p','h','i','c','_','S','u','p',0,
-'E','n','c','l','o','s','e','d','_','I','d','e','o','g','r','a','p','h','i','c','_','S','u','p','p','l','e','m','e','n','t',0,
-2,'C','J','K','_','E','x','t','_','C',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',
-'s','_','E','x','t','e','n','s','i','o','n','_','C',0,2,'M','a','n','d','a','i','c',0,'M','a','n','d','a','i','c',0,
-2,'B','a','t','a','k',0,'B','a','t','a','k',0,2,'E','t','h','i','o','p','i','c','_','E','x','t','_','A',0,
-'E','t','h','i','o','p','i','c','_','E','x','t','e','n','d','e','d','_','A',0,
-2,'B','r','a','h','m','i',0,'B','r','a','h','m','i',0,2,'B','a','m','u','m','_','S','u','p',0,
-'B','a','m','u','m','_','S','u','p','p','l','e','m','e','n','t',0,
-2,'K','a','n','a','_','S','u','p',0,'K','a','n','a','_','S','u','p','p','l','e','m','e','n','t',0,
-2,'P','l','a','y','i','n','g','_','C','a','r','d','s',0,'P','l','a','y','i','n','g','_','C','a','r','d','s',0,
-2,'M','i','s','c','_','P','i','c','t','o','g','r','a','p','h','s',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_',
-'S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h','s',0,
-2,'E','m','o','t','i','c','o','n','s',0,'E','m','o','t','i','c','o','n','s',0,
-2,'T','r','a','n','s','p','o','r','t','_','A','n','d','_','M','a','p',0,'T','r','a','n','s','p','o','r','t','_','A','n','d',
-'_','M','a','p','_','S','y','m','b','o','l','s',0,2,'A','l','c','h','e','m','i','c','a','l',0,
-'A','l','c','h','e','m','i','c','a','l','_','S','y','m','b','o','l','s',0,
-2,'C','J','K','_','E','x','t','_','D',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',
-'s','_','E','x','t','e','n','s','i','o','n','_','D',0,2,'A','r','a','b','i','c','_','E','x','t','_','A',0,
-'A','r','a','b','i','c','_','E','x','t','e','n','d','e','d','_','A',0,
-2,'A','r','a','b','i','c','_','M','a','t','h',0,'A','r','a','b','i','c','_','M','a','t','h','e','m','a','t','i','c','a','l',
-'_','A','l','p','h','a','b','e','t','i','c','_','S','y','m','b','o','l','s',0,
-2,'C','h','a','k','m','a',0,'C','h','a','k','m','a',0,2,'M','e','e','t','e','i','_','M','a','y','e','k','_','E','x','t',
-0,'M','e','e','t','e','i','_','M','a','y','e','k','_','E','x','t','e','n','s','i','o','n','s',0,
-2,'M','e','r','o','i','t','i','c','_','C','u','r','s','i','v','e',0,'M','e','r','o','i','t','i','c','_','C','u','r','s','i',
-'v','e',0,2,'M','e','r','o','i','t','i','c','_','H','i','e','r','o','g','l','y','p','h','s',0,
-'M','e','r','o','i','t','i','c','_','H','i','e','r','o','g','l','y','p','h','s',0,
-2,'M','i','a','o',0,'M','i','a','o',0,2,'S','h','a','r','a','d','a',0,'S','h','a','r','a','d','a',0,
-2,'S','o','r','a','_','S','o','m','p','e','n','g',0,'S','o','r','a','_','S','o','m','p','e','n','g',0,
-2,'S','u','n','d','a','n','e','s','e','_','S','u','p',0,'S','u','n','d','a','n','e','s','e','_','S','u','p','p','l','e','m',
-'e','n','t',0,2,'T','a','k','r','i',0,'T','a','k','r','i',0,
-2,'B','a','s','s','a','_','V','a','h',0,'B','a','s','s','a','_','V','a','h',0,
-2,'C','a','u','c','a','s','i','a','n','_','A','l','b','a','n','i','a','n',0,'C','a','u','c','a','s','i','a','n','_','A','l',
-'b','a','n','i','a','n',0,2,'C','o','p','t','i','c','_','E','p','a','c','t','_','N','u','m','b','e','r','s',0,
-'C','o','p','t','i','c','_','E','p','a','c','t','_','N','u','m','b','e','r','s',0,
-2,'D','i','a','c','r','i','t','i','c','a','l','s','_','E','x','t',0,'C','o','m','b','i','n','i','n','g','_','D','i','a','c',
-'r','i','t','i','c','a','l','_','M','a','r','k','s','_','E','x','t','e','n','d','e','d',0,
-2,'D','u','p','l','o','y','a','n',0,'D','u','p','l','o','y','a','n',0,
-2,'E','l','b','a','s','a','n',0,'E','l','b','a','s','a','n',0,
-2,'G','e','o','m','e','t','r','i','c','_','S','h','a','p','e','s','_','E','x','t',0,'G','e','o','m','e','t','r','i','c','_',
-'S','h','a','p','e','s','_','E','x','t','e','n','d','e','d',0,
-2,'G','r','a','n','t','h','a',0,'G','r','a','n','t','h','a',0,
-2,'K','h','o','j','k','i',0,'K','h','o','j','k','i',0,2,'K','h','u','d','a','w','a','d','i',0,
-'K','h','u','d','a','w','a','d','i',0,2,'L','a','t','i','n','_','E','x','t','_','E',0,'L','a','t','i','n','_','E','x','t',
-'e','n','d','e','d','_','E',0,2,'L','i','n','e','a','r','_','A',0,'L','i','n','e','a','r','_','A',0,
-2,'M','a','h','a','j','a','n','i',0,'M','a','h','a','j','a','n','i',0,
-2,'M','a','n','i','c','h','a','e','a','n',0,'M','a','n','i','c','h','a','e','a','n',0,
-2,'M','e','n','d','e','_','K','i','k','a','k','u','i',0,'M','e','n','d','e','_','K','i','k','a','k','u','i',0,
-2,'M','o','d','i',0,'M','o','d','i',0,2,'M','r','o',0,'M','r','o',0,
-2,'M','y','a','n','m','a','r','_','E','x','t','_','B',0,'M','y','a','n','m','a','r','_','E','x','t','e','n','d','e','d','_',
-'B',0,2,'N','a','b','a','t','a','e','a','n',0,'N','a','b','a','t','a','e','a','n',0,
-2,'O','l','d','_','N','o','r','t','h','_','A','r','a','b','i','a','n',0,'O','l','d','_','N','o','r','t','h','_','A','r','a',
-'b','i','a','n',0,2,'O','l','d','_','P','e','r','m','i','c',0,'O','l','d','_','P','e','r','m','i','c',0,
-2,'O','r','n','a','m','e','n','t','a','l','_','D','i','n','g','b','a','t','s',0,'O','r','n','a','m','e','n','t','a','l','_',
-'D','i','n','g','b','a','t','s',0,2,'P','a','h','a','w','h','_','H','m','o','n','g',0,'P','a','h','a','w','h','_','H','m',
-'o','n','g',0,2,'P','a','l','m','y','r','e','n','e',0,'P','a','l','m','y','r','e','n','e',0,
-2,'P','a','u','_','C','i','n','_','H','a','u',0,'P','a','u','_','C','i','n','_','H','a','u',0,
-2,'P','s','a','l','t','e','r','_','P','a','h','l','a','v','i',0,'P','s','a','l','t','e','r','_','P','a','h','l','a','v','i',
-0,2,'S','h','o','r','t','h','a','n','d','_','F','o','r','m','a','t','_','C','o','n','t','r','o','l','s',0,
-'S','h','o','r','t','h','a','n','d','_','F','o','r','m','a','t','_','C','o','n','t','r','o','l','s',0,
-2,'S','i','d','d','h','a','m',0,'S','i','d','d','h','a','m',0,
-2,'S','i','n','h','a','l','a','_','A','r','c','h','a','i','c','_','N','u','m','b','e','r','s',0,
-'S','i','n','h','a','l','a','_','A','r','c','h','a','i','c','_','N','u','m','b','e','r','s',0,
-2,'S','u','p','_','A','r','r','o','w','s','_','C',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','A','r','r','o','w',
-'s','_','C',0,2,'T','i','r','h','u','t','a',0,'T','i','r','h','u','t','a',0,
-2,'W','a','r','a','n','g','_','C','i','t','i',0,'W','a','r','a','n','g','_','C','i','t','i',0,
-2,'A','h','o','m',0,'A','h','o','m',0,2,'A','n','a','t','o','l','i','a','n','_','H','i','e','r','o','g','l','y','p','h',
-'s',0,'A','n','a','t','o','l','i','a','n','_','H','i','e','r','o','g','l','y','p','h','s',0,
-2,'C','h','e','r','o','k','e','e','_','S','u','p',0,'C','h','e','r','o','k','e','e','_','S','u','p','p','l','e','m','e','n',
-'t',0,2,'C','J','K','_','E','x','t','_','E',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a',
-'p','h','s','_','E','x','t','e','n','s','i','o','n','_','E',0,
-2,'E','a','r','l','y','_','D','y','n','a','s','t','i','c','_','C','u','n','e','i','f','o','r','m',0,
-'E','a','r','l','y','_','D','y','n','a','s','t','i','c','_','C','u','n','e','i','f','o','r','m',0,
-2,'H','a','t','r','a','n',0,'H','a','t','r','a','n',0,2,'M','u','l','t','a','n','i',0,
-'M','u','l','t','a','n','i',0,2,'O','l','d','_','H','u','n','g','a','r','i','a','n',0,'O','l','d','_','H','u','n','g','a',
-'r','i','a','n',0,2,'S','u','p','_','S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h',
-'s',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o',
-'g','r','a','p','h','s',0,2,'S','u','t','t','o','n','_','S','i','g','n','W','r','i','t','i','n','g',0,
-'S','u','t','t','o','n','_','S','i','g','n','W','r','i','t','i','n','g',0,
-2,'A','d','l','a','m',0,'A','d','l','a','m',0,2,'B','h','a','i','k','s','u','k','i',0,
-'B','h','a','i','k','s','u','k','i',0,2,'C','y','r','i','l','l','i','c','_','E','x','t','_','C',0,
-'C','y','r','i','l','l','i','c','_','E','x','t','e','n','d','e','d','_','C',0,
-2,'G','l','a','g','o','l','i','t','i','c','_','S','u','p',0,'G','l','a','g','o','l','i','t','i','c','_','S','u','p','p','l',
-'e','m','e','n','t',0,2,'I','d','e','o','g','r','a','p','h','i','c','_','S','y','m','b','o','l','s',0,
-'I','d','e','o','g','r','a','p','h','i','c','_','S','y','m','b','o','l','s','_','A','n','d','_','P','u','n','c','t','u','a','t',
-'i','o','n',0,2,'M','a','r','c','h','e','n',0,'M','a','r','c','h','e','n',0,
-2,'M','o','n','g','o','l','i','a','n','_','S','u','p',0,'M','o','n','g','o','l','i','a','n','_','S','u','p','p','l','e','m',
-'e','n','t',0,2,'N','e','w','a',0,'N','e','w','a',0,2,'O','s','a','g','e',0,'O','s','a','g','e',0,
-2,'T','a','n','g','u','t',0,'T','a','n','g','u','t',0,2,'T','a','n','g','u','t','_','C','o','m','p','o','n','e','n','t',
-'s',0,'T','a','n','g','u','t','_','C','o','m','p','o','n','e','n','t','s',0,
-2,'C','J','K','_','E','x','t','_','F',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',
-'s','_','E','x','t','e','n','s','i','o','n','_','F',0,2,'K','a','n','a','_','E','x','t','_','A',0,
-'K','a','n','a','_','E','x','t','e','n','d','e','d','_','A',0,
-2,'M','a','s','a','r','a','m','_','G','o','n','d','i',0,'M','a','s','a','r','a','m','_','G','o','n','d','i',0,
-2,'N','u','s','h','u',0,'N','u','s','h','u',0,2,'S','o','y','o','m','b','o',0,'S','o','y','o','m','b','o',0,
-2,'S','y','r','i','a','c','_','S','u','p',0,'S','y','r','i','a','c','_','S','u','p','p','l','e','m','e','n','t',0,
-2,'Z','a','n','a','b','a','z','a','r','_','S','q','u','a','r','e',0,'Z','a','n','a','b','a','z','a','r','_','S','q','u','a',
-'r','e',0,2,'C','h','e','s','s','_','S','y','m','b','o','l','s',0,'C','h','e','s','s','_','S','y','m','b','o','l','s',0,
-2,'D','o','g','r','a',0,'D','o','g','r','a',0,2,'G','e','o','r','g','i','a','n','_','E','x','t',0,
-'G','e','o','r','g','i','a','n','_','E','x','t','e','n','d','e','d',0,
-2,'G','u','n','j','a','l','a','_','G','o','n','d','i',0,'G','u','n','j','a','l','a','_','G','o','n','d','i',0,
-2,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a',0,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a',
-0,2,'I','n','d','i','c','_','S','i','y','a','q','_','N','u','m','b','e','r','s',0,'I','n','d','i','c','_','S','i','y','a',
-'q','_','N','u','m','b','e','r','s',0,2,'M','a','k','a','s','a','r',0,'M','a','k','a','s','a','r',0,
-2,'M','a','y','a','n','_','N','u','m','e','r','a','l','s',0,'M','a','y','a','n','_','N','u','m','e','r','a','l','s',0,
-2,'M','e','d','e','f','a','i','d','r','i','n',0,'M','e','d','e','f','a','i','d','r','i','n',0,
-2,'O','l','d','_','S','o','g','d','i','a','n',0,'O','l','d','_','S','o','g','d','i','a','n',0,
-2,'S','o','g','d','i','a','n',0,'S','o','g','d','i','a','n',0,
-2,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g','l','y','p','h','_','F','o','r','m','a','t','_','C','o','n','t',
-'r','o','l','s',0,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g','l','y','p','h','_','F','o','r','m','a','t','_',
-'C','o','n','t','r','o','l','s',0,2,'E','l','y','m','a','i','c',0,'E','l','y','m','a','i','c',0,
-2,'N','a','n','d','i','n','a','g','a','r','i',0,'N','a','n','d','i','n','a','g','a','r','i',0,
-2,'N','y','i','a','k','e','n','g','_','P','u','a','c','h','u','e','_','H','m','o','n','g',0,
-'N','y','i','a','k','e','n','g','_','P','u','a','c','h','u','e','_','H','m','o','n','g',0,
-2,'O','t','t','o','m','a','n','_','S','i','y','a','q','_','N','u','m','b','e','r','s',0,'O','t','t','o','m','a','n','_','S',
-'i','y','a','q','_','N','u','m','b','e','r','s',0,2,'S','m','a','l','l','_','K','a','n','a','_','E','x','t',0,
-'S','m','a','l','l','_','K','a','n','a','_','E','x','t','e','n','s','i','o','n',0,
-2,'S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h','s','_','E','x','t','_','A',0,
-'S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h','s','_','E','x','t','e','n','d','e','d',
-'_','A',0,2,'T','a','m','i','l','_','S','u','p',0,'T','a','m','i','l','_','S','u','p','p','l','e','m','e','n','t',0,
-2,'W','a','n','c','h','o',0,'W','a','n','c','h','o',0,2,'C','h','o','r','a','s','m','i','a','n',0,
-'C','h','o','r','a','s','m','i','a','n',0,2,'C','J','K','_','E','x','t','_','G',0,'C','J','K','_','U','n','i','f','i','e',
-'d','_','I','d','e','o','g','r','a','p','h','s','_','E','x','t','e','n','s','i','o','n','_','G',0,
-2,'D','i','v','e','s','_','A','k','u','r','u',0,'D','i','v','e','s','_','A','k','u','r','u',0,
-2,'K','h','i','t','a','n','_','S','m','a','l','l','_','S','c','r','i','p','t',0,'K','h','i','t','a','n','_','S','m','a','l',
-'l','_','S','c','r','i','p','t',0,2,'L','i','s','u','_','S','u','p',0,'L','i','s','u','_','S','u','p','p','l','e','m','e',
-'n','t',0,2,'S','y','m','b','o','l','s','_','F','o','r','_','L','e','g','a','c','y','_','C','o','m','p','u','t','i','n','g',
-0,'S','y','m','b','o','l','s','_','F','o','r','_','L','e','g','a','c','y','_','C','o','m','p','u','t','i','n','g',0,
-2,'T','a','n','g','u','t','_','S','u','p',0,'T','a','n','g','u','t','_','S','u','p','p','l','e','m','e','n','t',0,
-2,'Y','e','z','i','d','i',0,'Y','e','z','i','d','i',0,2,'c','c','c',0,'C','a','n','o','n','i','c','a','l','_','C','o',
-'m','b','i','n','i','n','g','_','C','l','a','s','s',0,2,'d','t',0,'D','e','c','o','m','p','o','s','i','t','i','o','n','_',
-'T','y','p','e',0,3,'N','o','n','e',0,'N','o','n','e',0,'n','o','n','e',0,
-3,'C','a','n',0,'C','a','n','o','n','i','c','a','l',0,'c','a','n',0,
-3,'C','o','m',0,'C','o','m','p','a','t',0,'c','o','m',0,
-3,'E','n','c',0,'C','i','r','c','l','e',0,'e','n','c',0,
-3,'F','i','n',0,'F','i','n','a','l',0,'f','i','n',0,3,'F','o','n','t',0,'F','o','n','t',0,
-'f','o','n','t',0,3,'F','r','a',0,'F','r','a','c','t','i','o','n',0,'f','r','a',0,
-3,'I','n','i','t',0,'I','n','i','t','i','a','l',0,'i','n','i','t',0,
-3,'I','s','o',0,'I','s','o','l','a','t','e','d',0,'i','s','o',0,
-3,'M','e','d',0,'M','e','d','i','a','l',0,'m','e','d',0,
-3,'N','a','r',0,'N','a','r','r','o','w',0,'n','a','r',0,
-3,'N','b',0,'N','o','b','r','e','a','k',0,'n','b',0,3,'S','m','l',0,'S','m','a','l','l',0,
-'s','m','l',0,3,'S','q','r',0,'S','q','u','a','r','e',0,'s','q','r',0,
-3,'S','u','b',0,'S','u','b',0,'s','u','b',0,3,'S','u','p',0,'S','u','p','e','r',0,
-'s','u','p',0,3,'V','e','r','t',0,'V','e','r','t','i','c','a','l',0,'v','e','r','t',0,
-3,'W','i','d','e',0,'W','i','d','e',0,'w','i','d','e',0,
-2,'e','a',0,'E','a','s','t','_','A','s','i','a','n','_','W','i','d','t','h',0,
-2,'N',0,'N','e','u','t','r','a','l',0,2,'A',0,'A','m','b','i','g','u','o','u','s',0,
-2,'H',0,'H','a','l','f','w','i','d','t','h',0,2,'F',0,'F','u','l','l','w','i','d','t','h',0,
-2,'N','a',0,'N','a','r','r','o','w',0,2,'W',0,'W','i','d','e',0,
-2,'g','c',0,'G','e','n','e','r','a','l','_','C','a','t','e','g','o','r','y',0,
-2,'C','n',0,'U','n','a','s','s','i','g','n','e','d',0,2,'L','u',0,'U','p','p','e','r','c','a','s','e','_','L','e','t',
-'t','e','r',0,2,'L','l',0,'L','o','w','e','r','c','a','s','e','_','L','e','t','t','e','r',0,
-2,'L','t',0,'T','i','t','l','e','c','a','s','e','_','L','e','t','t','e','r',0,
-2,'L','m',0,'M','o','d','i','f','i','e','r','_','L','e','t','t','e','r',0,
-2,'L','o',0,'O','t','h','e','r','_','L','e','t','t','e','r',0,
-2,'M','n',0,'N','o','n','s','p','a','c','i','n','g','_','M','a','r','k',0,
-2,'M','e',0,'E','n','c','l','o','s','i','n','g','_','M','a','r','k',0,
-2,'M','c',0,'S','p','a','c','i','n','g','_','M','a','r','k',0,
-3,'N','d',0,'D','e','c','i','m','a','l','_','N','u','m','b','e','r',0,'d','i','g','i','t',0,
-2,'N','l',0,'L','e','t','t','e','r','_','N','u','m','b','e','r',0,
-2,'N','o',0,'O','t','h','e','r','_','N','u','m','b','e','r',0,
-2,'Z','s',0,'S','p','a','c','e','_','S','e','p','a','r','a','t','o','r',0,
-2,'Z','l',0,'L','i','n','e','_','S','e','p','a','r','a','t','o','r',0,
-2,'Z','p',0,'P','a','r','a','g','r','a','p','h','_','S','e','p','a','r','a','t','o','r',0,
-3,'C','c',0,'C','o','n','t','r','o','l',0,'c','n','t','r','l',0,
-2,'C','f',0,'F','o','r','m','a','t',0,2,'C','o',0,'P','r','i','v','a','t','e','_','U','s','e',0,
-2,'C','s',0,'S','u','r','r','o','g','a','t','e',0,2,'P','d',0,'D','a','s','h','_','P','u','n','c','t','u','a','t','i',
-'o','n',0,2,'P','s',0,'O','p','e','n','_','P','u','n','c','t','u','a','t','i','o','n',0,
-2,'P','e',0,'C','l','o','s','e','_','P','u','n','c','t','u','a','t','i','o','n',0,
-2,'P','c',0,'C','o','n','n','e','c','t','o','r','_','P','u','n','c','t','u','a','t','i','o','n',0,
-2,'P','o',0,'O','t','h','e','r','_','P','u','n','c','t','u','a','t','i','o','n',0,
-2,'S','m',0,'M','a','t','h','_','S','y','m','b','o','l',0,
-2,'S','c',0,'C','u','r','r','e','n','c','y','_','S','y','m','b','o','l',0,
-2,'S','k',0,'M','o','d','i','f','i','e','r','_','S','y','m','b','o','l',0,
-2,'S','o',0,'O','t','h','e','r','_','S','y','m','b','o','l',0,
-2,'P','i',0,'I','n','i','t','i','a','l','_','P','u','n','c','t','u','a','t','i','o','n',0,
-2,'P','f',0,'F','i','n','a','l','_','P','u','n','c','t','u','a','t','i','o','n',0,
-2,'j','g',0,'J','o','i','n','i','n','g','_','G','r','o','u','p',0,
-2,'N','o','_','J','o','i','n','i','n','g','_','G','r','o','u','p',0,'N','o','_','J','o','i','n','i','n','g','_','G','r','o',
-'u','p',0,2,'A','i','n',0,'A','i','n',0,2,'A','l','a','p','h',0,'A','l','a','p','h',0,
-2,'A','l','e','f',0,'A','l','e','f',0,2,'B','e','h',0,'B','e','h',0,
-2,'B','e','t','h',0,'B','e','t','h',0,2,'D','a','l',0,'D','a','l',0,
-2,'D','a','l','a','t','h','_','R','i','s','h',0,'D','a','l','a','t','h','_','R','i','s','h',0,
-2,'E',0,'E',0,2,'F','e','h',0,'F','e','h',0,2,'F','i','n','a','l','_','S','e','m','k','a','t','h',0,
-'F','i','n','a','l','_','S','e','m','k','a','t','h',0,2,'G','a','f',0,'G','a','f',0,
-2,'G','a','m','a','l',0,'G','a','m','a','l',0,2,'H','a','h',0,'H','a','h',0,
-2,'T','e','h','_','M','a','r','b','u','t','a','_','G','o','a','l',0,'H','a','m','z','a','_','O','n','_','H','e','h','_','G',
-'o','a','l',0,2,'H','e',0,'H','e',0,2,'H','e','h',0,'H','e','h',0,
-2,'H','e','h','_','G','o','a','l',0,'H','e','h','_','G','o','a','l',0,
-2,'H','e','t','h',0,'H','e','t','h',0,2,'K','a','f',0,'K','a','f',0,
-2,'K','a','p','h',0,'K','a','p','h',0,2,'K','n','o','t','t','e','d','_','H','e','h',0,
-'K','n','o','t','t','e','d','_','H','e','h',0,2,'L','a','m',0,'L','a','m',0,
-2,'L','a','m','a','d','h',0,'L','a','m','a','d','h',0,2,'M','e','e','m',0,'M','e','e','m',0,
-2,'M','i','m',0,'M','i','m',0,2,'N','o','o','n',0,'N','o','o','n',0,
-2,'N','u','n',0,'N','u','n',0,2,'P','e',0,'P','e',0,
-2,'Q','a','f',0,'Q','a','f',0,2,'Q','a','p','h',0,'Q','a','p','h',0,
-2,'R','e','h',0,'R','e','h',0,2,'R','e','v','e','r','s','e','d','_','P','e',0,'R','e','v','e','r','s','e','d','_','P',
-'e',0,2,'S','a','d',0,'S','a','d',0,2,'S','a','d','h','e',0,'S','a','d','h','e',0,
-2,'S','e','e','n',0,'S','e','e','n',0,2,'S','e','m','k','a','t','h',0,'S','e','m','k','a','t','h',0,
-2,'S','h','i','n',0,'S','h','i','n',0,2,'S','w','a','s','h','_','K','a','f',0,'S','w','a','s','h','_','K','a','f',0,
-2,'S','y','r','i','a','c','_','W','a','w',0,'S','y','r','i','a','c','_','W','a','w',0,
-2,'T','a','h',0,'T','a','h',0,2,'T','a','w',0,'T','a','w',0,
-2,'T','e','h','_','M','a','r','b','u','t','a',0,'T','e','h','_','M','a','r','b','u','t','a',0,
-2,'T','e','t','h',0,'T','e','t','h',0,2,'W','a','w',0,'W','a','w',0,
-2,'Y','e','h',0,'Y','e','h',0,2,'Y','e','h','_','B','a','r','r','e','e',0,'Y','e','h','_','B','a','r','r','e','e',0,
-2,'Y','e','h','_','W','i','t','h','_','T','a','i','l',0,'Y','e','h','_','W','i','t','h','_','T','a','i','l',0,
-2,'Y','u','d','h',0,'Y','u','d','h',0,2,'Y','u','d','h','_','H','e',0,'Y','u','d','h','_','H','e',0,
-2,'Z','a','i','n',0,'Z','a','i','n',0,2,'F','e',0,'F','e',0,
-2,'K','h','a','p','h',0,'K','h','a','p','h',0,2,'Z','h','a','i','n',0,'Z','h','a','i','n',0,
-2,'B','u','r','u','s','h','a','s','k','i','_','Y','e','h','_','B','a','r','r','e','e',0,'B','u','r','u','s','h','a','s','k',
-'i','_','Y','e','h','_','B','a','r','r','e','e',0,2,'F','a','r','s','i','_','Y','e','h',0,
-'F','a','r','s','i','_','Y','e','h',0,2,'N','y','a',0,'N','y','a',0,
-2,'R','o','h','i','n','g','y','a','_','Y','e','h',0,'R','o','h','i','n','g','y','a','_','Y','e','h',0,
-2,'M','a','n','i','c','h','a','e','a','n','_','A','l','e','p','h',0,'M','a','n','i','c','h','a','e','a','n','_','A','l','e',
-'p','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','A','y','i','n',0,'M','a','n','i','c','h','a','e','a','n','_','A',
-'y','i','n',0,2,'M','a','n','i','c','h','a','e','a','n','_','B','e','t','h',0,'M','a','n','i','c','h','a','e','a','n','_',
-'B','e','t','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','D','a','l','e','t','h',0,
-'M','a','n','i','c','h','a','e','a','n','_','D','a','l','e','t','h',0,
-2,'M','a','n','i','c','h','a','e','a','n','_','D','h','a','m','e','d','h',0,'M','a','n','i','c','h','a','e','a','n','_','D',
-'h','a','m','e','d','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','F','i','v','e',0,
-'M','a','n','i','c','h','a','e','a','n','_','F','i','v','e',0,
-2,'M','a','n','i','c','h','a','e','a','n','_','G','i','m','e','l',0,'M','a','n','i','c','h','a','e','a','n','_','G','i','m',
-'e','l',0,2,'M','a','n','i','c','h','a','e','a','n','_','H','e','t','h',0,'M','a','n','i','c','h','a','e','a','n','_','H',
-'e','t','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','H','u','n','d','r','e','d',0,
-'M','a','n','i','c','h','a','e','a','n','_','H','u','n','d','r','e','d',0,
-2,'M','a','n','i','c','h','a','e','a','n','_','K','a','p','h',0,'M','a','n','i','c','h','a','e','a','n','_','K','a','p','h',
-0,2,'M','a','n','i','c','h','a','e','a','n','_','L','a','m','e','d','h',0,'M','a','n','i','c','h','a','e','a','n','_','L',
-'a','m','e','d','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','M','e','m',0,'M','a','n','i','c','h','a','e','a','n',
-'_','M','e','m',0,2,'M','a','n','i','c','h','a','e','a','n','_','N','u','n',0,'M','a','n','i','c','h','a','e','a','n','_',
-'N','u','n',0,2,'M','a','n','i','c','h','a','e','a','n','_','O','n','e',0,'M','a','n','i','c','h','a','e','a','n','_','O',
-'n','e',0,2,'M','a','n','i','c','h','a','e','a','n','_','P','e',0,'M','a','n','i','c','h','a','e','a','n','_','P','e',0,
-2,'M','a','n','i','c','h','a','e','a','n','_','Q','o','p','h',0,'M','a','n','i','c','h','a','e','a','n','_','Q','o','p','h',
-0,2,'M','a','n','i','c','h','a','e','a','n','_','R','e','s','h',0,'M','a','n','i','c','h','a','e','a','n','_','R','e','s',
-'h',0,2,'M','a','n','i','c','h','a','e','a','n','_','S','a','d','h','e',0,'M','a','n','i','c','h','a','e','a','n','_','S',
-'a','d','h','e',0,2,'M','a','n','i','c','h','a','e','a','n','_','S','a','m','e','k','h',0,
-'M','a','n','i','c','h','a','e','a','n','_','S','a','m','e','k','h',0,
-2,'M','a','n','i','c','h','a','e','a','n','_','T','a','w',0,'M','a','n','i','c','h','a','e','a','n','_','T','a','w',0,
-2,'M','a','n','i','c','h','a','e','a','n','_','T','e','n',0,'M','a','n','i','c','h','a','e','a','n','_','T','e','n',0,
-2,'M','a','n','i','c','h','a','e','a','n','_','T','e','t','h',0,'M','a','n','i','c','h','a','e','a','n','_','T','e','t','h',
-0,2,'M','a','n','i','c','h','a','e','a','n','_','T','h','a','m','e','d','h',0,'M','a','n','i','c','h','a','e','a','n','_',
-'T','h','a','m','e','d','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','T','w','e','n','t','y',0,
-'M','a','n','i','c','h','a','e','a','n','_','T','w','e','n','t','y',0,
-2,'M','a','n','i','c','h','a','e','a','n','_','W','a','w',0,'M','a','n','i','c','h','a','e','a','n','_','W','a','w',0,
-2,'M','a','n','i','c','h','a','e','a','n','_','Y','o','d','h',0,'M','a','n','i','c','h','a','e','a','n','_','Y','o','d','h',
-0,2,'M','a','n','i','c','h','a','e','a','n','_','Z','a','y','i','n',0,'M','a','n','i','c','h','a','e','a','n','_','Z','a',
-'y','i','n',0,2,'S','t','r','a','i','g','h','t','_','W','a','w',0,'S','t','r','a','i','g','h','t','_','W','a','w',0,
-2,'A','f','r','i','c','a','n','_','F','e','h',0,'A','f','r','i','c','a','n','_','F','e','h',0,
-2,'A','f','r','i','c','a','n','_','N','o','o','n',0,'A','f','r','i','c','a','n','_','N','o','o','n',0,
-2,'A','f','r','i','c','a','n','_','Q','a','f',0,'A','f','r','i','c','a','n','_','Q','a','f',0,
-2,'M','a','l','a','y','a','l','a','m','_','B','h','a',0,'M','a','l','a','y','a','l','a','m','_','B','h','a',0,
-2,'M','a','l','a','y','a','l','a','m','_','J','a',0,'M','a','l','a','y','a','l','a','m','_','J','a',0,
-2,'M','a','l','a','y','a','l','a','m','_','L','l','a',0,'M','a','l','a','y','a','l','a','m','_','L','l','a',0,
-2,'M','a','l','a','y','a','l','a','m','_','L','l','l','a',0,'M','a','l','a','y','a','l','a','m','_','L','l','l','a',0,
-2,'M','a','l','a','y','a','l','a','m','_','N','g','a',0,'M','a','l','a','y','a','l','a','m','_','N','g','a',0,
-2,'M','a','l','a','y','a','l','a','m','_','N','n','a',0,'M','a','l','a','y','a','l','a','m','_','N','n','a',0,
-2,'M','a','l','a','y','a','l','a','m','_','N','n','n','a',0,'M','a','l','a','y','a','l','a','m','_','N','n','n','a',0,
-2,'M','a','l','a','y','a','l','a','m','_','N','y','a',0,'M','a','l','a','y','a','l','a','m','_','N','y','a',0,
-2,'M','a','l','a','y','a','l','a','m','_','R','a',0,'M','a','l','a','y','a','l','a','m','_','R','a',0,
-2,'M','a','l','a','y','a','l','a','m','_','S','s','a',0,'M','a','l','a','y','a','l','a','m','_','S','s','a',0,
-2,'M','a','l','a','y','a','l','a','m','_','T','t','a',0,'M','a','l','a','y','a','l','a','m','_','T','t','a',0,
-2,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a','_','K','i','n','n','a','_','Y','a',0,
-'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a','_','K','i','n','n','a','_','Y','a',0,
-2,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a','_','P','a',0,'H','a','n','i','f','i','_','R','o','h','i','n',
-'g','y','a','_','P','a',0,2,'j','t',0,'J','o','i','n','i','n','g','_','T','y','p','e',0,
-2,'U',0,'N','o','n','_','J','o','i','n','i','n','g',0,2,'C',0,'J','o','i','n','_','C','a','u','s','i','n','g',0,
-2,'D',0,'D','u','a','l','_','J','o','i','n','i','n','g',0,
-2,'L',0,'L','e','f','t','_','J','o','i','n','i','n','g',0,
-2,'R',0,'R','i','g','h','t','_','J','o','i','n','i','n','g',0,
-2,'T',0,'T','r','a','n','s','p','a','r','e','n','t',0,2,'l','b',0,'L','i','n','e','_','B','r','e','a','k',0,
-2,'X','X',0,'U','n','k','n','o','w','n',0,2,'A','I',0,'A','m','b','i','g','u','o','u','s',0,
-2,'A','L',0,'A','l','p','h','a','b','e','t','i','c',0,2,'B','2',0,'B','r','e','a','k','_','B','o','t','h',0,
-2,'B','A',0,'B','r','e','a','k','_','A','f','t','e','r',0,
-2,'B','B',0,'B','r','e','a','k','_','B','e','f','o','r','e',0,
-2,'B','K',0,'M','a','n','d','a','t','o','r','y','_','B','r','e','a','k',0,
-2,'C','B',0,'C','o','n','t','i','n','g','e','n','t','_','B','r','e','a','k',0,
-2,'C','L',0,'C','l','o','s','e','_','P','u','n','c','t','u','a','t','i','o','n',0,
-2,'C','M',0,'C','o','m','b','i','n','i','n','g','_','M','a','r','k',0,
-2,'C','R',0,'C','a','r','r','i','a','g','e','_','R','e','t','u','r','n',0,
-2,'E','X',0,'E','x','c','l','a','m','a','t','i','o','n',0,
-2,'G','L',0,'G','l','u','e',0,2,'H','Y',0,'H','y','p','h','e','n',0,
-2,'I','D',0,'I','d','e','o','g','r','a','p','h','i','c',0,
-3,'I','N',0,'I','n','s','e','p','a','r','a','b','l','e',0,'I','n','s','e','p','e','r','a','b','l','e',0,
-2,'I','S',0,'I','n','f','i','x','_','N','u','m','e','r','i','c',0,
-2,'L','F',0,'L','i','n','e','_','F','e','e','d',0,2,'N','S',0,'N','o','n','s','t','a','r','t','e','r',0,
-2,'N','U',0,'N','u','m','e','r','i','c',0,2,'O','P',0,'O','p','e','n','_','P','u','n','c','t','u','a','t','i','o','n',
-0,2,'P','O',0,'P','o','s','t','f','i','x','_','N','u','m','e','r','i','c',0,
-2,'P','R',0,'P','r','e','f','i','x','_','N','u','m','e','r','i','c',0,
-2,'Q','U',0,'Q','u','o','t','a','t','i','o','n',0,2,'S','A',0,'C','o','m','p','l','e','x','_','C','o','n','t','e','x',
-'t',0,2,'S','G',0,'S','u','r','r','o','g','a','t','e',0,
-2,'S','P',0,'S','p','a','c','e',0,2,'S','Y',0,'B','r','e','a','k','_','S','y','m','b','o','l','s',0,
-2,'Z','W',0,'Z','W','S','p','a','c','e',0,2,'N','L',0,'N','e','x','t','_','L','i','n','e',0,
-2,'W','J',0,'W','o','r','d','_','J','o','i','n','e','r',0,
-2,'H','2',0,'H','2',0,2,'H','3',0,'H','3',0,2,'J','L',0,'J','L',0,
-2,'J','T',0,'J','T',0,2,'J','V',0,'J','V',0,2,'C','P',0,'C','l','o','s','e','_','P','a','r','e','n','t','h','e',
-'s','i','s',0,2,'C','J',0,'C','o','n','d','i','t','i','o','n','a','l','_','J','a','p','a','n','e','s','e','_','S','t','a',
-'r','t','e','r',0,2,'H','L',0,'H','e','b','r','e','w','_','L','e','t','t','e','r',0,
-2,'E','B',0,'E','_','B','a','s','e',0,2,'E','M',0,'E','_','M','o','d','i','f','i','e','r',0,
-2,'Z','W','J',0,'Z','W','J',0,2,'n','t',0,'N','u','m','e','r','i','c','_','T','y','p','e',0,
-2,'N','o','n','e',0,'N','o','n','e',0,2,'D','e',0,'D','e','c','i','m','a','l',0,
-2,'D','i',0,'D','i','g','i','t',0,2,'N','u',0,'N','u','m','e','r','i','c',0,
-2,'s','c',0,'S','c','r','i','p','t',0,2,'Z','y','y','y',0,'C','o','m','m','o','n',0,
-3,'Z','i','n','h',0,'I','n','h','e','r','i','t','e','d',0,'Q','a','a','i',0,
-2,'A','r','a','b',0,'A','r','a','b','i','c',0,2,'A','r','m','n',0,'A','r','m','e','n','i','a','n',0,
-2,'B','e','n','g',0,'B','e','n','g','a','l','i',0,2,'B','o','p','o',0,'B','o','p','o','m','o','f','o',0,
-2,'C','h','e','r',0,'C','h','e','r','o','k','e','e',0,3,'C','o','p','t',0,'C','o','p','t','i','c',0,
-'Q','a','a','c',0,2,'C','y','r','l',0,'C','y','r','i','l','l','i','c',0,
-2,'D','s','r','t',0,'D','e','s','e','r','e','t',0,2,'D','e','v','a',0,'D','e','v','a','n','a','g','a','r','i',0,
-2,'E','t','h','i',0,'E','t','h','i','o','p','i','c',0,2,'G','e','o','r',0,'G','e','o','r','g','i','a','n',0,
-2,'G','o','t','h',0,'G','o','t','h','i','c',0,2,'G','r','e','k',0,'G','r','e','e','k',0,
-2,'G','u','j','r',0,'G','u','j','a','r','a','t','i',0,2,'G','u','r','u',0,'G','u','r','m','u','k','h','i',0,
-2,'H','a','n','i',0,'H','a','n',0,2,'H','a','n','g',0,'H','a','n','g','u','l',0,
-2,'H','e','b','r',0,'H','e','b','r','e','w',0,2,'H','i','r','a',0,'H','i','r','a','g','a','n','a',0,
-2,'K','n','d','a',0,'K','a','n','n','a','d','a',0,2,'K','a','n','a',0,'K','a','t','a','k','a','n','a',0,
-2,'K','h','m','r',0,'K','h','m','e','r',0,2,'L','a','o','o',0,'L','a','o',0,
-2,'L','a','t','n',0,'L','a','t','i','n',0,2,'M','l','y','m',0,'M','a','l','a','y','a','l','a','m',0,
-2,'M','o','n','g',0,'M','o','n','g','o','l','i','a','n',0,
-2,'M','y','m','r',0,'M','y','a','n','m','a','r',0,2,'O','g','a','m',0,'O','g','h','a','m',0,
-2,'I','t','a','l',0,'O','l','d','_','I','t','a','l','i','c',0,
-2,'O','r','y','a',0,'O','r','i','y','a',0,2,'R','u','n','r',0,'R','u','n','i','c',0,
-2,'S','i','n','h',0,'S','i','n','h','a','l','a',0,2,'S','y','r','c',0,'S','y','r','i','a','c',0,
-2,'T','a','m','l',0,'T','a','m','i','l',0,2,'T','e','l','u',0,'T','e','l','u','g','u',0,
-2,'T','h','a','a',0,'T','h','a','a','n','a',0,2,'T','i','b','t',0,'T','i','b','e','t','a','n',0,
-2,'C','a','n','s',0,'C','a','n','a','d','i','a','n','_','A','b','o','r','i','g','i','n','a','l',0,
-2,'Y','i','i','i',0,'Y','i',0,2,'T','g','l','g',0,'T','a','g','a','l','o','g',0,
-2,'H','a','n','o',0,'H','a','n','u','n','o','o',0,2,'B','u','h','d',0,'B','u','h','i','d',0,
-2,'T','a','g','b',0,'T','a','g','b','a','n','w','a',0,2,'B','r','a','i',0,'B','r','a','i','l','l','e',0,
-2,'C','p','r','t',0,'C','y','p','r','i','o','t',0,2,'L','i','m','b',0,'L','i','m','b','u',0,
-2,'L','i','n','b',0,'L','i','n','e','a','r','_','B',0,2,'O','s','m','a',0,'O','s','m','a','n','y','a',0,
-2,'S','h','a','w',0,'S','h','a','v','i','a','n',0,2,'T','a','l','e',0,'T','a','i','_','L','e',0,
-2,'U','g','a','r',0,'U','g','a','r','i','t','i','c',0,2,'H','r','k','t',0,'K','a','t','a','k','a','n','a','_','O','r',
-'_','H','i','r','a','g','a','n','a',0,2,'B','u','g','i',0,'B','u','g','i','n','e','s','e',0,
-2,'G','l','a','g',0,'G','l','a','g','o','l','i','t','i','c',0,
-2,'K','h','a','r',0,'K','h','a','r','o','s','h','t','h','i',0,
-2,'S','y','l','o',0,'S','y','l','o','t','i','_','N','a','g','r','i',0,
-2,'T','a','l','u',0,'N','e','w','_','T','a','i','_','L','u','e',0,
-2,'T','f','n','g',0,'T','i','f','i','n','a','g','h',0,2,'X','p','e','o',0,'O','l','d','_','P','e','r','s','i','a','n',
-0,2,'B','a','l','i',0,'B','a','l','i','n','e','s','e',0,
-2,'B','a','t','k',0,'B','a','t','a','k',0,2,'B','l','i','s',0,'B','l','i','s',0,
-2,'B','r','a','h',0,'B','r','a','h','m','i',0,2,'C','i','r','t',0,'C','i','r','t',0,
-2,'C','y','r','s',0,'C','y','r','s',0,2,'E','g','y','d',0,'E','g','y','d',0,
-2,'E','g','y','h',0,'E','g','y','h',0,2,'E','g','y','p',0,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g',
-'l','y','p','h','s',0,2,'G','e','o','k',0,'G','e','o','k',0,
-2,'H','a','n','s',0,'H','a','n','s',0,2,'H','a','n','t',0,'H','a','n','t',0,
-2,'H','m','n','g',0,'P','a','h','a','w','h','_','H','m','o','n','g',0,
-2,'H','u','n','g',0,'O','l','d','_','H','u','n','g','a','r','i','a','n',0,
-2,'I','n','d','s',0,'I','n','d','s',0,2,'J','a','v','a',0,'J','a','v','a','n','e','s','e',0,
-2,'K','a','l','i',0,'K','a','y','a','h','_','L','i',0,2,'L','a','t','f',0,'L','a','t','f',0,
-2,'L','a','t','g',0,'L','a','t','g',0,2,'L','e','p','c',0,'L','e','p','c','h','a',0,
-2,'L','i','n','a',0,'L','i','n','e','a','r','_','A',0,2,'M','a','n','d',0,'M','a','n','d','a','i','c',0,
-2,'M','a','y','a',0,'M','a','y','a',0,2,'M','e','r','o',0,'M','e','r','o','i','t','i','c','_','H','i','e','r','o','g',
-'l','y','p','h','s',0,2,'N','k','o','o',0,'N','k','o',0,
-2,'O','r','k','h',0,'O','l','d','_','T','u','r','k','i','c',0,
-2,'P','e','r','m',0,'O','l','d','_','P','e','r','m','i','c',0,
-2,'P','h','a','g',0,'P','h','a','g','s','_','P','a',0,2,'P','h','n','x',0,'P','h','o','e','n','i','c','i','a','n',0,
-2,'P','l','r','d',0,'M','i','a','o',0,2,'R','o','r','o',0,'R','o','r','o',0,
-2,'S','a','r','a',0,'S','a','r','a',0,2,'S','y','r','e',0,'S','y','r','e',0,
-2,'S','y','r','j',0,'S','y','r','j',0,2,'S','y','r','n',0,'S','y','r','n',0,
-2,'T','e','n','g',0,'T','e','n','g',0,2,'V','a','i','i',0,'V','a','i',0,
-2,'V','i','s','p',0,'V','i','s','p',0,2,'X','s','u','x',0,'C','u','n','e','i','f','o','r','m',0,
-2,'Z','x','x','x',0,'Z','x','x','x',0,2,'Z','z','z','z',0,'U','n','k','n','o','w','n',0,
-2,'C','a','r','i',0,'C','a','r','i','a','n',0,2,'J','p','a','n',0,'J','p','a','n',0,
-2,'L','a','n','a',0,'T','a','i','_','T','h','a','m',0,2,'L','y','c','i',0,'L','y','c','i','a','n',0,
-2,'L','y','d','i',0,'L','y','d','i','a','n',0,2,'O','l','c','k',0,'O','l','_','C','h','i','k','i',0,
-2,'R','j','n','g',0,'R','e','j','a','n','g',0,2,'S','a','u','r',0,'S','a','u','r','a','s','h','t','r','a',0,
-2,'S','g','n','w',0,'S','i','g','n','W','r','i','t','i','n','g',0,
-2,'S','u','n','d',0,'S','u','n','d','a','n','e','s','e',0,
-2,'M','o','o','n',0,'M','o','o','n',0,2,'M','t','e','i',0,'M','e','e','t','e','i','_','M','a','y','e','k',0,
-2,'A','r','m','i',0,'I','m','p','e','r','i','a','l','_','A','r','a','m','a','i','c',0,
-2,'A','v','s','t',0,'A','v','e','s','t','a','n',0,2,'C','a','k','m',0,'C','h','a','k','m','a',0,
-2,'K','o','r','e',0,'K','o','r','e',0,2,'K','t','h','i',0,'K','a','i','t','h','i',0,
-2,'M','a','n','i',0,'M','a','n','i','c','h','a','e','a','n',0,
-2,'P','h','l','i',0,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','h','l','a','v','i',0,
-2,'P','h','l','p',0,'P','s','a','l','t','e','r','_','P','a','h','l','a','v','i',0,
-2,'P','h','l','v',0,'P','h','l','v',0,2,'P','r','t','i',0,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P',
-'a','r','t','h','i','a','n',0,2,'S','a','m','r',0,'S','a','m','a','r','i','t','a','n',0,
-2,'T','a','v','t',0,'T','a','i','_','V','i','e','t',0,2,'Z','m','t','h',0,'Z','m','t','h',0,
-2,'Z','s','y','m',0,'Z','s','y','m',0,2,'B','a','m','u',0,'B','a','m','u','m',0,
-2,'N','k','g','b',0,'N','k','g','b',0,2,'S','a','r','b',0,'O','l','d','_','S','o','u','t','h','_','A','r','a','b','i',
-'a','n',0,2,'B','a','s','s',0,'B','a','s','s','a','_','V','a','h',0,
-2,'D','u','p','l',0,'D','u','p','l','o','y','a','n',0,2,'E','l','b','a',0,'E','l','b','a','s','a','n',0,
-2,'G','r','a','n',0,'G','r','a','n','t','h','a',0,2,'K','p','e','l',0,'K','p','e','l',0,
-2,'L','o','m','a',0,'L','o','m','a',0,2,'M','e','n','d',0,'M','e','n','d','e','_','K','i','k','a','k','u','i',0,
-2,'M','e','r','c',0,'M','e','r','o','i','t','i','c','_','C','u','r','s','i','v','e',0,
-2,'N','a','r','b',0,'O','l','d','_','N','o','r','t','h','_','A','r','a','b','i','a','n',0,
-2,'N','b','a','t',0,'N','a','b','a','t','a','e','a','n',0,
-2,'P','a','l','m',0,'P','a','l','m','y','r','e','n','e',0,
-2,'S','i','n','d',0,'K','h','u','d','a','w','a','d','i',0,
-2,'W','a','r','a',0,'W','a','r','a','n','g','_','C','i','t','i',0,
-2,'A','f','a','k',0,'A','f','a','k',0,2,'J','u','r','c',0,'J','u','r','c',0,
-2,'M','r','o','o',0,'M','r','o',0,2,'N','s','h','u',0,'N','u','s','h','u',0,
-2,'S','h','r','d',0,'S','h','a','r','a','d','a',0,2,'S','o','r','a',0,'S','o','r','a','_','S','o','m','p','e','n','g',
-0,2,'T','a','k','r',0,'T','a','k','r','i',0,2,'T','a','n','g',0,'T','a','n','g','u','t',0,
-2,'W','o','l','e',0,'W','o','l','e',0,2,'H','l','u','w',0,'A','n','a','t','o','l','i','a','n','_','H','i','e','r','o',
-'g','l','y','p','h','s',0,2,'K','h','o','j',0,'K','h','o','j','k','i',0,
-2,'T','i','r','h',0,'T','i','r','h','u','t','a',0,2,'A','g','h','b',0,'C','a','u','c','a','s','i','a','n','_','A','l',
-'b','a','n','i','a','n',0,2,'M','a','h','j',0,'M','a','h','a','j','a','n','i',0,
-2,'H','a','t','r',0,'H','a','t','r','a','n',0,2,'M','u','l','t',0,'M','u','l','t','a','n','i',0,
-2,'P','a','u','c',0,'P','a','u','_','C','i','n','_','H','a','u',0,
-2,'S','i','d','d',0,'S','i','d','d','h','a','m',0,2,'A','d','l','m',0,'A','d','l','a','m',0,
-2,'B','h','k','s',0,'B','h','a','i','k','s','u','k','i',0,
-2,'M','a','r','c',0,'M','a','r','c','h','e','n',0,2,'O','s','g','e',0,'O','s','a','g','e',0,
-2,'H','a','n','b',0,'H','a','n','b',0,2,'J','a','m','o',0,'J','a','m','o',0,
-2,'Z','s','y','e',0,'Z','s','y','e',0,2,'G','o','n','m',0,'M','a','s','a','r','a','m','_','G','o','n','d','i',0,
-2,'S','o','y','o',0,'S','o','y','o','m','b','o',0,2,'Z','a','n','b',0,'Z','a','n','a','b','a','z','a','r','_','S','q',
-'u','a','r','e',0,2,'D','o','g','r',0,'D','o','g','r','a',0,
-2,'G','o','n','g',0,'G','u','n','j','a','l','a','_','G','o','n','d','i',0,
-2,'M','a','k','a',0,'M','a','k','a','s','a','r',0,2,'M','e','d','f',0,'M','e','d','e','f','a','i','d','r','i','n',0,
-2,'R','o','h','g',0,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a',0,
-2,'S','o','g','d',0,'S','o','g','d','i','a','n',0,2,'S','o','g','o',0,'O','l','d','_','S','o','g','d','i','a','n',0,
-2,'E','l','y','m',0,'E','l','y','m','a','i','c',0,2,'H','m','n','p',0,'N','y','i','a','k','e','n','g','_','P','u','a',
-'c','h','u','e','_','H','m','o','n','g',0,2,'N','a','n','d',0,'N','a','n','d','i','n','a','g','a','r','i',0,
-2,'W','c','h','o',0,'W','a','n','c','h','o',0,2,'C','h','r','s',0,'C','h','o','r','a','s','m','i','a','n',0,
-2,'D','i','a','k',0,'D','i','v','e','s','_','A','k','u','r','u',0,
-2,'K','i','t','s',0,'K','h','i','t','a','n','_','S','m','a','l','l','_','S','c','r','i','p','t',0,
-2,'Y','e','z','i',0,'Y','e','z','i','d','i',0,2,'h','s','t',0,'H','a','n','g','u','l','_','S','y','l','l','a','b','l',
-'e','_','T','y','p','e',0,2,'N','A',0,'N','o','t','_','A','p','p','l','i','c','a','b','l','e',0,
-2,'L',0,'L','e','a','d','i','n','g','_','J','a','m','o',0,
-2,'V',0,'V','o','w','e','l','_','J','a','m','o',0,2,'T',0,'T','r','a','i','l','i','n','g','_','J','a','m','o',0,
-2,'L','V',0,'L','V','_','S','y','l','l','a','b','l','e',0,
-2,'L','V','T',0,'L','V','T','_','S','y','l','l','a','b','l','e',0,
-2,'N','F','D','_','Q','C',0,'N','F','D','_','Q','u','i','c','k','_','C','h','e','c','k',0,
-2,'N',0,'N','o',0,2,'Y',0,'Y','e','s',0,2,'N','F','K','D','_','Q','C',0,'N','F','K','D','_','Q','u','i','c','k',
-'_','C','h','e','c','k',0,2,'N','F','C','_','Q','C',0,'N','F','C','_','Q','u','i','c','k','_','C','h','e','c','k',0,
-2,'M',0,'M','a','y','b','e',0,2,'N','F','K','C','_','Q','C',0,'N','F','K','C','_','Q','u','i','c','k','_','C','h','e',
-'c','k',0,2,'l','c','c','c',0,'L','e','a','d','_','C','a','n','o','n','i','c','a','l','_','C','o','m','b','i','n','i','n',
-'g','_','C','l','a','s','s',0,2,'t','c','c','c',0,'T','r','a','i','l','_','C','a','n','o','n','i','c','a','l','_','C','o',
-'m','b','i','n','i','n','g','_','C','l','a','s','s',0,2,'G','C','B',0,'G','r','a','p','h','e','m','e','_','C','l','u','s',
-'t','e','r','_','B','r','e','a','k',0,2,'X','X',0,'O','t','h','e','r',0,
-2,'C','N',0,'C','o','n','t','r','o','l',0,2,'C','R',0,'C','R',0,
-2,'E','X',0,'E','x','t','e','n','d',0,2,'L',0,'L',0,
-2,'L','F',0,'L','F',0,2,'L','V',0,'L','V',0,2,'L','V','T',0,'L','V','T',0,
-2,'T',0,'T',0,2,'V',0,'V',0,2,'S','M',0,'S','p','a','c','i','n','g','M','a','r','k',0,
-2,'P','P',0,'P','r','e','p','e','n','d',0,2,'E','B','G',0,'E','_','B','a','s','e','_','G','A','Z',0,
-2,'G','A','Z',0,'G','l','u','e','_','A','f','t','e','r','_','Z','w','j',0,
-2,'S','B',0,'S','e','n','t','e','n','c','e','_','B','r','e','a','k',0,
-2,'A','T',0,'A','T','e','r','m',0,2,'C','L',0,'C','l','o','s','e',0,
-2,'F','O',0,'F','o','r','m','a','t',0,2,'L','O',0,'L','o','w','e','r',0,
-2,'L','E',0,'O','L','e','t','t','e','r',0,2,'S','E',0,'S','e','p',0,
-2,'S','P',0,'S','p',0,2,'S','T',0,'S','T','e','r','m',0,
-2,'U','P',0,'U','p','p','e','r',0,2,'S','C',0,'S','C','o','n','t','i','n','u','e',0,
-2,'W','B',0,'W','o','r','d','_','B','r','e','a','k',0,2,'L','E',0,'A','L','e','t','t','e','r',0,
-2,'K','A',0,'K','a','t','a','k','a','n','a',0,2,'M','L',0,'M','i','d','L','e','t','t','e','r',0,
-2,'M','N',0,'M','i','d','N','u','m',0,2,'E','X',0,'E','x','t','e','n','d','N','u','m','L','e','t',0,
-2,'E','x','t','e','n','d',0,'E','x','t','e','n','d',0,2,'M','B',0,'M','i','d','N','u','m','L','e','t',0,
-2,'N','L',0,'N','e','w','l','i','n','e',0,2,'S','Q',0,'S','i','n','g','l','e','_','Q','u','o','t','e',0,
-2,'D','Q',0,'D','o','u','b','l','e','_','Q','u','o','t','e',0,
-2,'W','S','e','g','S','p','a','c','e',0,'W','S','e','g','S','p','a','c','e',0,
-2,'b','p','t',0,'B','i','d','i','_','P','a','i','r','e','d','_','B','r','a','c','k','e','t','_','T','y','p','e',0,
-2,'n',0,'N','o','n','e',0,2,'o',0,'O','p','e','n',0,
-2,'c',0,'C','l','o','s','e',0,2,'I','n','P','C',0,'I','n','d','i','c','_','P','o','s','i','t','i','o','n','a','l','_',
-'C','a','t','e','g','o','r','y',0,2,'N','A',0,'N','A',0,
-2,'B','o','t','t','o','m',0,'B','o','t','t','o','m',0,2,'B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0,
-'B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0,
-2,'B','o','t','t','o','m','_','A','n','d','_','R','i','g','h','t',0,'B','o','t','t','o','m','_','A','n','d','_','R','i','g',
-'h','t',0,2,'L','e','f','t',0,'L','e','f','t',0,2,'L','e','f','t','_','A','n','d','_','R','i','g','h','t',0,
-'L','e','f','t','_','A','n','d','_','R','i','g','h','t',0,2,'O','v','e','r','s','t','r','u','c','k',0,
-'O','v','e','r','s','t','r','u','c','k',0,2,'R','i','g','h','t',0,'R','i','g','h','t',0,
-2,'T','o','p',0,'T','o','p',0,2,'T','o','p','_','A','n','d','_','B','o','t','t','o','m',0,
-'T','o','p','_','A','n','d','_','B','o','t','t','o','m',0,2,'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A',
-'n','d','_','R','i','g','h','t',0,'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A','n','d','_','R','i','g','h',
-'t',0,2,'T','o','p','_','A','n','d','_','L','e','f','t',0,'T','o','p','_','A','n','d','_','L','e','f','t',0,
-2,'T','o','p','_','A','n','d','_','L','e','f','t','_','A','n','d','_','R','i','g','h','t',0,
-'T','o','p','_','A','n','d','_','L','e','f','t','_','A','n','d','_','R','i','g','h','t',0,
-2,'T','o','p','_','A','n','d','_','R','i','g','h','t',0,'T','o','p','_','A','n','d','_','R','i','g','h','t',0,
-2,'V','i','s','u','a','l','_','O','r','d','e','r','_','L','e','f','t',0,'V','i','s','u','a','l','_','O','r','d','e','r','_',
-'L','e','f','t',0,2,'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0,
-'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0,
-2,'I','n','S','C',0,'I','n','d','i','c','_','S','y','l','l','a','b','i','c','_','C','a','t','e','g','o','r','y',0,
-2,'O','t','h','e','r',0,'O','t','h','e','r',0,2,'A','v','a','g','r','a','h','a',0,'A','v','a','g','r','a','h','a',0,
-2,'B','i','n','d','u',0,'B','i','n','d','u',0,2,'B','r','a','h','m','i','_','J','o','i','n','i','n','g','_','N','u','m',
-'b','e','r',0,'B','r','a','h','m','i','_','J','o','i','n','i','n','g','_','N','u','m','b','e','r',0,
-2,'C','a','n','t','i','l','l','a','t','i','o','n','_','M','a','r','k',0,'C','a','n','t','i','l','l','a','t','i','o','n','_',
-'M','a','r','k',0,2,'C','o','n','s','o','n','a','n','t',0,'C','o','n','s','o','n','a','n','t',0,
-2,'C','o','n','s','o','n','a','n','t','_','D','e','a','d',0,'C','o','n','s','o','n','a','n','t','_','D','e','a','d',0,
-2,'C','o','n','s','o','n','a','n','t','_','F','i','n','a','l',0,'C','o','n','s','o','n','a','n','t','_','F','i','n','a','l',
-0,2,'C','o','n','s','o','n','a','n','t','_','H','e','a','d','_','L','e','t','t','e','r',0,
-'C','o','n','s','o','n','a','n','t','_','H','e','a','d','_','L','e','t','t','e','r',0,
-2,'C','o','n','s','o','n','a','n','t','_','I','n','i','t','i','a','l','_','P','o','s','t','f','i','x','e','d',0,
-'C','o','n','s','o','n','a','n','t','_','I','n','i','t','i','a','l','_','P','o','s','t','f','i','x','e','d',0,
-2,'C','o','n','s','o','n','a','n','t','_','K','i','l','l','e','r',0,'C','o','n','s','o','n','a','n','t','_','K','i','l','l',
-'e','r',0,2,'C','o','n','s','o','n','a','n','t','_','M','e','d','i','a','l',0,'C','o','n','s','o','n','a','n','t','_','M',
-'e','d','i','a','l',0,2,'C','o','n','s','o','n','a','n','t','_','P','l','a','c','e','h','o','l','d','e','r',0,
-'C','o','n','s','o','n','a','n','t','_','P','l','a','c','e','h','o','l','d','e','r',0,
-2,'C','o','n','s','o','n','a','n','t','_','P','r','e','c','e','d','i','n','g','_','R','e','p','h','a',0,
-'C','o','n','s','o','n','a','n','t','_','P','r','e','c','e','d','i','n','g','_','R','e','p','h','a',0,
-2,'C','o','n','s','o','n','a','n','t','_','P','r','e','f','i','x','e','d',0,'C','o','n','s','o','n','a','n','t','_','P','r',
-'e','f','i','x','e','d',0,2,'C','o','n','s','o','n','a','n','t','_','S','u','b','j','o','i','n','e','d',0,
-'C','o','n','s','o','n','a','n','t','_','S','u','b','j','o','i','n','e','d',0,
-2,'C','o','n','s','o','n','a','n','t','_','S','u','c','c','e','e','d','i','n','g','_','R','e','p','h','a',0,
-'C','o','n','s','o','n','a','n','t','_','S','u','c','c','e','e','d','i','n','g','_','R','e','p','h','a',0,
-2,'C','o','n','s','o','n','a','n','t','_','W','i','t','h','_','S','t','a','c','k','e','r',0,
-'C','o','n','s','o','n','a','n','t','_','W','i','t','h','_','S','t','a','c','k','e','r',0,
-2,'G','e','m','i','n','a','t','i','o','n','_','M','a','r','k',0,'G','e','m','i','n','a','t','i','o','n','_','M','a','r','k',
-0,2,'I','n','v','i','s','i','b','l','e','_','S','t','a','c','k','e','r',0,'I','n','v','i','s','i','b','l','e','_','S','t',
-'a','c','k','e','r',0,2,'J','o','i','n','e','r',0,'J','o','i','n','e','r',0,
-2,'M','o','d','i','f','y','i','n','g','_','L','e','t','t','e','r',0,'M','o','d','i','f','y','i','n','g','_','L','e','t','t',
-'e','r',0,2,'N','o','n','_','J','o','i','n','e','r',0,'N','o','n','_','J','o','i','n','e','r',0,
-2,'N','u','k','t','a',0,'N','u','k','t','a',0,2,'N','u','m','b','e','r',0,'N','u','m','b','e','r',0,
-2,'N','u','m','b','e','r','_','J','o','i','n','e','r',0,'N','u','m','b','e','r','_','J','o','i','n','e','r',0,
-2,'P','u','r','e','_','K','i','l','l','e','r',0,'P','u','r','e','_','K','i','l','l','e','r',0,
-2,'R','e','g','i','s','t','e','r','_','S','h','i','f','t','e','r',0,'R','e','g','i','s','t','e','r','_','S','h','i','f','t',
-'e','r',0,2,'S','y','l','l','a','b','l','e','_','M','o','d','i','f','i','e','r',0,'S','y','l','l','a','b','l','e','_','M',
-'o','d','i','f','i','e','r',0,2,'T','o','n','e','_','L','e','t','t','e','r',0,'T','o','n','e','_','L','e','t','t','e','r',
-0,2,'T','o','n','e','_','M','a','r','k',0,'T','o','n','e','_','M','a','r','k',0,
-2,'V','i','r','a','m','a',0,'V','i','r','a','m','a',0,2,'V','i','s','a','r','g','a',0,
-'V','i','s','a','r','g','a',0,2,'V','o','w','e','l',0,'V','o','w','e','l',0,
-2,'V','o','w','e','l','_','D','e','p','e','n','d','e','n','t',0,'V','o','w','e','l','_','D','e','p','e','n','d','e','n','t',
-0,2,'V','o','w','e','l','_','I','n','d','e','p','e','n','d','e','n','t',0,'V','o','w','e','l','_','I','n','d','e','p','e',
-'n','d','e','n','t',0,2,'v','o',0,'V','e','r','t','i','c','a','l','_','O','r','i','e','n','t','a','t','i','o','n',0,
-2,'R',0,'R','o','t','a','t','e','d',0,2,'T','r',0,'T','r','a','n','s','f','o','r','m','e','d','_','R','o','t','a','t',
-'e','d',0,2,'T','u',0,'T','r','a','n','s','f','o','r','m','e','d','_','U','p','r','i','g','h','t',0,
-2,'U',0,'U','p','r','i','g','h','t',0,2,'g','c','m',0,'G','e','n','e','r','a','l','_','C','a','t','e','g','o','r','y',
-'_','M','a','s','k',0,2,'C',0,'O','t','h','e','r',0,2,'L',0,'L','e','t','t','e','r',0,
-2,'L','C',0,'C','a','s','e','d','_','L','e','t','t','e','r',0,
-3,'M',0,'M','a','r','k',0,'C','o','m','b','i','n','i','n','g','_','M','a','r','k',0,
-2,'N',0,'N','u','m','b','e','r',0,3,'P',0,'P','u','n','c','t','u','a','t','i','o','n',0,
-'p','u','n','c','t',0,2,'S',0,'S','y','m','b','o','l',0,
-2,'Z',0,'S','e','p','a','r','a','t','o','r',0,2,'n','v',0,'N','u','m','e','r','i','c','_','V','a','l','u','e',0,
-2,'a','g','e',0,'A','g','e',0,2,'b','m','g',0,'B','i','d','i','_','M','i','r','r','o','r','i','n','g','_','G','l','y',
-'p','h',0,2,'c','f',0,'C','a','s','e','_','F','o','l','d','i','n','g',0,
-2,'i','s','c',0,'I','S','O','_','C','o','m','m','e','n','t',0,
-2,'l','c',0,'L','o','w','e','r','c','a','s','e','_','M','a','p','p','i','n','g',0,
-2,'n','a',0,'N','a','m','e',0,3,'s','c','f',0,'S','i','m','p','l','e','_','C','a','s','e','_','F','o','l','d','i','n',
-'g',0,'s','f','c',0,2,'s','l','c',0,'S','i','m','p','l','e','_','L','o','w','e','r','c','a','s','e','_','M','a','p','p',
-'i','n','g',0,2,'s','t','c',0,'S','i','m','p','l','e','_','T','i','t','l','e','c','a','s','e','_','M','a','p','p','i','n',
-'g',0,2,'s','u','c',0,'S','i','m','p','l','e','_','U','p','p','e','r','c','a','s','e','_','M','a','p','p','i','n','g',0,
-2,'t','c',0,'T','i','t','l','e','c','a','s','e','_','M','a','p','p','i','n','g',0,
-2,'n','a','1',0,'U','n','i','c','o','d','e','_','1','_','N','a','m','e',0,
-2,'u','c',0,'U','p','p','e','r','c','a','s','e','_','M','a','p','p','i','n','g',0,
-2,'b','p','b',0,'B','i','d','i','_','P','a','i','r','e','d','_','B','r','a','c','k','e','t',0,
-2,'s','c','x',0,'S','c','r','i','p','t','_','E','x','t','e','n','s','i','o','n','s',0
-};
-
-U_NAMESPACE_END
-
-#endif // INCLUDED_FROM_PROPNAME_CPP
diff --git a/contrib/libs/icu/common/propsvec.cpp b/contrib/libs/icu/common/propsvec.cpp
deleted file mode 100644
index 056fcda9cf6..00000000000
--- a/contrib/libs/icu/common/propsvec.cpp
+++ /dev/null
@@ -1,529 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: propsvec.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002feb22
-* created by: Markus W. Scherer
-*
-* Store bits (Unicode character properties) in bit set vectors.
-*/
-
-#include <stdlib.h>
-#include "unicode/utypes.h"
-#include "cmemory.h"
-#include "utrie.h"
-#include "utrie2.h"
-#include "uarrsort.h"
-#include "propsvec.h"
-#include "uassert.h"
-
-struct UPropsVectors {
- uint32_t *v;
- int32_t columns; /* number of columns, plus two for start & limit values */
- int32_t maxRows;
- int32_t rows;
- int32_t prevRow; /* search optimization: remember last row seen */
- UBool isCompacted;
-};
-
-#define UPVEC_INITIAL_ROWS (1<<12)
-#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16)
-#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
-
-U_CAPI UPropsVectors * U_EXPORT2
-upvec_open(int32_t columns, UErrorCode *pErrorCode) {
- UPropsVectors *pv;
- uint32_t *v, *row;
- uint32_t cp;
-
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- if(columns<1) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- columns+=2; /* count range start and limit columns */
-
- pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors));
- v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4);
- if(pv==NULL || v==NULL) {
- uprv_free(pv);
- uprv_free(v);
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memset(pv, 0, sizeof(UPropsVectors));
- pv->v=v;
- pv->columns=columns;
- pv->maxRows=UPVEC_INITIAL_ROWS;
- pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
-
- /* set the all-Unicode row and the special-value rows */
- row=pv->v;
- uprv_memset(row, 0, pv->rows*columns*4);
- row[0]=0;
- row[1]=0x110000;
- row+=columns;
- for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
- row[0]=cp;
- row[1]=cp+1;
- row+=columns;
- }
- return pv;
-}
-
-U_CAPI void U_EXPORT2
-upvec_close(UPropsVectors *pv) {
- if(pv!=NULL) {
- uprv_free(pv->v);
- uprv_free(pv);
- }
-}
-
-static uint32_t *
-_findRow(UPropsVectors *pv, UChar32 rangeStart) {
- uint32_t *row;
- int32_t columns, i, start, limit, prevRow;
-
- columns=pv->columns;
- limit=pv->rows;
- prevRow=pv->prevRow;
-
- /* check the vicinity of the last-seen row (start searching with an unrolled loop) */
- row=pv->v+prevRow*columns;
- if(rangeStart>=(UChar32)row[0]) {
- if(rangeStart<(UChar32)row[1]) {
- /* same row as last seen */
- return row;
- } else if(rangeStart<(UChar32)(row+=columns)[1]) {
- /* next row after the last one */
- pv->prevRow=prevRow+1;
- return row;
- } else if(rangeStart<(UChar32)(row+=columns)[1]) {
- /* second row after the last one */
- pv->prevRow=prevRow+2;
- return row;
- } else if((rangeStart-(UChar32)row[1])<10) {
- /* we are close, continue looping */
- prevRow+=2;
- do {
- ++prevRow;
- row+=columns;
- } while(rangeStart>=(UChar32)row[1]);
- pv->prevRow=prevRow;
- return row;
- }
- } else if(rangeStart<(UChar32)pv->v[1]) {
- /* the very first row */
- pv->prevRow=0;
- return pv->v;
- }
-
- /* do a binary search for the start of the range */
- start=0;
- while(start<limit-1) {
- i=(start+limit)/2;
- row=pv->v+i*columns;
- if(rangeStart<(UChar32)row[0]) {
- limit=i;
- } else if(rangeStart<(UChar32)row[1]) {
- pv->prevRow=i;
- return row;
- } else {
- start=i;
- }
- }
-
- /* must be found because all ranges together always cover all of Unicode */
- pv->prevRow=start;
- return pv->v+start*columns;
-}
-
-U_CAPI void U_EXPORT2
-upvec_setValue(UPropsVectors *pv,
- UChar32 start, UChar32 end,
- int32_t column,
- uint32_t value, uint32_t mask,
- UErrorCode *pErrorCode) {
- uint32_t *firstRow, *lastRow;
- int32_t columns;
- UChar32 limit;
- UBool splitFirstRow, splitLastRow;
-
- /* argument checking */
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- if( pv==NULL ||
- start<0 || start>end || end>UPVEC_MAX_CP ||
- column<0 || column>=(pv->columns-2)
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if(pv->isCompacted) {
- *pErrorCode=U_NO_WRITE_PERMISSION;
- return;
- }
- limit=end+1;
-
- /* initialize */
- columns=pv->columns;
- column+=2; /* skip range start and limit columns */
- value&=mask;
-
- /* find the rows whose ranges overlap with the input range */
-
- /* find the first and last rows, always successful */
- firstRow=_findRow(pv, start);
- lastRow=_findRow(pv, end);
-
- /*
- * Rows need to be split if they partially overlap with the
- * input range (only possible for the first and last rows)
- * and if their value differs from the input value.
- */
- splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask));
- splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask));
-
- /* split first/last rows if necessary */
- if(splitFirstRow || splitLastRow) {
- int32_t count, rows;
-
- rows=pv->rows;
- if((rows+splitFirstRow+splitLastRow)>pv->maxRows) {
- uint32_t *newVectors;
- int32_t newMaxRows;
-
- if(pv->maxRows<UPVEC_MEDIUM_ROWS) {
- newMaxRows=UPVEC_MEDIUM_ROWS;
- } else if(pv->maxRows<UPVEC_MAX_ROWS) {
- newMaxRows=UPVEC_MAX_ROWS;
- } else {
- /* Implementation bug, or UPVEC_MAX_ROWS too low. */
- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4);
- if(newVectors==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- uprv_memcpy(newVectors, pv->v, (size_t)rows*columns*4);
- firstRow=newVectors+(firstRow-pv->v);
- lastRow=newVectors+(lastRow-pv->v);
- uprv_free(pv->v);
- pv->v=newVectors;
- pv->maxRows=newMaxRows;
- }
-
- /* count the number of row cells to move after the last row, and move them */
- count = (int32_t)((pv->v+rows*columns)-(lastRow+columns));
- if(count>0) {
- uprv_memmove(
- lastRow+(1+splitFirstRow+splitLastRow)*columns,
- lastRow+columns,
- count*4);
- }
- pv->rows=rows+splitFirstRow+splitLastRow;
-
- /* split the first row, and move the firstRow pointer to the second part */
- if(splitFirstRow) {
- /* copy all affected rows up one and move the lastRow pointer */
- count = (int32_t)((lastRow-firstRow)+columns);
- uprv_memmove(firstRow+columns, firstRow, (size_t)count*4);
- lastRow+=columns;
-
- /* split the range and move the firstRow pointer */
- firstRow[1]=firstRow[columns]=(uint32_t)start;
- firstRow+=columns;
- }
-
- /* split the last row */
- if(splitLastRow) {
- /* copy the last row data */
- uprv_memcpy(lastRow+columns, lastRow, (size_t)columns*4);
-
- /* split the range and move the firstRow pointer */
- lastRow[1]=lastRow[columns]=(uint32_t)limit;
- }
- }
-
- /* set the "row last seen" to the last row for the range */
- pv->prevRow=(int32_t)((lastRow-(pv->v))/columns);
-
- /* set the input value in all remaining rows */
- firstRow+=column;
- lastRow+=column;
- mask=~mask;
- for(;;) {
- *firstRow=(*firstRow&mask)|value;
- if(firstRow==lastRow) {
- break;
- }
- firstRow+=columns;
- }
-}
-
-U_CAPI uint32_t U_EXPORT2
-upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) {
- uint32_t *row;
- UPropsVectors *ncpv;
-
- if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) {
- return 0;
- }
- ncpv=(UPropsVectors *)pv;
- row=_findRow(ncpv, c);
- return row[2+column];
-}
-
-U_CAPI uint32_t * U_EXPORT2
-upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
- UChar32 *pRangeStart, UChar32 *pRangeEnd) {
- uint32_t *row;
- int32_t columns;
-
- if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) {
- return NULL;
- }
-
- columns=pv->columns;
- row=pv->v+rowIndex*columns;
- if(pRangeStart!=NULL) {
- *pRangeStart=(UChar32)row[0];
- }
- if(pRangeEnd!=NULL) {
- *pRangeEnd=(UChar32)row[1]-1;
- }
- return row+2;
-}
-
-static int32_t U_CALLCONV
-upvec_compareRows(const void *context, const void *l, const void *r) {
- const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r;
- const UPropsVectors *pv=(const UPropsVectors *)context;
- int32_t i, count, columns;
-
- count=columns=pv->columns; /* includes start/limit columns */
-
- /* start comparing after start/limit but wrap around to them */
- i=2;
- do {
- if(left[i]!=right[i]) {
- return left[i]<right[i] ? -1 : 1;
- }
- if(++i==columns) {
- i=0;
- }
- } while(--count>0);
-
- return 0;
-}
-
-U_CAPI void U_EXPORT2
-upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
- uint32_t *row;
- int32_t i, columns, valueColumns, rows, count;
- UChar32 start, limit;
-
- /* argument checking */
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- if(handler==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if(pv->isCompacted) {
- return;
- }
-
- /* Set the flag now: Sorting and compacting destroys the builder data structure. */
- pv->isCompacted=TRUE;
-
- rows=pv->rows;
- columns=pv->columns;
- U_ASSERT(columns>=3); /* upvec_open asserts this */
- valueColumns=columns-2; /* not counting start & limit */
-
- /* sort the properties vectors to find unique vector values */
- uprv_sortArray(pv->v, rows, columns*4,
- upvec_compareRows, pv, FALSE, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
-
- /*
- * Find and set the special values.
- * This has to do almost the same work as the compaction below,
- * to find the indexes where the special-value rows will move.
- */
- row=pv->v;
- count=-valueColumns;
- for(i=0; i<rows; ++i) {
- start=(UChar32)row[0];
-
- /* count a new values vector if it is different from the current one */
- if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) {
- count+=valueColumns;
- }
-
- if(start>=UPVEC_FIRST_SPECIAL_CP) {
- handler(context, start, start, count, row+2, valueColumns, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- }
-
- row+=columns;
- }
-
- /* count is at the beginning of the last vector, add valueColumns to include that last vector */
- count+=valueColumns;
-
- /* Call the handler once more to signal the start of delivering real values. */
- handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP,
- count, row-valueColumns, valueColumns, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
-
- /*
- * Move vector contents up to a contiguous array with only unique
- * vector values, and call the handler function for each vector.
- *
- * This destroys the Properties Vector structure and replaces it
- * with an array of just vector values.
- */
- row=pv->v;
- count=-valueColumns;
- for(i=0; i<rows; ++i) {
- /* fetch these first before memmove() may overwrite them */
- start=(UChar32)row[0];
- limit=(UChar32)row[1];
-
- /* add a new values vector if it is different from the current one */
- if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) {
- count+=valueColumns;
- uprv_memmove(pv->v+count, row+2, (size_t)valueColumns*4);
- }
-
- if(start<UPVEC_FIRST_SPECIAL_CP) {
- handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- }
-
- row+=columns;
- }
-
- /* count is at the beginning of the last vector, add one to include that last vector */
- pv->rows=count/valueColumns+1;
-}
-
-U_CAPI const uint32_t * U_EXPORT2
-upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
- if(!pv->isCompacted) {
- return NULL;
- }
- if(pRows!=NULL) {
- *pRows=pv->rows;
- }
- if(pColumns!=NULL) {
- *pColumns=pv->columns-2;
- }
- return pv->v;
-}
-
-U_CAPI uint32_t * U_EXPORT2
-upvec_cloneArray(const UPropsVectors *pv,
- int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) {
- uint32_t *clonedArray;
- int32_t byteLength;
-
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- if(!pv->isCompacted) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- byteLength=pv->rows*(pv->columns-2)*4;
- clonedArray=(uint32_t *)uprv_malloc(byteLength);
- if(clonedArray==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memcpy(clonedArray, pv->v, byteLength);
- if(pRows!=NULL) {
- *pRows=pv->rows;
- }
- if(pColumns!=NULL) {
- *pColumns=pv->columns-2;
- }
- return clonedArray;
-}
-
-U_CAPI UTrie2 * U_EXPORT2
-upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) {
- UPVecToUTrie2Context toUTrie2={ NULL, 0, 0, 0 };
- upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode);
- utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- utrie2_close(toUTrie2.trie);
- toUTrie2.trie=NULL;
- }
- return toUTrie2.trie;
-}
-
-/*
- * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
- * some 16-bit field and builds and returns a UTrie2.
- */
-
-U_CAPI void U_CALLCONV
-upvec_compactToUTrie2Handler(void *context,
- UChar32 start, UChar32 end,
- int32_t rowIndex, uint32_t *row, int32_t columns,
- UErrorCode *pErrorCode) {
- (void)row;
- (void)columns;
- UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context;
- if(start<UPVEC_FIRST_SPECIAL_CP) {
- utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode);
- } else {
- switch(start) {
- case UPVEC_INITIAL_VALUE_CP:
- toUTrie2->initialValue=rowIndex;
- break;
- case UPVEC_ERROR_VALUE_CP:
- toUTrie2->errorValue=rowIndex;
- break;
- case UPVEC_START_REAL_VALUES_CP:
- toUTrie2->maxValue=rowIndex;
- if(rowIndex>0xffff) {
- /* too many rows for a 16-bit trie */
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- } else {
- toUTrie2->trie=utrie2_open(toUTrie2->initialValue,
- toUTrie2->errorValue, pErrorCode);
- }
- break;
- default:
- break;
- }
- }
-}
diff --git a/contrib/libs/icu/common/propsvec.h b/contrib/libs/icu/common/propsvec.h
deleted file mode 100644
index 39080615ea3..00000000000
--- a/contrib/libs/icu/common/propsvec.h
+++ /dev/null
@@ -1,178 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2010, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: propsvec.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002feb22
-* created by: Markus W. Scherer
-*
-* Store bits (Unicode character properties) in bit set vectors.
-*/
-
-#ifndef __UPROPSVEC_H__
-#define __UPROPSVEC_H__
-
-#include "unicode/utypes.h"
-#include "utrie.h"
-#include "utrie2.h"
-
-U_CDECL_BEGIN
-
-/**
- * Unicode Properties Vectors associated with code point ranges.
- *
- * Rows of uint32_t integers in a contiguous array store
- * the range limits and the properties vectors.
- *
- * Logically, each row has a certain number of uint32_t values,
- * which is set via the upvec_open() "columns" parameter.
- *
- * Internally, two additional columns are stored.
- * In each internal row,
- * row[0] contains the start code point and
- * row[1] contains the limit code point,
- * which is the start of the next range.
- *
- * Initially, there is only one "normal" row for
- * range [0..0x110000[ with values 0.
- * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
- *
- * It would be possible to store only one range boundary per row,
- * but self-contained rows allow to later sort them by contents.
- */
-struct UPropsVectors;
-typedef struct UPropsVectors UPropsVectors;
-
-/*
- * Special pseudo code points for storing the initialValue and the errorValue,
- * which are used to initialize a UTrie2 or similar.
- */
-#define UPVEC_FIRST_SPECIAL_CP 0x110000
-#define UPVEC_INITIAL_VALUE_CP 0x110000
-#define UPVEC_ERROR_VALUE_CP 0x110001
-#define UPVEC_MAX_CP 0x110001
-
-/*
- * Special pseudo code point used in upvec_compact() signalling the end of
- * delivering special values and the beginning of delivering real ones.
- * Stable value, unlike UPVEC_MAX_CP which might grow over time.
- */
-#define UPVEC_START_REAL_VALUES_CP 0x200000
-
-/*
- * Open a UPropsVectors object.
- * @param columns Number of value integers (uint32_t) per row.
- */
-U_CAPI UPropsVectors * U_EXPORT2
-upvec_open(int32_t columns, UErrorCode *pErrorCode);
-
-U_CAPI void U_EXPORT2
-upvec_close(UPropsVectors *pv);
-
-/*
- * In rows for code points [start..end], select the column,
- * reset the mask bits and set the value bits (ANDed with the mask).
- *
- * Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
- */
-U_CAPI void U_EXPORT2
-upvec_setValue(UPropsVectors *pv,
- UChar32 start, UChar32 end,
- int32_t column,
- uint32_t value, uint32_t mask,
- UErrorCode *pErrorCode);
-
-/*
- * Logically const but must not be used on the same pv concurrently!
- * Always returns 0 if called after upvec_compact().
- */
-U_CAPI uint32_t U_EXPORT2
-upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
-
-/*
- * pRangeStart and pRangeEnd can be NULL.
- * @return NULL if rowIndex out of range and for illegal arguments,
- * or if called after upvec_compact()
- */
-U_CAPI uint32_t * U_EXPORT2
-upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
- UChar32 *pRangeStart, UChar32 *pRangeEnd);
-
-/*
- * Compact the vectors:
- * - modify the memory
- * - keep only unique vectors
- * - store them contiguously from the beginning of the memory
- * - for each (non-unique) row, call the handler function
- *
- * The handler's rowIndex is the index of the row in the compacted
- * memory block.
- * (Therefore, it starts at 0 increases in increments of the columns value.)
- *
- * In a first phase, only special values are delivered (each exactly once),
- * with start==end both equalling a special pseudo code point.
- * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
- * where rowIndex is the length of the compacted array,
- * and the row is arbitrary (but not NULL).
- * Then, in the second phase, the handler is called for each row of real values.
- */
-typedef void U_CALLCONV
-UPVecCompactHandler(void *context,
- UChar32 start, UChar32 end,
- int32_t rowIndex, uint32_t *row, int32_t columns,
- UErrorCode *pErrorCode);
-
-U_CAPI void U_EXPORT2
-upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
-
-/*
- * Get the vectors array after calling upvec_compact().
- * The caller must not modify nor release the returned array.
- * Returns NULL if called before upvec_compact().
- */
-U_CAPI const uint32_t * U_EXPORT2
-upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
-
-/*
- * Get a clone of the vectors array after calling upvec_compact().
- * The caller owns the returned array and must uprv_free() it.
- * Returns NULL if called before upvec_compact().
- */
-U_CAPI uint32_t * U_EXPORT2
-upvec_cloneArray(const UPropsVectors *pv,
- int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
-
-/*
- * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
- * vectors array, and freeze the trie.
- */
-U_CAPI UTrie2 * U_EXPORT2
-upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
-
-struct UPVecToUTrie2Context {
- UTrie2 *trie;
- int32_t initialValue;
- int32_t errorValue;
- int32_t maxValue;
-};
-typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
-
-/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
-U_CAPI void U_CALLCONV
-upvec_compactToUTrie2Handler(void *context,
- UChar32 start, UChar32 end,
- int32_t rowIndex, uint32_t *row, int32_t columns,
- UErrorCode *pErrorCode);
-
-U_CDECL_END
-
-#endif
diff --git a/contrib/libs/icu/common/punycode.cpp b/contrib/libs/icu/common/punycode.cpp
deleted file mode 100644
index 90fe1ec3c80..00000000000
--- a/contrib/libs/icu/common/punycode.cpp
+++ /dev/null
@@ -1,589 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: punycode.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002jan31
-* created by: Markus W. Scherer
-*/
-
-
-/* This ICU code derived from: */
-/*
-punycode.c 0.4.0 (2001-Nov-17-Sat)
-http://www.cs.berkeley.edu/~amc/idn/
-Adam M. Costello
-http://www.nicemice.net/amc/
-
-Disclaimer and license
-
- Regarding this entire document or any portion of it (including
- the pseudocode and C code), the author makes no guarantees and
- is not responsible for any damage resulting from its use. The
- author grants irrevocable permission to anyone to use, modify,
- and distribute it in any way that does not diminish the rights
- of anyone else to use, modify, and distribute it, provided that
- redistributed derivative works do not contain misleading author or
- version information. Derivative works need not be licensed under
- similar terms.
-*/
-/*
- * ICU modifications:
- * - ICU data types and coding conventions
- * - ICU string buffer handling with implicit source lengths
- * and destination preflighting
- * - UTF-16 handling
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_IDNA
-
-#include "unicode/ustring.h"
-#include "unicode/utf.h"
-#include "unicode/utf16.h"
-#include "ustr_imp.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "punycode.h"
-#include "uassert.h"
-
-
-/* Punycode ----------------------------------------------------------------- */
-
-/* Punycode parameters for Bootstring */
-#define BASE 36
-#define TMIN 1
-#define TMAX 26
-#define SKEW 38
-#define DAMP 700
-#define INITIAL_BIAS 72
-#define INITIAL_N 0x80
-
-/* "Basic" Unicode/ASCII code points */
-#define _HYPHEN 0X2d
-#define DELIMITER _HYPHEN
-
-#define _ZERO_ 0X30
-#define _NINE 0x39
-
-#define _SMALL_A 0X61
-#define _SMALL_Z 0X7a
-
-#define _CAPITAL_A 0X41
-#define _CAPITAL_Z 0X5a
-
-#define IS_BASIC(c) ((c)<0x80)
-#define IS_BASIC_UPPERCASE(c) (_CAPITAL_A<=(c) && (c)<=_CAPITAL_Z)
-
-/**
- * digitToBasic() returns the basic code point whose value
- * (when used for representing integers) is d, which must be in the
- * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
- * nonzero, in which case the uppercase form is used.
- */
-static inline char
-digitToBasic(int32_t digit, UBool uppercase) {
- /* 0..25 map to ASCII a..z or A..Z */
- /* 26..35 map to ASCII 0..9 */
- if(digit<26) {
- if(uppercase) {
- return (char)(_CAPITAL_A+digit);
- } else {
- return (char)(_SMALL_A+digit);
- }
- } else {
- return (char)((_ZERO_-26)+digit);
- }
-}
-
-/**
- * basicToDigit[] contains the numeric value of a basic code
- * point (for use in representing integers) in the range 0 to
- * BASE-1, or -1 if b is does not represent a value.
- */
-static const int8_t
-basicToDigit[256]={
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
-
- -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-
- -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
-};
-
-static inline char
-asciiCaseMap(char b, UBool uppercase) {
- if(uppercase) {
- if(_SMALL_A<=b && b<=_SMALL_Z) {
- b-=(_SMALL_A-_CAPITAL_A);
- }
- } else {
- if(_CAPITAL_A<=b && b<=_CAPITAL_Z) {
- b+=(_SMALL_A-_CAPITAL_A);
- }
- }
- return b;
-}
-
-/* Punycode-specific Bootstring code ---------------------------------------- */
-
-/*
- * The following code omits the {parts} of the pseudo-algorithm in the spec
- * that are not used with the Punycode parameter set.
- */
-
-/* Bias adaptation function. */
-static int32_t
-adaptBias(int32_t delta, int32_t length, UBool firstTime) {
- int32_t count;
-
- if(firstTime) {
- delta/=DAMP;
- } else {
- delta/=2;
- }
-
- delta+=delta/length;
- for(count=0; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
- delta/=(BASE-TMIN);
- }
-
- return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
-}
-
-#define MAX_CP_COUNT 200
-
-U_CFUNC int32_t
-u_strToPunycode(const UChar *src, int32_t srcLength,
- UChar *dest, int32_t destCapacity,
- const UBool *caseFlags,
- UErrorCode *pErrorCode) {
-
- int32_t cpBuffer[MAX_CP_COUNT];
- int32_t n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
- UChar c, c2;
-
- /* argument checking */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /*
- * Handle the basic code points and
- * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
- */
- srcCPCount=destLength=0;
- if(srcLength==-1) {
- /* NUL-terminated input */
- for(j=0; /* no condition */; ++j) {
- if((c=src[j])==0) {
- break;
- }
- if(srcCPCount==MAX_CP_COUNT) {
- /* too many input code points */
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- if(IS_BASIC(c)) {
- cpBuffer[srcCPCount++]=0;
- if(destLength<destCapacity) {
- dest[destLength]=
- caseFlags!=NULL ?
- asciiCaseMap((char)c, caseFlags[j]) :
- (char)c;
- }
- ++destLength;
- } else {
- n=(caseFlags!=NULL && caseFlags[j])<<31L;
- if(U16_IS_SINGLE(c)) {
- n|=c;
- } else if(U16_IS_LEAD(c) && U16_IS_TRAIL(c2=src[j+1])) {
- ++j;
- n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2);
- } else {
- /* error: unmatched surrogate */
- *pErrorCode=U_INVALID_CHAR_FOUND;
- return 0;
- }
- cpBuffer[srcCPCount++]=n;
- }
- }
- } else {
- /* length-specified input */
- for(j=0; j<srcLength; ++j) {
- if(srcCPCount==MAX_CP_COUNT) {
- /* too many input code points */
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- c=src[j];
- if(IS_BASIC(c)) {
- cpBuffer[srcCPCount++]=0;
- if(destLength<destCapacity) {
- dest[destLength]=
- caseFlags!=NULL ?
- asciiCaseMap((char)c, caseFlags[j]) :
- (char)c;
- }
- ++destLength;
- } else {
- n=(caseFlags!=NULL && caseFlags[j])<<31L;
- if(U16_IS_SINGLE(c)) {
- n|=c;
- } else if(U16_IS_LEAD(c) && (j+1)<srcLength && U16_IS_TRAIL(c2=src[j+1])) {
- ++j;
- n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2);
- } else {
- /* error: unmatched surrogate */
- *pErrorCode=U_INVALID_CHAR_FOUND;
- return 0;
- }
- cpBuffer[srcCPCount++]=n;
- }
- }
- }
-
- /* Finish the basic string - if it is not empty - with a delimiter. */
- basicLength=destLength;
- if(basicLength>0) {
- if(destLength<destCapacity) {
- dest[destLength]=DELIMITER;
- }
- ++destLength;
- }
-
- /*
- * handledCPCount is the number of code points that have been handled
- * basicLength is the number of basic code points
- * destLength is the number of chars that have been output
- */
-
- /* Initialize the state: */
- n=INITIAL_N;
- delta=0;
- bias=INITIAL_BIAS;
-
- /* Main encoding loop: */
- for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) {
- /*
- * All non-basic code points < n have been handled already.
- * Find the next larger one:
- */
- for(m=0x7fffffff, j=0; j<srcCPCount; ++j) {
- q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
- if(n<=q && q<m) {
- m=q;
- }
- }
-
- /*
- * Increase delta enough to advance the decoder's
- * <n,i> state to <m,0>, but guard against overflow:
- */
- if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
- return 0;
- }
- delta+=(m-n)*(handledCPCount+1);
- n=m;
-
- /* Encode a sequence of same code points n */
- for(j=0; j<srcCPCount; ++j) {
- q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
- if(q<n) {
- ++delta;
- } else if(q==n) {
- /* Represent delta as a generalized variable-length integer: */
- for(q=delta, k=BASE; /* no condition */; k+=BASE) {
-
- /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
-
- t=k-bias;
- if(t<TMIN) {
- t=TMIN;
- } else if(t>TMAX) {
- t=TMAX;
- }
- */
-
- t=k-bias;
- if(t<TMIN) {
- t=TMIN;
- } else if(k>=(bias+TMAX)) {
- t=TMAX;
- }
-
- if(q<t) {
- break;
- }
-
- if(destLength<destCapacity) {
- dest[destLength]=digitToBasic(t+(q-t)%(BASE-t), 0);
- }
- ++destLength;
- q=(q-t)/(BASE-t);
- }
-
- if(destLength<destCapacity) {
- dest[destLength]=digitToBasic(q, (UBool)(cpBuffer[j]<0));
- }
- ++destLength;
- bias=adaptBias(delta, handledCPCount+1, (UBool)(handledCPCount==basicLength));
- delta=0;
- ++handledCPCount;
- }
- }
-
- ++delta;
- ++n;
- }
-
- return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
-}
-
-U_CFUNC int32_t
-u_strFromPunycode(const UChar *src, int32_t srcLength,
- UChar *dest, int32_t destCapacity,
- UBool *caseFlags,
- UErrorCode *pErrorCode) {
- int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
- destCPCount, firstSupplementaryIndex, cpLength;
- UChar b;
-
- /* argument checking */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if(srcLength==-1) {
- srcLength=u_strlen(src);
- }
-
- /*
- * Handle the basic code points:
- * Let basicLength be the number of input code points
- * before the last delimiter, or 0 if there is none,
- * then copy the first basicLength code points to the output.
- *
- * The two following loops iterate backward.
- */
- for(j=srcLength; j>0;) {
- if(src[--j]==DELIMITER) {
- break;
- }
- }
- destLength=basicLength=destCPCount=j;
- U_ASSERT(destLength>=0);
-
- while(j>0) {
- b=src[--j];
- if(!IS_BASIC(b)) {
- *pErrorCode=U_INVALID_CHAR_FOUND;
- return 0;
- }
-
- if(j<destCapacity) {
- dest[j]=(UChar)b;
-
- if(caseFlags!=NULL) {
- caseFlags[j]=IS_BASIC_UPPERCASE(b);
- }
- }
- }
-
- /* Initialize the state: */
- n=INITIAL_N;
- i=0;
- bias=INITIAL_BIAS;
- firstSupplementaryIndex=1000000000;
-
- /*
- * Main decoding loop:
- * Start just after the last delimiter if any
- * basic code points were copied; start at the beginning otherwise.
- */
- for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
- /*
- * in is the index of the next character to be consumed, and
- * destCPCount is the number of code points in the output array.
- *
- * Decode a generalized variable-length integer into delta,
- * which gets added to i. The overflow checking is easier
- * if we increase i as we go, then subtract off its starting
- * value at the end to obtain delta.
- */
- for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
- if(in>=srcLength) {
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- return 0;
- }
-
- digit=basicToDigit[(uint8_t)src[in++]];
- if(digit<0) {
- *pErrorCode=U_INVALID_CHAR_FOUND;
- return 0;
- }
- if(digit>(0x7fffffff-i)/w) {
- /* integer overflow */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- return 0;
- }
-
- i+=digit*w;
- /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
- t=k-bias;
- if(t<TMIN) {
- t=TMIN;
- } else if(t>TMAX) {
- t=TMAX;
- }
- */
- t=k-bias;
- if(t<TMIN) {
- t=TMIN;
- } else if(k>=(bias+TMAX)) {
- t=TMAX;
- }
- if(digit<t) {
- break;
- }
-
- if(w>0x7fffffff/(BASE-t)) {
- /* integer overflow */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- return 0;
- }
- w*=BASE-t;
- }
-
- /*
- * Modification from sample code:
- * Increments destCPCount here,
- * where needed instead of in for() loop tail.
- */
- ++destCPCount;
- bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0));
-
- /*
- * i was supposed to wrap around from (incremented) destCPCount to 0,
- * incrementing n each time, so we'll fix that now:
- */
- if(i/destCPCount>(0x7fffffff-n)) {
- /* integer overflow */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- return 0;
- }
-
- n+=i/destCPCount;
- i%=destCPCount;
- /* not needed for Punycode: */
- /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
-
- if(n>0x10ffff || U_IS_SURROGATE(n)) {
- /* Unicode code point overflow */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- return 0;
- }
-
- /* Insert n at position i of the output: */
- cpLength=U16_LENGTH(n);
- if(dest!=NULL && ((destLength+cpLength)<=destCapacity)) {
- int32_t codeUnitIndex;
-
- /*
- * Handle indexes when supplementary code points are present.
- *
- * In almost all cases, there will be only BMP code points before i
- * and even in the entire string.
- * This is handled with the same efficiency as with UTF-32.
- *
- * Only the rare cases with supplementary code points are handled
- * more slowly - but not too bad since this is an insertion anyway.
- */
- if(i<=firstSupplementaryIndex) {
- codeUnitIndex=i;
- if(cpLength>1) {
- firstSupplementaryIndex=codeUnitIndex;
- } else {
- ++firstSupplementaryIndex;
- }
- } else {
- codeUnitIndex=firstSupplementaryIndex;
- U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex);
- }
-
- /* use the UChar index codeUnitIndex instead of the code point index i */
- if(codeUnitIndex<destLength) {
- uprv_memmove(dest+codeUnitIndex+cpLength,
- dest+codeUnitIndex,
- (destLength-codeUnitIndex)*U_SIZEOF_UCHAR);
- if(caseFlags!=NULL) {
- uprv_memmove(caseFlags+codeUnitIndex+cpLength,
- caseFlags+codeUnitIndex,
- destLength-codeUnitIndex);
- }
- }
- if(cpLength==1) {
- /* BMP, insert one code unit */
- dest[codeUnitIndex]=(UChar)n;
- } else {
- /* supplementary character, insert two code units */
- dest[codeUnitIndex]=U16_LEAD(n);
- dest[codeUnitIndex+1]=U16_TRAIL(n);
- }
- if(caseFlags!=NULL) {
- /* Case of last character determines uppercase flag: */
- caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]);
- if(cpLength==2) {
- caseFlags[codeUnitIndex+1]=FALSE;
- }
- }
- }
- destLength+=cpLength;
- U_ASSERT(destLength>=0);
- ++i;
- }
-
- return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
-}
-
-/* ### check notes on overflow handling - only necessary if not IDNA? are these Punycode functions to be public? */
-
-#endif /* #if !UCONFIG_NO_IDNA */
diff --git a/contrib/libs/icu/common/punycode.h b/contrib/libs/icu/common/punycode.h
deleted file mode 100644
index 5d8a243175c..00000000000
--- a/contrib/libs/icu/common/punycode.h
+++ /dev/null
@@ -1,120 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2003, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: punycode.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002jan31
-* created by: Markus W. Scherer
-*/
-
-/* This ICU code derived from: */
-/*
-punycode.c 0.4.0 (2001-Nov-17-Sat)
-http://www.cs.berkeley.edu/~amc/idn/
-Adam M. Costello
-http://www.nicemice.net/amc/
-*/
-
-#ifndef __PUNYCODE_H__
-#define __PUNYCODE_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_IDNA
-
-/**
- * u_strToPunycode() converts Unicode to Punycode.
- *
- * The input string must not contain single, unpaired surrogates.
- * The output will be represented as an array of ASCII code points.
- *
- * The output string is NUL-terminated according to normal ICU
- * string output rules.
- *
- * @param src Input Unicode string.
- * This function handles a limited amount of code points
- * (the limit is >=64).
- * U_INDEX_OUTOFBOUNDS_ERROR is set if the limit is exceeded.
- * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
- * @param dest Output Punycode array.
- * @param destCapacity Size of dest.
- * @param caseFlags Vector of boolean values, one per input UChar,
- * indicating that the corresponding character is to be
- * marked for the decoder optionally
- * uppercasing (TRUE) or lowercasing (FALSE)
- * the character.
- * ASCII characters are output directly in the case as marked.
- * Flags corresponding to trail surrogates are ignored.
- * If caseFlags==NULL then input characters are not
- * case-mapped.
- * @param pErrorCode ICU in/out error code parameter.
- * U_INVALID_CHAR_FOUND if src contains
- * unmatched single surrogates.
- * U_INDEX_OUTOFBOUNDS_ERROR if src contains
- * too many code points.
- * @return Number of ASCII characters in puny.
- *
- * @see u_strFromPunycode
- */
-U_CFUNC int32_t
-u_strToPunycode(const UChar *src, int32_t srcLength,
- UChar *dest, int32_t destCapacity,
- const UBool *caseFlags,
- UErrorCode *pErrorCode);
-
-/**
- * u_strFromPunycode() converts Punycode to Unicode.
- * The Unicode string will be at most as long (in UChars)
- * than the Punycode string (in chars).
- *
- * @param src Input Punycode string.
- * @param srcLength Length of puny, or -1 if NUL-terminated
- * @param dest Output Unicode string buffer.
- * @param destCapacity Size of dest in number of UChars,
- * and of caseFlags in numbers of UBools.
- * @param caseFlags Output array for case flags as
- * defined by the Punycode string.
- * The caller should uppercase (TRUE) or lowercase (FASLE)
- * the corresponding character in dest.
- * For supplementary characters, only the lead surrogate
- * is marked, and FALSE is stored for the trail surrogate.
- * This is redundant and not necessary for ASCII characters
- * because they are already in the case indicated.
- * Can be NULL if the case flags are not needed.
- * @param pErrorCode ICU in/out error code parameter.
- * U_INVALID_CHAR_FOUND if a non-ASCII character
- * precedes the last delimiter ('-'),
- * or if an invalid character (not a-zA-Z0-9) is found
- * after the last delimiter.
- * U_ILLEGAL_CHAR_FOUND if the delta sequence is ill-formed.
- * @return Number of UChars written to dest.
- *
- * @see u_strToPunycode
- */
-U_CFUNC int32_t
-u_strFromPunycode(const UChar *src, int32_t srcLength,
- UChar *dest, int32_t destCapacity,
- UBool *caseFlags,
- UErrorCode *pErrorCode);
-
-#endif /* #if !UCONFIG_NO_IDNA */
-
-#endif
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/contrib/libs/icu/common/putil.cpp b/contrib/libs/icu/common/putil.cpp
deleted file mode 100644
index 0f3c795336c..00000000000
--- a/contrib/libs/icu/common/putil.cpp
+++ /dev/null
@@ -1,2430 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1997-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
-*
-* Date Name Description
-* 04/14/97 aliu Creation.
-* 04/24/97 aliu Added getDefaultDataDirectory() and
-* getDefaultLocaleID().
-* 04/28/97 aliu Rewritten to assume Unix and apply general methods
-* for assumed case. Non-UNIX platforms must be
-* special-cased. Rewrote numeric methods dealing
-* with NaN and Infinity to be platform independent
-* over all IEEE 754 platforms.
-* 05/13/97 aliu Restored sign of timezone
-* (semantics are hours West of GMT)
-* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
-* nextDouble..
-* 07/22/98 stephen Added remainder, max, min, trunc
-* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
-* 08/24/98 stephen Added longBitsFromDouble
-* 09/08/98 stephen Minor changes for Mac Port
-* 03/02/99 stephen Removed openFile(). Added AS400 support.
-* Fixed EBCDIC tables
-* 04/15/99 stephen Converted to C.
-* 06/28/99 stephen Removed mutex locking in u_isBigEndian().
-* 08/04/99 jeffrey R. Added OS/2 changes
-* 11/15/99 helena Integrated S/390 IEEE support.
-* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
-* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
-* 01/03/08 Steven L. Fake Time Support
-******************************************************************************
-*/
-
-// Defines _XOPEN_SOURCE for access to POSIX functions.
-// Must be before any other #includes.
-#include "uposixdefs.h"
-
-// First, the platform type. Need this for U_PLATFORM.
-#include "unicode/platform.h"
-
-#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
-/* tzset isn't defined in strict ANSI on MinGW. */
-#undef __STRICT_ANSI__
-#endif
-
-/*
- * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
- */
-#include <time.h>
-
-#if !U_PLATFORM_USES_ONLY_WIN32_API
-#include <sys/time.h>
-#endif
-
-/* include the rest of the ICU headers */
-#include "unicode/putil.h"
-#include "unicode/ustring.h"
-#include "putilimp.h"
-#include "uassert.h"
-#include "umutex.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "locmap.h"
-#include "ucln_cmn.h"
-#include "charstr.h"
-
-/* Include standard headers. */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <locale.h>
-#include <float.h>
-
-#ifndef U_COMMON_IMPLEMENTATION
-#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
-#endif
-
-
-/* include system headers */
-#if U_PLATFORM_USES_ONLY_WIN32_API
- /*
- * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
- * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
- * to use native APIs as much as possible?
- */
-#ifndef WIN32_LEAN_AND_MEAN
-# define WIN32_LEAN_AND_MEAN
-#endif
-# define VC_EXTRALEAN
-# define NOUSER
-# define NOSERVICE
-# define NOIME
-# define NOMCX
-# include <windows.h>
-# include "unicode/uloc.h"
-# include "wintz.h"
-#elif U_PLATFORM == U_PF_OS400
-# include <float.h>
-# include <qusec.h> /* error code structure */
-# include <qusrjobi.h>
-# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
-# include <mih/testptr.h> /* For uprv_maximumPtr */
-#elif U_PLATFORM == U_PF_OS390
-# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
-#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
-# include <limits.h>
-# include <unistd.h>
-# if U_PLATFORM == U_PF_SOLARIS
-# ifndef _XPG4_2
-# define _XPG4_2
-# endif
-# endif
-#elif U_PLATFORM == U_PF_QNX
-# include <sys/neutrino.h>
-#endif
-
-/*
- * Only include langinfo.h if we have a way to get the codeset. If we later
- * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
- *
- */
-
-#if U_HAVE_NL_LANGINFO_CODESET
-#include <langinfo.h>
-#endif
-
-/**
- * Simple things (presence of functions, etc) should just go in configure.in and be added to
- * icucfg.h via autoheader.
- */
-#if U_PLATFORM_IMPLEMENTS_POSIX
-# if U_PLATFORM == U_PF_OS400
-# define HAVE_DLFCN_H 0
-# define HAVE_DLOPEN 0
-# else
-# ifndef HAVE_DLFCN_H
-# define HAVE_DLFCN_H 1
-# endif
-# ifndef HAVE_DLOPEN
-# define HAVE_DLOPEN 1
-# endif
-# endif
-# ifndef HAVE_GETTIMEOFDAY
-# define HAVE_GETTIMEOFDAY 1
-# endif
-#else
-# define HAVE_DLFCN_H 0
-# define HAVE_DLOPEN 0
-# define HAVE_GETTIMEOFDAY 0
-#endif
-
-U_NAMESPACE_USE
-
-/* Define the extension for data files, again... */
-#define DATA_TYPE "dat"
-
-/* Leave this copyright notice here! */
-static const char copyright[] = U_COPYRIGHT_STRING;
-
-/* floating point implementations ------------------------------------------- */
-
-/* We return QNAN rather than SNAN*/
-#define SIGN 0x80000000U
-
-/* Make it easy to define certain types of constants */
-typedef union {
- int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
- double d64;
-} BitPatternConversion;
-static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
-static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
-
-/*---------------------------------------------------------------------------
- Platform utilities
- Our general strategy is to assume we're on a POSIX platform. Platforms which
- are non-POSIX must declare themselves so. The default POSIX implementation
- will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
- functions).
- ---------------------------------------------------------------------------*/
-
-#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
-# undef U_POSIX_LOCALE
-#else
-# define U_POSIX_LOCALE 1
-#endif
-
-/*
- WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
- can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
-*/
-#if !IEEE_754
-static char*
-u_topNBytesOfDouble(double* d, int n)
-{
-#if U_IS_BIG_ENDIAN
- return (char*)d;
-#else
- return (char*)(d + 1) - n;
-#endif
-}
-
-static char*
-u_bottomNBytesOfDouble(double* d, int n)
-{
-#if U_IS_BIG_ENDIAN
- return (char*)(d + 1) - n;
-#else
- return (char*)d;
-#endif
-}
-#endif /* !IEEE_754 */
-
-#if IEEE_754
-static UBool
-u_signBit(double d) {
- uint8_t hiByte;
-#if U_IS_BIG_ENDIAN
- hiByte = *(uint8_t *)&d;
-#else
- hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
-#endif
- return (hiByte & 0x80) != 0;
-}
-#endif
-
-
-
-#if defined (U_DEBUG_FAKETIME)
-/* Override the clock to test things without having to move the system clock.
- * Assumes POSIX gettimeofday() will function
- */
-UDate fakeClock_t0 = 0; /** Time to start the clock from **/
-UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
-UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
-
-static UDate getUTCtime_real() {
- struct timeval posixTime;
- gettimeofday(&posixTime, NULL);
- return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
-}
-
-static UDate getUTCtime_fake() {
- static UMutex fakeClockMutex;
- umtx_lock(&fakeClockMutex);
- if(!fakeClock_set) {
- UDate real = getUTCtime_real();
- const char *fake_start = getenv("U_FAKETIME_START");
- if((fake_start!=NULL) && (fake_start[0]!=0)) {
- sscanf(fake_start,"%lf",&fakeClock_t0);
- fakeClock_dt = fakeClock_t0 - real;
- fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
- "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
- fakeClock_t0, fake_start, fakeClock_dt, real);
- } else {
- fakeClock_dt = 0;
- fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
- "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
- }
- fakeClock_set = TRUE;
- }
- umtx_unlock(&fakeClockMutex);
-
- return getUTCtime_real() + fakeClock_dt;
-}
-#endif
-
-#if U_PLATFORM_USES_ONLY_WIN32_API
-typedef union {
- int64_t int64;
- FILETIME fileTime;
-} FileTimeConversion; /* This is like a ULARGE_INTEGER */
-
-/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
-#define EPOCH_BIAS INT64_C(116444736000000000)
-#define HECTONANOSECOND_PER_MILLISECOND 10000
-
-#endif
-
-/*---------------------------------------------------------------------------
- Universal Implementations
- These are designed to work on all platforms. Try these, and if they
- don't work on your platform, then special case your platform with new
- implementations.
----------------------------------------------------------------------------*/
-
-U_CAPI UDate U_EXPORT2
-uprv_getUTCtime()
-{
-#if defined(U_DEBUG_FAKETIME)
- return getUTCtime_fake(); /* Hook for overriding the clock */
-#else
- return uprv_getRawUTCtime();
-#endif
-}
-
-/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
-U_CAPI UDate U_EXPORT2
-uprv_getRawUTCtime()
-{
-#if U_PLATFORM_USES_ONLY_WIN32_API
-
- FileTimeConversion winTime;
- GetSystemTimeAsFileTime(&winTime.fileTime);
- return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
-#else
-
-#if HAVE_GETTIMEOFDAY
- struct timeval posixTime;
- gettimeofday(&posixTime, NULL);
- return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
-#else
- time_t epochtime;
- time(&epochtime);
- return (UDate)epochtime * U_MILLIS_PER_SECOND;
-#endif
-
-#endif
-}
-
-/*-----------------------------------------------------------------------------
- IEEE 754
- These methods detect and return NaN and infinity values for doubles
- conforming to IEEE 754. Platforms which support this standard include X86,
- Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
- If this doesn't work on your platform, you have non-IEEE floating-point, and
- will need to code your own versions. A naive implementation is to return 0.0
- for getNaN and getInfinity, and false for isNaN and isInfinite.
- ---------------------------------------------------------------------------*/
-
-U_CAPI UBool U_EXPORT2
-uprv_isNaN(double number)
-{
-#if IEEE_754
- BitPatternConversion convertedNumber;
- convertedNumber.d64 = number;
- /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
- return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
-
-#elif U_PLATFORM == U_PF_OS390
- uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
- sizeof(uint32_t));
- uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
- sizeof(uint32_t));
-
- return ((highBits & 0x7F080000L) == 0x7F080000L) &&
- (lowBits == 0x00000000L);
-
-#else
- /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
- /* you'll need to replace this default implementation with what's correct*/
- /* for your platform.*/
- return number != number;
-#endif
-}
-
-U_CAPI UBool U_EXPORT2
-uprv_isInfinite(double number)
-{
-#if IEEE_754
- BitPatternConversion convertedNumber;
- convertedNumber.d64 = number;
- /* Infinity is exactly 0x7FF0000000000000U. */
- return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
-#elif U_PLATFORM == U_PF_OS390
- uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
- sizeof(uint32_t));
- uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
- sizeof(uint32_t));
-
- return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
-
-#else
- /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
- /* value, you'll need to replace this default implementation with what's*/
- /* correct for your platform.*/
- return number == (2.0 * number);
-#endif
-}
-
-U_CAPI UBool U_EXPORT2
-uprv_isPositiveInfinity(double number)
-{
-#if IEEE_754 || U_PLATFORM == U_PF_OS390
- return (UBool)(number > 0 && uprv_isInfinite(number));
-#else
- return uprv_isInfinite(number);
-#endif
-}
-
-U_CAPI UBool U_EXPORT2
-uprv_isNegativeInfinity(double number)
-{
-#if IEEE_754 || U_PLATFORM == U_PF_OS390
- return (UBool)(number < 0 && uprv_isInfinite(number));
-
-#else
- uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
- sizeof(uint32_t));
- return((highBits & SIGN) && uprv_isInfinite(number));
-
-#endif
-}
-
-U_CAPI double U_EXPORT2
-uprv_getNaN()
-{
-#if IEEE_754 || U_PLATFORM == U_PF_OS390
- return gNan.d64;
-#else
- /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
- /* you'll need to replace this default implementation with what's correct*/
- /* for your platform.*/
- return 0.0;
-#endif
-}
-
-U_CAPI double U_EXPORT2
-uprv_getInfinity()
-{
-#if IEEE_754 || U_PLATFORM == U_PF_OS390
- return gInf.d64;
-#else
- /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
- /* value, you'll need to replace this default implementation with what's*/
- /* correct for your platform.*/
- return 0.0;
-#endif
-}
-
-U_CAPI double U_EXPORT2
-uprv_floor(double x)
-{
- return floor(x);
-}
-
-U_CAPI double U_EXPORT2
-uprv_ceil(double x)
-{
- return ceil(x);
-}
-
-U_CAPI double U_EXPORT2
-uprv_round(double x)
-{
- return uprv_floor(x + 0.5);
-}
-
-U_CAPI double U_EXPORT2
-uprv_fabs(double x)
-{
- return fabs(x);
-}
-
-U_CAPI double U_EXPORT2
-uprv_modf(double x, double* y)
-{
- return modf(x, y);
-}
-
-U_CAPI double U_EXPORT2
-uprv_fmod(double x, double y)
-{
- return fmod(x, y);
-}
-
-U_CAPI double U_EXPORT2
-uprv_pow(double x, double y)
-{
- /* This is declared as "double pow(double x, double y)" */
- return pow(x, y);
-}
-
-U_CAPI double U_EXPORT2
-uprv_pow10(int32_t x)
-{
- return pow(10.0, (double)x);
-}
-
-U_CAPI double U_EXPORT2
-uprv_fmax(double x, double y)
-{
-#if IEEE_754
- /* first handle NaN*/
- if(uprv_isNaN(x) || uprv_isNaN(y))
- return uprv_getNaN();
-
- /* check for -0 and 0*/
- if(x == 0.0 && y == 0.0 && u_signBit(x))
- return y;
-
-#endif
-
- /* this should work for all flt point w/o NaN and Inf special cases */
- return (x > y ? x : y);
-}
-
-U_CAPI double U_EXPORT2
-uprv_fmin(double x, double y)
-{
-#if IEEE_754
- /* first handle NaN*/
- if(uprv_isNaN(x) || uprv_isNaN(y))
- return uprv_getNaN();
-
- /* check for -0 and 0*/
- if(x == 0.0 && y == 0.0 && u_signBit(y))
- return y;
-
-#endif
-
- /* this should work for all flt point w/o NaN and Inf special cases */
- return (x > y ? y : x);
-}
-
-U_CAPI UBool U_EXPORT2
-uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
- // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
- // This function could be optimized by calling one of those primitives.
- auto a64 = static_cast<int64_t>(a);
- auto b64 = static_cast<int64_t>(b);
- int64_t res64 = a64 + b64;
- *res = static_cast<int32_t>(res64);
- return res64 != *res;
-}
-
-U_CAPI UBool U_EXPORT2
-uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
- // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
- // This function could be optimized by calling one of those primitives.
- auto a64 = static_cast<int64_t>(a);
- auto b64 = static_cast<int64_t>(b);
- int64_t res64 = a64 * b64;
- *res = static_cast<int32_t>(res64);
- return res64 != *res;
-}
-
-/**
- * Truncates the given double.
- * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
- * This is different than calling floor() or ceil():
- * floor(3.3) = 3, floor(-3.3) = -4
- * ceil(3.3) = 4, ceil(-3.3) = -3
- */
-U_CAPI double U_EXPORT2
-uprv_trunc(double d)
-{
-#if IEEE_754
- /* handle error cases*/
- if(uprv_isNaN(d))
- return uprv_getNaN();
- if(uprv_isInfinite(d))
- return uprv_getInfinity();
-
- if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
- return ceil(d);
- else
- return floor(d);
-
-#else
- return d >= 0 ? floor(d) : ceil(d);
-
-#endif
-}
-
-/**
- * Return the largest positive number that can be represented by an integer
- * type of arbitrary bit length.
- */
-U_CAPI double U_EXPORT2
-uprv_maxMantissa(void)
-{
- return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
-}
-
-U_CAPI double U_EXPORT2
-uprv_log(double d)
-{
- return log(d);
-}
-
-U_CAPI void * U_EXPORT2
-uprv_maximumPtr(void * base)
-{
-#if U_PLATFORM == U_PF_OS400
- /*
- * With the provided function we should never be out of range of a given segment
- * (a traditional/typical segment that is). Our segments have 5 bytes for the
- * id and 3 bytes for the offset. The key is that the casting takes care of
- * only retrieving the offset portion minus x1000. Hence, the smallest offset
- * seen in a program is x001000 and when casted to an int would be 0.
- * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
- *
- * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
- * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
- * This function determines the activation based on the pointer that is passed in and
- * calculates the appropriate maximum available size for
- * each pointer type (TERASPACE and non-TERASPACE)
- *
- * Unlike other operating systems, the pointer model isn't determined at
- * compile time on i5/OS.
- */
- if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
- /* if it is a TERASPACE pointer the max is 2GB - 4k */
- return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
- }
- /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
- return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
-
-#else
- return U_MAX_PTR(base);
-#endif
-}
-
-/*---------------------------------------------------------------------------
- Platform-specific Implementations
- Try these, and if they don't work on your platform, then special case your
- platform with new implementations.
- ---------------------------------------------------------------------------*/
-
-/* Generic time zone layer -------------------------------------------------- */
-
-/* Time zone utilities */
-U_CAPI void U_EXPORT2
-uprv_tzset()
-{
-#if defined(U_TZSET)
- U_TZSET();
-#else
- /* no initialization*/
-#endif
-}
-
-U_CAPI int32_t U_EXPORT2
-uprv_timezone()
-{
-#ifdef U_TIMEZONE
- return U_TIMEZONE;
-#else
- time_t t, t1, t2;
- struct tm tmrec;
- int32_t tdiff = 0;
-
- time(&t);
- uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
-#if U_PLATFORM != U_PF_IPHONE
- UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
-#endif
- t1 = mktime(&tmrec); /* local time in seconds*/
- uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
- t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
- tdiff = t2 - t1;
-
-#if U_PLATFORM != U_PF_IPHONE
- /* imitate NT behaviour, which returns same timezone offset to GMT for
- winter and summer.
- This does not work on all platforms. For instance, on glibc on Linux
- and on Mac OS 10.5, tdiff calculated above remains the same
- regardless of whether DST is in effect or not. iOS is another
- platform where this does not work. Linux + glibc and Mac OS 10.5
- have U_TIMEZONE defined so that this code is not reached.
- */
- if (dst_checked)
- tdiff += 3600;
-#endif
- return tdiff;
-#endif
-}
-
-/* Note that U_TZNAME does *not* have to be tzname, but if it is,
- some platforms need to have it declared here. */
-
-#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
-/* RS6000 and others reject char **tzname. */
-extern U_IMPORT char *U_TZNAME[];
-#endif
-
-#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
-/* These platforms are likely to use Olson timezone IDs. */
-/* common targets of the symbolic link at TZDEFAULT are:
- * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
- * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
- * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
- * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
- * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
- * To avoid checking lots of paths, just check that the target path
- * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
- */
-
-#define CHECK_LOCALTIME_LINK 1
-#if U_PLATFORM_IS_DARWIN_BASED
-#include <tzfile.h>
-#define TZZONEINFO (TZDIR "/")
-#elif U_PLATFORM == U_PF_SOLARIS
-#define TZDEFAULT "/etc/localtime"
-#define TZZONEINFO "/usr/share/lib/zoneinfo/"
-#define TZ_ENV_CHECK "localtime"
-#else
-#define TZDEFAULT "/etc/localtime"
-#define TZZONEINFO "/usr/share/zoneinfo/"
-#endif
-#define TZZONEINFOTAIL "/zoneinfo/"
-#if U_HAVE_DIRENT_H
-#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
-/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
- symlinked to /etc/localtime, which makes searchForTZFile return
- 'localtime' when it's the first match. */
-#define TZFILE_SKIP2 "localtime"
-#define SEARCH_TZFILE
-#include <dirent.h> /* Needed to search through system timezone files */
-#endif
-static char gTimeZoneBuffer[PATH_MAX];
-static char *gTimeZoneBufferPtr = NULL;
-#endif
-
-#if !U_PLATFORM_USES_ONLY_WIN32_API
-#define isNonDigit(ch) (ch < '0' || '9' < ch)
-static UBool isValidOlsonID(const char *id) {
- int32_t idx = 0;
-
- /* Determine if this is something like Iceland (Olson ID)
- or AST4ADT (non-Olson ID) */
- while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
- idx++;
- }
-
- /* If we went through the whole string, then it might be okay.
- The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
- "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
- The rest of the time it could be an Olson ID. George */
- return (UBool)(id[idx] == 0
- || uprv_strcmp(id, "PST8PDT") == 0
- || uprv_strcmp(id, "MST7MDT") == 0
- || uprv_strcmp(id, "CST6CDT") == 0
- || uprv_strcmp(id, "EST5EDT") == 0);
-}
-
-/* On some Unix-like OS, 'posix' subdirectory in
- /usr/share/zoneinfo replicates the top-level contents. 'right'
- subdirectory has the same set of files, but individual files
- are different from those in the top-level directory or 'posix'
- because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
- has files for UTC.
- When the first match for /etc/localtime is in either of them
- (usually in posix because 'right' has different file contents),
- or TZ environment variable points to one of them, createTimeZone
- fails because, say, 'posix/America/New_York' is not an Olson
- timezone id ('America/New_York' is). So, we have to skip
- 'posix/' and 'right/' at the beginning. */
-static void skipZoneIDPrefix(const char** id) {
- if (uprv_strncmp(*id, "posix/", 6) == 0
- || uprv_strncmp(*id, "right/", 6) == 0)
- {
- *id += 6;
- }
-}
-#endif
-
-#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
-
-#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
-typedef struct OffsetZoneMapping {
- int32_t offsetSeconds;
- int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
- const char *stdID;
- const char *dstID;
- const char *olsonID;
-} OffsetZoneMapping;
-
-enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
-
-/*
-This list tries to disambiguate a set of abbreviated timezone IDs and offsets
-and maps it to an Olson ID.
-Before adding anything to this list, take a look at
-icu/source/tools/tzcode/tz.alias
-Sometimes no daylight savings (0) is important to define due to aliases.
-This list can be tested with icu/source/test/compat/tzone.pl
-More values could be added to daylightType to increase precision.
-*/
-static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
- {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
- {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
- {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
- {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
- {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
- {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
- {-36000, 2, "EST", "EST", "Australia/Sydney"},
- {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
- {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
- {-34200, 2, "CST", "CST", "Australia/South"},
- {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
- {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
- {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
- {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
- {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
- {-28800, 2, "WST", "WST", "Australia/West"},
- {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
- {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
- {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
- {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
- {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
- {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
- {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
- {-14400, 1, "AZT", "AZST", "Asia/Baku"},
- {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
- {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
- {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
- {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
- {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
- {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
- {-3600, 0, "CET", "WEST", "Africa/Algiers"},
- {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
- {0, 1, "GMT", "IST", "Europe/Dublin"},
- {0, 1, "GMT", "BST", "Europe/London"},
- {0, 0, "WET", "WEST", "Africa/Casablanca"},
- {0, 0, "WET", "WET", "Africa/El_Aaiun"},
- {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
- {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
- {10800, 1, "PMST", "PMDT", "America/Miquelon"},
- {10800, 2, "UYT", "UYST", "America/Montevideo"},
- {10800, 1, "WGT", "WGST", "America/Godthab"},
- {10800, 2, "BRT", "BRST", "Brazil/East"},
- {12600, 1, "NST", "NDT", "America/St_Johns"},
- {14400, 1, "AST", "ADT", "Canada/Atlantic"},
- {14400, 2, "AMT", "AMST", "America/Cuiaba"},
- {14400, 2, "CLT", "CLST", "Chile/Continental"},
- {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
- {14400, 2, "PYT", "PYST", "America/Asuncion"},
- {18000, 1, "CST", "CDT", "America/Havana"},
- {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
- {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
- {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
- {21600, 0, "CST", "CDT", "America/Guatemala"},
- {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
- {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
- {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
- {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
- {32400, 1, "AKST", "AKDT", "US/Alaska"},
- {36000, 1, "HAST", "HADT", "US/Aleutian"}
-};
-
-/*#define DEBUG_TZNAME*/
-
-static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
-{
- int32_t idx;
-#ifdef DEBUG_TZNAME
- fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
-#endif
- for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
- {
- if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
- && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
- && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
- && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
- {
- return OFFSET_ZONE_MAPPINGS[idx].olsonID;
- }
- }
- return NULL;
-}
-#endif
-
-#ifdef SEARCH_TZFILE
-#define MAX_READ_SIZE 512
-
-typedef struct DefaultTZInfo {
- char* defaultTZBuffer;
- int64_t defaultTZFileSize;
- FILE* defaultTZFilePtr;
- UBool defaultTZstatus;
- int32_t defaultTZPosition;
-} DefaultTZInfo;
-
-/*
- * This method compares the two files given to see if they are a match.
- * It is currently use to compare two TZ files.
- */
-static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
- FILE* file;
- int64_t sizeFile;
- int64_t sizeFileLeft;
- int32_t sizeFileRead;
- int32_t sizeFileToRead;
- char bufferFile[MAX_READ_SIZE];
- UBool result = TRUE;
-
- if (tzInfo->defaultTZFilePtr == NULL) {
- tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
- }
- file = fopen(TZFileName, "r");
-
- tzInfo->defaultTZPosition = 0; /* reset position to begin search */
-
- if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
- /* First check that the file size are equal. */
- if (tzInfo->defaultTZFileSize == 0) {
- fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
- tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
- }
- fseek(file, 0, SEEK_END);
- sizeFile = ftell(file);
- sizeFileLeft = sizeFile;
-
- if (sizeFile != tzInfo->defaultTZFileSize) {
- result = FALSE;
- } else {
- /* Store the data from the files in seperate buffers and
- * compare each byte to determine equality.
- */
- if (tzInfo->defaultTZBuffer == NULL) {
- rewind(tzInfo->defaultTZFilePtr);
- tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
- sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
- }
- rewind(file);
- while(sizeFileLeft > 0) {
- uprv_memset(bufferFile, 0, MAX_READ_SIZE);
- sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
-
- sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
- if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
- result = FALSE;
- break;
- }
- sizeFileLeft -= sizeFileRead;
- tzInfo->defaultTZPosition += sizeFileRead;
- }
- }
- } else {
- result = FALSE;
- }
-
- if (file != NULL) {
- fclose(file);
- }
-
- return result;
-}
-
-
-/* dirent also lists two entries: "." and ".." that we can safely ignore. */
-#define SKIP1 "."
-#define SKIP2 ".."
-static UBool U_CALLCONV putil_cleanup(void);
-static CharString *gSearchTZFileResult = NULL;
-
-/*
- * This method recursively traverses the directory given for a matching TZ file and returns the first match.
- * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
- */
-static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
- DIR* dirp = NULL;
- struct dirent* dirEntry = NULL;
- char* result = NULL;
- UErrorCode status = U_ZERO_ERROR;
-
- /* Save the current path */
- CharString curpath(path, -1, status);
- if (U_FAILURE(status)) {
- goto cleanupAndReturn;
- }
-
- dirp = opendir(path);
- if (dirp == NULL) {
- goto cleanupAndReturn;
- }
-
- if (gSearchTZFileResult == NULL) {
- gSearchTZFileResult = new CharString;
- if (gSearchTZFileResult == NULL) {
- goto cleanupAndReturn;
- }
- ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
- }
-
- /* Check each entry in the directory. */
- while((dirEntry = readdir(dirp)) != NULL) {
- const char* dirName = dirEntry->d_name;
- if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0
- && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
- /* Create a newpath with the new entry to test each entry in the directory. */
- CharString newpath(curpath, status);
- newpath.append(dirName, -1, status);
- if (U_FAILURE(status)) {
- break;
- }
-
- DIR* subDirp = NULL;
- if ((subDirp = opendir(newpath.data())) != NULL) {
- /* If this new path is a directory, make a recursive call with the newpath. */
- closedir(subDirp);
- newpath.append('/', status);
- if (U_FAILURE(status)) {
- break;
- }
- result = searchForTZFile(newpath.data(), tzInfo);
- /*
- Have to get out here. Otherwise, we'd keep looking
- and return the first match in the top-level directory
- if there's a match in the top-level. If not, this function
- would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
- It worked without this in most cases because we have a fallback of calling
- localtime_r to figure out the default timezone.
- */
- if (result != NULL)
- break;
- } else {
- if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
- int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
- if (amountToSkip > newpath.length()) {
- amountToSkip = newpath.length();
- }
- const char* zoneid = newpath.data() + amountToSkip;
- skipZoneIDPrefix(&zoneid);
- gSearchTZFileResult->clear();
- gSearchTZFileResult->append(zoneid, -1, status);
- if (U_FAILURE(status)) {
- break;
- }
- result = gSearchTZFileResult->data();
- /* Get out after the first one found. */
- break;
- }
- }
- }
- }
-
- cleanupAndReturn:
- if (dirp) {
- closedir(dirp);
- }
- return result;
-}
-#endif
-
-U_CAPI void U_EXPORT2
-uprv_tzname_clear_cache()
-{
-#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
- gTimeZoneBufferPtr = NULL;
-#endif
-}
-
-U_CAPI const char* U_EXPORT2
-uprv_tzname(int n)
-{
- (void)n; // Avoid unreferenced parameter warning.
- const char *tzid = NULL;
-#if U_PLATFORM_USES_ONLY_WIN32_API
- tzid = uprv_detectWindowsTimeZone();
-
- if (tzid != NULL) {
- return tzid;
- }
-
-#ifndef U_TZNAME
- // The return value is free'd in timezone.cpp on Windows because
- // the other code path returns a pointer to a heap location.
- // If we don't have a name already, then tzname wouldn't be any
- // better, so just fall back.
- return uprv_strdup("");
-#endif // !U_TZNAME
-
-#else
-
-/*#if U_PLATFORM_IS_DARWIN_BASED
- int ret;
-
- tzid = getenv("TZFILE");
- if (tzid != NULL) {
- return tzid;
- }
-#endif*/
-
-/* This code can be temporarily disabled to test tzname resolution later on. */
-#ifndef DEBUG_TZNAME
- tzid = getenv("TZ");
- if (tzid != NULL && isValidOlsonID(tzid)
-#if U_PLATFORM == U_PF_SOLARIS
- /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
- && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
-#endif
- ) {
- /* The colon forces tzset() to treat the remainder as zoneinfo path */
- if (tzid[0] == ':') {
- tzid++;
- }
- /* This might be a good Olson ID. */
- skipZoneIDPrefix(&tzid);
- return tzid;
- }
- /* else U_TZNAME will give a better result. */
-#endif
-
-#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
- /* Caller must handle threading issues */
- if (gTimeZoneBufferPtr == NULL) {
- /*
- This is a trick to look at the name of the link to get the Olson ID
- because the tzfile contents is underspecified.
- This isn't guaranteed to work because it may not be a symlink.
- */
- int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
- if (0 < ret) {
- int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
- gTimeZoneBuffer[ret] = 0;
- char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
-
- if (tzZoneInfoTailPtr != NULL
- && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen))
- {
- return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen);
- }
- } else {
-#if defined(SEARCH_TZFILE)
- DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
- if (tzInfo != NULL) {
- tzInfo->defaultTZBuffer = NULL;
- tzInfo->defaultTZFileSize = 0;
- tzInfo->defaultTZFilePtr = NULL;
- tzInfo->defaultTZstatus = FALSE;
- tzInfo->defaultTZPosition = 0;
-
- gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
-
- /* Free previously allocated memory */
- if (tzInfo->defaultTZBuffer != NULL) {
- uprv_free(tzInfo->defaultTZBuffer);
- }
- if (tzInfo->defaultTZFilePtr != NULL) {
- fclose(tzInfo->defaultTZFilePtr);
- }
- uprv_free(tzInfo);
- }
-
- if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
- return gTimeZoneBufferPtr;
- }
-#endif
- }
- }
- else {
- return gTimeZoneBufferPtr;
- }
-#endif
-#endif
-
-#ifdef U_TZNAME
-#if U_PLATFORM_USES_ONLY_WIN32_API
- /* The return value is free'd in timezone.cpp on Windows because
- * the other code path returns a pointer to a heap location. */
- return uprv_strdup(U_TZNAME[n]);
-#else
- /*
- U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
- So we remap the abbreviation to an olson ID.
-
- Since Windows exposes a little more timezone information,
- we normally don't use this code on Windows because
- uprv_detectWindowsTimeZone should have already given the correct answer.
- */
- {
- struct tm juneSol, decemberSol;
- int daylightType;
- static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
- static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
-
- /* This probing will tell us when daylight savings occurs. */
- localtime_r(&juneSolstice, &juneSol);
- localtime_r(&decemberSolstice, &decemberSol);
- if(decemberSol.tm_isdst > 0) {
- daylightType = U_DAYLIGHT_DECEMBER;
- } else if(juneSol.tm_isdst > 0) {
- daylightType = U_DAYLIGHT_JUNE;
- } else {
- daylightType = U_DAYLIGHT_NONE;
- }
- tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
- if (tzid != NULL) {
- return tzid;
- }
- }
- return U_TZNAME[n];
-#endif
-#else
- return "";
-#endif
-}
-
-/* Get and set the ICU data directory --------------------------------------- */
-
-static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
-static char *gDataDirectory = NULL;
-
-UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
-static CharString *gTimeZoneFilesDirectory = NULL;
-
-#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
- static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
- static bool gCorrectedPOSIXLocaleHeapAllocated = false;
-#endif
-
-static UBool U_CALLCONV putil_cleanup(void)
-{
- if (gDataDirectory && *gDataDirectory) {
- uprv_free(gDataDirectory);
- }
- gDataDirectory = NULL;
- gDataDirInitOnce.reset();
-
- delete gTimeZoneFilesDirectory;
- gTimeZoneFilesDirectory = NULL;
- gTimeZoneFilesInitOnce.reset();
-
-#ifdef SEARCH_TZFILE
- delete gSearchTZFileResult;
- gSearchTZFileResult = NULL;
-#endif
-
-#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
- if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
- uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
- gCorrectedPOSIXLocale = NULL;
- gCorrectedPOSIXLocaleHeapAllocated = false;
- }
-#endif
- return TRUE;
-}
-
-/*
- * Set the data directory.
- * Make a copy of the passed string, and set the global data dir to point to it.
- */
-U_CAPI void U_EXPORT2
-u_setDataDirectory(const char *directory) {
- char *newDataDir;
- int32_t length;
-
- if(directory==NULL || *directory==0) {
- /* A small optimization to prevent the malloc and copy when the
- shared library is used, and this is a way to make sure that NULL
- is never returned.
- */
- newDataDir = (char *)"";
- }
- else {
- length=(int32_t)uprv_strlen(directory);
- newDataDir = (char *)uprv_malloc(length + 2);
- /* Exit out if newDataDir could not be created. */
- if (newDataDir == NULL) {
- return;
- }
- uprv_strcpy(newDataDir, directory);
-
-#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
- {
- char *p;
- while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
- *p = U_FILE_SEP_CHAR;
- }
- }
-#endif
- }
-
- if (gDataDirectory && *gDataDirectory) {
- uprv_free(gDataDirectory);
- }
- gDataDirectory = newDataDir;
- ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
-}
-
-U_CAPI UBool U_EXPORT2
-uprv_pathIsAbsolute(const char *path)
-{
- if(!path || !*path) {
- return FALSE;
- }
-
- if(*path == U_FILE_SEP_CHAR) {
- return TRUE;
- }
-
-#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
- if(*path == U_FILE_ALT_SEP_CHAR) {
- return TRUE;
- }
-#endif
-
-#if U_PLATFORM_USES_ONLY_WIN32_API
- if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
- ((path[0] >= 'a') && (path[0] <= 'z'))) &&
- path[1] == ':' ) {
- return TRUE;
- }
-#endif
-
- return FALSE;
-}
-
-/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
- (needed for some Darwin ICU build environments) */
-#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR
-# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
-# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
-# endif
-#endif
-
-#if defined(ICU_DATA_DIR_WINDOWS)
-// Helper function to get the ICU Data Directory under the Windows directory location.
-static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
-{
- wchar_t windowsPath[MAX_PATH];
- char windowsPathUtf8[MAX_PATH];
-
- UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
- if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
- // Convert UTF-16 to a UTF-8 string.
- UErrorCode status = U_ZERO_ERROR;
- int32_t windowsPathUtf8Len = 0;
- u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
- &windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status);
-
- if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
- (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
- // Ensure it always has a separator, so we can append the ICU data path.
- if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
- windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
- windowsPathUtf8[windowsPathUtf8Len] = '\0';
- }
- // Check if the concatenated string will fit.
- if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
- uprv_strcpy(directoryBuffer, windowsPathUtf8);
- uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
- return TRUE;
- }
- }
- }
-
- return FALSE;
-}
-#endif
-
-static void U_CALLCONV dataDirectoryInitFn() {
- /* If we already have the directory, then return immediately. Will happen if user called
- * u_setDataDirectory().
- */
- if (gDataDirectory) {
- return;
- }
-
- const char *path = NULL;
-#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
- char datadir_path_buffer[PATH_MAX];
-#endif
-
- /*
- When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
- override ICU's data with the ICU_DATA environment variable. This prevents
- problems where multiple custom copies of ICU's specific version of data
- are installed on a system. Either the application must define the data
- directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
- ICU, set the data with udata_setCommonData or trust that all of the
- required data is contained in ICU's data library that contains
- the entry point defined by U_ICUDATA_ENTRY_POINT.
-
- There may also be some platforms where environment variables
- are not allowed.
- */
-# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
- /* First try to get the environment variable */
-# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv
- path=getenv("ICU_DATA");
-# endif
-# endif
-
- /* ICU_DATA_DIR may be set as a compile option.
- * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
- * and is used only when data is built in archive mode eliminating the need
- * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
- * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
- * set their own path.
- */
-#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
- if(path==NULL || *path==0) {
-# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
- const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
-# endif
-# ifdef ICU_DATA_DIR
- path=ICU_DATA_DIR;
-# else
- path=U_ICU_DATA_DEFAULT_DIR;
-# endif
-# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
- if (prefix != NULL) {
- snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
- path=datadir_path_buffer;
- }
-# endif
- }
-#endif
-
-#if defined(ICU_DATA_DIR_WINDOWS)
- char datadir_path_buffer[MAX_PATH];
- if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
- path = datadir_path_buffer;
- }
-#endif
-
- if(path==NULL) {
- /* It looks really bad, set it to something. */
- path = "";
- }
-
- u_setDataDirectory(path);
- return;
-}
-
-U_CAPI const char * U_EXPORT2
-u_getDataDirectory(void) {
- umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
- return gDataDirectory;
-}
-
-static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return;
- }
- gTimeZoneFilesDirectory->clear();
- gTimeZoneFilesDirectory->append(path, status);
-#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
- char *p = gTimeZoneFilesDirectory->data();
- while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
- *p = U_FILE_SEP_CHAR;
- }
-#endif
-}
-
-#define TO_STRING(x) TO_STRING_2(x)
-#define TO_STRING_2(x) #x
-
-static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
- U_ASSERT(gTimeZoneFilesDirectory == NULL);
- ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
- gTimeZoneFilesDirectory = new CharString();
- if (gTimeZoneFilesDirectory == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- const char *dir = "";
-
-#if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
- char timezonefilesdir_path_buffer[PATH_MAX];
- const char *prefix = getenv(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR);
-#endif
-
-#if U_PLATFORM_HAS_WINUWP_API == 1
-// The UWP version does not support the environment variable setting.
-
-# if defined(ICU_DATA_DIR_WINDOWS)
- // When using the Windows system data, we can possibly pick up time zone data from the Windows directory.
- char datadir_path_buffer[MAX_PATH];
- if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
- dir = datadir_path_buffer;
- }
-# endif
-
-#else
- dir = getenv("ICU_TIMEZONE_FILES_DIR");
-#endif // U_PLATFORM_HAS_WINUWP_API
-
-#if defined(U_TIMEZONE_FILES_DIR)
- if (dir == NULL) {
- // Build time configuration setting.
- dir = TO_STRING(U_TIMEZONE_FILES_DIR);
- }
-#endif
-
- if (dir == NULL) {
- dir = "";
- }
-
-#if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
- if (prefix != NULL) {
- snprintf(timezonefilesdir_path_buffer, PATH_MAX, "%s%s", prefix, dir);
- dir = timezonefilesdir_path_buffer;
- }
-#endif
-
- setTimeZoneFilesDir(dir, status);
-}
-
-
-U_CAPI const char * U_EXPORT2
-u_getTimeZoneFilesDirectory(UErrorCode *status) {
- umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
- return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
-}
-
-U_CAPI void U_EXPORT2
-u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
- umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
- setTimeZoneFilesDir(path, *status);
-
- // Note: this function does some extra churn, first setting based on the
- // environment, then immediately replacing with the value passed in.
- // The logic is simpler that way, and performance shouldn't be an issue.
-}
-
-
-#if U_POSIX_LOCALE
-/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
- * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
- * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
- */
-static const char *uprv_getPOSIXIDForCategory(int category)
-{
- const char* posixID = NULL;
- if (category == LC_MESSAGES || category == LC_CTYPE) {
- /*
- * On Solaris two different calls to setlocale can result in
- * different values. Only get this value once.
- *
- * We must check this first because an application can set this.
- *
- * LC_ALL can't be used because it's platform dependent. The LANG
- * environment variable seems to affect LC_CTYPE variable by default.
- * Here is what setlocale(LC_ALL, NULL) can return.
- * HPUX can return 'C C C C C C C'
- * Solaris can return /en_US/C/C/C/C/C on the second try.
- * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
- *
- * The default codepage detection also needs to use LC_CTYPE.
- *
- * Do not call setlocale(LC_*, "")! Using an empty string instead
- * of NULL, will modify the libc behavior.
- */
- posixID = setlocale(category, NULL);
- if ((posixID == 0)
- || (uprv_strcmp("C", posixID) == 0)
- || (uprv_strcmp("POSIX", posixID) == 0))
- {
- /* Maybe we got some garbage. Try something more reasonable */
- posixID = getenv("LC_ALL");
- /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
- * This is needed to properly handle empty env. variables
- */
-#if U_PLATFORM == U_PF_SOLARIS
- if ((posixID == 0) || (posixID[0] == '\0')) {
- posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
- if ((posixID == 0) || (posixID[0] == '\0')) {
-#else
- if (posixID == 0) {
- posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
- if (posixID == 0) {
-#endif
- posixID = getenv("LANG");
- }
- }
- }
- }
- if ((posixID==0)
- || (uprv_strcmp("C", posixID) == 0)
- || (uprv_strcmp("POSIX", posixID) == 0))
- {
- /* Nothing worked. Give it a nice POSIX default value. */
- posixID = "en_US_POSIX";
- // Note: this test will not catch 'C.UTF-8',
- // that will be handled in uprv_getDefaultLocaleID().
- // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
- // caller which expects to see "en_US_POSIX" in many branches.
- }
- return posixID;
-}
-
-/* Return just the POSIX id for the default locale, whatever happens to be in
- * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
- */
-static const char *uprv_getPOSIXIDForDefaultLocale(void)
-{
- static const char* posixID = NULL;
- if (posixID == 0) {
- posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
- }
- return posixID;
-}
-
-#if !U_CHARSET_IS_UTF8
-/* Return just the POSIX id for the default codepage, whatever happens to be in
- * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
- */
-static const char *uprv_getPOSIXIDForDefaultCodepage(void)
-{
- static const char* posixID = NULL;
- if (posixID == 0) {
- posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
- }
- return posixID;
-}
-#endif
-#endif
-
-/* NOTE: The caller should handle thread safety */
-U_CAPI const char* U_EXPORT2
-uprv_getDefaultLocaleID()
-{
-#if U_POSIX_LOCALE
-/*
- Note that: (a '!' means the ID is improper somehow)
- LC_ALL ----> default_loc codepage
---------------------------------------------------------
- ab.CD ab CD
- ab@CD ab__CD -
- [email protected] ab__CD EF
-
- ab_CD.EF@GH ab_CD_GH EF
-
-Some 'improper' ways to do the same as above:
- ! [email protected] ab_CD_GH EF
- ! [email protected] ab_CD_GH EF
- ! [email protected]@GH.IJ ab_CD_GH EF
-
- _CD@GH _CD_GH -
- _CD.EF@GH _CD_GH EF
-
-The variant cannot have dots in it.
-The 'rightmost' variant (@xxx) wins.
-The leftmost codepage (.xxx) wins.
-*/
- const char* posixID = uprv_getPOSIXIDForDefaultLocale();
-
- /* Format: (no spaces)
- ll [ _CC ] [ . MM ] [ @ VV]
-
- l = lang, C = ctry, M = charmap, V = variant
- */
-
- if (gCorrectedPOSIXLocale != nullptr) {
- return gCorrectedPOSIXLocale;
- }
-
- // Copy the ID into owned memory.
- // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
- char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
- if (correctedPOSIXLocale == nullptr) {
- return nullptr;
- }
- uprv_strcpy(correctedPOSIXLocale, posixID);
-
- char *limit;
- if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
- *limit = 0;
- }
- if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
- *limit = 0;
- }
-
- if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
- || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
- // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
- // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
- uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
- }
-
- /* Note that we scan the *uncorrected* ID. */
- const char *p;
- if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
- p++;
-
- /* Take care of any special cases here.. */
- if (!uprv_strcmp(p, "nynorsk")) {
- p = "NY";
- /* Don't worry about no__NY. In practice, it won't appear. */
- }
-
- if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) {
- uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
- }
- else {
- uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
- }
-
- const char *q;
- if ((q = uprv_strchr(p, '.')) != nullptr) {
- /* How big will the resulting string be? */
- int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
- uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
- correctedPOSIXLocale[len] = 0;
- }
- else {
- /* Anything following the @ sign */
- uprv_strcat(correctedPOSIXLocale, p);
- }
-
- /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
- * How about 'russian' -> 'ru'?
- * Many of the other locales using ISO codes will be handled by the
- * canonicalization functions in uloc_getDefault.
- */
- }
-
- if (gCorrectedPOSIXLocale == nullptr) {
- gCorrectedPOSIXLocale = correctedPOSIXLocale;
- gCorrectedPOSIXLocaleHeapAllocated = true;
- ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
- correctedPOSIXLocale = nullptr;
- }
- posixID = gCorrectedPOSIXLocale;
-
- if (correctedPOSIXLocale != nullptr) { /* Was already set - clean up. */
- uprv_free(correctedPOSIXLocale);
- }
-
- return posixID;
-
-#elif U_PLATFORM_USES_ONLY_WIN32_API
-#define POSIX_LOCALE_CAPACITY 64
- UErrorCode status = U_ZERO_ERROR;
- char *correctedPOSIXLocale = nullptr;
-
- // If we have already figured this out just use the cached value
- if (gCorrectedPOSIXLocale != nullptr) {
- return gCorrectedPOSIXLocale;
- }
-
- // No cached value, need to determine the current value
- static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
- int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
-
- // Now we should have a Windows locale name that needs converted to the POSIX style.
- if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
- {
- // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
- char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
-
- int32_t i;
- for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
- {
- if (windowsLocale[i] == '_')
- {
- modifiedWindowsLocale[i] = '-';
- }
- else
- {
- modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
- }
-
- if (modifiedWindowsLocale[i] == '\0')
- {
- break;
- }
- }
-
- if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
- {
- // Ran out of room, can't really happen, maybe we'll be lucky about a matching
- // locale when tags are dropped
- modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
- }
-
- // Now normalize the resulting name
- correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
- /* TODO: Should we just exit on memory allocation failure? */
- if (correctedPOSIXLocale)
- {
- int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
- if (U_SUCCESS(status))
- {
- *(correctedPOSIXLocale + posixLen) = 0;
- gCorrectedPOSIXLocale = correctedPOSIXLocale;
- gCorrectedPOSIXLocaleHeapAllocated = true;
- ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
- }
- else
- {
- uprv_free(correctedPOSIXLocale);
- }
- }
- }
-
- // If unable to find a locale we can agree upon, use en-US by default
- if (gCorrectedPOSIXLocale == nullptr) {
- gCorrectedPOSIXLocale = "en_US";
- }
- return gCorrectedPOSIXLocale;
-
-#elif U_PLATFORM == U_PF_OS400
- /* locales are process scoped and are by definition thread safe */
- static char correctedLocale[64];
- const char *localeID = getenv("LC_ALL");
- char *p;
-
- if (localeID == NULL)
- localeID = getenv("LANG");
- if (localeID == NULL)
- localeID = setlocale(LC_ALL, NULL);
- /* Make sure we have something... */
- if (localeID == NULL)
- return "en_US_POSIX";
-
- /* Extract the locale name from the path. */
- if((p = uprv_strrchr(localeID, '/')) != NULL)
- {
- /* Increment p to start of locale name. */
- p++;
- localeID = p;
- }
-
- /* Copy to work location. */
- uprv_strcpy(correctedLocale, localeID);
-
- /* Strip off the '.locale' extension. */
- if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
- *p = 0;
- }
-
- /* Upper case the locale name. */
- T_CString_toUpperCase(correctedLocale);
-
- /* See if we are using the POSIX locale. Any of the
- * following are equivalent and use the same QLGPGCMA
- * (POSIX) locale.
- * QLGPGCMA2 means UCS2
- * QLGPGCMA_4 means UTF-32
- * QLGPGCMA_8 means UTF-8
- */
- if ((uprv_strcmp("C", correctedLocale) == 0) ||
- (uprv_strcmp("POSIX", correctedLocale) == 0) ||
- (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
- {
- uprv_strcpy(correctedLocale, "en_US_POSIX");
- }
- else
- {
- int16_t LocaleLen;
-
- /* Lower case the lang portion. */
- for(p = correctedLocale; *p != 0 && *p != '_'; p++)
- {
- *p = uprv_tolower(*p);
- }
-
- /* Adjust for Euro. After '_E' add 'URO'. */
- LocaleLen = uprv_strlen(correctedLocale);
- if (correctedLocale[LocaleLen - 2] == '_' &&
- correctedLocale[LocaleLen - 1] == 'E')
- {
- uprv_strcat(correctedLocale, "URO");
- }
-
- /* If using Lotus-based locale then convert to
- * equivalent non Lotus.
- */
- else if (correctedLocale[LocaleLen - 2] == '_' &&
- correctedLocale[LocaleLen - 1] == 'L')
- {
- correctedLocale[LocaleLen - 2] = 0;
- }
-
- /* There are separate simplified and traditional
- * locales called zh_HK_S and zh_HK_T.
- */
- else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
- {
- uprv_strcpy(correctedLocale, "zh_HK");
- }
-
- /* A special zh_CN_GBK locale...
- */
- else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
- {
- uprv_strcpy(correctedLocale, "zh_CN");
- }
-
- }
-
- return correctedLocale;
-#endif
-
-}
-
-#if !U_CHARSET_IS_UTF8
-#if U_POSIX_LOCALE
-/*
-Due to various platform differences, one platform may specify a charset,
-when they really mean a different charset. Remap the names so that they are
-compatible with ICU. Only conflicting/ambiguous aliases should be resolved
-here. Before adding anything to this function, please consider adding unique
-names to the ICU alias table in the data directory.
-*/
-static const char*
-remapPlatformDependentCodepage(const char *locale, const char *name) {
- if (locale != NULL && *locale == 0) {
- /* Make sure that an empty locale is handled the same way. */
- locale = NULL;
- }
- if (name == NULL) {
- return NULL;
- }
-#if U_PLATFORM == U_PF_AIX
- if (uprv_strcmp(name, "IBM-943") == 0) {
- /* Use the ASCII compatible ibm-943 */
- name = "Shift-JIS";
- }
- else if (uprv_strcmp(name, "IBM-1252") == 0) {
- /* Use the windows-1252 that contains the Euro */
- name = "IBM-5348";
- }
-#elif U_PLATFORM == U_PF_SOLARIS
- if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
- /* Solaris underspecifies the "EUC" name. */
- if (uprv_strcmp(locale, "zh_CN") == 0) {
- name = "EUC-CN";
- }
- else if (uprv_strcmp(locale, "zh_TW") == 0) {
- name = "EUC-TW";
- }
- else if (uprv_strcmp(locale, "ko_KR") == 0) {
- name = "EUC-KR";
- }
- }
- else if (uprv_strcmp(name, "eucJP") == 0) {
- /*
- ibm-954 is the best match.
- ibm-33722 is the default for eucJP (similar to Windows).
- */
- name = "eucjis";
- }
- else if (uprv_strcmp(name, "646") == 0) {
- /*
- * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
- * ISO-8859-1 instead of US-ASCII(646).
- */
- name = "ISO-8859-1";
- }
-#elif U_PLATFORM_IS_DARWIN_BASED
- if (locale == NULL && *name == 0) {
- /*
- No locale was specified, and an empty name was passed in.
- This usually indicates that nl_langinfo didn't return valid information.
- Mac OS X uses UTF-8 by default (especially the locale data and console).
- */
- name = "UTF-8";
- }
- else if (uprv_strcmp(name, "CP949") == 0) {
- /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
- name = "EUC-KR";
- }
- else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
- /*
- * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
- */
- name = "UTF-8";
- }
-#elif U_PLATFORM == U_PF_BSD
- if (uprv_strcmp(name, "CP949") == 0) {
- /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
- name = "EUC-KR";
- }
-#elif U_PLATFORM == U_PF_HPUX
- if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
- /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
- /* zh_TW.big5 is not the same charset as zh_HK.big5! */
- name = "hkbig5";
- }
- else if (uprv_strcmp(name, "eucJP") == 0) {
- /*
- ibm-1350 is the best match, but unavailable.
- ibm-954 is mostly a superset of ibm-1350.
- ibm-33722 is the default for eucJP (similar to Windows).
- */
- name = "eucjis";
- }
-#elif U_PLATFORM == U_PF_LINUX
- if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
- /* Linux underspecifies the "EUC" name. */
- if (uprv_strcmp(locale, "korean") == 0) {
- name = "EUC-KR";
- }
- else if (uprv_strcmp(locale, "japanese") == 0) {
- /* See comment below about eucJP */
- name = "eucjis";
- }
- }
- else if (uprv_strcmp(name, "eucjp") == 0) {
- /*
- ibm-1350 is the best match, but unavailable.
- ibm-954 is mostly a superset of ibm-1350.
- ibm-33722 is the default for eucJP (similar to Windows).
- */
- name = "eucjis";
- }
- else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
- (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
- /*
- * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
- */
- name = "UTF-8";
- }
- /*
- * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
- * it by falling back to 'US-ASCII' when NULL is returned from this
- * function. So, we don't have to worry about it here.
- */
-#endif
- /* return NULL when "" is passed in */
- if (*name == 0) {
- name = NULL;
- }
- return name;
-}
-
-static const char*
-getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
-{
- char localeBuf[100];
- const char *name = NULL;
- char *variant = NULL;
-
- if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
- size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
- uprv_strncpy(localeBuf, localeName, localeCapacity);
- localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
- name = uprv_strncpy(buffer, name+1, buffCapacity);
- buffer[buffCapacity-1] = 0; /* ensure NULL termination */
- if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
- *variant = 0;
- }
- name = remapPlatformDependentCodepage(localeBuf, name);
- }
- return name;
-}
-#endif
-
-static const char*
-int_getDefaultCodepage()
-{
-#if U_PLATFORM == U_PF_OS400
- uint32_t ccsid = 37; /* Default to ibm-37 */
- static char codepage[64];
- Qwc_JOBI0400_t jobinfo;
- Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
-
- EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
- "* ", " ", &error);
-
- if (error.Bytes_Available == 0) {
- if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
- ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
- }
- else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
- ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
- }
- /* else use the default */
- }
- sprintf(codepage,"ibm-%d", ccsid);
- return codepage;
-
-#elif U_PLATFORM == U_PF_OS390
- static char codepage[64];
-
- strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
- strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
- codepage[63] = 0; /* NULL terminate */
-
- return codepage;
-
-#elif U_PLATFORM_USES_ONLY_WIN32_API
- static char codepage[64];
- DWORD codepageNumber = 0;
-
-#if U_PLATFORM_HAS_WINUWP_API == 1
- // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
- // have folks use Unicode than a "system" code page, however this is the same
- // codepage as the system default locale codepage. (FWIW, the system locale is
- // ONLY used for codepage, it should never be used for anything else)
- GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
- (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
-#else
- // Win32 apps can call GetACP
- codepageNumber = GetACP();
-#endif
- // Special case for UTF-8
- if (codepageNumber == 65001)
- {
- return "UTF-8";
- }
- // Windows codepages can look like windows-1252, so format the found number
- // the numbers are eclectic, however all valid system code pages, besides UTF-8
- // are between 3 and 19999
- if (codepageNumber > 0 && codepageNumber < 20000)
- {
- sprintf(codepage, "windows-%ld", codepageNumber);
- return codepage;
- }
- // If the codepage number call failed then return UTF-8
- return "UTF-8";
-
-#elif U_POSIX_LOCALE
- static char codesetName[100];
- const char *localeName = NULL;
- const char *name = NULL;
-
- localeName = uprv_getPOSIXIDForDefaultCodepage();
- uprv_memset(codesetName, 0, sizeof(codesetName));
- /* On Solaris nl_langinfo returns C locale values unless setlocale
- * was called earlier.
- */
-#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
- /* When available, check nl_langinfo first because it usually gives more
- useful names. It depends on LC_CTYPE.
- nl_langinfo may use the same buffer as setlocale. */
- {
- const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
-#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
- /*
- * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
- * instead of ASCII.
- */
- if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
- codeset = remapPlatformDependentCodepage(localeName, codeset);
- } else
-#endif
- {
- codeset = remapPlatformDependentCodepage(NULL, codeset);
- }
-
- if (codeset != NULL) {
- uprv_strncpy(codesetName, codeset, sizeof(codesetName));
- codesetName[sizeof(codesetName)-1] = 0;
- return codesetName;
- }
- }
-#endif
-
- /* Use setlocale in a nice way, and then check some environment variables.
- Maybe the application used setlocale already.
- */
- uprv_memset(codesetName, 0, sizeof(codesetName));
- name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
- if (name) {
- /* if we can find the codeset name from setlocale, return that. */
- return name;
- }
-
- if (*codesetName == 0)
- {
- /* Everything failed. Return US ASCII (ISO 646). */
- (void)uprv_strcpy(codesetName, "US-ASCII");
- }
- return codesetName;
-#else
- return "US-ASCII";
-#endif
-}
-
-
-U_CAPI const char* U_EXPORT2
-uprv_getDefaultCodepage()
-{
- static char const *name = NULL;
- umtx_lock(NULL);
- if (name == NULL) {
- name = int_getDefaultCodepage();
- }
- umtx_unlock(NULL);
- return name;
-}
-#endif /* !U_CHARSET_IS_UTF8 */
-
-
-/* end of platform-specific implementation -------------- */
-
-/* version handling --------------------------------------------------------- */
-
-U_CAPI void U_EXPORT2
-u_versionFromString(UVersionInfo versionArray, const char *versionString) {
- char *end;
- uint16_t part=0;
-
- if(versionArray==NULL) {
- return;
- }
-
- if(versionString!=NULL) {
- for(;;) {
- versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
- if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
- break;
- }
- versionString=end+1;
- }
- }
-
- while(part<U_MAX_VERSION_LENGTH) {
- versionArray[part++]=0;
- }
-}
-
-U_CAPI void U_EXPORT2
-u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
- if(versionArray!=NULL && versionString!=NULL) {
- char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
- int32_t len = u_strlen(versionString);
- if(len>U_MAX_VERSION_STRING_LENGTH) {
- len = U_MAX_VERSION_STRING_LENGTH;
- }
- u_UCharsToChars(versionString, versionChars, len);
- versionChars[len]=0;
- u_versionFromString(versionArray, versionChars);
- }
-}
-
-U_CAPI void U_EXPORT2
-u_versionToString(const UVersionInfo versionArray, char *versionString) {
- uint16_t count, part;
- uint8_t field;
-
- if(versionString==NULL) {
- return;
- }
-
- if(versionArray==NULL) {
- versionString[0]=0;
- return;
- }
-
- /* count how many fields need to be written */
- for(count=4; count>0 && versionArray[count-1]==0; --count) {
- }
-
- if(count <= 1) {
- count = 2;
- }
-
- /* write the first part */
- /* write the decimal field value */
- field=versionArray[0];
- if(field>=100) {
- *versionString++=(char)('0'+field/100);
- field%=100;
- }
- if(field>=10) {
- *versionString++=(char)('0'+field/10);
- field%=10;
- }
- *versionString++=(char)('0'+field);
-
- /* write the following parts */
- for(part=1; part<count; ++part) {
- /* write a dot first */
- *versionString++=U_VERSION_DELIMITER;
-
- /* write the decimal field value */
- field=versionArray[part];
- if(field>=100) {
- *versionString++=(char)('0'+field/100);
- field%=100;
- }
- if(field>=10) {
- *versionString++=(char)('0'+field/10);
- field%=10;
- }
- *versionString++=(char)('0'+field);
- }
-
- /* NUL-terminate */
- *versionString=0;
-}
-
-U_CAPI void U_EXPORT2
-u_getVersion(UVersionInfo versionArray) {
- (void)copyright; // Suppress unused variable warning from clang.
- u_versionFromString(versionArray, U_ICU_VERSION);
-}
-
-/**
- * icucfg.h dependent code
- */
-
-#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
-
-#if HAVE_DLFCN_H
-#ifdef __MVS__
-#ifndef __SUSV3
-#define __SUSV3 1
-#endif
-#endif
-#include <dlfcn.h>
-#endif /* HAVE_DLFCN_H */
-
-U_INTERNAL void * U_EXPORT2
-uprv_dl_open(const char *libName, UErrorCode *status) {
- void *ret = NULL;
- if(U_FAILURE(*status)) return ret;
- ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
- if(ret==NULL) {
-#ifdef U_TRACE_DYLOAD
- printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
-#endif
- *status = U_MISSING_RESOURCE_ERROR;
- }
- return ret;
-}
-
-U_INTERNAL void U_EXPORT2
-uprv_dl_close(void *lib, UErrorCode *status) {
- if(U_FAILURE(*status)) return;
- dlclose(lib);
-}
-
-U_INTERNAL UVoidFunction* U_EXPORT2
-uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
- union {
- UVoidFunction *fp;
- void *vp;
- } uret;
- uret.fp = NULL;
- if(U_FAILURE(*status)) return uret.fp;
- uret.vp = dlsym(lib, sym);
- if(uret.vp == NULL) {
-#ifdef U_TRACE_DYLOAD
- printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
-#endif
- *status = U_MISSING_RESOURCE_ERROR;
- }
- return uret.fp;
-}
-
-#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
-
-/* Windows API implementation. */
-// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
-
-U_INTERNAL void * U_EXPORT2
-uprv_dl_open(const char *libName, UErrorCode *status) {
- HMODULE lib = NULL;
-
- if(U_FAILURE(*status)) return NULL;
-
- lib = LoadLibraryA(libName);
-
- if(lib==NULL) {
- *status = U_MISSING_RESOURCE_ERROR;
- }
-
- return (void*)lib;
-}
-
-U_INTERNAL void U_EXPORT2
-uprv_dl_close(void *lib, UErrorCode *status) {
- HMODULE handle = (HMODULE)lib;
- if(U_FAILURE(*status)) return;
-
- FreeLibrary(handle);
-
- return;
-}
-
-U_INTERNAL UVoidFunction* U_EXPORT2
-uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
- HMODULE handle = (HMODULE)lib;
- UVoidFunction* addr = NULL;
-
- if(U_FAILURE(*status) || lib==NULL) return NULL;
-
- addr = (UVoidFunction*)GetProcAddress(handle, sym);
-
- if(addr==NULL) {
- DWORD lastError = GetLastError();
- if(lastError == ERROR_PROC_NOT_FOUND) {
- *status = U_MISSING_RESOURCE_ERROR;
- } else {
- *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
- }
- }
-
- return addr;
-}
-
-#else
-
-/* No dynamic loading, null (nonexistent) implementation. */
-
-U_INTERNAL void * U_EXPORT2
-uprv_dl_open(const char *libName, UErrorCode *status) {
- (void)libName;
- if(U_FAILURE(*status)) return NULL;
- *status = U_UNSUPPORTED_ERROR;
- return NULL;
-}
-
-U_INTERNAL void U_EXPORT2
-uprv_dl_close(void *lib, UErrorCode *status) {
- (void)lib;
- if(U_FAILURE(*status)) return;
- *status = U_UNSUPPORTED_ERROR;
- return;
-}
-
-U_INTERNAL UVoidFunction* U_EXPORT2
-uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
- (void)lib;
- (void)sym;
- if(U_SUCCESS(*status)) {
- *status = U_UNSUPPORTED_ERROR;
- }
- return (UVoidFunction*)NULL;
-}
-
-#endif
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/contrib/libs/icu/common/putilimp.h b/contrib/libs/icu/common/putilimp.h
deleted file mode 100644
index d9c90cf4e75..00000000000
--- a/contrib/libs/icu/common/putilimp.h
+++ /dev/null
@@ -1,615 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1997-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* FILE NAME : putilimp.h
-*
-* Date Name Description
-* 10/17/04 grhoten Move internal functions from putil.h to this file.
-******************************************************************************
-*/
-
-#ifndef PUTILIMP_H
-#define PUTILIMP_H
-
-#include "unicode/utypes.h"
-#include "unicode/putil.h"
-
-/**
- * \def U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
- * Nearly all CPUs and compilers implement a right-shift of a signed integer
- * as an Arithmetic Shift Right which copies the sign bit (the Most Significant Bit (MSB))
- * into the vacated bits (sign extension).
- * For example, (int32_t)0xfff5fff3>>4 becomes 0xffff5fff and -1>>1=-1.
- *
- * This can be useful for storing a signed value in the upper bits
- * and another bit field in the lower bits.
- * The signed value can be retrieved by simple right-shifting.
- *
- * This is consistent with the Java language.
- *
- * However, the C standard allows compilers to implement a right-shift of a signed integer
- * as a Logical Shift Right which copies a 0 into the vacated bits.
- * For example, (int32_t)0xfff5fff3>>4 becomes 0x0fff5fff and -1>>1=0x7fffffff.
- *
- * Code that depends on the natural behavior should be guarded with this macro,
- * with an alternate path for unusual platforms.
- * @internal
- */
-#ifdef U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
- /* Use the predefined value. */
-#else
- /*
- * Nearly all CPUs & compilers implement a right-shift of a signed integer
- * as an Arithmetic Shift Right (with sign extension).
- */
-# define U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 1
-#endif
-
-/** Define this to 1 if your platform supports IEEE 754 floating point,
- to 0 if it does not. */
-#ifndef IEEE_754
-# define IEEE_754 1
-#endif
-
-/**
- * uintptr_t is an optional part of the standard definitions in stdint.h.
- * The opengroup.org documentation for stdint.h says
- * "On XSI-conformant systems, the intptr_t and uintptr_t types are required;
- * otherwise, they are optional."
- * We assume that when uintptr_t is defined, UINTPTR_MAX is defined as well.
- *
- * Do not use ptrdiff_t since it is signed. size_t is unsigned.
- */
-/* TODO: This check fails on some z environments. Filed a ticket #9357 for this. */
-#if !defined(__intptr_t_defined) && !defined(UINTPTR_MAX) && (U_PLATFORM != U_PF_OS390)
-typedef size_t uintptr_t;
-#endif
-
-/*===========================================================================*/
-/** @{ Information about POSIX support */
-/*===========================================================================*/
-
-#ifdef U_HAVE_NL_LANGINFO_CODESET
- /* Use the predefined value. */
-#elif U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_ANDROID || U_PLATFORM == U_PF_QNX
-# define U_HAVE_NL_LANGINFO_CODESET 0
-#else
-# define U_HAVE_NL_LANGINFO_CODESET 1
-#endif
-
-#ifdef U_NL_LANGINFO_CODESET
- /* Use the predefined value. */
-#elif !U_HAVE_NL_LANGINFO_CODESET
-# define U_NL_LANGINFO_CODESET -1
-#elif U_PLATFORM == U_PF_OS400
- /* not defined */
-#else
-# define U_NL_LANGINFO_CODESET CODESET
-#endif
-
-#if defined(U_TZSET) || defined(U_HAVE_TZSET)
- /* Use the predefined value. */
-#elif U_PLATFORM_USES_ONLY_WIN32_API
- // UWP doesn't support tzset or environment variables for tz
-#if U_PLATFORM_HAS_WINUWP_API == 0
-# define U_TZSET _tzset
-#endif
-#elif U_PLATFORM == U_PF_OS400
- /* not defined */
-#else
-# define U_TZSET tzset
-#endif
-
-#if defined(U_TIMEZONE) || defined(U_HAVE_TIMEZONE)
- /* Use the predefined value. */
-#elif U_PLATFORM == U_PF_ANDROID
-# define U_TIMEZONE timezone
-#elif defined(__UCLIBC__)
- // uClibc does not have __timezone or _timezone.
-#elif defined(_NEWLIB_VERSION)
-# define U_TIMEZONE _timezone
-#elif defined(__GLIBC__)
- // glibc
-# define U_TIMEZONE __timezone
-#elif U_PLATFORM_IS_LINUX_BASED
- // not defined
-#elif U_PLATFORM_USES_ONLY_WIN32_API
-# define U_TIMEZONE _timezone
-#elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__)
- /* not defined */
-#elif U_PLATFORM == U_PF_OS400
- /* not defined */
-#elif U_PLATFORM == U_PF_IPHONE
- /* not defined */
-#else
-# define U_TIMEZONE timezone
-#endif
-
-#if defined(U_TZNAME) || defined(U_HAVE_TZNAME)
- /* Use the predefined value. */
-#elif U_PLATFORM_USES_ONLY_WIN32_API
- /* not usable on all windows platforms */
-#if U_PLATFORM_HAS_WINUWP_API == 0
-# define U_TZNAME _tzname
-#endif
-#elif U_PLATFORM == U_PF_OS400
- /* not defined */
-#else
-# define U_TZNAME tzname
-#endif
-
-#ifdef U_HAVE_MMAP
- /* Use the predefined value. */
-#elif U_PLATFORM_USES_ONLY_WIN32_API
-# define U_HAVE_MMAP 0
-#else
-# define U_HAVE_MMAP 1
-#endif
-
-#ifdef U_HAVE_POPEN
- /* Use the predefined value. */
-#elif U_PLATFORM_USES_ONLY_WIN32_API
-# define U_HAVE_POPEN 0
-#elif U_PLATFORM == U_PF_OS400
-# define U_HAVE_POPEN 0
-#else
-# define U_HAVE_POPEN 1
-#endif
-
-/**
- * \def U_HAVE_DIRENT_H
- * Defines whether dirent.h is available.
- * @internal
- */
-#ifdef U_HAVE_DIRENT_H
- /* Use the predefined value. */
-#elif U_PLATFORM_USES_ONLY_WIN32_API
-# define U_HAVE_DIRENT_H 0
-#else
-# define U_HAVE_DIRENT_H 1
-#endif
-
-/** @} */
-
-/*===========================================================================*/
-/** @{ Programs used by ICU code */
-/*===========================================================================*/
-
-/**
- * \def U_MAKE_IS_NMAKE
- * Defines whether the "make" program is Windows nmake.
- */
-#ifdef U_MAKE_IS_NMAKE
- /* Use the predefined value. */
-#elif U_PLATFORM == U_PF_WINDOWS
-# define U_MAKE_IS_NMAKE 1
-#else
-# define U_MAKE_IS_NMAKE 0
-#endif
-
-/** @} */
-
-/*==========================================================================*/
-/* Platform utilities */
-/*==========================================================================*/
-
-/**
- * Platform utilities isolates the platform dependencies of the
- * library. For each platform which this code is ported to, these
- * functions may have to be re-implemented.
- */
-
-/**
- * Floating point utility to determine if a double is Not a Number (NaN).
- * @internal
- */
-U_INTERNAL UBool U_EXPORT2 uprv_isNaN(double d);
-/**
- * Floating point utility to determine if a double has an infinite value.
- * @internal
- */
-U_INTERNAL UBool U_EXPORT2 uprv_isInfinite(double d);
-/**
- * Floating point utility to determine if a double has a positive infinite value.
- * @internal
- */
-U_INTERNAL UBool U_EXPORT2 uprv_isPositiveInfinity(double d);
-/**
- * Floating point utility to determine if a double has a negative infinite value.
- * @internal
- */
-U_INTERNAL UBool U_EXPORT2 uprv_isNegativeInfinity(double d);
-/**
- * Floating point utility that returns a Not a Number (NaN) value.
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_getNaN(void);
-/**
- * Floating point utility that returns an infinite value.
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_getInfinity(void);
-
-/**
- * Floating point utility to truncate a double.
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_trunc(double d);
-/**
- * Floating point utility to calculate the floor of a double.
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_floor(double d);
-/**
- * Floating point utility to calculate the ceiling of a double.
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_ceil(double d);
-/**
- * Floating point utility to calculate the absolute value of a double.
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_fabs(double d);
-/**
- * Floating point utility to calculate the fractional and integer parts of a double.
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_modf(double d, double* pinteger);
-/**
- * Floating point utility to calculate the remainder of a double divided by another double.
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_fmod(double d, double y);
-/**
- * Floating point utility to calculate d to the power of exponent (d^exponent).
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_pow(double d, double exponent);
-/**
- * Floating point utility to calculate 10 to the power of exponent (10^exponent).
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_pow10(int32_t exponent);
-/**
- * Floating point utility to calculate the maximum value of two doubles.
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_fmax(double d, double y);
-/**
- * Floating point utility to calculate the minimum value of two doubles.
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_fmin(double d, double y);
-/**
- * Private utility to calculate the maximum value of two integers.
- * @internal
- */
-U_INTERNAL int32_t U_EXPORT2 uprv_max(int32_t d, int32_t y);
-/**
- * Private utility to calculate the minimum value of two integers.
- * @internal
- */
-U_INTERNAL int32_t U_EXPORT2 uprv_min(int32_t d, int32_t y);
-
-#if U_IS_BIG_ENDIAN
-# define uprv_isNegative(number) (*((signed char *)&(number))<0)
-#else
-# define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
-#endif
-
-/**
- * Return the largest positive number that can be represented by an integer
- * type of arbitrary bit length.
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_maxMantissa(void);
-
-/**
- * Floating point utility to calculate the logarithm of a double.
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_log(double d);
-
-/**
- * Does common notion of rounding e.g. uprv_floor(x + 0.5);
- * @param x the double number
- * @return the rounded double
- * @internal
- */
-U_INTERNAL double U_EXPORT2 uprv_round(double x);
-
-/**
- * Adds the signed integers a and b, storing the result in res.
- * Checks for signed integer overflow.
- * Similar to the GCC/Clang extension __builtin_add_overflow
- *
- * @param a The first operand.
- * @param b The second operand.
- * @param res a + b
- * @return true if overflow occurred; false if no overflow occurred.
- * @internal
- */
-U_INTERNAL UBool U_EXPORT2 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res);
-
-/**
- * Multiplies the signed integers a and b, storing the result in res.
- * Checks for signed integer overflow.
- * Similar to the GCC/Clang extension __builtin_mul_overflow
- *
- * @param a The first multiplicand.
- * @param b The second multiplicand.
- * @param res a * b
- * @return true if overflow occurred; false if no overflow occurred.
- * @internal
- */
-U_INTERNAL UBool U_EXPORT2 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res);
-
-#if 0
-/**
- * Returns the number of digits after the decimal point in a double number x.
- *
- * @param x the double number
- * @return the number of digits after the decimal point in a double number x.
- * @internal
- */
-/*U_INTERNAL int32_t U_EXPORT2 uprv_digitsAfterDecimal(double x);*/
-#endif
-
-#if !U_CHARSET_IS_UTF8
-/**
- * Please use ucnv_getDefaultName() instead.
- * Return the default codepage for this platform and locale.
- * This function can call setlocale() on Unix platforms. Please read the
- * platform documentation on setlocale() before calling this function.
- * @return the default codepage for this platform
- * @internal
- */
-U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void);
-#endif
-
-/**
- * Please use uloc_getDefault() instead.
- * Return the default locale ID string by querying the system, or
- * zero if one cannot be found.
- * This function can call setlocale() on Unix platforms. Please read the
- * platform documentation on setlocale() before calling this function.
- * @return the default locale ID string
- * @internal
- */
-U_INTERNAL const char* U_EXPORT2 uprv_getDefaultLocaleID(void);
-
-/**
- * Time zone utilities
- *
- * Wrappers for C runtime library functions relating to timezones.
- * The t_tzset() function (similar to tzset) uses the current setting
- * of the environment variable TZ to assign values to three global
- * variables: daylight, timezone, and tzname. These variables have the
- * following meanings, and are declared in &lt;time.h&gt;.
- *
- * daylight Nonzero if daylight-saving-time zone (DST) is specified
- * in TZ; otherwise, 0. Default value is 1.
- * timezone Difference in seconds between coordinated universal
- * time and local time. E.g., -28,800 for PST (GMT-8hrs)
- * tzname(0) Three-letter time-zone name derived from TZ environment
- * variable. E.g., "PST".
- * tzname(1) Three-letter DST zone name derived from TZ environment
- * variable. E.g., "PDT". If DST zone is omitted from TZ,
- * tzname(1) is an empty string.
- *
- * Notes: For example, to set the TZ environment variable to correspond
- * to the current time zone in Germany, you can use one of the
- * following statements:
- *
- * set TZ=GST1GDT
- * set TZ=GST+1GDT
- *
- * If the TZ value is not set, t_tzset() attempts to use the time zone
- * information specified by the operating system. Under Windows NT
- * and Windows 95, this information is specified in the Control Panel's
- * Date/Time application.
- * @internal
- */
-U_INTERNAL void U_EXPORT2 uprv_tzset(void);
-
-/**
- * Difference in seconds between coordinated universal
- * time and local time. E.g., -28,800 for PST (GMT-8hrs)
- * @return the difference in seconds between coordinated universal time and local time.
- * @internal
- */
-U_INTERNAL int32_t U_EXPORT2 uprv_timezone(void);
-
-/**
- * tzname(0) Three-letter time-zone name derived from TZ environment
- * variable. E.g., "PST".
- * tzname(1) Three-letter DST zone name derived from TZ environment
- * variable. E.g., "PDT". If DST zone is omitted from TZ,
- * tzname(1) is an empty string.
- * @internal
- */
-U_INTERNAL const char* U_EXPORT2 uprv_tzname(int n);
-
-/**
- * Reset the global tzname cache.
- * @internal
- */
-U_INTERNAL void uprv_tzname_clear_cache();
-
-/**
- * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
- * This function is affected by 'faketime' and should be the bottleneck for all user-visible ICU time functions.
- * @return the UTC time measured in milliseconds
- * @internal
- */
-U_INTERNAL UDate U_EXPORT2 uprv_getUTCtime(void);
-
-/**
- * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
- * This function is not affected by 'faketime', so it should only be used by low level test functions- not by anything that
- * exposes time to the end user.
- * @return the UTC time measured in milliseconds
- * @internal
- */
-U_INTERNAL UDate U_EXPORT2 uprv_getRawUTCtime(void);
-
-/**
- * Determine whether a pathname is absolute or not, as defined by the platform.
- * @param path Pathname to test
- * @return TRUE if the path is absolute
- * @internal (ICU 3.0)
- */
-U_INTERNAL UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path);
-
-/**
- * Use U_MAX_PTR instead of this function.
- * @param void pointer to test
- * @return the largest possible pointer greater than the base
- * @internal (ICU 3.8)
- */
-U_INTERNAL void * U_EXPORT2 uprv_maximumPtr(void *base);
-
-/**
- * Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer.
- * In fact, buffer sizes must not exceed 2GB so that the difference between
- * the buffer limit and the buffer start can be expressed in an int32_t.
- *
- * The definition of U_MAX_PTR must fulfill the following conditions:
- * - return the largest possible pointer greater than base
- * - return a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
- * - avoid wrapping around at high addresses
- * - make sure that the returned pointer is not farther from base than 0x7fffffff bytes
- *
- * @param base The beginning of a buffer to find the maximum offset from
- * @internal
- */
-#ifndef U_MAX_PTR
-# if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
- /* We have 31-bit pointers. */
-# define U_MAX_PTR(base) ((void *)0x7fffffff)
-# elif U_PLATFORM == U_PF_OS400
-# define U_MAX_PTR(base) uprv_maximumPtr((void *)base)
-# elif 0
- /*
- * For platforms where pointers are scalar values (which is normal, but unlike i5/OS)
- * but that do not define uintptr_t.
- *
- * However, this does not work on modern compilers:
- * The C++ standard does not define pointer overflow, and allows compilers to
- * assume that p+u>p for any pointer p and any integer u>0.
- * Thus, modern compilers optimize away the ">" comparison.
- * (See ICU tickets #7187 and #8096.)
- */
-# define U_MAX_PTR(base) \
- ((void *)(((char *)(base)+0x7fffffffu) > (char *)(base) \
- ? ((char *)(base)+0x7fffffffu) \
- : (char *)-1))
-# else
- /* Default version. C++ standard compliant for scalar pointers. */
-# define U_MAX_PTR(base) \
- ((void *)(((uintptr_t)(base)+0x7fffffffu) > (uintptr_t)(base) \
- ? ((uintptr_t)(base)+0x7fffffffu) \
- : (uintptr_t)-1))
-# endif
-#endif
-
-
-#ifdef __cplusplus
-/**
- * Pin a buffer capacity such that doing pointer arithmetic
- * on the destination pointer and capacity cannot overflow.
- *
- * The pinned capacity must fulfill the following conditions (for positive capacities):
- * - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.)
- * - (dest + capacity) >= dest
- * - The size (in bytes) of T[capacity] does not exceed 0x7fffffff
- *
- * @param dest the destination buffer pointer.
- * @param capacity the requested buffer capacity, in units of type T.
- * @return the pinned capacity.
- * @internal
- */
-template <typename T>
-inline int32_t pinCapacity(T *dest, int32_t capacity) {
- if (capacity <= 0) { return capacity; }
-
- uintptr_t destInt = (uintptr_t)dest;
- uintptr_t maxInt;
-
-# if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
- // We have 31-bit pointers.
- maxInt = 0x7fffffff;
-# elif U_PLATFORM == U_PF_OS400
- maxInt = (uintptr_t)uprv_maximumPtr((void *)dest);
-# else
- maxInt = destInt + 0x7fffffffu;
- if (maxInt < destInt) {
- // Less than 2GB to the end of the address space.
- // Pin to that to prevent address overflow.
- maxInt = (uintptr_t)-1;
- }
-# endif
-
- uintptr_t maxBytes = maxInt - destInt; // max. 2GB
- int32_t maxCapacity = (int32_t)(maxBytes / sizeof(T));
- return capacity <= maxCapacity ? capacity : maxCapacity;
-}
-#endif // __cplusplus
-
-/* Dynamic Library Functions */
-
-typedef void (UVoidFunction)(void);
-
-#if U_ENABLE_DYLOAD
-/**
- * Load a library
- * @internal (ICU 4.4)
- */
-U_INTERNAL void * U_EXPORT2 uprv_dl_open(const char *libName, UErrorCode *status);
-
-/**
- * Close a library
- * @internal (ICU 4.4)
- */
-U_INTERNAL void U_EXPORT2 uprv_dl_close( void *lib, UErrorCode *status);
-
-/**
- * Extract a symbol from a library (function)
- * @internal (ICU 4.8)
- */
-U_INTERNAL UVoidFunction* U_EXPORT2 uprv_dlsym_func( void *lib, const char *symbolName, UErrorCode *status);
-
-/**
- * Extract a symbol from a library (function)
- * Not implemented, no clients.
- * @internal
- */
-/* U_INTERNAL void * U_EXPORT2 uprv_dlsym_data( void *lib, const char *symbolName, UErrorCode *status); */
-
-#endif
-
-/**
- * Define malloc and related functions
- * @internal
- */
-#if U_PLATFORM == U_PF_OS400
-# define uprv_default_malloc(x) _C_TS_malloc(x)
-# define uprv_default_realloc(x,y) _C_TS_realloc(x,y)
-# define uprv_default_free(x) _C_TS_free(x)
-/* also _C_TS_calloc(x) */
-#else
-/* C defaults */
-# define uprv_default_malloc(x) malloc(x)
-# define uprv_default_realloc(x,y) realloc(x,y)
-# define uprv_default_free(x) free(x)
-#endif
-
-
-#endif
diff --git a/contrib/libs/icu/common/rbbi.cpp b/contrib/libs/icu/common/rbbi.cpp
deleted file mode 100644
index 43ba58ba9e6..00000000000
--- a/contrib/libs/icu/common/rbbi.cpp
+++ /dev/null
@@ -1,1272 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-***************************************************************************
-* Copyright (C) 1999-2016 International Business Machines Corporation
-* and others. All rights reserved.
-***************************************************************************
-*/
-//
-// file: rbbi.cpp Contains the implementation of the rule based break iterator
-// runtime engine and the API implementation for
-// class RuleBasedBreakIterator
-//
-
-#include "utypeinfo.h" // for 'typeid' to work
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include <cinttypes>
-
-#include "unicode/rbbi.h"
-#include "unicode/schriter.h"
-#include "unicode/uchriter.h"
-#include "unicode/uclean.h"
-#include "unicode/udata.h"
-
-#include "brkeng.h"
-#include "ucln_cmn.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "localsvc.h"
-#include "rbbidata.h"
-#include "rbbi_cache.h"
-#include "rbbirb.h"
-#include "uassert.h"
-#include "umutex.h"
-#include "uvectr32.h"
-
-#ifdef RBBI_DEBUG
-static UBool gTrace = FALSE;
-#endif
-
-U_NAMESPACE_BEGIN
-
-// The state number of the starting state
-constexpr int32_t START_STATE = 1;
-
-// The state-transition value indicating "stop"
-constexpr int32_t STOP_STATE = 0;
-
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator)
-
-
-//=======================================================================
-// constructors
-//=======================================================================
-
-/**
- * Constructs a RuleBasedBreakIterator that uses the already-created
- * tables object that is passed in as a parameter.
- */
-RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status)
- : fSCharIter(UnicodeString())
-{
- init(status);
- fData = new RBBIDataWrapper(data, status); // status checked in constructor
- if (U_FAILURE(status)) {return;}
- if(fData == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-}
-
-//
-// Construct from precompiled binary rules (tables). This constructor is public API,
-// taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules().
-//
-RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
- uint32_t ruleLength,
- UErrorCode &status)
- : fSCharIter(UnicodeString())
-{
- init(status);
- if (U_FAILURE(status)) {
- return;
- }
- if (compiledRules == NULL || ruleLength < sizeof(RBBIDataHeader)) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- const RBBIDataHeader *data = (const RBBIDataHeader *)compiledRules;
- if (data->fLength > ruleLength) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status);
- if (U_FAILURE(status)) {return;}
- if(fData == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-}
-
-
-//-------------------------------------------------------------------------------
-//
-// Constructor from a UDataMemory handle to precompiled break rules
-// stored in an ICU data file.
-//
-//-------------------------------------------------------------------------------
-RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status)
- : fSCharIter(UnicodeString())
-{
- init(status);
- fData = new RBBIDataWrapper(udm, status); // status checked in constructor
- if (U_FAILURE(status)) {return;}
- if(fData == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-}
-
-
-
-//-------------------------------------------------------------------------------
-//
-// Constructor from a set of rules supplied as a string.
-//
-//-------------------------------------------------------------------------------
-RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
- UParseError &parseError,
- UErrorCode &status)
- : fSCharIter(UnicodeString())
-{
- init(status);
- if (U_FAILURE(status)) {return;}
- RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)
- RBBIRuleBuilder::createRuleBasedBreakIterator(rules, &parseError, status);
- // Note: This is a bit awkward. The RBBI ruleBuilder has a factory method that
- // creates and returns a complete RBBI. From here, in a constructor, we
- // can't just return the object created by the builder factory, hence
- // the assignment of the factory created object to "this".
- if (U_SUCCESS(status)) {
- *this = *bi;
- delete bi;
- }
-}
-
-
-//-------------------------------------------------------------------------------
-//
-// Default Constructor. Create an empty shell that can be set up later.
-// Used when creating a RuleBasedBreakIterator from a set
-// of rules.
-//-------------------------------------------------------------------------------
-RuleBasedBreakIterator::RuleBasedBreakIterator()
- : fSCharIter(UnicodeString())
-{
- UErrorCode status = U_ZERO_ERROR;
- init(status);
-}
-
-
-//-------------------------------------------------------------------------------
-//
-// Copy constructor. Will produce a break iterator with the same behavior,
-// and which iterates over the same text, as the one passed in.
-//
-//-------------------------------------------------------------------------------
-RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other)
-: BreakIterator(other),
- fSCharIter(UnicodeString())
-{
- UErrorCode status = U_ZERO_ERROR;
- this->init(status);
- *this = other;
-}
-
-
-/**
- * Destructor
- */
-RuleBasedBreakIterator::~RuleBasedBreakIterator() {
- if (fCharIter != &fSCharIter) {
- // fCharIter was adopted from the outside.
- delete fCharIter;
- }
- fCharIter = NULL;
-
- utext_close(&fText);
-
- if (fData != NULL) {
- fData->removeReference();
- fData = NULL;
- }
- delete fBreakCache;
- fBreakCache = NULL;
-
- delete fDictionaryCache;
- fDictionaryCache = NULL;
-
- delete fLanguageBreakEngines;
- fLanguageBreakEngines = NULL;
-
- delete fUnhandledBreakEngine;
- fUnhandledBreakEngine = NULL;
-}
-
-/**
- * Assignment operator. Sets this iterator to have the same behavior,
- * and iterate over the same text, as the one passed in.
- */
-RuleBasedBreakIterator&
-RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
- if (this == &that) {
- return *this;
- }
- BreakIterator::operator=(that);
-
- if (fLanguageBreakEngines != NULL) {
- delete fLanguageBreakEngines;
- fLanguageBreakEngines = NULL; // Just rebuild for now
- }
- // TODO: clone fLanguageBreakEngines from "that"
- UErrorCode status = U_ZERO_ERROR;
- utext_clone(&fText, &that.fText, FALSE, TRUE, &status);
-
- if (fCharIter != &fSCharIter) {
- delete fCharIter;
- }
- fCharIter = &fSCharIter;
-
- if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) {
- // This is a little bit tricky - it will intially appear that
- // this->fCharIter is adopted, even if that->fCharIter was
- // not adopted. That's ok.
- fCharIter = that.fCharIter->clone();
- }
- fSCharIter = that.fSCharIter;
- if (fCharIter == NULL) {
- fCharIter = &fSCharIter;
- }
-
- if (fData != NULL) {
- fData->removeReference();
- fData = NULL;
- }
- if (that.fData != NULL) {
- fData = that.fData->addReference();
- }
-
- fPosition = that.fPosition;
- fRuleStatusIndex = that.fRuleStatusIndex;
- fDone = that.fDone;
-
- // TODO: both the dictionary and the main cache need to be copied.
- // Current position could be within a dictionary range. Trying to continue
- // the iteration without the caches present would go to the rules, with
- // the assumption that the current position is on a rule boundary.
- fBreakCache->reset(fPosition, fRuleStatusIndex);
- fDictionaryCache->reset();
-
- return *this;
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// init() Shared initialization routine. Used by all the constructors.
-// Initializes all fields, leaving the object in a consistent state.
-//
-//-----------------------------------------------------------------------------
-void RuleBasedBreakIterator::init(UErrorCode &status) {
- fCharIter = NULL;
- fData = NULL;
- fPosition = 0;
- fRuleStatusIndex = 0;
- fDone = false;
- fDictionaryCharCount = 0;
- fLanguageBreakEngines = NULL;
- fUnhandledBreakEngine = NULL;
- fBreakCache = NULL;
- fDictionaryCache = NULL;
-
- // Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER.
- // fText = UTEXT_INITIALIZER;
- static const UText initializedUText = UTEXT_INITIALIZER;
- uprv_memcpy(&fText, &initializedUText, sizeof(UText));
-
- if (U_FAILURE(status)) {
- return;
- }
-
- utext_openUChars(&fText, NULL, 0, &status);
- fDictionaryCache = new DictionaryCache(this, status);
- fBreakCache = new BreakCache(this, status);
- if (U_SUCCESS(status) && (fDictionaryCache == NULL || fBreakCache == NULL)) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
-
-#ifdef RBBI_DEBUG
- static UBool debugInitDone = FALSE;
- if (debugInitDone == FALSE) {
- char *debugEnv = getenv("U_RBBIDEBUG");
- if (debugEnv && uprv_strstr(debugEnv, "trace")) {
- gTrace = TRUE;
- }
- debugInitDone = TRUE;
- }
-#endif
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// clone - Returns a newly-constructed RuleBasedBreakIterator with the same
-// behavior, and iterating over the same text, as this one.
-// Virtual function: does the right thing with subclasses.
-//
-//-----------------------------------------------------------------------------
-RuleBasedBreakIterator*
-RuleBasedBreakIterator::clone() const {
- return new RuleBasedBreakIterator(*this);
-}
-
-/**
- * Equality operator. Returns TRUE if both BreakIterators are of the
- * same class, have the same behavior, and iterate over the same text.
- */
-UBool
-RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
- if (typeid(*this) != typeid(that)) {
- return FALSE;
- }
- if (this == &that) {
- return TRUE;
- }
-
- // The base class BreakIterator carries no state that participates in equality,
- // and does not implement an equality function that would otherwise be
- // checked at this point.
-
- const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that;
-
- if (!utext_equals(&fText, &that2.fText)) {
- // The two break iterators are operating on different text,
- // or have a different iteration position.
- // Note that fText's position is always the same as the break iterator's position.
- return FALSE;
- }
-
- if (!(fPosition == that2.fPosition &&
- fRuleStatusIndex == that2.fRuleStatusIndex &&
- fDone == that2.fDone)) {
- return FALSE;
- }
-
- if (that2.fData == fData ||
- (fData != NULL && that2.fData != NULL && *that2.fData == *fData)) {
- // The two break iterators are using the same rules.
- return TRUE;
- }
- return FALSE;
-}
-
-/**
- * Compute a hash code for this BreakIterator
- * @return A hash code
- */
-int32_t
-RuleBasedBreakIterator::hashCode(void) const {
- int32_t hash = 0;
- if (fData != NULL) {
- hash = fData->hashCode();
- }
- return hash;
-}
-
-
-void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return;
- }
- fBreakCache->reset();
- fDictionaryCache->reset();
- utext_clone(&fText, ut, FALSE, TRUE, &status);
-
- // Set up a dummy CharacterIterator to be returned if anyone
- // calls getText(). With input from UText, there is no reasonable
- // way to return a characterIterator over the actual input text.
- // Return one over an empty string instead - this is the closest
- // we can come to signaling a failure.
- // (GetText() is obsolete, this failure is sort of OK)
- fSCharIter.setText(UnicodeString());
-
- if (fCharIter != &fSCharIter) {
- // existing fCharIter was adopted from the outside. Delete it now.
- delete fCharIter;
- }
- fCharIter = &fSCharIter;
-
- this->first();
-}
-
-
-UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const {
- UText *result = utext_clone(fillIn, &fText, FALSE, TRUE, &status);
- return result;
-}
-
-
-//=======================================================================
-// BreakIterator overrides
-//=======================================================================
-
-/**
- * Return a CharacterIterator over the text being analyzed.
- */
-CharacterIterator&
-RuleBasedBreakIterator::getText() const {
- return *fCharIter;
-}
-
-/**
- * Set the iterator to analyze a new piece of text. This function resets
- * the current iteration position to the beginning of the text.
- * @param newText An iterator over the text to analyze.
- */
-void
-RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
- // If we are holding a CharacterIterator adopted from a
- // previous call to this function, delete it now.
- if (fCharIter != &fSCharIter) {
- delete fCharIter;
- }
-
- fCharIter = newText;
- UErrorCode status = U_ZERO_ERROR;
- fBreakCache->reset();
- fDictionaryCache->reset();
- if (newText==NULL || newText->startIndex() != 0) {
- // startIndex !=0 wants to be an error, but there's no way to report it.
- // Make the iterator text be an empty string.
- utext_openUChars(&fText, NULL, 0, &status);
- } else {
- utext_openCharacterIterator(&fText, newText, &status);
- }
- this->first();
-}
-
-/**
- * Set the iterator to analyze a new piece of text. This function resets
- * the current iteration position to the beginning of the text.
- * @param newText An iterator over the text to analyze.
- */
-void
-RuleBasedBreakIterator::setText(const UnicodeString& newText) {
- UErrorCode status = U_ZERO_ERROR;
- fBreakCache->reset();
- fDictionaryCache->reset();
- utext_openConstUnicodeString(&fText, &newText, &status);
-
- // Set up a character iterator on the string.
- // Needed in case someone calls getText().
- // Can not, unfortunately, do this lazily on the (probably never)
- // call to getText(), because getText is const.
- fSCharIter.setText(newText);
-
- if (fCharIter != &fSCharIter) {
- // old fCharIter was adopted from the outside. Delete it.
- delete fCharIter;
- }
- fCharIter = &fSCharIter;
-
- this->first();
-}
-
-
-/**
- * Provide a new UText for the input text. Must reference text with contents identical
- * to the original.
- * Intended for use with text data originating in Java (garbage collected) environments
- * where the data may be moved in memory at arbitrary times.
- */
-RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return *this;
- }
- if (input == NULL) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- int64_t pos = utext_getNativeIndex(&fText);
- // Shallow read-only clone of the new UText into the existing input UText
- utext_clone(&fText, input, FALSE, TRUE, &status);
- if (U_FAILURE(status)) {
- return *this;
- }
- utext_setNativeIndex(&fText, pos);
- if (utext_getNativeIndex(&fText) != pos) {
- // Sanity check. The new input utext is supposed to have the exact same
- // contents as the old. If we can't set to the same position, it doesn't.
- // The contents underlying the old utext might be invalid at this point,
- // so it's not safe to check directly.
- status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return *this;
-}
-
-
-/**
- * Sets the current iteration position to the beginning of the text, position zero.
- * @return The new iterator position, which is zero.
- */
-int32_t RuleBasedBreakIterator::first(void) {
- UErrorCode status = U_ZERO_ERROR;
- if (!fBreakCache->seek(0)) {
- fBreakCache->populateNear(0, status);
- }
- fBreakCache->current();
- U_ASSERT(fPosition == 0);
- return 0;
-}
-
-/**
- * Sets the current iteration position to the end of the text.
- * @return The text's past-the-end offset.
- */
-int32_t RuleBasedBreakIterator::last(void) {
- int32_t endPos = (int32_t)utext_nativeLength(&fText);
- UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position.
- (void)endShouldBeBoundary;
- U_ASSERT(endShouldBeBoundary);
- U_ASSERT(fPosition == endPos);
- return endPos;
-}
-
-/**
- * Advances the iterator either forward or backward the specified number of steps.
- * Negative values move backward, and positive values move forward. This is
- * equivalent to repeatedly calling next() or previous().
- * @param n The number of steps to move. The sign indicates the direction
- * (negative is backwards, and positive is forwards).
- * @return The character offset of the boundary position n boundaries away from
- * the current one.
- */
-int32_t RuleBasedBreakIterator::next(int32_t n) {
- int32_t result = 0;
- if (n > 0) {
- for (; n > 0 && result != UBRK_DONE; --n) {
- result = next();
- }
- } else if (n < 0) {
- for (; n < 0 && result != UBRK_DONE; ++n) {
- result = previous();
- }
- } else {
- result = current();
- }
- return result;
-}
-
-/**
- * Advances the iterator to the next boundary position.
- * @return The position of the first boundary after this one.
- */
-int32_t RuleBasedBreakIterator::next(void) {
- fBreakCache->next();
- return fDone ? UBRK_DONE : fPosition;
-}
-
-/**
- * Move the iterator backwards, to the boundary preceding the current one.
- *
- * Starts from the current position within fText.
- * Starting position need not be on a boundary.
- *
- * @return The position of the boundary position immediately preceding the starting position.
- */
-int32_t RuleBasedBreakIterator::previous(void) {
- UErrorCode status = U_ZERO_ERROR;
- fBreakCache->previous(status);
- return fDone ? UBRK_DONE : fPosition;
-}
-
-/**
- * Sets the iterator to refer to the first boundary position following
- * the specified position.
- * @param startPos The position from which to begin searching for a break position.
- * @return The position of the first break after the current position.
- */
-int32_t RuleBasedBreakIterator::following(int32_t startPos) {
- // if the supplied position is before the beginning, return the
- // text's starting offset
- if (startPos < 0) {
- return first();
- }
-
- // Move requested offset to a code point start. It might be on a trail surrogate,
- // or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text.
- utext_setNativeIndex(&fText, startPos);
- startPos = (int32_t)utext_getNativeIndex(&fText);
-
- UErrorCode status = U_ZERO_ERROR;
- fBreakCache->following(startPos, status);
- return fDone ? UBRK_DONE : fPosition;
-}
-
-/**
- * Sets the iterator to refer to the last boundary position before the
- * specified position.
- * @param offset The position to begin searching for a break from.
- * @return The position of the last boundary before the starting position.
- */
-int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
- if (offset > utext_nativeLength(&fText)) {
- return last();
- }
-
- // Move requested offset to a code point start. It might be on a trail surrogate,
- // or on a trail byte if the input is UTF-8.
-
- utext_setNativeIndex(&fText, offset);
- int32_t adjustedOffset = static_cast<int32_t>(utext_getNativeIndex(&fText));
-
- UErrorCode status = U_ZERO_ERROR;
- fBreakCache->preceding(adjustedOffset, status);
- return fDone ? UBRK_DONE : fPosition;
-}
-
-/**
- * Returns true if the specfied position is a boundary position. As a side
- * effect, leaves the iterator pointing to the first boundary position at
- * or after "offset".
- *
- * @param offset the offset to check.
- * @return True if "offset" is a boundary position.
- */
-UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
- // out-of-range indexes are never boundary positions
- if (offset < 0) {
- first(); // For side effects on current position, tag values.
- return FALSE;
- }
-
- // Adjust offset to be on a code point boundary and not beyond the end of the text.
- // Note that isBoundary() is always false for offsets that are not on code point boundaries.
- // But we still need the side effect of leaving iteration at the following boundary.
-
- utext_setNativeIndex(&fText, offset);
- int32_t adjustedOffset = static_cast<int32_t>(utext_getNativeIndex(&fText));
-
- bool result = false;
- UErrorCode status = U_ZERO_ERROR;
- if (fBreakCache->seek(adjustedOffset) || fBreakCache->populateNear(adjustedOffset, status)) {
- result = (fBreakCache->current() == offset);
- }
-
- if (result && adjustedOffset < offset && utext_char32At(&fText, offset) == U_SENTINEL) {
- // Original offset is beyond the end of the text. Return FALSE, it's not a boundary,
- // but the iteration position remains set to the end of the text, which is a boundary.
- return FALSE;
- }
- if (!result) {
- // Not on a boundary. isBoundary() must leave iterator on the following boundary.
- // Cache->seek(), above, left us on the preceding boundary, so advance one.
- next();
- }
- return result;
-}
-
-
-/**
- * Returns the current iteration position.
- * @return The current iteration position.
- */
-int32_t RuleBasedBreakIterator::current(void) const {
- return fPosition;
-}
-
-
-//=======================================================================
-// implementation
-//=======================================================================
-
-//
-// RBBIRunMode - the state machine runs an extra iteration at the beginning and end
-// of user text. A variable with this enum type keeps track of where we
-// are. The state machine only fetches user input while in the RUN mode.
-//
-enum RBBIRunMode {
- RBBI_START, // state machine processing is before first char of input
- RBBI_RUN, // state machine processing is in the user text
- RBBI_END // state machine processing is after end of user text.
-};
-
-
-// Map from look-ahead break states (corresponds to rules) to boundary positions.
-// Allows multiple lookahead break rules to be in flight at the same time.
-//
-// This is a temporary approach for ICU 57. A better fix is to make the look-ahead numbers
-// in the state table be sequential, then we can just index an array. And the
-// table could also tell us in advance how big that array needs to be.
-//
-// Before ICU 57 there was just a single simple variable for a look-ahead match that
-// was in progress. Two rules at once did not work.
-
-static const int32_t kMaxLookaheads = 8;
-struct LookAheadResults {
- int32_t fUsedSlotLimit;
- int32_t fPositions[8];
- int16_t fKeys[8];
-
- LookAheadResults() : fUsedSlotLimit(0), fPositions(), fKeys() {}
-
- int32_t getPosition(int16_t key) {
- for (int32_t i=0; i<fUsedSlotLimit; ++i) {
- if (fKeys[i] == key) {
- return fPositions[i];
- }
- }
- UPRV_UNREACHABLE;
- }
-
- void setPosition(int16_t key, int32_t position) {
- int32_t i;
- for (i=0; i<fUsedSlotLimit; ++i) {
- if (fKeys[i] == key) {
- fPositions[i] = position;
- return;
- }
- }
- if (i >= kMaxLookaheads) {
- UPRV_UNREACHABLE;
- }
- fKeys[i] = key;
- fPositions[i] = position;
- U_ASSERT(fUsedSlotLimit == i);
- fUsedSlotLimit = i + 1;
- }
-};
-
-
-//-----------------------------------------------------------------------------------
-//
-// handleNext()
-// Run the state machine to find a boundary
-//
-//-----------------------------------------------------------------------------------
-int32_t RuleBasedBreakIterator::handleNext() {
- int32_t state;
- uint16_t category = 0;
- RBBIRunMode mode;
-
- RBBIStateTableRow *row;
- UChar32 c;
- LookAheadResults lookAheadMatches;
- int32_t result = 0;
- int32_t initialPosition = 0;
- const RBBIStateTable *statetable = fData->fForwardTable;
- const char *tableData = statetable->fTableData;
- uint32_t tableRowLen = statetable->fRowLen;
- #ifdef RBBI_DEBUG
- if (gTrace) {
- RBBIDebugPuts("Handle Next pos char state category");
- }
- #endif
-
- // handleNext alway sets the break tag value.
- // Set the default for it.
- fRuleStatusIndex = 0;
-
- fDictionaryCharCount = 0;
-
- // if we're already at the end of the text, return DONE.
- initialPosition = fPosition;
- UTEXT_SETNATIVEINDEX(&fText, initialPosition);
- result = initialPosition;
- c = UTEXT_NEXT32(&fText);
- if (c==U_SENTINEL) {
- fDone = TRUE;
- return UBRK_DONE;
- }
-
- // Set the initial state for the state machine
- state = START_STATE;
- row = (RBBIStateTableRow *)
- //(statetable->fTableData + (statetable->fRowLen * state));
- (tableData + tableRowLen * state);
-
-
- mode = RBBI_RUN;
- if (statetable->fFlags & RBBI_BOF_REQUIRED) {
- category = 2;
- mode = RBBI_START;
- }
-
-
- // loop until we reach the end of the text or transition to state 0
- //
- for (;;) {
- if (c == U_SENTINEL) {
- // Reached end of input string.
- if (mode == RBBI_END) {
- // We have already run the loop one last time with the
- // character set to the psueudo {eof} value. Now it is time
- // to unconditionally bail out.
- break;
- }
- // Run the loop one last time with the fake end-of-input character category.
- mode = RBBI_END;
- category = 1;
- }
-
- //
- // Get the char category. An incoming category of 1 or 2 means that
- // we are preset for doing the beginning or end of input, and
- // that we shouldn't get a category from an actual text input character.
- //
- if (mode == RBBI_RUN) {
- // look up the current character's character category, which tells us
- // which column in the state table to look at.
- // Note: the 16 in UTRIE_GET16 refers to the size of the data being returned,
- // not the size of the character going in, which is a UChar32.
- //
- category = UTRIE2_GET16(fData->fTrie, c);
-
- // Check the dictionary bit in the character's category.
- // Counter is only used by dictionary based iteration.
- // Chars that need to be handled by a dictionary have a flag bit set
- // in their category values.
- //
- if ((category & 0x4000) != 0) {
- fDictionaryCharCount++;
- // And off the dictionary flag bit.
- category &= ~0x4000;
- }
- }
-
- #ifdef RBBI_DEBUG
- if (gTrace) {
- RBBIDebugPrintf(" %4" PRId64 " ", utext_getNativeIndex(&fText));
- if (0x20<=c && c<0x7f) {
- RBBIDebugPrintf("\"%c\" ", c);
- } else {
- RBBIDebugPrintf("%5x ", c);
- }
- RBBIDebugPrintf("%3d %3d\n", state, category);
- }
- #endif
-
- // State Transition - move machine to its next state
- //
-
- // fNextState is a variable-length array.
- U_ASSERT(category<fData->fHeader->fCatCount);
- state = row->fNextState[category]; /*Not accessing beyond memory*/
- row = (RBBIStateTableRow *)
- // (statetable->fTableData + (statetable->fRowLen * state));
- (tableData + tableRowLen * state);
-
-
- if (row->fAccepting == -1) {
- // Match found, common case.
- if (mode != RBBI_START) {
- result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
- }
- fRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values.
- }
-
- int16_t completedRule = row->fAccepting;
- if (completedRule > 0) {
- // Lookahead match is completed.
- int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule);
- if (lookaheadResult >= 0) {
- fRuleStatusIndex = row->fTagIdx;
- fPosition = lookaheadResult;
- return lookaheadResult;
- }
- }
-
- // If we are at the position of the '/' in a look-ahead (hard break) rule;
- // record the current position, to be returned later, if the full rule matches.
- // TODO: Move this check before the previous check of fAccepting.
- // This would enable hard-break rules with no following context.
- // But there are line break test failures when trying this. Investigate.
- // Issue ICU-20837
- int16_t rule = row->fLookAhead;
- if (rule != 0) {
- int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
- lookAheadMatches.setPosition(rule, pos);
- }
-
- if (state == STOP_STATE) {
- // This is the normal exit from the lookup state machine.
- // We have advanced through the string until it is certain that no
- // longer match is possible, no matter what characters follow.
- break;
- }
-
- // Advance to the next character.
- // If this is a beginning-of-input loop iteration, don't advance
- // the input position. The next iteration will be processing the
- // first real input character.
- if (mode == RBBI_RUN) {
- c = UTEXT_NEXT32(&fText);
- } else {
- if (mode == RBBI_START) {
- mode = RBBI_RUN;
- }
- }
- }
-
- // The state machine is done. Check whether it found a match...
-
- // If the iterator failed to advance in the match engine, force it ahead by one.
- // (This really indicates a defect in the break rules. They should always match
- // at least one character.)
- if (result == initialPosition) {
- utext_setNativeIndex(&fText, initialPosition);
- utext_next32(&fText);
- result = (int32_t)utext_getNativeIndex(&fText);
- fRuleStatusIndex = 0;
- }
-
- // Leave the iterator at our result position.
- fPosition = result;
- #ifdef RBBI_DEBUG
- if (gTrace) {
- RBBIDebugPrintf("result = %d\n\n", result);
- }
- #endif
- return result;
-}
-
-
-//-----------------------------------------------------------------------------------
-//
-// handleSafePrevious()
-//
-// Iterate backwards using the safe reverse rules.
-// The logic of this function is similar to handleNext(), but simpler
-// because the safe table does not require as many options.
-//
-//-----------------------------------------------------------------------------------
-int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
- int32_t state;
- uint16_t category = 0;
- RBBIStateTableRow *row;
- UChar32 c;
- int32_t result = 0;
-
- const RBBIStateTable *stateTable = fData->fReverseTable;
- UTEXT_SETNATIVEINDEX(&fText, fromPosition);
- #ifdef RBBI_DEBUG
- if (gTrace) {
- RBBIDebugPuts("Handle Previous pos char state category");
- }
- #endif
-
- // if we're already at the start of the text, return DONE.
- if (fData == NULL || UTEXT_GETNATIVEINDEX(&fText)==0) {
- return BreakIterator::DONE;
- }
-
- // Set the initial state for the state machine
- c = UTEXT_PREVIOUS32(&fText);
- state = START_STATE;
- row = (RBBIStateTableRow *)
- (stateTable->fTableData + (stateTable->fRowLen * state));
-
- // loop until we reach the start of the text or transition to state 0
- //
- for (; c != U_SENTINEL; c = UTEXT_PREVIOUS32(&fText)) {
-
- // look up the current character's character category, which tells us
- // which column in the state table to look at.
- // Note: the 16 in UTRIE_GET16 refers to the size of the data being returned,
- // not the size of the character going in, which is a UChar32.
- //
- // And off the dictionary flag bit. For reverse iteration it is not used.
- category = UTRIE2_GET16(fData->fTrie, c);
- category &= ~0x4000;
-
- #ifdef RBBI_DEBUG
- if (gTrace) {
- RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(&fText));
- if (0x20<=c && c<0x7f) {
- RBBIDebugPrintf("\"%c\" ", c);
- } else {
- RBBIDebugPrintf("%5x ", c);
- }
- RBBIDebugPrintf("%3d %3d\n", state, category);
- }
- #endif
-
- // State Transition - move machine to its next state
- //
- // fNextState is a variable-length array.
- U_ASSERT(category<fData->fHeader->fCatCount);
- state = row->fNextState[category]; /*Not accessing beyond memory*/
- row = (RBBIStateTableRow *)
- (stateTable->fTableData + (stateTable->fRowLen * state));
-
- if (state == STOP_STATE) {
- // This is the normal exit from the lookup state machine.
- // Transistion to state zero means we have found a safe point.
- break;
- }
- }
-
- // The state machine is done. Check whether it found a match...
- result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
- #ifdef RBBI_DEBUG
- if (gTrace) {
- RBBIDebugPrintf("result = %d\n\n", result);
- }
- #endif
- return result;
-}
-
-//-------------------------------------------------------------------------------
-//
-// getRuleStatus() Return the break rule tag associated with the current
-// iterator position. If the iterator arrived at its current
-// position by iterating forwards, the value will have been
-// cached by the handleNext() function.
-//
-//-------------------------------------------------------------------------------
-
-int32_t RuleBasedBreakIterator::getRuleStatus() const {
-
- // fLastRuleStatusIndex indexes to the start of the appropriate status record
- // (the number of status values.)
- // This function returns the last (largest) of the array of status values.
- int32_t idx = fRuleStatusIndex + fData->fRuleStatusTable[fRuleStatusIndex];
- int32_t tagVal = fData->fRuleStatusTable[idx];
-
- return tagVal;
-}
-
-
-int32_t RuleBasedBreakIterator::getRuleStatusVec(
- int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return 0;
- }
-
- int32_t numVals = fData->fRuleStatusTable[fRuleStatusIndex];
- int32_t numValsToCopy = numVals;
- if (numVals > capacity) {
- status = U_BUFFER_OVERFLOW_ERROR;
- numValsToCopy = capacity;
- }
- int i;
- for (i=0; i<numValsToCopy; i++) {
- fillInVec[i] = fData->fRuleStatusTable[fRuleStatusIndex + i + 1];
- }
- return numVals;
-}
-
-
-
-//-------------------------------------------------------------------------------
-//
-// getBinaryRules Access to the compiled form of the rules,
-// for use by build system tools that save the data
-// for standard iterator types.
-//
-//-------------------------------------------------------------------------------
-const uint8_t *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) {
- const uint8_t *retPtr = NULL;
- length = 0;
-
- if (fData != NULL) {
- retPtr = (const uint8_t *)fData->fHeader;
- length = fData->fHeader->fLength;
- }
- return retPtr;
-}
-
-
-RuleBasedBreakIterator *RuleBasedBreakIterator::createBufferClone(
- void * /*stackBuffer*/, int32_t &bufferSize, UErrorCode &status) {
- if (U_FAILURE(status)){
- return NULL;
- }
-
- if (bufferSize == 0) {
- bufferSize = 1; // preflighting for deprecated functionality
- return NULL;
- }
-
- BreakIterator *clonedBI = clone();
- if (clonedBI == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- status = U_SAFECLONE_ALLOCATED_WARNING;
- }
- return (RuleBasedBreakIterator *)clonedBI;
-}
-
-U_NAMESPACE_END
-
-
-static icu::UStack *gLanguageBreakFactories = nullptr;
-static const icu::UnicodeString *gEmptyString = nullptr;
-static icu::UInitOnce gLanguageBreakFactoriesInitOnce = U_INITONCE_INITIALIZER;
-static icu::UInitOnce gRBBIInitOnce = U_INITONCE_INITIALIZER;
-
-/**
- * Release all static memory held by breakiterator.
- */
-U_CDECL_BEGIN
-UBool U_CALLCONV rbbi_cleanup(void) {
- delete gLanguageBreakFactories;
- gLanguageBreakFactories = nullptr;
- delete gEmptyString;
- gEmptyString = nullptr;
- gLanguageBreakFactoriesInitOnce.reset();
- gRBBIInitOnce.reset();
- return TRUE;
-}
-U_CDECL_END
-
-U_CDECL_BEGIN
-static void U_CALLCONV _deleteFactory(void *obj) {
- delete (icu::LanguageBreakFactory *) obj;
-}
-U_CDECL_END
-U_NAMESPACE_BEGIN
-
-static void U_CALLCONV rbbiInit() {
- gEmptyString = new UnicodeString();
- ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup);
-}
-
-static void U_CALLCONV initLanguageFactories() {
- UErrorCode status = U_ZERO_ERROR;
- U_ASSERT(gLanguageBreakFactories == NULL);
- gLanguageBreakFactories = new UStack(_deleteFactory, NULL, status);
- if (gLanguageBreakFactories != NULL && U_SUCCESS(status)) {
- ICULanguageBreakFactory *builtIn = new ICULanguageBreakFactory(status);
- gLanguageBreakFactories->push(builtIn, status);
-#ifdef U_LOCAL_SERVICE_HOOK
- LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status);
- if (extra != NULL) {
- gLanguageBreakFactories->push(extra, status);
- }
-#endif
- }
- ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup);
-}
-
-
-static const LanguageBreakEngine*
-getLanguageBreakEngineFromFactory(UChar32 c)
-{
- umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories);
- if (gLanguageBreakFactories == NULL) {
- return NULL;
- }
-
- int32_t i = gLanguageBreakFactories->size();
- const LanguageBreakEngine *lbe = NULL;
- while (--i >= 0) {
- LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i));
- lbe = factory->getEngineFor(c);
- if (lbe != NULL) {
- break;
- }
- }
- return lbe;
-}
-
-
-//-------------------------------------------------------------------------------
-//
-// getLanguageBreakEngine Find an appropriate LanguageBreakEngine for the
-// the character c.
-//
-//-------------------------------------------------------------------------------
-const LanguageBreakEngine *
-RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
- const LanguageBreakEngine *lbe = NULL;
- UErrorCode status = U_ZERO_ERROR;
-
- if (fLanguageBreakEngines == NULL) {
- fLanguageBreakEngines = new UStack(status);
- if (fLanguageBreakEngines == NULL || U_FAILURE(status)) {
- delete fLanguageBreakEngines;
- fLanguageBreakEngines = 0;
- return NULL;
- }
- }
-
- int32_t i = fLanguageBreakEngines->size();
- while (--i >= 0) {
- lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
- if (lbe->handles(c)) {
- return lbe;
- }
- }
-
- // No existing dictionary took the character. See if a factory wants to
- // give us a new LanguageBreakEngine for this character.
- lbe = getLanguageBreakEngineFromFactory(c);
-
- // If we got one, use it and push it on our stack.
- if (lbe != NULL) {
- fLanguageBreakEngines->push((void *)lbe, status);
- // Even if we can't remember it, we can keep looking it up, so
- // return it even if the push fails.
- return lbe;
- }
-
- // No engine is forthcoming for this character. Add it to the
- // reject set. Create the reject break engine if needed.
- if (fUnhandledBreakEngine == NULL) {
- fUnhandledBreakEngine = new UnhandledEngine(status);
- if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- // Put it last so that scripts for which we have an engine get tried
- // first.
- fLanguageBreakEngines->insertElementAt(fUnhandledBreakEngine, 0, status);
- // If we can't insert it, or creation failed, get rid of it
- if (U_FAILURE(status)) {
- delete fUnhandledBreakEngine;
- fUnhandledBreakEngine = 0;
- return NULL;
- }
- }
-
- // Tell the reject engine about the character; at its discretion, it may
- // add more than just the one character.
- fUnhandledBreakEngine->handleCharacter(c);
-
- return fUnhandledBreakEngine;
-}
-
-void RuleBasedBreakIterator::dumpCache() {
- fBreakCache->dumpCache();
-}
-
-void RuleBasedBreakIterator::dumpTables() {
- fData->printData();
-}
-
-/**
- * Returns the description used to create this iterator
- */
-
-const UnicodeString&
-RuleBasedBreakIterator::getRules() const {
- if (fData != NULL) {
- return fData->getRuleSourceString();
- } else {
- umtx_initOnce(gRBBIInitOnce, &rbbiInit);
- return *gEmptyString;
- }
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/contrib/libs/icu/common/rbbi_cache.cpp b/contrib/libs/icu/common/rbbi_cache.cpp
deleted file mode 100644
index 4f9e83360a2..00000000000
--- a/contrib/libs/icu/common/rbbi_cache.cpp
+++ /dev/null
@@ -1,653 +0,0 @@
-// Copyright (C) 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// file: rbbi_cache.cpp
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/ubrk.h"
-#include "unicode/rbbi.h"
-
-#include "rbbi_cache.h"
-
-#include "brkeng.h"
-#include "cmemory.h"
-#include "rbbidata.h"
-#include "rbbirb.h"
-#include "uassert.h"
-#include "uvectr32.h"
-
-U_NAMESPACE_BEGIN
-
-/*
- * DictionaryCache implementation
- */
-
-RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
- fBI(bi), fBreaks(status), fPositionInCache(-1),
- fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
-}
-
-RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
-}
-
-void RuleBasedBreakIterator::DictionaryCache::reset() {
- fPositionInCache = -1;
- fStart = 0;
- fLimit = 0;
- fFirstRuleStatusIndex = 0;
- fOtherRuleStatusIndex = 0;
- fBreaks.removeAllElements();
-}
-
-UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
- if (fromPos >= fLimit || fromPos < fStart) {
- fPositionInCache = -1;
- return FALSE;
- }
-
- // Sequential iteration, move from previous boundary to the following
-
- int32_t r = 0;
- if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
- ++fPositionInCache;
- if (fPositionInCache >= fBreaks.size()) {
- fPositionInCache = -1;
- return FALSE;
- }
- r = fBreaks.elementAti(fPositionInCache);
- U_ASSERT(r > fromPos);
- *result = r;
- *statusIndex = fOtherRuleStatusIndex;
- return TRUE;
- }
-
- // Random indexing. Linear search for the boundary following the given position.
-
- for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
- r= fBreaks.elementAti(fPositionInCache);
- if (r > fromPos) {
- *result = r;
- *statusIndex = fOtherRuleStatusIndex;
- return TRUE;
- }
- }
- UPRV_UNREACHABLE;
-}
-
-
-UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
- if (fromPos <= fStart || fromPos > fLimit) {
- fPositionInCache = -1;
- return FALSE;
- }
-
- if (fromPos == fLimit) {
- fPositionInCache = fBreaks.size() - 1;
- if (fPositionInCache >= 0) {
- U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
- }
- }
-
- int32_t r;
- if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
- --fPositionInCache;
- r = fBreaks.elementAti(fPositionInCache);
- U_ASSERT(r < fromPos);
- *result = r;
- *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
- return TRUE;
- }
-
- if (fPositionInCache == 0) {
- fPositionInCache = -1;
- return FALSE;
- }
-
- for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
- r = fBreaks.elementAti(fPositionInCache);
- if (r < fromPos) {
- *result = r;
- *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
- return TRUE;
- }
- }
- UPRV_UNREACHABLE;
-}
-
-void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
- int32_t firstRuleStatus, int32_t otherRuleStatus) {
- if ((endPos - startPos) <= 1) {
- return;
- }
-
- reset();
- fFirstRuleStatusIndex = firstRuleStatus;
- fOtherRuleStatusIndex = otherRuleStatus;
-
- int32_t rangeStart = startPos;
- int32_t rangeEnd = endPos;
-
- uint16_t category;
- int32_t current;
- UErrorCode status = U_ZERO_ERROR;
- int32_t foundBreakCount = 0;
- UText *text = &fBI->fText;
-
- // Loop through the text, looking for ranges of dictionary characters.
- // For each span, find the appropriate break engine, and ask it to find
- // any breaks within the span.
-
- utext_setNativeIndex(text, rangeStart);
- UChar32 c = utext_current32(text);
- category = UTRIE2_GET16(fBI->fData->fTrie, c);
-
- while(U_SUCCESS(status)) {
- while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
- utext_next32(text); // TODO: cleaner loop structure.
- c = utext_current32(text);
- category = UTRIE2_GET16(fBI->fData->fTrie, c);
- }
- if (current >= rangeEnd) {
- break;
- }
-
- // We now have a dictionary character. Get the appropriate language object
- // to deal with it.
- const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
-
- // Ask the language object if there are any breaks. It will add them to the cache and
- // leave the text pointer on the other side of its range, ready to search for the next one.
- if (lbe != NULL) {
- foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
- }
-
- // Reload the loop variables for the next go-round
- c = utext_current32(text);
- category = UTRIE2_GET16(fBI->fData->fTrie, c);
- }
-
- // If we found breaks, ensure that the first and last entries are
- // the original starting and ending position. And initialize the
- // cache iteration position to the first entry.
-
- // printf("foundBreakCount = %d\n", foundBreakCount);
- if (foundBreakCount > 0) {
- U_ASSERT(foundBreakCount == fBreaks.size());
- if (startPos < fBreaks.elementAti(0)) {
- // The dictionary did not place a boundary at the start of the segment of text.
- // Add one now. This should not commonly happen, but it would be easy for interactions
- // of the rules for dictionary segments and the break engine implementations to
- // inadvertently cause it. Cover it here, just in case.
- fBreaks.insertElementAt(startPos, 0, status);
- }
- if (endPos > fBreaks.peeki()) {
- fBreaks.push(endPos, status);
- }
- fPositionInCache = 0;
- // Note: Dictionary matching may extend beyond the original limit.
- fStart = fBreaks.elementAti(0);
- fLimit = fBreaks.peeki();
- } else {
- // there were no language-based breaks, even though the segment contained
- // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
- // for this range will fail, and the calling code will fall back to the rule based boundaries.
- }
-}
-
-
-/*
- * BreakCache implemetation
- */
-
-RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
- fBI(bi), fSideBuffer(status) {
- reset();
-}
-
-
-RuleBasedBreakIterator::BreakCache::~BreakCache() {
-}
-
-
-void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
- fStartBufIdx = 0;
- fEndBufIdx = 0;
- fTextIdx = pos;
- fBufIdx = 0;
- fBoundaries[0] = pos;
- fStatuses[0] = (uint16_t)ruleStatus;
-}
-
-
-int32_t RuleBasedBreakIterator::BreakCache::current() {
- fBI->fPosition = fTextIdx;
- fBI->fRuleStatusIndex = fStatuses[fBufIdx];
- fBI->fDone = FALSE;
- return fTextIdx;
-}
-
-
-void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return;
- }
- if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
- // startPos is in the cache. Do a next() from that position.
- // TODO: an awkward set of interactions with bi->fDone
- // seek() does not clear it; it can't because of interactions with populateNear().
- // next() does not clear it in the fast-path case, where everything matters. Maybe it should.
- // So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
- fBI->fDone = false;
- next();
- }
- return;
-}
-
-
-void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return;
- }
- if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
- if (startPos == fTextIdx) {
- previous(status);
- } else {
- // seek() leaves the BreakCache positioned at the preceding boundary
- // if the requested position is between two bounaries.
- // current() pushes the BreakCache position out to the BreakIterator itself.
- U_ASSERT(startPos > fTextIdx);
- current();
- }
- }
- return;
-}
-
-
-/*
- * Out-of-line code for BreakCache::next().
- * Cache does not already contain the boundary
- */
-void RuleBasedBreakIterator::BreakCache::nextOL() {
- fBI->fDone = !populateFollowing();
- fBI->fPosition = fTextIdx;
- fBI->fRuleStatusIndex = fStatuses[fBufIdx];
- return;
-}
-
-
-void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
- if (U_FAILURE(status)) {
- return;
- }
- int32_t initialBufIdx = fBufIdx;
- if (fBufIdx == fStartBufIdx) {
- // At start of cache. Prepend to it.
- populatePreceding(status);
- } else {
- // Cache already holds the next boundary
- fBufIdx = modChunkSize(fBufIdx - 1);
- fTextIdx = fBoundaries[fBufIdx];
- }
- fBI->fDone = (fBufIdx == initialBufIdx);
- fBI->fPosition = fTextIdx;
- fBI->fRuleStatusIndex = fStatuses[fBufIdx];
- return;
-}
-
-
-UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
- if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
- return FALSE;
- }
- if (pos == fBoundaries[fStartBufIdx]) {
- // Common case: seek(0), from BreakIterator::first()
- fBufIdx = fStartBufIdx;
- fTextIdx = fBoundaries[fBufIdx];
- return TRUE;
- }
- if (pos == fBoundaries[fEndBufIdx]) {
- fBufIdx = fEndBufIdx;
- fTextIdx = fBoundaries[fBufIdx];
- return TRUE;
- }
-
- int32_t min = fStartBufIdx;
- int32_t max = fEndBufIdx;
- while (min != max) {
- int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
- probe = modChunkSize(probe);
- if (fBoundaries[probe] > pos) {
- max = probe;
- } else {
- min = modChunkSize(probe + 1);
- }
- }
- U_ASSERT(fBoundaries[max] > pos);
- fBufIdx = modChunkSize(max - 1);
- fTextIdx = fBoundaries[fBufIdx];
- U_ASSERT(fTextIdx <= pos);
- return TRUE;
-}
-
-
-UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return FALSE;
- }
- U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
-
- // Find a boundary somewhere in the vicinity of the requested position.
- // Depending on the safe rules and the text data, it could be either before, at, or after
- // the requested position.
-
-
- // If the requested position is not near already cached positions, clear the existing cache,
- // find a near-by boundary and begin new cache contents there.
-
- if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
- int32_t aBoundary = 0;
- int32_t ruleStatusIndex = 0;
- if (position > 20) {
- int32_t backupPos = fBI->handleSafePrevious(position);
-
- if (backupPos > 0) {
- // Advance to the boundary following the backup position.
- // There is a complication: the safe reverse rules identify pairs of code points
- // that are safe. If advancing from the safe point moves forwards by less than
- // two code points, we need to advance one more time to ensure that the boundary
- // is good, including a correct rules status value.
- //
- fBI->fPosition = backupPos;
- aBoundary = fBI->handleNext();
- if (aBoundary <= backupPos + 4) {
- // +4 is a quick test for possibly having advanced only one codepoint.
- // Four being the length of the longest potential code point, a supplementary in UTF-8
- utext_setNativeIndex(&fBI->fText, aBoundary);
- if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
- // The initial handleNext() only advanced by a single code point. Go again.
- aBoundary = fBI->handleNext(); // Safe rules identify safe pairs.
- }
- }
- ruleStatusIndex = fBI->fRuleStatusIndex;
- }
- }
- reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point.
- }
-
- // Fill in boundaries between existing cache content and the new requested position.
-
- if (fBoundaries[fEndBufIdx] < position) {
- // The last position in the cache precedes the requested position.
- // Add following position(s) to the cache.
- while (fBoundaries[fEndBufIdx] < position) {
- if (!populateFollowing()) {
- UPRV_UNREACHABLE;
- }
- }
- fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer.
- fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries.
- while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos.
- previous(status);
- }
- return true;
- }
-
- if (fBoundaries[fStartBufIdx] > position) {
- // The first position in the cache is beyond the requested position.
- // back up more until we get a boundary <= the requested position.
- while (fBoundaries[fStartBufIdx] > position) {
- populatePreceding(status);
- }
- fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer.
- fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries.
- while (fTextIdx < position) { // Move forwards to a position at or following the requested pos.
- next();
- }
- if (fTextIdx > position) {
- // If position is not itself a boundary, the next() loop above will overshoot.
- // Back up one, leaving cache position at the boundary preceding the requested position.
- previous(status);
- }
- return true;
- }
-
- U_ASSERT(fTextIdx == position);
- return true;
-}
-
-
-
-UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
- int32_t fromPosition = fBoundaries[fEndBufIdx];
- int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
- int32_t pos = 0;
- int32_t ruleStatusIdx = 0;
-
- if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
- addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
- return TRUE;
- }
-
- fBI->fPosition = fromPosition;
- pos = fBI->handleNext();
- if (pos == UBRK_DONE) {
- return FALSE;
- }
-
- ruleStatusIdx = fBI->fRuleStatusIndex;
- if (fBI->fDictionaryCharCount > 0) {
- // The text segment obtained from the rules includes dictionary characters.
- // Subdivide it, with subdivided results going into the dictionary cache.
- fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
- if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
- addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
- return TRUE;
- // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
- // But be careful with interactions with populateNear().
- }
- }
-
- // Rule based segment did not include dictionary characters.
- // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
- // meaning that we didn't take the return, above.
- // Add its end point to the cache.
- addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
-
- // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
- // (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
- //
- for (int count=0; count<6; ++count) {
- pos = fBI->handleNext();
- if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
- break;
- }
- addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
- }
-
- return TRUE;
-}
-
-
-UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
- if (U_FAILURE(status)) {
- return FALSE;
- }
-
- int32_t fromPosition = fBoundaries[fStartBufIdx];
- if (fromPosition == 0) {
- return FALSE;
- }
-
- int32_t position = 0;
- int32_t positionStatusIdx = 0;
-
- if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
- addPreceding(position, positionStatusIdx, UpdateCachePosition);
- return TRUE;
- }
-
- int32_t backupPosition = fromPosition;
-
- // Find a boundary somewhere preceding the first already-cached boundary
- do {
- backupPosition = backupPosition - 30;
- if (backupPosition <= 0) {
- backupPosition = 0;
- } else {
- backupPosition = fBI->handleSafePrevious(backupPosition);
- }
- if (backupPosition == UBRK_DONE || backupPosition == 0) {
- position = 0;
- positionStatusIdx = 0;
- } else {
- // Advance to the boundary following the backup position.
- // There is a complication: the safe reverse rules identify pairs of code points
- // that are safe. If advancing from the safe point moves forwards by less than
- // two code points, we need to advance one more time to ensure that the boundary
- // is good, including a correct rules status value.
- //
- fBI->fPosition = backupPosition;
- position = fBI->handleNext();
- if (position <= backupPosition + 4) {
- // +4 is a quick test for possibly having advanced only one codepoint.
- // Four being the length of the longest potential code point, a supplementary in UTF-8
- utext_setNativeIndex(&fBI->fText, position);
- if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
- // The initial handleNext() only advanced by a single code point. Go again.
- position = fBI->handleNext(); // Safe rules identify safe pairs.
- }
- }
- positionStatusIdx = fBI->fRuleStatusIndex;
- }
- } while (position >= fromPosition);
-
- // Find boundaries between the one we just located and the first already-cached boundary
- // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
-
- fSideBuffer.removeAllElements();
- fSideBuffer.addElement(position, status);
- fSideBuffer.addElement(positionStatusIdx, status);
-
- do {
- int32_t prevPosition = fBI->fPosition = position;
- int32_t prevStatusIdx = positionStatusIdx;
- position = fBI->handleNext();
- positionStatusIdx = fBI->fRuleStatusIndex;
- if (position == UBRK_DONE) {
- break;
- }
-
- UBool segmentHandledByDictionary = FALSE;
- if (fBI->fDictionaryCharCount != 0) {
- // Segment from the rules includes dictionary characters.
- // Subdivide it, with subdivided results going into the dictionary cache.
- int32_t dictSegEndPosition = position;
- fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
- while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
- segmentHandledByDictionary = true;
- U_ASSERT(position > prevPosition);
- if (position >= fromPosition) {
- break;
- }
- U_ASSERT(position <= dictSegEndPosition);
- fSideBuffer.addElement(position, status);
- fSideBuffer.addElement(positionStatusIdx, status);
- prevPosition = position;
- }
- U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
- }
-
- if (!segmentHandledByDictionary && position < fromPosition) {
- fSideBuffer.addElement(position, status);
- fSideBuffer.addElement(positionStatusIdx, status);
- }
- } while (position < fromPosition);
-
- // Move boundaries from the side buffer to the main circular buffer.
- UBool success = FALSE;
- if (!fSideBuffer.isEmpty()) {
- positionStatusIdx = fSideBuffer.popi();
- position = fSideBuffer.popi();
- addPreceding(position, positionStatusIdx, UpdateCachePosition);
- success = TRUE;
- }
-
- while (!fSideBuffer.isEmpty()) {
- positionStatusIdx = fSideBuffer.popi();
- position = fSideBuffer.popi();
- if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
- // No space in circular buffer to hold a new preceding result while
- // also retaining the current cache (iteration) position.
- // Bailing out is safe; the cache will refill again if needed.
- break;
- }
- }
-
- return success;
-}
-
-
-void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
- U_ASSERT(position > fBoundaries[fEndBufIdx]);
- U_ASSERT(ruleStatusIdx <= UINT16_MAX);
- int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
- if (nextIdx == fStartBufIdx) {
- fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1.
- }
- fBoundaries[nextIdx] = position;
- fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
- fEndBufIdx = nextIdx;
- if (update == UpdateCachePosition) {
- // Set current position to the newly added boundary.
- fBufIdx = nextIdx;
- fTextIdx = position;
- } else {
- // Retaining the original cache position.
- // Check if the added boundary wraps around the buffer, and would over-write the original position.
- // It's the responsibility of callers of this function to not add too many.
- U_ASSERT(nextIdx != fBufIdx);
- }
-}
-
-bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
- U_ASSERT(position < fBoundaries[fStartBufIdx]);
- U_ASSERT(ruleStatusIdx <= UINT16_MAX);
- int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
- if (nextIdx == fEndBufIdx) {
- if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
- // Failure. The insertion of the new boundary would claim the buffer position that is the
- // current iteration position. And we also want to retain the current iteration position.
- // (The buffer is already completely full of entries that precede the iteration position.)
- return false;
- }
- fEndBufIdx = modChunkSize(fEndBufIdx - 1);
- }
- fBoundaries[nextIdx] = position;
- fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
- fStartBufIdx = nextIdx;
- if (update == UpdateCachePosition) {
- fBufIdx = nextIdx;
- fTextIdx = position;
- }
- return true;
-}
-
-
-void RuleBasedBreakIterator::BreakCache::dumpCache() {
-#ifdef RBBI_DEBUG
- RBBIDebugPrintf("fTextIdx:%d fBufIdx:%d\n", fTextIdx, fBufIdx);
- for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
- RBBIDebugPrintf("%d %d\n", i, fBoundaries[i]);
- if (i == fEndBufIdx) {
- break;
- }
- }
-#endif
-}
-
-U_NAMESPACE_END
-
-#endif // #if !UCONFIG_NO_BREAK_ITERATION
diff --git a/contrib/libs/icu/common/rbbi_cache.h b/contrib/libs/icu/common/rbbi_cache.h
deleted file mode 100644
index 7991d6c0c7b..00000000000
--- a/contrib/libs/icu/common/rbbi_cache.h
+++ /dev/null
@@ -1,203 +0,0 @@
-// Copyright (C) 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// file: rbbi_cache.h
-//
-#ifndef RBBI_CACHE_H
-#define RBBI_CACHE_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/rbbi.h"
-#include "unicode/uobject.h"
-
-#include "uvectr32.h"
-
-U_NAMESPACE_BEGIN
-
-/* DictionaryCache stores the boundaries obtained from a run of dictionary characters.
- * Dictionary boundaries are moved first to this cache, then from here
- * to the main BreakCache, where they may inter-leave with non-dictionary
- * boundaries. The public BreakIterator API always fetches directly
- * from the main BreakCache, not from here.
- *
- * In common situations, the number of boundaries in a single dictionary run
- * should be quite small, it will be terminated by punctuation, spaces,
- * or any other non-dictionary characters. The main BreakCache may end
- * up with boundaries from multiple dictionary based runs.
- *
- * The boundaries are stored in a simple ArrayList (vector), with the
- * assumption that they will be accessed sequentially.
- */
-class RuleBasedBreakIterator::DictionaryCache: public UMemory {
- public:
- DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status);
- ~DictionaryCache();
-
- void reset();
-
- UBool following(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
- UBool preceding(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
-
- /**
- * Populate the cache with the dictionary based boundaries within a region of text.
- * @param startPos The start position of a range of text
- * @param endPos The end position of a range of text
- * @param firstRuleStatus The rule status index that applies to the break at startPos
- * @param otherRuleStatus The rule status index that applies to boundaries other than startPos
- * @internal
- */
- void populateDictionary(int32_t startPos, int32_t endPos,
- int32_t firstRuleStatus, int32_t otherRuleStatus);
-
-
-
- RuleBasedBreakIterator *fBI;
-
- UVector32 fBreaks; // A vector containing the boundaries.
- int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following()
- // or preceding(). Optimizes sequential access.
- int32_t fStart; // Text position of first boundary in cache.
- int32_t fLimit; // Last boundary in cache. Which is the limit of the
- // text segment being handled by the dictionary.
- int32_t fFirstRuleStatusIndex; // Rule status info for first boundary.
- int32_t fOtherRuleStatusIndex; // Rule status info for 2nd through last boundaries.
-};
-
-
-/*
- * class BreakCache
- *
- * Cache of break boundary positions and rule status values.
- * Break iterator API functions, next(), previous(), etc., will use cached results
- * when possible, and otherwise cache new results as they are obtained.
- *
- * Uniformly caches both dictionary and rule based (non-dictionary) boundaries.
- *
- * The cache is implemented as a single circular buffer.
- */
-
-/*
- * size of the circular cache buffer.
- */
-
-class RuleBasedBreakIterator::BreakCache: public UMemory {
- public:
- BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status);
- virtual ~BreakCache();
- void reset(int32_t pos = 0, int32_t ruleStatus = 0);
- void next() { if (fBufIdx == fEndBufIdx) {
- nextOL();
- } else {
- fBufIdx = modChunkSize(fBufIdx + 1);
- fTextIdx = fBI->fPosition = fBoundaries[fBufIdx];
- fBI->fRuleStatusIndex = fStatuses[fBufIdx];
- }
- }
-
-
- void nextOL();
- void previous(UErrorCode &status);
-
- // Move the iteration state to the position following the startPosition.
- // Input position must be pinned to the input length.
- void following(int32_t startPosition, UErrorCode &status);
-
- void preceding(int32_t startPosition, UErrorCode &status);
-
- /*
- * Update the state of the public BreakIterator (fBI) to reflect the
- * current state of the break iterator cache (this).
- */
- int32_t current();
-
- /**
- * Add boundaries to the cache near the specified position.
- * The given position need not be a boundary itself.
- * The input position must be within the range of the text, and
- * on a code point boundary.
- * If the requested position is a break boundary, leave the iteration
- * position on it.
- * If the requested position is not a boundary, leave the iteration
- * position on the preceding boundary and include both the
- * preceding and following boundaries in the cache.
- * Additional boundaries, either preceding or following, may be added
- * to the cache as a side effect.
- *
- * Return FALSE if the operation failed.
- */
- UBool populateNear(int32_t position, UErrorCode &status);
-
- /**
- * Add boundary(s) to the cache following the current last boundary.
- * Return FALSE if at the end of the text, and no more boundaries can be added.
- * Leave iteration position at the first newly added boundary, or unchanged if no boundary was added.
- */
- UBool populateFollowing();
-
- /**
- * Add one or more boundaries to the cache preceding the first currently cached boundary.
- * Leave the iteration position on the first added boundary.
- * Return false if no boundaries could be added (if at the start of the text.)
- */
- UBool populatePreceding(UErrorCode &status);
-
- enum UpdatePositionValues {
- RetainCachePosition = 0,
- UpdateCachePosition = 1
- };
-
- /*
- * Add the boundary following the current position.
- * The current position can be left as it was, or changed to the newly added boundary,
- * as specified by the update parameter.
- */
- void addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
-
-
- /*
- * Add the boundary preceding the current position.
- * The current position can be left as it was, or changed to the newly added boundary,
- * as specified by the update parameter.
- */
- bool addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
-
- /**
- * Set the cache position to the specified position, or, if the position
- * falls between to cached boundaries, to the preceding boundary.
- * Fails if the requested position is outside of the range of boundaries currently held by the cache.
- * The startPosition must be on a code point boundary.
- *
- * Return TRUE if successful, FALSE if the specified position is after
- * the last cached boundary or before the first.
- */
- UBool seek(int32_t startPosition);
-
- void dumpCache();
-
- private:
- static inline int32_t modChunkSize(int index) { return index & (CACHE_SIZE - 1); }
-
- static constexpr int32_t CACHE_SIZE = 128;
- static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two.");
-
- RuleBasedBreakIterator *fBI;
- int32_t fStartBufIdx;
- int32_t fEndBufIdx; // inclusive
-
- int32_t fTextIdx;
- int32_t fBufIdx;
-
- int32_t fBoundaries[CACHE_SIZE];
- uint16_t fStatuses[CACHE_SIZE];
-
- UVector32 fSideBuffer;
-};
-
-U_NAMESPACE_END
-
-#endif // #if !UCONFIG_NO_BREAK_ITERATION
-
-#endif // RBBI_CACHE_H
diff --git a/contrib/libs/icu/common/rbbidata.cpp b/contrib/libs/icu/common/rbbidata.cpp
deleted file mode 100644
index 1d4c9e5895f..00000000000
--- a/contrib/libs/icu/common/rbbidata.cpp
+++ /dev/null
@@ -1,425 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-***************************************************************************
-* Copyright (C) 1999-2014 International Business Machines Corporation *
-* and others. All rights reserved. *
-***************************************************************************
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/utypes.h"
-#include "rbbidata.h"
-#include "rbbirb.h"
-#include "utrie2.h"
-#include "udatamem.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "umutex.h"
-
-#include "uassert.h"
-
-
-U_NAMESPACE_BEGIN
-
-//-----------------------------------------------------------------------------
-//
-// Constructors.
-//
-//-----------------------------------------------------------------------------
-RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status) {
- init0();
- init(data, status);
-}
-
-RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt, UErrorCode &status) {
- init0();
- init(data, status);
- fDontFreeData = TRUE;
-}
-
-RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
- init0();
- if (U_FAILURE(status)) {
- return;
- }
- const DataHeader *dh = udm->pHeader;
- int32_t headerSize = dh->dataHeader.headerSize;
- if ( !(headerSize >= 20 &&
- dh->info.isBigEndian == U_IS_BIG_ENDIAN &&
- dh->info.charsetFamily == U_CHARSET_FAMILY &&
- dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk "
- dh->info.dataFormat[1] == 0x72 &&
- dh->info.dataFormat[2] == 0x6b &&
- dh->info.dataFormat[3] == 0x20 &&
- isDataVersionAcceptable(dh->info.formatVersion))
- ) {
- status = U_INVALID_FORMAT_ERROR;
- return;
- }
- const char *dataAsBytes = reinterpret_cast<const char *>(dh);
- const RBBIDataHeader *rbbidh = reinterpret_cast<const RBBIDataHeader *>(dataAsBytes + headerSize);
- init(rbbidh, status);
- fUDataMem = udm;
-}
-
-UBool RBBIDataWrapper::isDataVersionAcceptable(const UVersionInfo version) {
- return RBBI_DATA_FORMAT_VERSION[0] == version[0];
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// init(). Does most of the work of construction, shared between the
-// constructors.
-//
-//-----------------------------------------------------------------------------
-void RBBIDataWrapper::init0() {
- fHeader = NULL;
- fForwardTable = NULL;
- fReverseTable = NULL;
- fRuleSource = NULL;
- fRuleStatusTable = NULL;
- fTrie = NULL;
- fUDataMem = NULL;
- fRefCount = 0;
- fDontFreeData = TRUE;
-}
-
-void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return;
- }
- fHeader = data;
- if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) {
- status = U_INVALID_FORMAT_ERROR;
- return;
- }
- // Note: in ICU version 3.2 and earlier, there was a formatVersion 1
- // that is no longer supported. At that time fFormatVersion was
- // an int32_t field, rather than an array of 4 bytes.
-
- fDontFreeData = FALSE;
- if (data->fFTableLen != 0) {
- fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable);
- }
- if (data->fRTableLen != 0) {
- fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable);
- }
-
- fTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
- (uint8_t *)data + fHeader->fTrie,
- fHeader->fTrieLen,
- NULL, // *actual length
- &status);
- if (U_FAILURE(status)) {
- return;
- }
-
- fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource);
- fRuleString.setTo(TRUE, fRuleSource, -1);
- U_ASSERT(data->fRuleSourceLen > 0);
-
- fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable);
- fStatusMaxIdx = data->fStatusTableLen / sizeof(int32_t);
-
- fRefCount = 1;
-
-#ifdef RBBI_DEBUG
- char *debugEnv = getenv("U_RBBIDEBUG");
- if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();}
-#endif
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// Destructor. Don't call this - use removeReference() instead.
-//
-//-----------------------------------------------------------------------------
-RBBIDataWrapper::~RBBIDataWrapper() {
- U_ASSERT(fRefCount == 0);
- utrie2_close(fTrie);
- fTrie = NULL;
- if (fUDataMem) {
- udata_close(fUDataMem);
- } else if (!fDontFreeData) {
- uprv_free((void *)fHeader);
- }
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// Operator == Consider two RBBIDataWrappers to be equal if they
-// refer to the same underlying data. Although
-// the data wrappers are normally shared between
-// iterator instances, it's possible to independently
-// open the same data twice, and get two instances, which
-// should still be ==.
-//
-//-----------------------------------------------------------------------------
-UBool RBBIDataWrapper::operator ==(const RBBIDataWrapper &other) const {
- if (fHeader == other.fHeader) {
- return TRUE;
- }
- if (fHeader->fLength != other.fHeader->fLength) {
- return FALSE;
- }
- if (uprv_memcmp(fHeader, other.fHeader, fHeader->fLength) == 0) {
- return TRUE;
- }
- return FALSE;
-}
-
-int32_t RBBIDataWrapper::hashCode() {
- return fHeader->fFTableLen;
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// Reference Counting. A single RBBIDataWrapper object is shared among
-// however many RulesBasedBreakIterator instances are
-// referencing the same data.
-//
-//-----------------------------------------------------------------------------
-void RBBIDataWrapper::removeReference() {
- if (umtx_atomic_dec(&fRefCount) == 0) {
- delete this;
- }
-}
-
-
-RBBIDataWrapper *RBBIDataWrapper::addReference() {
- umtx_atomic_inc(&fRefCount);
- return this;
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// getRuleSourceString
-//
-//-----------------------------------------------------------------------------
-const UnicodeString &RBBIDataWrapper::getRuleSourceString() const {
- return fRuleString;
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// print - debugging function to dump the runtime data tables.
-//
-//-----------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *table) {
- uint32_t c;
- uint32_t s;
-
- RBBIDebugPrintf(" %s\n", heading);
-
- RBBIDebugPrintf("State | Acc LA TagIx");
- for (c=0; c<fHeader->fCatCount; c++) {RBBIDebugPrintf("%3d ", c);}
- RBBIDebugPrintf("\n------|---------------"); for (c=0;c<fHeader->fCatCount; c++) {
- RBBIDebugPrintf("----");
- }
- RBBIDebugPrintf("\n");
-
- if (table == NULL) {
- RBBIDebugPrintf(" N U L L T A B L E\n\n");
- return;
- }
- for (s=0; s<table->fNumStates; s++) {
- RBBIStateTableRow *row = (RBBIStateTableRow *)
- (table->fTableData + (table->fRowLen * s));
- RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->fAccepting, row->fLookAhead, row->fTagIdx);
- for (c=0; c<fHeader->fCatCount; c++) {
- RBBIDebugPrintf("%3d ", row->fNextState[c]);
- }
- RBBIDebugPrintf("\n");
- }
- RBBIDebugPrintf("\n");
-}
-#endif
-
-
-void RBBIDataWrapper::printData() {
-#ifdef RBBI_DEBUG
- RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader);
- RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1],
- fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]);
- RBBIDebugPrintf(" total length of data = %d\n", fHeader->fLength);
- RBBIDebugPrintf(" number of character categories = %d\n\n", fHeader->fCatCount);
-
- printTable("Forward State Transition Table", fForwardTable);
- printTable("Reverse State Transition Table", fReverseTable);
-
- RBBIDebugPrintf("\nOrignal Rules source:\n");
- for (int32_t c=0; fRuleSource[c] != 0; c++) {
- RBBIDebugPrintf("%c", fRuleSource[c]);
- }
- RBBIDebugPrintf("\n\n");
-#endif
-}
-
-
-U_NAMESPACE_END
-U_NAMESPACE_USE
-
-//-----------------------------------------------------------------------------
-//
-// ubrk_swap - byte swap and char encoding swap of RBBI data
-//
-//-----------------------------------------------------------------------------
-
-U_CAPI int32_t U_EXPORT2
-ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
- UErrorCode *status) {
-
- if (status == NULL || U_FAILURE(*status)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
- *status=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- //
- // Check that the data header is for for break data.
- // (Header contents are defined in genbrk.cpp)
- //
- const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4);
- if(!( pInfo->dataFormat[0]==0x42 && /* dataFormat="Brk " */
- pInfo->dataFormat[1]==0x72 &&
- pInfo->dataFormat[2]==0x6b &&
- pInfo->dataFormat[3]==0x20 &&
- RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) {
- udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
- pInfo->dataFormat[0], pInfo->dataFormat[1],
- pInfo->dataFormat[2], pInfo->dataFormat[3],
- pInfo->formatVersion[0]);
- *status=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- //
- // Swap the data header. (This is the generic ICU Data Header, not the RBBI Specific
- // RBBIDataHeader). This swap also conveniently gets us
- // the size of the ICU d.h., which lets us locate the start
- // of the RBBI specific data.
- //
- int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
-
-
- //
- // Get the RRBI Data Header, and check that it appears to be OK.
- //
- const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
- RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
- if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
- !RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) ||
- ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) {
- udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
- *status=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- //
- // Prefight operation? Just return the size
- //
- int32_t breakDataLength = ds->readUInt32(rbbiDH->fLength);
- int32_t totalSize = headerSize + breakDataLength;
- if (length < 0) {
- return totalSize;
- }
-
- //
- // Check that length passed in is consistent with length from RBBI data header.
- //
- if (length < totalSize) {
- udata_printError(ds, "ubrk_swap(): too few bytes (%d after ICU Data header) for break data.\n",
- breakDataLength);
- *status=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
-
- //
- // Swap the Data. Do the data itself first, then the RBBI Data Header, because
- // we need to reference the header to locate the data, and an
- // inplace swap of the header leaves it unusable.
- //
- uint8_t *outBytes = (uint8_t *)outData + headerSize;
- RBBIDataHeader *outputDH = (RBBIDataHeader *)outBytes;
-
- int32_t tableStartOffset;
- int32_t tableLength;
-
- //
- // If not swapping in place, zero out the output buffer before starting.
- // Individual tables and other data items within are aligned to 8 byte boundaries
- // when originally created. Any unused space between items needs to be zero.
- //
- if (inBytes != outBytes) {
- uprv_memset(outBytes, 0, breakDataLength);
- }
-
- //
- // Each state table begins with several 32 bit fields. Calculate the size
- // in bytes of these.
- //
- int32_t topSize = offsetof(RBBIStateTable, fTableData);
-
- // Forward state table.
- tableStartOffset = ds->readUInt32(rbbiDH->fFTable);
- tableLength = ds->readUInt32(rbbiDH->fFTableLen);
-
- if (tableLength > 0) {
- ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
- outBytes+tableStartOffset, status);
- ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
- outBytes+tableStartOffset+topSize, status);
- }
-
- // Reverse state table. Same layout as forward table, above.
- tableStartOffset = ds->readUInt32(rbbiDH->fRTable);
- tableLength = ds->readUInt32(rbbiDH->fRTableLen);
-
- if (tableLength > 0) {
- ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
- outBytes+tableStartOffset, status);
- ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
- outBytes+tableStartOffset+topSize, status);
- }
-
- // Trie table for character categories
- utrie2_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
- outBytes+ds->readUInt32(rbbiDH->fTrie), status);
-
- // Source Rules Text. It's UChar data
- ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen),
- outBytes+ds->readUInt32(rbbiDH->fRuleSource), status);
-
- // Table of rule status values. It's all int_32 values
- ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen),
- outBytes+ds->readUInt32(rbbiDH->fStatusTable), status);
-
- // And, last, the header.
- // It is all int32_t values except for fFormataVersion, which is an array of four bytes.
- // Swap the whole thing as int32_t, then re-swap the one field.
- //
- ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status);
- ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status);
-
- return totalSize;
-}
-
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/contrib/libs/icu/common/rbbidata.h b/contrib/libs/icu/common/rbbidata.h
deleted file mode 100644
index 7b9b8d82526..00000000000
--- a/contrib/libs/icu/common/rbbidata.h
+++ /dev/null
@@ -1,199 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2014 International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: rbbidata.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* RBBI data formats Includes
-*
-* Structs that describes the format of the Binary RBBI data,
-* as it is stored in ICU's data file.
-*
-* RBBIDataWrapper - Instances of this class sit between the
-* raw data structs and the RulesBasedBreakIterator objects
-* that are created by applications. The wrapper class
-* provides reference counting for the underlying data,
-* and direct pointers to data that would not otherwise
-* be accessible without ugly pointer arithmetic. The
-* wrapper does not attempt to provide any higher level
-* abstractions for the data itself.
-*
-* There will be only one instance of RBBIDataWrapper for any
-* set of RBBI run time data being shared by instances
-* (clones) of RulesBasedBreakIterator.
-*/
-
-#ifndef __RBBIDATA_H__
-#define __RBBIDATA_H__
-
-#include "unicode/utypes.h"
-#include "unicode/udata.h"
-#include "udataswp.h"
-
-/**
- * Swap RBBI data. See udataswp.h.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-ubrk_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-#ifdef __cplusplus
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-#include "unicode/uversion.h"
-#include "umutex.h"
-#include "utrie2.h"
-
-U_NAMESPACE_BEGIN
-
-// The current RBBI data format version.
-static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {5, 0, 0, 0};
-
-/*
- * The following structs map exactly onto the raw data from ICU common data file.
- */
-struct RBBIDataHeader {
- uint32_t fMagic; /* == 0xbla0 */
- UVersionInfo fFormatVersion; /* Data Format. Same as the value in struct UDataInfo */
- /* if there is one associated with this data. */
- /* (version originates in rbbi, is copied to UDataInfo) */
- uint32_t fLength; /* Total length in bytes of this RBBI Data, */
- /* including all sections, not just the header. */
- uint32_t fCatCount; /* Number of character categories. */
-
- /* */
- /* Offsets and sizes of each of the subsections within the RBBI data. */
- /* All offsets are bytes from the start of the RBBIDataHeader. */
- /* All sizes are in bytes. */
- /* */
- uint32_t fFTable; /* forward state transition table. */
- uint32_t fFTableLen;
- uint32_t fRTable; /* Offset to the reverse state transition table. */
- uint32_t fRTableLen;
- uint32_t fTrie; /* Offset to Trie data for character categories */
- uint32_t fTrieLen;
- uint32_t fRuleSource; /* Offset to the source for for the break */
- uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
- uint32_t fStatusTable; /* Offset to the table of rule status values */
- uint32_t fStatusTableLen;
-
- uint32_t fReserved[6]; /* Reserved for expansion */
-
-};
-
-
-
-struct RBBIStateTableRow {
- int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
- /* Value 0: not an accepting state. */
- /* -1: Unconditional Accepting state. */
- /* positive: Look-ahead match has completed. */
- /* Actual boundary position happened earlier */
- /* Value here == fLookAhead in earlier */
- /* state, at actual boundary pos. */
- int16_t fLookAhead; /* Non-zero if this row is for a state that */
- /* corresponds to a '/' in the rule source. */
- /* Value is the same as the fAccepting */
- /* value for the rule (which will appear */
- /* in a different state. */
- int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */
- /* from a rule. Value is the index in the */
- /* StatusTable of the set of matching */
- /* tags (rule status values) */
- int16_t fReserved;
- uint16_t fNextState[1]; /* Next State, indexed by char category. */
- /* Variable-length array declared with length 1 */
- /* to disable bounds checkers. */
- /* Array Size is actually fData->fHeader->fCatCount*/
- /* CAUTION: see RBBITableBuilder::getTableSize() */
- /* before changing anything here. */
-};
-
-
-struct RBBIStateTable {
- uint32_t fNumStates; /* Number of states. */
- uint32_t fRowLen; /* Length of a state table row, in bytes. */
- uint32_t fFlags; /* Option Flags for this state table */
- uint32_t fReserved; /* reserved */
- char fTableData[1]; /* First RBBIStateTableRow begins here. */
- /* Variable-length array declared with length 1 */
- /* to disable bounds checkers. */
- /* (making it char[] simplifies ugly address */
- /* arithmetic for indexing variable length rows.) */
-};
-
-typedef enum {
- RBBI_LOOKAHEAD_HARD_BREAK = 1,
- RBBI_BOF_REQUIRED = 2
-} RBBIStateTableFlags;
-
-
-/* */
-/* The reference counting wrapper class */
-/* */
-class RBBIDataWrapper : public UMemory {
-public:
- enum EDontAdopt {
- kDontAdopt
- };
- RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
- RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
- RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
- ~RBBIDataWrapper();
-
- static UBool isDataVersionAcceptable(const UVersionInfo version);
-
- void init0();
- void init(const RBBIDataHeader *data, UErrorCode &status);
- RBBIDataWrapper *addReference();
- void removeReference();
- UBool operator ==(const RBBIDataWrapper &other) const;
- int32_t hashCode();
- const UnicodeString &getRuleSourceString() const;
- void printData();
- void printTable(const char *heading, const RBBIStateTable *table);
-
- /* */
- /* Pointers to items within the data */
- /* */
- const RBBIDataHeader *fHeader;
- const RBBIStateTable *fForwardTable;
- const RBBIStateTable *fReverseTable;
- const UChar *fRuleSource;
- const int32_t *fRuleStatusTable;
-
- /* number of int32_t values in the rule status table. Used to sanity check indexing */
- int32_t fStatusMaxIdx;
-
- UTrie2 *fTrie;
-
-private:
- u_atomic_int32_t fRefCount;
- UDataMemory *fUDataMem;
- UnicodeString fRuleString;
- UBool fDontFreeData;
-
- RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */
- RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */
-};
-
-
-
-U_NAMESPACE_END
-
-U_CFUNC UBool rbbi_cleanup(void);
-
-#endif /* C++ */
-
-#endif
diff --git a/contrib/libs/icu/common/rbbinode.cpp b/contrib/libs/icu/common/rbbinode.cpp
deleted file mode 100644
index 69d84151fe8..00000000000
--- a/contrib/libs/icu/common/rbbinode.cpp
+++ /dev/null
@@ -1,372 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-***************************************************************************
-* Copyright (C) 2002-2016 International Business Machines Corporation *
-* and others. All rights reserved. *
-***************************************************************************
-*/
-
-//
-// File: rbbinode.cpp
-//
-// Implementation of class RBBINode, which represents a node in the
-// tree generated when parsing the Rules Based Break Iterator rules.
-//
-// This "Class" is actually closer to a struct.
-// Code using it is expected to directly access fields much of the time.
-//
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/unistr.h"
-#include "unicode/uniset.h"
-#include "unicode/uchar.h"
-#include "unicode/parsepos.h"
-
-#include "cstr.h"
-#include "uvector.h"
-
-#include "rbbirb.h"
-#include "rbbinode.h"
-
-#include "uassert.h"
-
-
-U_NAMESPACE_BEGIN
-
-#ifdef RBBI_DEBUG
-static int gLastSerial = 0;
-#endif
-
-
-//-------------------------------------------------------------------------
-//
-// Constructor. Just set the fields to reasonable default values.
-//
-//-------------------------------------------------------------------------
-RBBINode::RBBINode(NodeType t) : UMemory() {
-#ifdef RBBI_DEBUG
- fSerialNum = ++gLastSerial;
-#endif
- fType = t;
- fParent = NULL;
- fLeftChild = NULL;
- fRightChild = NULL;
- fInputSet = NULL;
- fFirstPos = 0;
- fLastPos = 0;
- fNullable = FALSE;
- fLookAheadEnd = FALSE;
- fRuleRoot = FALSE;
- fChainIn = FALSE;
- fVal = 0;
- fPrecedence = precZero;
-
- UErrorCode status = U_ZERO_ERROR;
- fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
- fLastPosSet = new UVector(status);
- fFollowPos = new UVector(status);
- if (t==opCat) {fPrecedence = precOpCat;}
- else if (t==opOr) {fPrecedence = precOpOr;}
- else if (t==opStart) {fPrecedence = precStart;}
- else if (t==opLParen) {fPrecedence = precLParen;}
-
-}
-
-
-RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
-#ifdef RBBI_DEBUG
- fSerialNum = ++gLastSerial;
-#endif
- fType = other.fType;
- fParent = NULL;
- fLeftChild = NULL;
- fRightChild = NULL;
- fInputSet = other.fInputSet;
- fPrecedence = other.fPrecedence;
- fText = other.fText;
- fFirstPos = other.fFirstPos;
- fLastPos = other.fLastPos;
- fNullable = other.fNullable;
- fVal = other.fVal;
- fRuleRoot = FALSE;
- fChainIn = other.fChainIn;
- UErrorCode status = U_ZERO_ERROR;
- fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
- fLastPosSet = new UVector(status);
- fFollowPos = new UVector(status);
-}
-
-
-//-------------------------------------------------------------------------
-//
-// Destructor. Deletes both this node AND any child nodes,
-// except in the case of variable reference nodes. For
-// these, the l. child points back to the definition, which
-// is common for all references to the variable, meaning
-// it can't be deleted here.
-//
-//-------------------------------------------------------------------------
-RBBINode::~RBBINode() {
- // printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
- delete fInputSet;
- fInputSet = NULL;
-
- switch (this->fType) {
- case varRef:
- case setRef:
- // for these node types, multiple instances point to the same "children"
- // Storage ownership of children handled elsewhere. Don't delete here.
- break;
-
- default:
- delete fLeftChild;
- fLeftChild = NULL;
- delete fRightChild;
- fRightChild = NULL;
- }
-
-
- delete fFirstPosSet;
- delete fLastPosSet;
- delete fFollowPos;
-
-}
-
-
-//-------------------------------------------------------------------------
-//
-// cloneTree Make a copy of the subtree rooted at this node.
-// Discard any variable references encountered along the way,
-// and replace with copies of the variable's definitions.
-// Used to replicate the expression underneath variable
-// references in preparation for generating the DFA tables.
-//
-//-------------------------------------------------------------------------
-RBBINode *RBBINode::cloneTree() {
- RBBINode *n;
-
- if (fType == RBBINode::varRef) {
- // If the current node is a variable reference, skip over it
- // and clone the definition of the variable instead.
- n = fLeftChild->cloneTree();
- } else if (fType == RBBINode::uset) {
- n = this;
- } else {
- n = new RBBINode(*this);
- // Check for null pointer.
- if (n != NULL) {
- if (fLeftChild != NULL) {
- n->fLeftChild = fLeftChild->cloneTree();
- n->fLeftChild->fParent = n;
- }
- if (fRightChild != NULL) {
- n->fRightChild = fRightChild->cloneTree();
- n->fRightChild->fParent = n;
- }
- }
- }
- return n;
-}
-
-
-
-//-------------------------------------------------------------------------
-//
-// flattenVariables Walk a parse tree, replacing any variable
-// references with a copy of the variable's definition.
-// Aside from variables, the tree is not changed.
-//
-// Return the root of the tree. If the root was not a variable
-// reference, it remains unchanged - the root we started with
-// is the root we return. If, however, the root was a variable
-// reference, the root of the newly cloned replacement tree will
-// be returned, and the original tree deleted.
-//
-// This function works by recursively walking the tree
-// without doing anything until a variable reference is
-// found, then calling cloneTree() at that point. Any
-// nested references are handled by cloneTree(), not here.
-//
-//-------------------------------------------------------------------------
-RBBINode *RBBINode::flattenVariables() {
- if (fType == varRef) {
- RBBINode *retNode = fLeftChild->cloneTree();
- if (retNode != NULL) {
- retNode->fRuleRoot = this->fRuleRoot;
- retNode->fChainIn = this->fChainIn;
- }
- delete this; // TODO: undefined behavior. Fix.
- return retNode;
- }
-
- if (fLeftChild != NULL) {
- fLeftChild = fLeftChild->flattenVariables();
- fLeftChild->fParent = this;
- }
- if (fRightChild != NULL) {
- fRightChild = fRightChild->flattenVariables();
- fRightChild->fParent = this;
- }
- return this;
-}
-
-
-//-------------------------------------------------------------------------
-//
-// flattenSets Walk the parse tree, replacing any nodes of type setRef
-// with a copy of the expression tree for the set. A set's
-// equivalent expression tree is precomputed and saved as
-// the left child of the uset node.
-//
-//-------------------------------------------------------------------------
-void RBBINode::flattenSets() {
- U_ASSERT(fType != setRef);
-
- if (fLeftChild != NULL) {
- if (fLeftChild->fType==setRef) {
- RBBINode *setRefNode = fLeftChild;
- RBBINode *usetNode = setRefNode->fLeftChild;
- RBBINode *replTree = usetNode->fLeftChild;
- fLeftChild = replTree->cloneTree();
- fLeftChild->fParent = this;
- delete setRefNode;
- } else {
- fLeftChild->flattenSets();
- }
- }
-
- if (fRightChild != NULL) {
- if (fRightChild->fType==setRef) {
- RBBINode *setRefNode = fRightChild;
- RBBINode *usetNode = setRefNode->fLeftChild;
- RBBINode *replTree = usetNode->fLeftChild;
- fRightChild = replTree->cloneTree();
- fRightChild->fParent = this;
- delete setRefNode;
- } else {
- fRightChild->flattenSets();
- }
- }
-}
-
-
-
-//-------------------------------------------------------------------------
-//
-// findNodes() Locate all the nodes of the specified type, starting
-// at the specified root.
-//
-//-------------------------------------------------------------------------
-void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
- /* test for buffer overflows */
- if (U_FAILURE(status)) {
- return;
- }
- if (fType == kind) {
- dest->addElement(this, status);
- }
- if (fLeftChild != NULL) {
- fLeftChild->findNodes(dest, kind, status);
- }
- if (fRightChild != NULL) {
- fRightChild->findNodes(dest, kind, status);
- }
-}
-
-
-//-------------------------------------------------------------------------
-//
-// print. Print out a single node, for debugging.
-//
-//-------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-
-static int32_t serial(const RBBINode *node) {
- return (node == NULL? -1 : node->fSerialNum);
-}
-
-
-void RBBINode::printNode(const RBBINode *node) {
- static const char * const nodeTypeNames[] = {
- "setRef",
- "uset",
- "varRef",
- "leafChar",
- "lookAhead",
- "tag",
- "endMark",
- "opStart",
- "opCat",
- "opOr",
- "opStar",
- "opPlus",
- "opQuestion",
- "opBreak",
- "opReverse",
- "opLParen"
- };
-
- if (node==NULL) {
- RBBIDebugPrintf("%10p", (void *)node);
- } else {
- RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ",
- (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
- node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
- serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
- node->fFirstPos, node->fVal);
- if (node->fType == varRef) {
- RBBI_DEBUG_printUnicodeString(node->fText);
- }
- }
- RBBIDebugPrintf("\n");
-}
-#endif
-
-
-#ifdef RBBI_DEBUG
-U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
- RBBIDebugPrintf("%*s", minWidth, CStr(s)());
-}
-#endif
-
-
-//-------------------------------------------------------------------------
-//
-// print. Print out the tree of nodes rooted at "this"
-//
-//-------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-void RBBINode::printNodeHeader() {
- RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n");
-}
-
-void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
- if (printHeading) {
- printNodeHeader();
- }
- printNode(node);
- if (node != NULL) {
- // Only dump the definition under a variable reference if asked to.
- // Unconditinally dump children of all other node types.
- if (node->fType != varRef) {
- if (node->fLeftChild != NULL) {
- printTree(node->fLeftChild, FALSE);
- }
-
- if (node->fRightChild != NULL) {
- printTree(node->fRightChild, FALSE);
- }
- }
- }
-}
-#endif
-
-
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/contrib/libs/icu/common/rbbinode.h b/contrib/libs/icu/common/rbbinode.h
deleted file mode 100644
index f3525960225..00000000000
--- a/contrib/libs/icu/common/rbbinode.h
+++ /dev/null
@@ -1,127 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/********************************************************************
- * COPYRIGHT:
- * Copyright (c) 2001-2016, International Business Machines Corporation and
- * others. All Rights Reserved.
- ********************************************************************/
-
-#ifndef RBBINODE_H
-#define RBBINODE_H
-
-#include "unicode/utypes.h"
-#include "unicode/unistr.h"
-#include "unicode/uobject.h"
-
-//
-// class RBBINode
-//
-// Represents a node in the parse tree generated when reading
-// a rule file.
-//
-
-U_NAMESPACE_BEGIN
-
-class UnicodeSet;
-class UVector;
-
-class RBBINode : public UMemory {
- public:
- enum NodeType {
- setRef,
- uset,
- varRef,
- leafChar,
- lookAhead,
- tag,
- endMark,
- opStart,
- opCat,
- opOr,
- opStar,
- opPlus,
- opQuestion,
- opBreak,
- opReverse,
- opLParen
- };
-
- enum OpPrecedence {
- precZero,
- precStart,
- precLParen,
- precOpOr,
- precOpCat
- };
-
- NodeType fType;
- RBBINode *fParent;
- RBBINode *fLeftChild;
- RBBINode *fRightChild;
- UnicodeSet *fInputSet; // For uset nodes only.
- OpPrecedence fPrecedence; // For binary ops only.
-
- UnicodeString fText; // Text corresponding to this node.
- // May be lazily evaluated when (if) needed
- // for some node types.
- int fFirstPos; // Position in the rule source string of the
- // first text associated with the node.
- // If there's a left child, this will be the same
- // as that child's left pos.
- int fLastPos; // Last position in the rule source string
- // of any text associated with this node.
- // If there's a right child, this will be the same
- // as that child's last postion.
-
- UBool fNullable; // See Aho.
- int32_t fVal; // For leafChar nodes, the value.
- // Values are the character category,
- // corresponds to columns in the final
- // state transition table.
-
- UBool fLookAheadEnd; // For endMark nodes, set TRUE if
- // marking the end of a look-ahead rule.
-
- UBool fRuleRoot; // True if this node is the root of a rule.
- UBool fChainIn; // True if chaining into this rule is allowed
- // (no '^' present).
-
- UVector *fFirstPosSet;
- UVector *fLastPosSet; // TODO: rename fFirstPos & fLastPos to avoid confusion.
- UVector *fFollowPos;
-
-
- RBBINode(NodeType t);
- RBBINode(const RBBINode &other);
- ~RBBINode();
-
- RBBINode *cloneTree();
- RBBINode *flattenVariables();
- void flattenSets();
- void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);
-
-#ifdef RBBI_DEBUG
- static void printNodeHeader();
- static void printNode(const RBBINode *n);
- static void printTree(const RBBINode *n, UBool withHeading);
-#endif
-
- private:
- RBBINode &operator = (const RBBINode &other); // No defs.
- UBool operator == (const RBBINode &other); // Private, so these functions won't accidently be used.
-
-#ifdef RBBI_DEBUG
- public:
- int fSerialNum; // Debugging aids.
-#endif
-};
-
-#ifdef RBBI_DEBUG
-U_CFUNC void
-RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth=0);
-#endif
-
-U_NAMESPACE_END
-
-#endif
-
diff --git a/contrib/libs/icu/common/rbbirb.cpp b/contrib/libs/icu/common/rbbirb.cpp
deleted file mode 100644
index 68ded32e1d0..00000000000
--- a/contrib/libs/icu/common/rbbirb.cpp
+++ /dev/null
@@ -1,341 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// file: rbbirb.cpp
-//
-// Copyright (C) 2002-2011, International Business Machines Corporation and others.
-// All Rights Reserved.
-//
-// This file contains the RBBIRuleBuilder class implementation. This is the main class for
-// building (compiling) break rules into the tables required by the runtime
-// RBBI engine.
-//
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/brkiter.h"
-#include "unicode/rbbi.h"
-#include "unicode/ubrk.h"
-#include "unicode/unistr.h"
-#include "unicode/uniset.h"
-#include "unicode/uchar.h"
-#include "unicode/uchriter.h"
-#include "unicode/parsepos.h"
-#include "unicode/parseerr.h"
-
-#include "cmemory.h"
-#include "cstring.h"
-#include "rbbirb.h"
-#include "rbbinode.h"
-#include "rbbiscan.h"
-#include "rbbisetb.h"
-#include "rbbitblb.h"
-#include "rbbidata.h"
-#include "uassert.h"
-
-
-U_NAMESPACE_BEGIN
-
-
-//----------------------------------------------------------------------------------------
-//
-// Constructor.
-//
-//----------------------------------------------------------------------------------------
-RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
- UParseError *parseErr,
- UErrorCode &status)
- : fRules(rules), fStrippedRules(rules)
-{
- fStatus = &status; // status is checked below
- fParseError = parseErr;
- fDebugEnv = NULL;
-#ifdef RBBI_DEBUG
- fDebugEnv = getenv("U_RBBIDEBUG");
-#endif
-
-
- fForwardTree = NULL;
- fReverseTree = NULL;
- fSafeFwdTree = NULL;
- fSafeRevTree = NULL;
- fDefaultTree = &fForwardTree;
- fForwardTable = NULL;
- fRuleStatusVals = NULL;
- fChainRules = FALSE;
- fLBCMNoChain = FALSE;
- fLookAheadHardBreak = FALSE;
- fUSetNodes = NULL;
- fRuleStatusVals = NULL;
- fScanner = NULL;
- fSetBuilder = NULL;
- if (parseErr) {
- uprv_memset(parseErr, 0, sizeof(UParseError));
- }
-
- if (U_FAILURE(status)) {
- return;
- }
-
- fUSetNodes = new UVector(status); // bcos status gets overwritten here
- fRuleStatusVals = new UVector(status);
- fScanner = new RBBIRuleScanner(this);
- fSetBuilder = new RBBISetBuilder(this);
- if (U_FAILURE(status)) {
- return;
- }
- if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-
-
-//----------------------------------------------------------------------------------------
-//
-// Destructor
-//
-//----------------------------------------------------------------------------------------
-RBBIRuleBuilder::~RBBIRuleBuilder() {
-
- int i;
- for (i=0; ; i++) {
- RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i);
- if (n==NULL) {
- break;
- }
- delete n;
- }
-
- delete fUSetNodes;
- delete fSetBuilder;
- delete fForwardTable;
- delete fForwardTree;
- delete fReverseTree;
- delete fSafeFwdTree;
- delete fSafeRevTree;
- delete fScanner;
- delete fRuleStatusVals;
-}
-
-
-
-
-
-//----------------------------------------------------------------------------------------
-//
-// flattenData() - Collect up the compiled RBBI rule data and put it into
-// the format for saving in ICU data files,
-// which is also the format needed by the RBBI runtime engine.
-//
-//----------------------------------------------------------------------------------------
-static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
-
-RBBIDataHeader *RBBIRuleBuilder::flattenData() {
- int32_t i;
-
- if (U_FAILURE(*fStatus)) {
- return NULL;
- }
-
- // Remove whitespace from the rules to make it smaller.
- // The rule parser has already removed comments.
- fStrippedRules = fScanner->stripRules(fStrippedRules);
-
- // Calculate the size of each section in the data.
- // Sizes here are padded up to a multiple of 8 for better memory alignment.
- // Sections sizes actually stored in the header are for the actual data
- // without the padding.
- //
- int32_t headerSize = align8(sizeof(RBBIDataHeader));
- int32_t forwardTableSize = align8(fForwardTable->getTableSize());
- int32_t reverseTableSize = align8(fForwardTable->getSafeTableSize());
- int32_t trieSize = align8(fSetBuilder->getTrieSize());
- int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
- int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar));
-
- int32_t totalSize = headerSize
- + forwardTableSize
- + reverseTableSize
- + statusTableSize + trieSize + rulesSize;
-
-#ifdef RBBI_DEBUG
- if (fDebugEnv && uprv_strstr(fDebugEnv, "size")) {
- RBBIDebugPrintf("Header Size: %8d\n", headerSize);
- RBBIDebugPrintf("Forward Table Size: %8d\n", forwardTableSize);
- RBBIDebugPrintf("Reverse Table Size: %8d\n", reverseTableSize);
- RBBIDebugPrintf("Trie Size: %8d\n", trieSize);
- RBBIDebugPrintf("Status Table Size: %8d\n", statusTableSize);
- RBBIDebugPrintf("Rules Size: %8d\n", rulesSize);
- RBBIDebugPrintf("-----------------------------\n");
- RBBIDebugPrintf("Total Size: %8d\n", totalSize);
- }
-#endif
-
- RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
- if (data == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memset(data, 0, totalSize);
-
-
- data->fMagic = 0xb1a0;
- data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
- data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
- data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
- data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
- data->fLength = totalSize;
- data->fCatCount = fSetBuilder->getNumCharCategories();
-
- data->fFTable = headerSize;
- data->fFTableLen = forwardTableSize;
-
- data->fRTable = data->fFTable + data->fFTableLen;
- data->fRTableLen = reverseTableSize;
-
- data->fTrie = data->fRTable + data->fRTableLen;
- data->fTrieLen = fSetBuilder->getTrieSize();
- data->fStatusTable = data->fTrie + trieSize;
- data->fStatusTableLen= statusTableSize;
- data->fRuleSource = data->fStatusTable + statusTableSize;
- data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar);
-
- uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
-
- fForwardTable->exportTable((uint8_t *)data + data->fFTable);
- fForwardTable->exportSafeTable((uint8_t *)data + data->fRTable);
- fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
-
- int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
- for (i=0; i<fRuleStatusVals->size(); i++) {
- ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
- }
-
- fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
-
- return data;
-}
-
-
-//----------------------------------------------------------------------------------------
-//
-// createRuleBasedBreakIterator construct from source rules that are passed in
-// in a UnicodeString
-//
-//----------------------------------------------------------------------------------------
-BreakIterator *
-RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
- UParseError *parseError,
- UErrorCode &status)
-{
- //
- // Read the input rules, generate a parse tree, symbol table,
- // and list of all Unicode Sets referenced by the rules.
- //
- RBBIRuleBuilder builder(rules, parseError, status);
- if (U_FAILURE(status)) { // status checked here bcos build below doesn't
- return NULL;
- }
-
- RBBIDataHeader *data = builder.build(status);
-
- if (U_FAILURE(status)) {
- return nullptr;
- }
-
- //
- // Create a break iterator from the compiled rules.
- // (Identical to creation from stored pre-compiled rules)
- //
- // status is checked after init in construction.
- RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status);
- if (U_FAILURE(status)) {
- delete This;
- This = NULL;
- }
- else if(This == NULL) { // test for NULL
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- return This;
-}
-
-RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
- if (U_FAILURE(status)) {
- return nullptr;
- }
-
- fScanner->parse();
- if (U_FAILURE(status)) {
- return nullptr;
- }
-
- //
- // UnicodeSet processing.
- // Munge the Unicode Sets to create a set of character categories.
- // Generate the mapping tables (TRIE) from input code points to
- // the character categories.
- //
- fSetBuilder->buildRanges();
-
- //
- // Generate the DFA state transition table.
- //
- fForwardTable = new RBBITableBuilder(this, &fForwardTree, status);
- if (fForwardTable == nullptr) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
-
- fForwardTable->buildForwardTable();
- optimizeTables();
- fForwardTable->buildSafeReverseTable(status);
-
-
-#ifdef RBBI_DEBUG
- if (fDebugEnv && uprv_strstr(fDebugEnv, "states")) {
- fForwardTable->printStates();
- fForwardTable->printRuleStatusTable();
- fForwardTable->printReverseTable();
- }
-#endif
-
- fSetBuilder->buildTrie();
-
- //
- // Package up the compiled data into a memory image
- // in the run-time format.
- //
- RBBIDataHeader *data = flattenData(); // returns NULL if error
- if (U_FAILURE(status)) {
- return nullptr;
- }
- return data;
-}
-
-void RBBIRuleBuilder::optimizeTables() {
- bool didSomething;
- do {
- didSomething = false;
-
- // Begin looking for duplicates with char class 3.
- // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
- // and should not have other categories merged into them.
- IntPair duplPair = {3, 0};
- while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
- fSetBuilder->mergeCategories(duplPair);
- fForwardTable->removeColumn(duplPair.second);
- didSomething = true;
- }
-
- while (fForwardTable->removeDuplicateStates() > 0) {
- didSomething = true;
- }
- } while (didSomething);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/contrib/libs/icu/common/rbbirb.h b/contrib/libs/icu/common/rbbirb.h
deleted file mode 100644
index 037c1dc2ce8..00000000000
--- a/contrib/libs/icu/common/rbbirb.h
+++ /dev/null
@@ -1,237 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// rbbirb.h
-//
-// Copyright (C) 2002-2008, International Business Machines Corporation and others.
-// All Rights Reserved.
-//
-// This file contains declarations for several classes from the
-// Rule Based Break Iterator rule builder.
-//
-
-
-#ifndef RBBIRB_H
-#define RBBIRB_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include <utility>
-
-#include "unicode/uobject.h"
-#include "unicode/rbbi.h"
-#include "unicode/uniset.h"
-#include "unicode/parseerr.h"
-#include "uhash.h"
-#include "uvector.h"
-#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
- // looks up references to $variables within a set.
-
-
-U_NAMESPACE_BEGIN
-
-class RBBIRuleScanner;
-struct RBBIRuleTableEl;
-class RBBISetBuilder;
-class RBBINode;
-class RBBITableBuilder;
-
-
-
-//--------------------------------------------------------------------------------
-//
-// RBBISymbolTable. Implements SymbolTable interface that is used by the
-// UnicodeSet parser to resolve references to $variables.
-//
-//--------------------------------------------------------------------------------
-class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
-public: // of these structs for each entry.
- RBBISymbolTableEntry();
- UnicodeString key;
- RBBINode *val;
- ~RBBISymbolTableEntry();
-
-private:
- RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
- RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
-};
-
-
-class RBBISymbolTable : public UMemory, public SymbolTable {
-private:
- const UnicodeString &fRules;
- UHashtable *fHashTable;
- RBBIRuleScanner *fRuleScanner;
-
- // These next two fields are part of the mechanism for passing references to
- // already-constructed UnicodeSets back to the UnicodeSet constructor
- // when the pattern includes $variable references.
- const UnicodeString ffffString; // = "/uffff"
- UnicodeSet *fCachedSetLookup;
-
-public:
- // API inherited from class SymbolTable
- virtual const UnicodeString* lookup(const UnicodeString& s) const;
- virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
- virtual UnicodeString parseReference(const UnicodeString& text,
- ParsePosition& pos, int32_t limit) const;
-
- // Additional Functions
- RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
- virtual ~RBBISymbolTable();
-
- virtual RBBINode *lookupNode(const UnicodeString &key) const;
- virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
-
-#ifdef RBBI_DEBUG
- virtual void rbbiSymtablePrint() const;
-#else
- // A do-nothing inline function for non-debug builds. Member funcs can't be empty
- // or the call sites won't compile.
- int32_t fFakeField;
- #define rbbiSymtablePrint() fFakeField=0;
-#endif
-
-private:
- RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
- RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
-};
-
-
-//--------------------------------------------------------------------------------
-//
-// class RBBIRuleBuilder The top-level class handling RBBI rule compiling.
-//
-//--------------------------------------------------------------------------------
-class RBBIRuleBuilder : public UMemory {
-public:
-
- // Create a rule based break iterator from a set of rules.
- // This function is the main entry point into the rule builder. The
- // public ICU API for creating RBBIs uses this function to do the actual work.
- //
- static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
- UParseError *parseError,
- UErrorCode &status);
-
-public:
- // The "public" functions and data members that appear below are accessed
- // (and shared) by the various parts that make up the rule builder. They
- // are NOT intended to be accessed by anything outside of the
- // rule builder implementation.
- RBBIRuleBuilder(const UnicodeString &rules,
- UParseError *parseErr,
- UErrorCode &status
- );
-
- virtual ~RBBIRuleBuilder();
-
- /**
- * Build the state tables and char class Trie from the source rules.
- */
- RBBIDataHeader *build(UErrorCode &status);
-
-
- /**
- * Fold together redundant character classes (table columns) and
- * redundant states (table rows). Done after initial table generation,
- * before serializing the result.
- */
- void optimizeTables();
-
- char *fDebugEnv; // controls debug trace output
- UErrorCode *fStatus; // Error reporting. Keeping status
- UParseError *fParseError; // here avoids passing it everywhere.
- const UnicodeString &fRules; // The rule string that we are compiling
- UnicodeString fStrippedRules; // The rule string, with comments stripped.
-
- RBBIRuleScanner *fScanner; // The scanner.
- RBBINode *fForwardTree; // The parse trees, generated by the scanner,
- RBBINode *fReverseTree; // then manipulated by subsequent steps.
- RBBINode *fSafeFwdTree;
- RBBINode *fSafeRevTree;
-
- RBBINode **fDefaultTree; // For rules not qualified with a !
- // the tree to which they belong to.
-
- UBool fChainRules; // True for chained Unicode TR style rules.
- // False for traditional regexp rules.
-
- UBool fLBCMNoChain; // True: suppress chaining of rules on
- // chars with LineBreak property == CM.
-
- UBool fLookAheadHardBreak; // True: Look ahead matches cause an
- // immediate break, no continuing for the
- // longest match.
-
- RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
- UVector *fUSetNodes; // Vector of all uset nodes.
-
- RBBITableBuilder *fForwardTable; // State transition table, build time form.
-
- UVector *fRuleStatusVals; // The values that can be returned
- // from getRuleStatus().
-
- RBBIDataHeader *flattenData(); // Create the flattened (runtime format)
- // data tables..
-private:
- RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
- RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
-};
-
-
-
-
-//----------------------------------------------------------------------------
-//
-// RBBISetTableEl is an entry in the hash table of UnicodeSets that have
-// been encountered. The val Node will be of nodetype uset
-// and contain pointers to the actual UnicodeSets.
-// The Key is the source string for initializing the set.
-//
-// The hash table is used to avoid creating duplicate
-// unnamed (not $var references) UnicodeSets.
-//
-// Memory Management:
-// The Hash Table owns these RBBISetTableEl structs and
-// the key strings. It does NOT own the val nodes.
-//
-//----------------------------------------------------------------------------
-struct RBBISetTableEl {
- UnicodeString *key;
- RBBINode *val;
-};
-
-/**
- * A pair of ints, used to bundle pairs of states or pairs of character classes.
- */
-typedef std::pair<int32_t, int32_t> IntPair;
-
-
-//----------------------------------------------------------------------------
-//
-// RBBIDebugPrintf Printf equivalent, for debugging output.
-// Conditional compilation of the implementation lets us
-// get rid of the stdio dependency in environments where it
-// is unavailable.
-//
-//----------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-#include <stdio.h>
-#define RBBIDebugPrintf printf
-#define RBBIDebugPuts puts
-#else
-#undef RBBIDebugPrintf
-#define RBBIDebugPuts(arg)
-#endif
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
-
-#endif
-
-
-
diff --git a/contrib/libs/icu/common/rbbirpt.h b/contrib/libs/icu/common/rbbirpt.h
deleted file mode 100644
index 586953c90c6..00000000000
--- a/contrib/libs/icu/common/rbbirpt.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//---------------------------------------------------------------------------------
-//
-// Generated Header File. Do not edit by hand.
-// This file contains the state table for the ICU Rule Based Break Iterator
-// rule parser.
-// It is generated by the Perl script "rbbicst.pl" from
-// the rule parser state definitions file "rbbirpt.txt".
-//
-// Copyright (C) 2002-2016 International Business Machines Corporation
-// and others. All rights reserved.
-//
-//---------------------------------------------------------------------------------
-#ifndef RBBIRPT_H
-#define RBBIRPT_H
-
-#include "unicode/utypes.h"
-
-U_NAMESPACE_BEGIN
-//
-// Character classes for RBBI rule scanning.
-//
- static const uint8_t kRuleSet_digit_char = 128;
- static const uint8_t kRuleSet_name_char = 129;
- static const uint8_t kRuleSet_name_start_char = 130;
- static const uint8_t kRuleSet_rule_char = 131;
- static const uint8_t kRuleSet_white_space = 132;
-
-
-enum RBBI_RuleParseAction {
- doCheckVarDef,
- doDotAny,
- doEndAssign,
- doEndOfRule,
- doEndVariableName,
- doExit,
- doExprCatOperator,
- doExprFinished,
- doExprOrOperator,
- doExprRParen,
- doExprStart,
- doLParen,
- doNOP,
- doNoChain,
- doOptionEnd,
- doOptionStart,
- doReverseDir,
- doRuleChar,
- doRuleError,
- doRuleErrorAssignExpr,
- doScanUnicodeSet,
- doSlash,
- doStartAssign,
- doStartTagValue,
- doStartVariableName,
- doTagDigit,
- doTagExpectedError,
- doTagValue,
- doUnaryOpPlus,
- doUnaryOpQuestion,
- doUnaryOpStar,
- doVariableNameExpectedErr,
- rbbiLastAction};
-
-//-------------------------------------------------------------------------------
-//
-// RBBIRuleTableEl represents the structure of a row in the transition table
-// for the rule parser state machine.
-//-------------------------------------------------------------------------------
-struct RBBIRuleTableEl {
- RBBI_RuleParseAction fAction;
- uint8_t fCharClass; // 0-127: an individual ASCII character
- // 128-255: character class index
- uint8_t fNextState; // 0-250: normal next-stat numbers
- // 255: pop next-state from stack.
- uint8_t fPushState;
- UBool fNextChar;
-};
-
-static const struct RBBIRuleTableEl gRuleParseStateTable[] = {
- {doNOP, 0, 0, 0, TRUE}
- , {doExprStart, 254, 29, 9, FALSE} // 1 start
- , {doNOP, 132, 1,0, TRUE} // 2
- , {doNoChain, 94 /* ^ */, 12, 9, TRUE} // 3
- , {doExprStart, 36 /* $ */, 88, 98, FALSE} // 4
- , {doNOP, 33 /* ! */, 19,0, TRUE} // 5
- , {doNOP, 59 /* ; */, 1,0, TRUE} // 6
- , {doNOP, 252, 0,0, FALSE} // 7
- , {doExprStart, 255, 29, 9, FALSE} // 8
- , {doEndOfRule, 59 /* ; */, 1,0, TRUE} // 9 break-rule-end
- , {doNOP, 132, 9,0, TRUE} // 10
- , {doRuleError, 255, 103,0, FALSE} // 11
- , {doExprStart, 254, 29,0, FALSE} // 12 start-after-caret
- , {doNOP, 132, 12,0, TRUE} // 13
- , {doRuleError, 94 /* ^ */, 103,0, FALSE} // 14
- , {doExprStart, 36 /* $ */, 88, 37, FALSE} // 15
- , {doRuleError, 59 /* ; */, 103,0, FALSE} // 16
- , {doRuleError, 252, 103,0, FALSE} // 17
- , {doExprStart, 255, 29,0, FALSE} // 18
- , {doNOP, 33 /* ! */, 21,0, TRUE} // 19 rev-option
- , {doReverseDir, 255, 28, 9, FALSE} // 20
- , {doOptionStart, 130, 23,0, TRUE} // 21 option-scan1
- , {doRuleError, 255, 103,0, FALSE} // 22
- , {doNOP, 129, 23,0, TRUE} // 23 option-scan2
- , {doOptionEnd, 255, 25,0, FALSE} // 24
- , {doNOP, 59 /* ; */, 1,0, TRUE} // 25 option-scan3
- , {doNOP, 132, 25,0, TRUE} // 26
- , {doRuleError, 255, 103,0, FALSE} // 27
- , {doExprStart, 255, 29, 9, FALSE} // 28 reverse-rule
- , {doRuleChar, 254, 38,0, TRUE} // 29 term
- , {doNOP, 132, 29,0, TRUE} // 30
- , {doRuleChar, 131, 38,0, TRUE} // 31
- , {doNOP, 91 /* [ */, 94, 38, FALSE} // 32
- , {doLParen, 40 /* ( */, 29, 38, TRUE} // 33
- , {doNOP, 36 /* $ */, 88, 37, FALSE} // 34
- , {doDotAny, 46 /* . */, 38,0, TRUE} // 35
- , {doRuleError, 255, 103,0, FALSE} // 36
- , {doCheckVarDef, 255, 38,0, FALSE} // 37 term-var-ref
- , {doNOP, 132, 38,0, TRUE} // 38 expr-mod
- , {doUnaryOpStar, 42 /* * */, 43,0, TRUE} // 39
- , {doUnaryOpPlus, 43 /* + */, 43,0, TRUE} // 40
- , {doUnaryOpQuestion, 63 /* ? */, 43,0, TRUE} // 41
- , {doNOP, 255, 43,0, FALSE} // 42
- , {doExprCatOperator, 254, 29,0, FALSE} // 43 expr-cont
- , {doNOP, 132, 43,0, TRUE} // 44
- , {doExprCatOperator, 131, 29,0, FALSE} // 45
- , {doExprCatOperator, 91 /* [ */, 29,0, FALSE} // 46
- , {doExprCatOperator, 40 /* ( */, 29,0, FALSE} // 47
- , {doExprCatOperator, 36 /* $ */, 29,0, FALSE} // 48
- , {doExprCatOperator, 46 /* . */, 29,0, FALSE} // 49
- , {doExprCatOperator, 47 /* / */, 55,0, FALSE} // 50
- , {doExprCatOperator, 123 /* { */, 67,0, TRUE} // 51
- , {doExprOrOperator, 124 /* | */, 29,0, TRUE} // 52
- , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 53
- , {doExprFinished, 255, 255,0, FALSE} // 54
- , {doSlash, 47 /* / */, 57,0, TRUE} // 55 look-ahead
- , {doNOP, 255, 103,0, FALSE} // 56
- , {doExprCatOperator, 254, 29,0, FALSE} // 57 expr-cont-no-slash
- , {doNOP, 132, 43,0, TRUE} // 58
- , {doExprCatOperator, 131, 29,0, FALSE} // 59
- , {doExprCatOperator, 91 /* [ */, 29,0, FALSE} // 60
- , {doExprCatOperator, 40 /* ( */, 29,0, FALSE} // 61
- , {doExprCatOperator, 36 /* $ */, 29,0, FALSE} // 62
- , {doExprCatOperator, 46 /* . */, 29,0, FALSE} // 63
- , {doExprOrOperator, 124 /* | */, 29,0, TRUE} // 64
- , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 65
- , {doExprFinished, 255, 255,0, FALSE} // 66
- , {doNOP, 132, 67,0, TRUE} // 67 tag-open
- , {doStartTagValue, 128, 70,0, FALSE} // 68
- , {doTagExpectedError, 255, 103,0, FALSE} // 69
- , {doNOP, 132, 74,0, TRUE} // 70 tag-value
- , {doNOP, 125 /* } */, 74,0, FALSE} // 71
- , {doTagDigit, 128, 70,0, TRUE} // 72
- , {doTagExpectedError, 255, 103,0, FALSE} // 73
- , {doNOP, 132, 74,0, TRUE} // 74 tag-close
- , {doTagValue, 125 /* } */, 77,0, TRUE} // 75
- , {doTagExpectedError, 255, 103,0, FALSE} // 76
- , {doExprCatOperator, 254, 29,0, FALSE} // 77 expr-cont-no-tag
- , {doNOP, 132, 77,0, TRUE} // 78
- , {doExprCatOperator, 131, 29,0, FALSE} // 79
- , {doExprCatOperator, 91 /* [ */, 29,0, FALSE} // 80
- , {doExprCatOperator, 40 /* ( */, 29,0, FALSE} // 81
- , {doExprCatOperator, 36 /* $ */, 29,0, FALSE} // 82
- , {doExprCatOperator, 46 /* . */, 29,0, FALSE} // 83
- , {doExprCatOperator, 47 /* / */, 55,0, FALSE} // 84
- , {doExprOrOperator, 124 /* | */, 29,0, TRUE} // 85
- , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 86
- , {doExprFinished, 255, 255,0, FALSE} // 87
- , {doStartVariableName, 36 /* $ */, 90,0, TRUE} // 88 scan-var-name
- , {doNOP, 255, 103,0, FALSE} // 89
- , {doNOP, 130, 92,0, TRUE} // 90 scan-var-start
- , {doVariableNameExpectedErr, 255, 103,0, FALSE} // 91
- , {doNOP, 129, 92,0, TRUE} // 92 scan-var-body
- , {doEndVariableName, 255, 255,0, FALSE} // 93
- , {doScanUnicodeSet, 91 /* [ */, 255,0, TRUE} // 94 scan-unicode-set
- , {doScanUnicodeSet, 112 /* p */, 255,0, TRUE} // 95
- , {doScanUnicodeSet, 80 /* P */, 255,0, TRUE} // 96
- , {doNOP, 255, 103,0, FALSE} // 97
- , {doNOP, 132, 98,0, TRUE} // 98 assign-or-rule
- , {doStartAssign, 61 /* = */, 29, 101, TRUE} // 99
- , {doNOP, 255, 37, 9, FALSE} // 100
- , {doEndAssign, 59 /* ; */, 1,0, TRUE} // 101 assign-end
- , {doRuleErrorAssignExpr, 255, 103,0, FALSE} // 102
- , {doExit, 255, 103,0, TRUE} // 103 errorDeath
- };
-#ifdef RBBI_DEBUG
-static const char * const RBBIRuleStateNames[] = { 0,
- "start",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "break-rule-end",
- 0,
- 0,
- "start-after-caret",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "rev-option",
- 0,
- "option-scan1",
- 0,
- "option-scan2",
- 0,
- "option-scan3",
- 0,
- 0,
- "reverse-rule",
- "term",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "term-var-ref",
- "expr-mod",
- 0,
- 0,
- 0,
- 0,
- "expr-cont",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "look-ahead",
- 0,
- "expr-cont-no-slash",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "tag-open",
- 0,
- 0,
- "tag-value",
- 0,
- 0,
- 0,
- "tag-close",
- 0,
- 0,
- "expr-cont-no-tag",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "scan-var-name",
- 0,
- "scan-var-start",
- 0,
- "scan-var-body",
- 0,
- "scan-unicode-set",
- 0,
- 0,
- 0,
- "assign-or-rule",
- 0,
- 0,
- "assign-end",
- 0,
- "errorDeath",
- 0};
-#endif
-
-U_NAMESPACE_END
-#endif
diff --git a/contrib/libs/icu/common/rbbiscan.cpp b/contrib/libs/icu/common/rbbiscan.cpp
deleted file mode 100644
index 4eb324b4b90..00000000000
--- a/contrib/libs/icu/common/rbbiscan.cpp
+++ /dev/null
@@ -1,1283 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// file: rbbiscan.cpp
-//
-// Copyright (C) 2002-2016, International Business Machines Corporation and others.
-// All Rights Reserved.
-//
-// This file contains the Rule Based Break Iterator Rule Builder functions for
-// scanning the rules and assembling a parse tree. This is the first phase
-// of compiling the rules.
-//
-// The overall of the rules is managed by class RBBIRuleBuilder, which will
-// create and use an instance of this class as part of the process.
-//
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/unistr.h"
-#include "unicode/uniset.h"
-#include "unicode/uchar.h"
-#include "unicode/uchriter.h"
-#include "unicode/parsepos.h"
-#include "unicode/parseerr.h"
-#include "cmemory.h"
-#include "cstring.h"
-
-#include "rbbirpt.h" // Contains state table for the rbbi rules parser.
- // generated by a Perl script.
-#include "rbbirb.h"
-#include "rbbinode.h"
-#include "rbbiscan.h"
-#include "rbbitblb.h"
-
-#include "uassert.h"
-
-//------------------------------------------------------------------------------
-//
-// Unicode Set init strings for each of the character classes needed for parsing a rule file.
-// (Initialized with hex values for portability to EBCDIC based machines.
-// Really ugly, but there's no good way to avoid it.)
-//
-// The sets are referred to by name in the rbbirpt.txt, which is the
-// source form of the state transition table for the RBBI rule parser.
-//
-//------------------------------------------------------------------------------
-static const UChar gRuleSet_rule_char_pattern[] = {
- // Characters that may appear as literals in patterns without escaping or quoting.
- // [ ^ [ \ p { Z } \ u 0 0 2 0
- 0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30,
- // - \ u 0 0 7 f ] - [ \ p
- 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x37, 0x66, 0x5d, 0x2d, 0x5b, 0x5c, 0x70,
- // { L } ] - [ \ p { N } ] ]
- 0x7b, 0x4c, 0x7d, 0x5d, 0x2d, 0x5b, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0x5d, 0};
-
-static const UChar gRuleSet_name_char_pattern[] = {
-// [ _ \ p { L } \ p { N } ]
- 0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0};
-
-static const UChar gRuleSet_digit_char_pattern[] = {
-// [ 0 - 9 ]
- 0x5b, 0x30, 0x2d, 0x39, 0x5d, 0};
-
-static const UChar gRuleSet_name_start_char_pattern[] = {
-// [ _ \ p { L } ]
- 0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5d, 0 };
-
-static const UChar kAny[] = {0x61, 0x6e, 0x79, 0x00}; // "any"
-
-
-U_CDECL_BEGIN
-static void U_CALLCONV RBBISetTable_deleter(void *p) {
- icu::RBBISetTableEl *px = (icu::RBBISetTableEl *)p;
- delete px->key;
- // Note: px->val is owned by the linked list "fSetsListHead" in scanner.
- // Don't delete the value nodes here.
- uprv_free(px);
-}
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-//------------------------------------------------------------------------------
-//
-// Constructor.
-//
-//------------------------------------------------------------------------------
-RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)
-{
- fRB = rb;
- fScanIndex = 0;
- fNextIndex = 0;
- fQuoteMode = FALSE;
- fLineNum = 1;
- fCharNum = 0;
- fLastChar = 0;
-
- fStateTable = NULL;
- fStack[0] = 0;
- fStackPtr = 0;
- fNodeStack[0] = NULL;
- fNodeStackPtr = 0;
-
- fReverseRule = FALSE;
- fLookAheadRule = FALSE;
- fNoChainInRule = FALSE;
-
- fSymbolTable = NULL;
- fSetTable = NULL;
- fRuleNum = 0;
- fOptionStart = 0;
-
- // Do not check status until after all critical fields are sufficiently initialized
- // that the destructor can run cleanly.
- if (U_FAILURE(*rb->fStatus)) {
- return;
- }
-
- //
- // Set up the constant Unicode Sets.
- // Note: These could be made static, lazily initialized, and shared among
- // all instances of RBBIRuleScanners. BUT this is quite a bit simpler,
- // and the time to build these few sets should be small compared to a
- // full break iterator build.
- fRuleSets[kRuleSet_rule_char-128]
- = UnicodeSet(UnicodeString(gRuleSet_rule_char_pattern), *rb->fStatus);
- // fRuleSets[kRuleSet_white_space-128] = [:Pattern_White_Space:]
- fRuleSets[kRuleSet_white_space-128].
- add(9, 0xd).add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
- fRuleSets[kRuleSet_name_char-128]
- = UnicodeSet(UnicodeString(gRuleSet_name_char_pattern), *rb->fStatus);
- fRuleSets[kRuleSet_name_start_char-128]
- = UnicodeSet(UnicodeString(gRuleSet_name_start_char_pattern), *rb->fStatus);
- fRuleSets[kRuleSet_digit_char-128]
- = UnicodeSet(UnicodeString(gRuleSet_digit_char_pattern), *rb->fStatus);
- if (*rb->fStatus == U_ILLEGAL_ARGUMENT_ERROR) {
- // This case happens if ICU's data is missing. UnicodeSet tries to look up property
- // names from the init string, can't find them, and claims an illegal argument.
- // Change the error so that the actual problem will be clearer to users.
- *rb->fStatus = U_BRK_INIT_ERROR;
- }
- if (U_FAILURE(*rb->fStatus)) {
- return;
- }
-
- fSymbolTable = new RBBISymbolTable(this, rb->fRules, *rb->fStatus);
- if (fSymbolTable == NULL) {
- *rb->fStatus = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- fSetTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, rb->fStatus);
- if (U_FAILURE(*rb->fStatus)) {
- return;
- }
- uhash_setValueDeleter(fSetTable, RBBISetTable_deleter);
-}
-
-
-
-//------------------------------------------------------------------------------
-//
-// Destructor
-//
-//------------------------------------------------------------------------------
-RBBIRuleScanner::~RBBIRuleScanner() {
- delete fSymbolTable;
- if (fSetTable != NULL) {
- uhash_close(fSetTable);
- fSetTable = NULL;
-
- }
-
-
- // Node Stack.
- // Normally has one entry, which is the entire parse tree for the rules.
- // If errors occured, there may be additional subtrees left on the stack.
- while (fNodeStackPtr > 0) {
- delete fNodeStack[fNodeStackPtr];
- fNodeStackPtr--;
- }
-
-}
-
-//------------------------------------------------------------------------------
-//
-// doParseAction Do some action during rule parsing.
-// Called by the parse state machine.
-// Actions build the parse tree and Unicode Sets,
-// and maintain the parse stack for nested expressions.
-//
-// TODO: unify EParseAction and RBBI_RuleParseAction enum types.
-// They represent exactly the same thing. They're separate
-// only to work around enum forward declaration restrictions
-// in some compilers, while at the same time avoiding multiple
-// definitions problems. I'm sure that there's a better way.
-//
-//------------------------------------------------------------------------------
-UBool RBBIRuleScanner::doParseActions(int32_t action)
-{
- RBBINode *n = NULL;
-
- UBool returnVal = TRUE;
-
- switch (action) {
-
- case doExprStart:
- pushNewNode(RBBINode::opStart);
- fRuleNum++;
- break;
-
-
- case doNoChain:
- // Scanned a '^' while on the rule start state.
- fNoChainInRule = TRUE;
- break;
-
-
- case doExprOrOperator:
- {
- fixOpStack(RBBINode::precOpCat);
- RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
- RBBINode *orNode = pushNewNode(RBBINode::opOr);
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
- orNode->fLeftChild = operandNode;
- operandNode->fParent = orNode;
- }
- break;
-
- case doExprCatOperator:
- // concatenation operator.
- // For the implicit concatenation of adjacent terms in an expression that are
- // not separated by any other operator. Action is invoked between the
- // actions for the two terms.
- {
- fixOpStack(RBBINode::precOpCat);
- RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
- RBBINode *catNode = pushNewNode(RBBINode::opCat);
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
- catNode->fLeftChild = operandNode;
- operandNode->fParent = catNode;
- }
- break;
-
- case doLParen:
- // Open Paren.
- // The openParen node is a dummy operation type with a low precedence,
- // which has the affect of ensuring that any real binary op that
- // follows within the parens binds more tightly to the operands than
- // stuff outside of the parens.
- pushNewNode(RBBINode::opLParen);
- break;
-
- case doExprRParen:
- fixOpStack(RBBINode::precLParen);
- break;
-
- case doNOP:
- break;
-
- case doStartAssign:
- // We've just scanned "$variable = "
- // The top of the node stack has the $variable ref node.
-
- // Save the start position of the RHS text in the StartExpression node
- // that precedes the $variableReference node on the stack.
- // This will eventually be used when saving the full $variable replacement
- // text as a string.
- n = fNodeStack[fNodeStackPtr-1];
- n->fFirstPos = fNextIndex; // move past the '='
-
- // Push a new start-of-expression node; needed to keep parse of the
- // RHS expression happy.
- pushNewNode(RBBINode::opStart);
- break;
-
-
-
-
- case doEndAssign:
- {
- // We have reached the end of an assignement statement.
- // Current scan char is the ';' that terminates the assignment.
-
- // Terminate expression, leaves expression parse tree rooted in TOS node.
- fixOpStack(RBBINode::precStart);
-
- RBBINode *startExprNode = fNodeStack[fNodeStackPtr-2];
- RBBINode *varRefNode = fNodeStack[fNodeStackPtr-1];
- RBBINode *RHSExprNode = fNodeStack[fNodeStackPtr];
-
- // Save original text of right side of assignment, excluding the terminating ';'
- // in the root of the node for the right-hand-side expression.
- RHSExprNode->fFirstPos = startExprNode->fFirstPos;
- RHSExprNode->fLastPos = fScanIndex;
- fRB->fRules.extractBetween(RHSExprNode->fFirstPos, RHSExprNode->fLastPos, RHSExprNode->fText);
-
- // Expression parse tree becomes l. child of the $variable reference node.
- varRefNode->fLeftChild = RHSExprNode;
- RHSExprNode->fParent = varRefNode;
-
- // Make a symbol table entry for the $variableRef node.
- fSymbolTable->addEntry(varRefNode->fText, varRefNode, *fRB->fStatus);
- if (U_FAILURE(*fRB->fStatus)) {
- // This is a round-about way to get the parse position set
- // so that duplicate symbols error messages include a line number.
- UErrorCode t = *fRB->fStatus;
- *fRB->fStatus = U_ZERO_ERROR;
- error(t);
- }
-
- // Clean up the stack.
- delete startExprNode;
- fNodeStackPtr-=3;
- break;
- }
-
- case doEndOfRule:
- {
- fixOpStack(RBBINode::precStart); // Terminate expression, leaves expression
- if (U_FAILURE(*fRB->fStatus)) { // parse tree rooted in TOS node.
- break;
- }
-#ifdef RBBI_DEBUG
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeStack("end of rule");}
-#endif
- U_ASSERT(fNodeStackPtr == 1);
- RBBINode *thisRule = fNodeStack[fNodeStackPtr];
-
- // If this rule includes a look-ahead '/', add a endMark node to the
- // expression tree.
- if (fLookAheadRule) {
- RBBINode *endNode = pushNewNode(RBBINode::endMark);
- RBBINode *catNode = pushNewNode(RBBINode::opCat);
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
- fNodeStackPtr -= 2;
- catNode->fLeftChild = thisRule;
- catNode->fRightChild = endNode;
- fNodeStack[fNodeStackPtr] = catNode;
- endNode->fVal = fRuleNum;
- endNode->fLookAheadEnd = TRUE;
- thisRule = catNode;
-
- // TODO: Disable chaining out of look-ahead (hard break) rules.
- // The break on rule match is forced, so there is no point in building up
- // the state table to chain into another rule for a longer match.
- }
-
- // Mark this node as being the root of a rule.
- thisRule->fRuleRoot = TRUE;
-
- // Flag if chaining into this rule is wanted.
- //
- if (fRB->fChainRules && // If rule chaining is enabled globally via !!chain
- !fNoChainInRule) { // and no '^' chain-in inhibit was on this rule
- thisRule->fChainIn = TRUE;
- }
-
-
- // All rule expressions are ORed together.
- // The ';' that terminates an expression really just functions as a '|' with
- // a low operator prededence.
- //
- // Each of the four sets of rules are collected separately.
- // (forward, reverse, safe_forward, safe_reverse)
- // OR this rule into the appropriate group of them.
- //
- RBBINode **destRules = (fReverseRule? &fRB->fSafeRevTree : fRB->fDefaultTree);
-
- if (*destRules != NULL) {
- // This is not the first rule encounted.
- // OR previous stuff (from *destRules)
- // with the current rule expression (on the Node Stack)
- // with the resulting OR expression going to *destRules
- //
- thisRule = fNodeStack[fNodeStackPtr];
- RBBINode *prevRules = *destRules;
- RBBINode *orNode = pushNewNode(RBBINode::opOr);
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
- orNode->fLeftChild = prevRules;
- prevRules->fParent = orNode;
- orNode->fRightChild = thisRule;
- thisRule->fParent = orNode;
- *destRules = orNode;
- }
- else
- {
- // This is the first rule encountered (for this direction).
- // Just move its parse tree from the stack to *destRules.
- *destRules = fNodeStack[fNodeStackPtr];
- }
- fReverseRule = FALSE; // in preparation for the next rule.
- fLookAheadRule = FALSE;
- fNoChainInRule = FALSE;
- fNodeStackPtr = 0;
- }
- break;
-
-
- case doRuleError:
- error(U_BRK_RULE_SYNTAX);
- returnVal = FALSE;
- break;
-
-
- case doVariableNameExpectedErr:
- error(U_BRK_RULE_SYNTAX);
- break;
-
-
- //
- // Unary operands + ? *
- // These all appear after the operand to which they apply.
- // When we hit one, the operand (may be a whole sub expression)
- // will be on the top of the stack.
- // Unary Operator becomes TOS, with the old TOS as its one child.
- case doUnaryOpPlus:
- {
- RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
- RBBINode *plusNode = pushNewNode(RBBINode::opPlus);
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
- plusNode->fLeftChild = operandNode;
- operandNode->fParent = plusNode;
- }
- break;
-
- case doUnaryOpQuestion:
- {
- RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
- RBBINode *qNode = pushNewNode(RBBINode::opQuestion);
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
- qNode->fLeftChild = operandNode;
- operandNode->fParent = qNode;
- }
- break;
-
- case doUnaryOpStar:
- {
- RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
- RBBINode *starNode = pushNewNode(RBBINode::opStar);
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
- starNode->fLeftChild = operandNode;
- operandNode->fParent = starNode;
- }
- break;
-
- case doRuleChar:
- // A "Rule Character" is any single character that is a literal part
- // of the regular expression. Like a, b and c in the expression "(abc*) | [:L:]"
- // These are pretty uncommon in break rules; the terms are more commonly
- // sets. To keep things uniform, treat these characters like as
- // sets that just happen to contain only one character.
- {
- n = pushNewNode(RBBINode::setRef);
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
- findSetFor(UnicodeString(fC.fChar), n);
- n->fFirstPos = fScanIndex;
- n->fLastPos = fNextIndex;
- fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
- break;
- }
-
- case doDotAny:
- // scanned a ".", meaning match any single character.
- {
- n = pushNewNode(RBBINode::setRef);
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
- findSetFor(UnicodeString(TRUE, kAny, 3), n);
- n->fFirstPos = fScanIndex;
- n->fLastPos = fNextIndex;
- fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
- break;
- }
-
- case doSlash:
- // Scanned a '/', which identifies a look-ahead break position in a rule.
- n = pushNewNode(RBBINode::lookAhead);
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
- n->fVal = fRuleNum;
- n->fFirstPos = fScanIndex;
- n->fLastPos = fNextIndex;
- fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
- fLookAheadRule = TRUE;
- break;
-
-
- case doStartTagValue:
- // Scanned a '{', the opening delimiter for a tag value within a rule.
- n = pushNewNode(RBBINode::tag);
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
- n->fVal = 0;
- n->fFirstPos = fScanIndex;
- n->fLastPos = fNextIndex;
- break;
-
- case doTagDigit:
- // Just scanned a decimal digit that's part of a tag value
- {
- n = fNodeStack[fNodeStackPtr];
- uint32_t v = u_charDigitValue(fC.fChar);
- U_ASSERT(v < 10);
- n->fVal = n->fVal*10 + v;
- break;
- }
-
- case doTagValue:
- n = fNodeStack[fNodeStackPtr];
- n->fLastPos = fNextIndex;
- fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
- break;
-
- case doTagExpectedError:
- error(U_BRK_MALFORMED_RULE_TAG);
- returnVal = FALSE;
- break;
-
- case doOptionStart:
- // Scanning a !!option. At the start of string.
- fOptionStart = fScanIndex;
- break;
-
- case doOptionEnd:
- {
- UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart);
- if (opt == UNICODE_STRING("chain", 5)) {
- fRB->fChainRules = TRUE;
- } else if (opt == UNICODE_STRING("LBCMNoChain", 11)) {
- fRB->fLBCMNoChain = TRUE;
- } else if (opt == UNICODE_STRING("forward", 7)) {
- fRB->fDefaultTree = &fRB->fForwardTree;
- } else if (opt == UNICODE_STRING("reverse", 7)) {
- fRB->fDefaultTree = &fRB->fReverseTree;
- } else if (opt == UNICODE_STRING("safe_forward", 12)) {
- fRB->fDefaultTree = &fRB->fSafeFwdTree;
- } else if (opt == UNICODE_STRING("safe_reverse", 12)) {
- fRB->fDefaultTree = &fRB->fSafeRevTree;
- } else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) {
- fRB->fLookAheadHardBreak = TRUE;
- } else if (opt == UNICODE_STRING("quoted_literals_only", 20)) {
- fRuleSets[kRuleSet_rule_char-128].clear();
- } else if (opt == UNICODE_STRING("unquoted_literals", 17)) {
- fRuleSets[kRuleSet_rule_char-128].applyPattern(UnicodeString(gRuleSet_rule_char_pattern), *fRB->fStatus);
- } else {
- error(U_BRK_UNRECOGNIZED_OPTION);
- }
- }
- break;
-
- case doReverseDir:
- fReverseRule = TRUE;
- break;
-
- case doStartVariableName:
- n = pushNewNode(RBBINode::varRef);
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
- n->fFirstPos = fScanIndex;
- break;
-
- case doEndVariableName:
- n = fNodeStack[fNodeStackPtr];
- if (n==NULL || n->fType != RBBINode::varRef) {
- error(U_BRK_INTERNAL_ERROR);
- break;
- }
- n->fLastPos = fScanIndex;
- fRB->fRules.extractBetween(n->fFirstPos+1, n->fLastPos, n->fText);
- // Look the newly scanned name up in the symbol table
- // If there's an entry, set the l. child of the var ref to the replacement expression.
- // (We also pass through here when scanning assignments, but no harm is done, other
- // than a slight wasted effort that seems hard to avoid. Lookup will be null)
- n->fLeftChild = fSymbolTable->lookupNode(n->fText);
- break;
-
- case doCheckVarDef:
- n = fNodeStack[fNodeStackPtr];
- if (n->fLeftChild == NULL) {
- error(U_BRK_UNDEFINED_VARIABLE);
- returnVal = FALSE;
- }
- break;
-
- case doExprFinished:
- break;
-
- case doRuleErrorAssignExpr:
- error(U_BRK_ASSIGN_ERROR);
- returnVal = FALSE;
- break;
-
- case doExit:
- returnVal = FALSE;
- break;
-
- case doScanUnicodeSet:
- scanSet();
- break;
-
- default:
- error(U_BRK_INTERNAL_ERROR);
- returnVal = FALSE;
- break;
- }
- return returnVal && U_SUCCESS(*fRB->fStatus);
-}
-
-
-
-
-//------------------------------------------------------------------------------
-//
-// Error Report a rule parse error.
-// Only report it if no previous error has been recorded.
-//
-//------------------------------------------------------------------------------
-void RBBIRuleScanner::error(UErrorCode e) {
- if (U_SUCCESS(*fRB->fStatus)) {
- *fRB->fStatus = e;
- if (fRB->fParseError) {
- fRB->fParseError->line = fLineNum;
- fRB->fParseError->offset = fCharNum;
- fRB->fParseError->preContext[0] = 0;
- fRB->fParseError->postContext[0] = 0;
- }
- }
-}
-
-
-
-
-//------------------------------------------------------------------------------
-//
-// fixOpStack The parse stack holds partially assembled chunks of the parse tree.
-// An entry on the stack may be as small as a single setRef node,
-// or as large as the parse tree
-// for an entire expression (this will be the one item left on the stack
-// when the parsing of an RBBI rule completes.
-//
-// This function is called when a binary operator is encountered.
-// It looks back up the stack for operators that are not yet associated
-// with a right operand, and if the precedence of the stacked operator >=
-// the precedence of the current operator, binds the operand left,
-// to the previously encountered operator.
-//
-//------------------------------------------------------------------------------
-void RBBIRuleScanner::fixOpStack(RBBINode::OpPrecedence p) {
- RBBINode *n;
- // printNodeStack("entering fixOpStack()");
- for (;;) {
- n = fNodeStack[fNodeStackPtr-1]; // an operator node
- if (n->fPrecedence == 0) {
- RBBIDebugPuts("RBBIRuleScanner::fixOpStack, bad operator node");
- error(U_BRK_INTERNAL_ERROR);
- return;
- }
-
- if (n->fPrecedence < p || n->fPrecedence <= RBBINode::precLParen) {
- // The most recent operand goes with the current operator,
- // not with the previously stacked one.
- break;
- }
- // Stack operator is a binary op ( '|' or concatenation)
- // TOS operand becomes right child of this operator.
- // Resulting subexpression becomes the TOS operand.
- n->fRightChild = fNodeStack[fNodeStackPtr];
- fNodeStack[fNodeStackPtr]->fParent = n;
- fNodeStackPtr--;
- // printNodeStack("looping in fixOpStack() ");
- }
-
- if (p <= RBBINode::precLParen) {
- // Scan is at a right paren or end of expression.
- // The scanned item must match the stack, or else there was an error.
- // Discard the left paren (or start expr) node from the stack,
- // leaving the completed (sub)expression as TOS.
- if (n->fPrecedence != p) {
- // Right paren encountered matched start of expression node, or
- // end of expression matched with a left paren node.
- error(U_BRK_MISMATCHED_PAREN);
- }
- fNodeStack[fNodeStackPtr-1] = fNodeStack[fNodeStackPtr];
- fNodeStackPtr--;
- // Delete the now-discarded LParen or Start node.
- delete n;
- }
- // printNodeStack("leaving fixOpStack()");
-}
-
-
-
-
-//------------------------------------------------------------------------------
-//
-// findSetFor given a UnicodeString,
-// - find the corresponding Unicode Set (uset node)
-// (create one if necessary)
-// - Set fLeftChild of the caller's node (should be a setRef node)
-// to the uset node
-// Maintain a hash table of uset nodes, so the same one is always used
-// for the same string.
-// If a "to adopt" set is provided and we haven't seen this key before,
-// add the provided set to the hash table.
-// If the string is one (32 bit) char in length, the set contains
-// just one element which is the char in question.
-// If the string is "any", return a set containing all chars.
-//
-//------------------------------------------------------------------------------
-void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt) {
-
- RBBISetTableEl *el;
-
- // First check whether we've already cached a set for this string.
- // If so, just use the cached set in the new node.
- // delete any set provided by the caller, since we own it.
- el = (RBBISetTableEl *)uhash_get(fSetTable, &s);
- if (el != NULL) {
- delete setToAdopt;
- node->fLeftChild = el->val;
- U_ASSERT(node->fLeftChild->fType == RBBINode::uset);
- return;
- }
-
- // Haven't seen this set before.
- // If the caller didn't provide us with a prebuilt set,
- // create a new UnicodeSet now.
- if (setToAdopt == NULL) {
- if (s.compare(kAny, -1) == 0) {
- setToAdopt = new UnicodeSet(0x000000, 0x10ffff);
- } else {
- UChar32 c;
- c = s.char32At(0);
- setToAdopt = new UnicodeSet(c, c);
- }
- }
-
- //
- // Make a new uset node to refer to this UnicodeSet
- // This new uset node becomes the child of the caller's setReference node.
- //
- RBBINode *usetNode = new RBBINode(RBBINode::uset);
- if (usetNode == NULL) {
- error(U_MEMORY_ALLOCATION_ERROR);
- return;
- }
- usetNode->fInputSet = setToAdopt;
- usetNode->fParent = node;
- node->fLeftChild = usetNode;
- usetNode->fText = s;
-
-
- //
- // Add the new uset node to the list of all uset nodes.
- //
- fRB->fUSetNodes->addElement(usetNode, *fRB->fStatus);
-
-
- //
- // Add the new set to the set hash table.
- //
- el = (RBBISetTableEl *)uprv_malloc(sizeof(RBBISetTableEl));
- UnicodeString *tkey = new UnicodeString(s);
- if (tkey == NULL || el == NULL || setToAdopt == NULL) {
- // Delete to avoid memory leak
- delete tkey;
- tkey = NULL;
- uprv_free(el);
- el = NULL;
- delete setToAdopt;
- setToAdopt = NULL;
-
- error(U_MEMORY_ALLOCATION_ERROR);
- return;
- }
- el->key = tkey;
- el->val = usetNode;
- uhash_put(fSetTable, el->key, el, fRB->fStatus);
-
- return;
-}
-
-
-
-//
-// Assorted Unicode character constants.
-// Numeric because there is no portable way to enter them as literals.
-// (Think EBCDIC).
-//
-static const UChar chCR = 0x0d; // New lines, for terminating comments.
-static const UChar chLF = 0x0a;
-static const UChar chNEL = 0x85; // NEL newline variant
-static const UChar chLS = 0x2028; // Unicode Line Separator
-static const UChar chApos = 0x27; // single quote, for quoted chars.
-static const UChar chPound = 0x23; // '#', introduces a comment.
-static const UChar chBackSlash = 0x5c; // '\' introduces a char escape
-static const UChar chLParen = 0x28;
-static const UChar chRParen = 0x29;
-
-
-//------------------------------------------------------------------------------
-//
-// stripRules Return a rules string without extra spaces.
-// (Comments are removed separately, during rule parsing.)
-//
-//------------------------------------------------------------------------------
-UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
- UnicodeString strippedRules;
- int32_t rulesLength = rules.length();
- bool skippingSpaces = false;
-
- for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) {
- UChar32 cp = rules.char32At(idx);
- bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE);
- if (skippingSpaces && whiteSpace) {
- continue;
- }
- strippedRules.append(cp);
- skippingSpaces = whiteSpace;
- }
- return strippedRules;
-}
-
-
-//------------------------------------------------------------------------------
-//
-// nextCharLL Low Level Next Char from rule input source.
-// Get a char from the input character iterator,
-// keep track of input position for error reporting.
-//
-//------------------------------------------------------------------------------
-UChar32 RBBIRuleScanner::nextCharLL() {
- UChar32 ch;
-
- if (fNextIndex >= fRB->fRules.length()) {
- return (UChar32)-1;
- }
- ch = fRB->fRules.char32At(fNextIndex);
- fNextIndex = fRB->fRules.moveIndex32(fNextIndex, 1);
-
- if (ch == chCR ||
- ch == chNEL ||
- ch == chLS ||
- (ch == chLF && fLastChar != chCR)) {
- // Character is starting a new line. Bump up the line number, and
- // reset the column to 0.
- fLineNum++;
- fCharNum=0;
- if (fQuoteMode) {
- error(U_BRK_NEW_LINE_IN_QUOTED_STRING);
- fQuoteMode = FALSE;
- }
- }
- else {
- // Character is not starting a new line. Except in the case of a
- // LF following a CR, increment the column position.
- if (ch != chLF) {
- fCharNum++;
- }
- }
- fLastChar = ch;
- return ch;
-}
-
-
-//------------------------------------------------------------------------------
-//
-// nextChar for rules scanning. At this level, we handle stripping
-// out comments and processing backslash character escapes.
-// The rest of the rules grammar is handled at the next level up.
-//
-//------------------------------------------------------------------------------
-void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
-
- // Unicode Character constants needed for the processing done by nextChar(),
- // in hex because literals wont work on EBCDIC machines.
-
- fScanIndex = fNextIndex;
- c.fChar = nextCharLL();
- c.fEscaped = FALSE;
-
- //
- // check for '' sequence.
- // These are recognized in all contexts, whether in quoted text or not.
- //
- if (c.fChar == chApos) {
- if (fRB->fRules.char32At(fNextIndex) == chApos) {
- c.fChar = nextCharLL(); // get nextChar officially so character counts
- c.fEscaped = TRUE; // stay correct.
- }
- else
- {
- // Single quote, by itself.
- // Toggle quoting mode.
- // Return either '(' or ')', because quotes cause a grouping of the quoted text.
- fQuoteMode = !fQuoteMode;
- if (fQuoteMode == TRUE) {
- c.fChar = chLParen;
- } else {
- c.fChar = chRParen;
- }
- c.fEscaped = FALSE; // The paren that we return is not escaped.
- return;
- }
- }
-
- if (fQuoteMode) {
- c.fEscaped = TRUE;
- }
- else
- {
- // We are not in a 'quoted region' of the source.
- //
- if (c.fChar == chPound) {
- // Start of a comment. Consume the rest of it.
- // The new-line char that terminates the comment is always returned.
- // It will be treated as white-space, and serves to break up anything
- // that might otherwise incorrectly clump together with a comment in
- // the middle (a variable name, for example.)
- int32_t commentStart = fScanIndex;
- for (;;) {
- c.fChar = nextCharLL();
- if (c.fChar == (UChar32)-1 || // EOF
- c.fChar == chCR ||
- c.fChar == chLF ||
- c.fChar == chNEL ||
- c.fChar == chLS) {break;}
- }
- for (int32_t i=commentStart; i<fNextIndex-1; ++i) {
- fRB->fStrippedRules.setCharAt(i, u' ');
- }
- }
- if (c.fChar == (UChar32)-1) {
- return;
- }
-
- //
- // check for backslash escaped characters.
- // Use UnicodeString::unescapeAt() to handle them.
- //
- if (c.fChar == chBackSlash) {
- c.fEscaped = TRUE;
- int32_t startX = fNextIndex;
- c.fChar = fRB->fRules.unescapeAt(fNextIndex);
- if (fNextIndex == startX) {
- error(U_BRK_HEX_DIGITS_EXPECTED);
- }
- fCharNum += fNextIndex-startX;
- }
- }
- // putc(c.fChar, stdout);
-}
-
-//------------------------------------------------------------------------------
-//
-// Parse RBBI rules. The state machine for rules parsing is here.
-// The state tables are hand-written in the file rbbirpt.txt,
-// and converted to the form used here by a perl
-// script rbbicst.pl
-//
-//------------------------------------------------------------------------------
-void RBBIRuleScanner::parse() {
- uint16_t state;
- const RBBIRuleTableEl *tableEl;
-
- if (U_FAILURE(*fRB->fStatus)) {
- return;
- }
-
- state = 1;
- nextChar(fC);
- //
- // Main loop for the rule parsing state machine.
- // Runs once per state transition.
- // Each time through optionally performs, depending on the state table,
- // - an advance to the the next input char
- // - an action to be performed.
- // - pushing or popping a state to/from the local state return stack.
- //
- for (;;) {
- // Bail out if anything has gone wrong.
- // RBBI rule file parsing stops on the first error encountered.
- if (U_FAILURE(*fRB->fStatus)) {
- break;
- }
-
- // Quit if state == 0. This is the normal way to exit the state machine.
- //
- if (state == 0) {
- break;
- }
-
- // Find the state table element that matches the input char from the rule, or the
- // class of the input character. Start with the first table row for this
- // state, then linearly scan forward until we find a row that matches the
- // character. The last row for each state always matches all characters, so
- // the search will stop there, if not before.
- //
- tableEl = &gRuleParseStateTable[state];
- #ifdef RBBI_DEBUG
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) {
- RBBIDebugPrintf("char, line, col = (\'%c\', %d, %d) state=%s ",
- fC.fChar, fLineNum, fCharNum, RBBIRuleStateNames[state]);
- }
- #endif
-
- for (;;) {
- #ifdef RBBI_DEBUG
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf("."); fflush(stdout);}
- #endif
- if (tableEl->fCharClass < 127 && fC.fEscaped == FALSE && tableEl->fCharClass == fC.fChar) {
- // Table row specified an individual character, not a set, and
- // the input character is not escaped, and
- // the input character matched it.
- break;
- }
- if (tableEl->fCharClass == 255) {
- // Table row specified default, match anything character class.
- break;
- }
- if (tableEl->fCharClass == 254 && fC.fEscaped) {
- // Table row specified "escaped" and the char was escaped.
- break;
- }
- if (tableEl->fCharClass == 253 && fC.fEscaped &&
- (fC.fChar == 0x50 || fC.fChar == 0x70 )) {
- // Table row specified "escaped P" and the char is either 'p' or 'P'.
- break;
- }
- if (tableEl->fCharClass == 252 && fC.fChar == (UChar32)-1) {
- // Table row specified eof and we hit eof on the input.
- break;
- }
-
- if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && // Table specs a char class &&
- fC.fEscaped == FALSE && // char is not escaped &&
- fC.fChar != (UChar32)-1) { // char is not EOF
- U_ASSERT((tableEl->fCharClass-128) < UPRV_LENGTHOF(fRuleSets));
- if (fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) {
- // Table row specified a character class, or set of characters,
- // and the current char matches it.
- break;
- }
- }
-
- // No match on this row, advance to the next row for this state,
- tableEl++;
- }
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPuts("");}
-
- //
- // We've found the row of the state table that matches the current input
- // character from the rules string.
- // Perform any action specified by this row in the state table.
- if (doParseActions((int32_t)tableEl->fAction) == FALSE) {
- // Break out of the state machine loop if the
- // the action signalled some kind of error, or
- // the action was to exit, occurs on normal end-of-rules-input.
- break;
- }
-
- if (tableEl->fPushState != 0) {
- fStackPtr++;
- if (fStackPtr >= kStackSize) {
- error(U_BRK_INTERNAL_ERROR);
- RBBIDebugPuts("RBBIRuleScanner::parse() - state stack overflow.");
- fStackPtr--;
- }
- fStack[fStackPtr] = tableEl->fPushState;
- }
-
- if (tableEl->fNextChar) {
- nextChar(fC);
- }
-
- // Get the next state from the table entry, or from the
- // state stack if the next state was specified as "pop".
- if (tableEl->fNextState != 255) {
- state = tableEl->fNextState;
- } else {
- state = fStack[fStackPtr];
- fStackPtr--;
- if (fStackPtr < 0) {
- error(U_BRK_INTERNAL_ERROR);
- RBBIDebugPuts("RBBIRuleScanner::parse() - state stack underflow.");
- fStackPtr++;
- }
- }
-
- }
-
- if (U_FAILURE(*fRB->fStatus)) {
- return;
- }
-
- // If there are no forward rules set an error.
- //
- if (fRB->fForwardTree == NULL) {
- error(U_BRK_RULE_SYNTAX);
- return;
- }
-
- //
- // Parsing of the input RBBI rules is complete.
- // We now have a parse tree for the rule expressions
- // and a list of all UnicodeSets that are referenced.
- //
-#ifdef RBBI_DEBUG
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "symbols")) {fSymbolTable->rbbiSymtablePrint();}
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ptree")) {
- RBBIDebugPrintf("Completed Forward Rules Parse Tree...\n");
- RBBINode::printTree(fRB->fForwardTree, TRUE);
- RBBIDebugPrintf("\nCompleted Reverse Rules Parse Tree...\n");
- RBBINode::printTree(fRB->fReverseTree, TRUE);
- RBBIDebugPrintf("\nCompleted Safe Point Forward Rules Parse Tree...\n");
- RBBINode::printTree(fRB->fSafeFwdTree, TRUE);
- RBBIDebugPrintf("\nCompleted Safe Point Reverse Rules Parse Tree...\n");
- RBBINode::printTree(fRB->fSafeRevTree, TRUE);
- }
-#endif
-}
-
-
-//------------------------------------------------------------------------------
-//
-// printNodeStack for debugging...
-//
-//------------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-void RBBIRuleScanner::printNodeStack(const char *title) {
- int i;
- RBBIDebugPrintf("%s. Dumping node stack...\n", title);
- for (i=fNodeStackPtr; i>0; i--) {RBBINode::printTree(fNodeStack[i], TRUE);}
-}
-#endif
-
-
-
-
-//------------------------------------------------------------------------------
-//
-// pushNewNode create a new RBBINode of the specified type and push it
-// onto the stack of nodes.
-//
-//------------------------------------------------------------------------------
-RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) {
- if (U_FAILURE(*fRB->fStatus)) {
- return NULL;
- }
- if (fNodeStackPtr >= kStackSize - 1) {
- error(U_BRK_RULE_SYNTAX);
- RBBIDebugPuts("RBBIRuleScanner::pushNewNode - stack overflow.");
- return NULL;
- }
- fNodeStackPtr++;
- fNodeStack[fNodeStackPtr] = new RBBINode(t);
- if (fNodeStack[fNodeStackPtr] == NULL) {
- *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR;
- }
- return fNodeStack[fNodeStackPtr];
-}
-
-
-
-//------------------------------------------------------------------------------
-//
-// scanSet Construct a UnicodeSet from the text at the current scan
-// position. Advance the scan position to the first character
-// after the set.
-//
-// A new RBBI setref node referring to the set is pushed onto the node
-// stack.
-//
-// The scan position is normally under the control of the state machine
-// that controls rule parsing. UnicodeSets, however, are parsed by
-// the UnicodeSet constructor, not by the RBBI rule parser.
-//
-//------------------------------------------------------------------------------
-void RBBIRuleScanner::scanSet() {
- UnicodeSet *uset;
- ParsePosition pos;
- int startPos;
- int i;
-
- if (U_FAILURE(*fRB->fStatus)) {
- return;
- }
-
- pos.setIndex(fScanIndex);
- startPos = fScanIndex;
- UErrorCode localStatus = U_ZERO_ERROR;
- uset = new UnicodeSet();
- if (uset == NULL) {
- localStatus = U_MEMORY_ALLOCATION_ERROR;
- } else {
- uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
- }
- if (U_FAILURE(localStatus)) {
- // TODO: Get more accurate position of the error from UnicodeSet's return info.
- // UnicodeSet appears to not be reporting correctly at this time.
- #ifdef RBBI_DEBUG
- RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex());
- #endif
- error(localStatus);
- delete uset;
- return;
- }
-
- // Verify that the set contains at least one code point.
- //
- U_ASSERT(uset!=NULL);
- if (uset->isEmpty()) {
- // This set is empty.
- // Make it an error, because it almost certainly is not what the user wanted.
- // Also, avoids having to think about corner cases in the tree manipulation code
- // that occurs later on.
- error(U_BRK_RULE_EMPTY_SET);
- delete uset;
- return;
- }
-
-
- // Advance the RBBI parse postion over the UnicodeSet pattern.
- // Don't just set fScanIndex because the line/char positions maintained
- // for error reporting would be thrown off.
- i = pos.getIndex();
- for (;;) {
- if (fNextIndex >= i) {
- break;
- }
- nextCharLL();
- }
-
- if (U_SUCCESS(*fRB->fStatus)) {
- RBBINode *n;
-
- n = pushNewNode(RBBINode::setRef);
- if (U_FAILURE(*fRB->fStatus)) {
- return;
- }
- n->fFirstPos = startPos;
- n->fLastPos = fNextIndex;
- fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
- // findSetFor() serves several purposes here:
- // - Adopts storage for the UnicodeSet, will be responsible for deleting.
- // - Mantains collection of all sets in use, needed later for establishing
- // character categories for run time engine.
- // - Eliminates mulitiple instances of the same set.
- // - Creates a new uset node if necessary (if this isn't a duplicate.)
- findSetFor(n->fText, n, uset);
- }
-
-}
-
-int32_t RBBIRuleScanner::numRules() {
- return fRuleNum;
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/contrib/libs/icu/common/rbbiscan.h b/contrib/libs/icu/common/rbbiscan.h
deleted file mode 100644
index 6828ba39345..00000000000
--- a/contrib/libs/icu/common/rbbiscan.h
+++ /dev/null
@@ -1,167 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// rbbiscan.h
-//
-// Copyright (C) 2002-2016, International Business Machines Corporation and others.
-// All Rights Reserved.
-//
-// This file contains declarations for class RBBIRuleScanner
-//
-
-
-#ifndef RBBISCAN_H
-#define RBBISCAN_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "unicode/rbbi.h"
-#include "unicode/uniset.h"
-#include "unicode/parseerr.h"
-#include "uhash.h"
-#include "uvector.h"
-#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
- // looks up references to $variables within a set.
-#include "rbbinode.h"
-#include "rbbirpt.h"
-
-U_NAMESPACE_BEGIN
-
-class RBBIRuleBuilder;
-class RBBISymbolTable;
-
-
-//--------------------------------------------------------------------------------
-//
-// class RBBIRuleScanner does the lowest level, character-at-a-time
-// scanning of break iterator rules.
-//
-// The output of the scanner is parse trees for
-// the rule expressions and a list of all Unicode Sets
-// encountered.
-//
-//--------------------------------------------------------------------------------
-
-class RBBIRuleScanner : public UMemory {
-public:
-
- enum {
- kStackSize = 100 // The size of the state stack for
- }; // rules parsing. Corresponds roughly
- // to the depth of parentheses nesting
- // that is allowed in the rules.
-
- struct RBBIRuleChar {
- UChar32 fChar;
- UBool fEscaped;
- RBBIRuleChar() : fChar(0), fEscaped(FALSE) {}
- };
-
- RBBIRuleScanner(RBBIRuleBuilder *rb);
-
-
- virtual ~RBBIRuleScanner();
-
- void nextChar(RBBIRuleChar &c); // Get the next char from the input stream.
- // Return false if at end.
-
- UBool push(const RBBIRuleChar &c); // Push (unget) one character.
- // Only a single character may be pushed.
-
- void parse(); // Parse the rules, generating two parse
- // trees, one each for the forward and
- // reverse rules,
- // and a list of UnicodeSets encountered.
-
- int32_t numRules(); // Return the number of rules that have been seen.
-
- /**
- * Return a rules string without unnecessary
- * characters.
- */
- static UnicodeString stripRules(const UnicodeString &rules);
-private:
-
- UBool doParseActions(int32_t a);
- void error(UErrorCode e); // error reporting convenience function.
- void fixOpStack(RBBINode::OpPrecedence p);
- // a character.
- void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
-
- UChar32 nextCharLL();
-#ifdef RBBI_DEBUG
- void printNodeStack(const char *title);
-#endif
- RBBINode *pushNewNode(RBBINode::NodeType t);
- void scanSet();
-
-
- RBBIRuleBuilder *fRB; // The rule builder that we are part of.
-
- int32_t fScanIndex; // Index of current character being processed
- // in the rule input string.
- int32_t fNextIndex; // Index of the next character, which
- // is the first character not yet scanned.
- UBool fQuoteMode; // Scan is in a 'quoted region'
- int32_t fLineNum; // Line number in input file.
- int32_t fCharNum; // Char position within the line.
- UChar32 fLastChar; // Previous char, needed to count CR-LF
- // as a single line, not two.
-
- RBBIRuleChar fC; // Current char for parse state machine
- // processing.
- UnicodeString fVarName; // $variableName, valid when we've just
- // scanned one.
-
- RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule
- // parsing. index by p[state][char-class]
-
- uint16_t fStack[kStackSize]; // State stack, holds state pushes
- int32_t fStackPtr; // and pops as specified in the state
- // transition rules.
-
- RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created
- // during the parse of a rule
- int32_t fNodeStackPtr;
-
-
- UBool fReverseRule; // True if the rule currently being scanned
- // is a reverse direction rule (if it
- // starts with a '!')
-
- UBool fLookAheadRule; // True if the rule includes a '/'
- // somewhere within it.
-
- UBool fNoChainInRule; // True if the current rule starts with a '^'.
-
- RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of
- // $variable symbols.
-
- UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to
- // the sets created while parsing rules.
- // The key is the string used for creating
- // the set.
-
- UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during
- // the scanning of RBBI rules. The
- // indicies for these are assigned by the
- // perl script that builds the state tables.
- // See rbbirpt.h.
-
- int32_t fRuleNum; // Counts each rule as it is scanned.
-
- int32_t fOptionStart; // Input index of start of a !!option
- // keyword, while being scanned.
-
- UnicodeSet *gRuleSet_rule_char;
- UnicodeSet *gRuleSet_white_space;
- UnicodeSet *gRuleSet_name_char;
- UnicodeSet *gRuleSet_name_start_char;
-
- RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
- RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
-};
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/rbbisetb.cpp b/contrib/libs/icu/common/rbbisetb.cpp
deleted file mode 100644
index 36e2e07e9c6..00000000000
--- a/contrib/libs/icu/common/rbbisetb.cpp
+++ /dev/null
@@ -1,684 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// rbbisetb.cpp
-//
-/*
-***************************************************************************
-* Copyright (C) 2002-2008 International Business Machines Corporation *
-* and others. All rights reserved. *
-***************************************************************************
-*/
-//
-// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules
-// (part of the rule building process.)
-//
-// Starting with the rules parse tree from the scanner,
-//
-// - Enumerate the set of UnicodeSets that are referenced
-// by the RBBI rules.
-// - compute a set of non-overlapping character ranges
-// with all characters within a range belonging to the same
-// set of input uniocde sets.
-// - Derive a set of non-overlapping UnicodeSet (like things)
-// that will correspond to columns in the state table for
-// the RBBI execution engine. All characters within one
-// of these sets belong to the same set of the original
-// UnicodeSets from the user's rules.
-// - construct the trie table that maps input characters
-// to the index of the matching non-overlapping set of set from
-// the previous step.
-//
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/uniset.h"
-#include "utrie2.h"
-#include "uvector.h"
-#include "uassert.h"
-#include "cmemory.h"
-#include "cstring.h"
-
-#include "rbbisetb.h"
-#include "rbbinode.h"
-
-U_NAMESPACE_BEGIN
-
-//------------------------------------------------------------------------
-//
-// Constructor
-//
-//------------------------------------------------------------------------
-RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb)
-{
- fRB = rb;
- fStatus = rb->fStatus;
- fRangeList = 0;
- fTrie = 0;
- fTrieSize = 0;
- fGroupCount = 0;
- fSawBOF = FALSE;
-}
-
-
-//------------------------------------------------------------------------
-//
-// Destructor
-//
-//------------------------------------------------------------------------
-RBBISetBuilder::~RBBISetBuilder()
-{
- RangeDescriptor *nextRangeDesc;
-
- // Walk through & delete the linked list of RangeDescriptors
- for (nextRangeDesc = fRangeList; nextRangeDesc!=NULL;) {
- RangeDescriptor *r = nextRangeDesc;
- nextRangeDesc = r->fNext;
- delete r;
- }
-
- utrie2_close(fTrie);
-}
-
-
-
-
-//------------------------------------------------------------------------
-//
-// build Build the list of non-overlapping character ranges
-// from the Unicode Sets.
-//
-//------------------------------------------------------------------------
-void RBBISetBuilder::buildRanges() {
- RBBINode *usetNode;
- RangeDescriptor *rlRange;
-
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();}
-
- //
- // Initialize the process by creating a single range encompassing all characters
- // that is in no sets.
- //
- fRangeList = new RangeDescriptor(*fStatus); // will check for status here
- if (fRangeList == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- fRangeList->fStartChar = 0;
- fRangeList->fEndChar = 0x10ffff;
-
- if (U_FAILURE(*fStatus)) {
- return;
- }
-
- //
- // Find the set of non-overlapping ranges of characters
- //
- int ni;
- for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules
- usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
- if (usetNode==NULL) {
- break;
- }
-
- UnicodeSet *inputSet = usetNode->fInputSet;
- int32_t inputSetRangeCount = inputSet->getRangeCount();
- int inputSetRangeIndex = 0;
- rlRange = fRangeList;
-
- for (;;) {
- if (inputSetRangeIndex >= inputSetRangeCount) {
- break;
- }
- UChar32 inputSetRangeBegin = inputSet->getRangeStart(inputSetRangeIndex);
- UChar32 inputSetRangeEnd = inputSet->getRangeEnd(inputSetRangeIndex);
-
- // skip over ranges from the range list that are completely
- // below the current range from the input unicode set.
- while (rlRange->fEndChar < inputSetRangeBegin) {
- rlRange = rlRange->fNext;
- }
-
- // If the start of the range from the range list is before with
- // the start of the range from the unicode set, split the range list range
- // in two, with one part being before (wholly outside of) the unicode set
- // and the other containing the rest.
- // Then continue the loop; the post-split current range will then be skipped
- // over
- if (rlRange->fStartChar < inputSetRangeBegin) {
- rlRange->split(inputSetRangeBegin, *fStatus);
- if (U_FAILURE(*fStatus)) {
- return;
- }
- continue;
- }
-
- // Same thing at the end of the ranges...
- // If the end of the range from the range list doesn't coincide with
- // the end of the range from the unicode set, split the range list
- // range in two. The first part of the split range will be
- // wholly inside the Unicode set.
- if (rlRange->fEndChar > inputSetRangeEnd) {
- rlRange->split(inputSetRangeEnd+1, *fStatus);
- if (U_FAILURE(*fStatus)) {
- return;
- }
- }
-
- // The current rlRange is now entirely within the UnicodeSet range.
- // Add this unicode set to the list of sets for this rlRange
- if (rlRange->fIncludesSets->indexOf(usetNode) == -1) {
- rlRange->fIncludesSets->addElement(usetNode, *fStatus);
- if (U_FAILURE(*fStatus)) {
- return;
- }
- }
-
- // Advance over ranges that we are finished with.
- if (inputSetRangeEnd == rlRange->fEndChar) {
- inputSetRangeIndex++;
- }
- rlRange = rlRange->fNext;
- }
- }
-
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();}
-
- //
- // Group the above ranges, with each group consisting of one or more
- // ranges that are in exactly the same set of original UnicodeSets.
- // The groups are numbered, and these group numbers are the set of
- // input symbols recognized by the run-time state machine.
- //
- // Numbering: # 0 (state table column 0) is unused.
- // # 1 is reserved - table column 1 is for end-of-input
- // # 2 is reserved - table column 2 is for beginning-in-input
- // # 3 is the first range list.
- //
- RangeDescriptor *rlSearchRange;
- for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
- for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) {
- if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) {
- rlRange->fNum = rlSearchRange->fNum;
- break;
- }
- }
- if (rlRange->fNum == 0) {
- fGroupCount ++;
- rlRange->fNum = fGroupCount+2;
- rlRange->setDictionaryFlag();
- addValToSets(rlRange->fIncludesSets, fGroupCount+2);
- }
- }
-
- // Handle input sets that contain the special string {eof}.
- // Column 1 of the state table is reserved for EOF on input.
- // Column 2 is reserved for before-the-start-input.
- // (This column can be optimized away later if there are no rule
- // references to {bof}.)
- // Add this column value (1 or 2) to the equivalent expression
- // subtree for each UnicodeSet that contains the string {eof}
- // Because {bof} and {eof} are not a characters in the normal sense,
- // they doesn't affect the computation of ranges or TRIE.
- static const UChar eofUString[] = {0x65, 0x6f, 0x66, 0};
- static const UChar bofUString[] = {0x62, 0x6f, 0x66, 0};
-
- UnicodeString eofString(eofUString);
- UnicodeString bofString(bofUString);
- for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules
- usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
- if (usetNode==NULL) {
- break;
- }
- UnicodeSet *inputSet = usetNode->fInputSet;
- if (inputSet->contains(eofString)) {
- addValToSet(usetNode, 1);
- }
- if (inputSet->contains(bofString)) {
- addValToSet(usetNode, 2);
- fSawBOF = TRUE;
- }
- }
-
-
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
-}
-
-
-//
-// Build the Trie table for mapping UChar32 values to the corresponding
-// range group number.
-//
-void RBBISetBuilder::buildTrie() {
- RangeDescriptor *rlRange;
-
- fTrie = utrie2_open(0, // Initial value for all code points.
- 0, // Error value for out-of-range input.
- fStatus);
-
- for (rlRange = fRangeList; rlRange!=0 && U_SUCCESS(*fStatus); rlRange=rlRange->fNext) {
- utrie2_setRange32(fTrie,
- rlRange->fStartChar, // Range start
- rlRange->fEndChar, // Range end (inclusive)
- rlRange->fNum, // value for range
- TRUE, // Overwrite previously written values
- fStatus);
- }
-}
-
-
-void RBBISetBuilder::mergeCategories(IntPair categories) {
- U_ASSERT(categories.first >= 1);
- U_ASSERT(categories.second > categories.first);
- for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
- int32_t rangeNum = rd->fNum & ~DICT_BIT;
- int32_t rangeDict = rd->fNum & DICT_BIT;
- if (rangeNum == categories.second) {
- rd->fNum = categories.first | rangeDict;
- } else if (rangeNum > categories.second) {
- rd->fNum--;
- }
- }
- --fGroupCount;
-}
-
-
-//-----------------------------------------------------------------------------------
-//
-// getTrieSize() Return the size that will be required to serialize the Trie.
-//
-//-----------------------------------------------------------------------------------
-int32_t RBBISetBuilder::getTrieSize() {
- if (U_FAILURE(*fStatus)) {
- return 0;
- }
- utrie2_freeze(fTrie, UTRIE2_16_VALUE_BITS, fStatus);
- fTrieSize = utrie2_serialize(fTrie,
- NULL, // Buffer
- 0, // Capacity
- fStatus);
- if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
- *fStatus = U_ZERO_ERROR;
- }
- // RBBIDebugPrintf("Trie table size is %d\n", trieSize);
- return fTrieSize;
-}
-
-
-//-----------------------------------------------------------------------------------
-//
-// serializeTrie() Put the serialized trie at the specified address.
-// Trust the caller to have given us enough memory.
-// getTrieSize() MUST be called first.
-//
-//-----------------------------------------------------------------------------------
-void RBBISetBuilder::serializeTrie(uint8_t *where) {
- utrie2_serialize(fTrie,
- where, // Buffer
- fTrieSize, // Capacity
- fStatus);
-}
-
-//------------------------------------------------------------------------
-//
-// addValToSets Add a runtime-mapped input value to each uset from a
-// list of uset nodes. (val corresponds to a state table column.)
-// For each of the original Unicode sets - which correspond
-// directly to uset nodes - a logically equivalent expression
-// is constructed in terms of the remapped runtime input
-// symbol set. This function adds one runtime input symbol to
-// a list of sets.
-//
-// The "logically equivalent expression" is the tree for an
-// or-ing together of all of the symbols that go into the set.
-//
-//------------------------------------------------------------------------
-void RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) {
- int32_t ix;
-
- for (ix=0; ix<sets->size(); ix++) {
- RBBINode *usetNode = (RBBINode *)sets->elementAt(ix);
- addValToSet(usetNode, val);
- }
-}
-
-void RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
- RBBINode *leafNode = new RBBINode(RBBINode::leafChar);
- if (leafNode == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- leafNode->fVal = (unsigned short)val;
- if (usetNode->fLeftChild == NULL) {
- usetNode->fLeftChild = leafNode;
- leafNode->fParent = usetNode;
- } else {
- // There are already input symbols present for this set.
- // Set up an OR node, with the previous stuff as the left child
- // and the new value as the right child.
- RBBINode *orNode = new RBBINode(RBBINode::opOr);
- if (orNode == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- orNode->fLeftChild = usetNode->fLeftChild;
- orNode->fRightChild = leafNode;
- orNode->fLeftChild->fParent = orNode;
- orNode->fRightChild->fParent = orNode;
- usetNode->fLeftChild = orNode;
- orNode->fParent = usetNode;
- }
-}
-
-
-//------------------------------------------------------------------------
-//
-// getNumCharCategories
-//
-//------------------------------------------------------------------------
-int32_t RBBISetBuilder::getNumCharCategories() const {
- return fGroupCount + 3;
-}
-
-
-//------------------------------------------------------------------------
-//
-// sawBOF
-//
-//------------------------------------------------------------------------
-UBool RBBISetBuilder::sawBOF() const {
- return fSawBOF;
-}
-
-
-//------------------------------------------------------------------------
-//
-// getFirstChar Given a runtime RBBI character category, find
-// the first UChar32 that is in the set of chars
-// in the category.
-//------------------------------------------------------------------------
-UChar32 RBBISetBuilder::getFirstChar(int32_t category) const {
- RangeDescriptor *rlRange;
- UChar32 retVal = (UChar32)-1;
- for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
- if (rlRange->fNum == category) {
- retVal = rlRange->fStartChar;
- break;
- }
- }
- return retVal;
-}
-
-
-
-//------------------------------------------------------------------------
-//
-// printRanges A debugging function.
-// dump out all of the range definitions.
-//
-//------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-void RBBISetBuilder::printRanges() {
- RangeDescriptor *rlRange;
- int i;
-
- RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n");
- for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
- RBBIDebugPrintf("%2i %4x-%4x ", rlRange->fNum, rlRange->fStartChar, rlRange->fEndChar);
-
- for (i=0; i<rlRange->fIncludesSets->size(); i++) {
- RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
- UnicodeString setName = UNICODE_STRING("anon", 4);
- RBBINode *setRef = usetNode->fParent;
- if (setRef != NULL) {
- RBBINode *varRef = setRef->fParent;
- if (varRef != NULL && varRef->fType == RBBINode::varRef) {
- setName = varRef->fText;
- }
- }
- RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
- }
- RBBIDebugPrintf("\n");
- }
-}
-#endif
-
-
-//------------------------------------------------------------------------
-//
-// printRangeGroups A debugging function.
-// dump out all of the range groups.
-//
-//------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-void RBBISetBuilder::printRangeGroups() {
- RangeDescriptor *rlRange;
- RangeDescriptor *tRange;
- int i;
- int lastPrintedGroupNum = 0;
-
- RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n");
- for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
- int groupNum = rlRange->fNum & 0xbfff;
- if (groupNum > lastPrintedGroupNum) {
- lastPrintedGroupNum = groupNum;
- RBBIDebugPrintf("%2i ", groupNum);
-
- if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" <DICT> ");}
-
- for (i=0; i<rlRange->fIncludesSets->size(); i++) {
- RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
- UnicodeString setName = UNICODE_STRING("anon", 4);
- RBBINode *setRef = usetNode->fParent;
- if (setRef != NULL) {
- RBBINode *varRef = setRef->fParent;
- if (varRef != NULL && varRef->fType == RBBINode::varRef) {
- setName = varRef->fText;
- }
- }
- RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
- }
-
- i = 0;
- for (tRange = rlRange; tRange != 0; tRange = tRange->fNext) {
- if (tRange->fNum == rlRange->fNum) {
- if (i++ % 5 == 0) {
- RBBIDebugPrintf("\n ");
- }
- RBBIDebugPrintf(" %05x-%05x", tRange->fStartChar, tRange->fEndChar);
- }
- }
- RBBIDebugPrintf("\n");
- }
- }
- RBBIDebugPrintf("\n");
-}
-#endif
-
-
-//------------------------------------------------------------------------
-//
-// printSets A debugging function.
-// dump out all of the set definitions.
-//
-//------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-void RBBISetBuilder::printSets() {
- int i;
-
- RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n");
- for (i=0; ; i++) {
- RBBINode *usetNode;
- RBBINode *setRef;
- RBBINode *varRef;
- UnicodeString setName;
-
- usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i);
- if (usetNode == NULL) {
- break;
- }
-
- RBBIDebugPrintf("%3d ", i);
- setName = UNICODE_STRING("anonymous", 9);
- setRef = usetNode->fParent;
- if (setRef != NULL) {
- varRef = setRef->fParent;
- if (varRef != NULL && varRef->fType == RBBINode::varRef) {
- setName = varRef->fText;
- }
- }
- RBBI_DEBUG_printUnicodeString(setName);
- RBBIDebugPrintf(" ");
- RBBI_DEBUG_printUnicodeString(usetNode->fText);
- RBBIDebugPrintf("\n");
- if (usetNode->fLeftChild != NULL) {
- RBBINode::printTree(usetNode->fLeftChild, TRUE);
- }
- }
- RBBIDebugPrintf("\n");
-}
-#endif
-
-
-
-//-------------------------------------------------------------------------------------
-//
-// RangeDescriptor copy constructor
-//
-//-------------------------------------------------------------------------------------
-
-RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) {
- int i;
-
- this->fStartChar = other.fStartChar;
- this->fEndChar = other.fEndChar;
- this->fNum = other.fNum;
- this->fNext = NULL;
- UErrorCode oldstatus = status;
- this->fIncludesSets = new UVector(status);
- if (U_FAILURE(oldstatus)) {
- status = oldstatus;
- }
- if (U_FAILURE(status)) {
- return;
- }
- /* test for NULL */
- if (this->fIncludesSets == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- for (i=0; i<other.fIncludesSets->size(); i++) {
- this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status);
- }
-}
-
-
-//-------------------------------------------------------------------------------------
-//
-// RangeDesriptor default constructor
-//
-//-------------------------------------------------------------------------------------
-RangeDescriptor::RangeDescriptor(UErrorCode &status) {
- this->fStartChar = 0;
- this->fEndChar = 0;
- this->fNum = 0;
- this->fNext = NULL;
- UErrorCode oldstatus = status;
- this->fIncludesSets = new UVector(status);
- if (U_FAILURE(oldstatus)) {
- status = oldstatus;
- }
- if (U_FAILURE(status)) {
- return;
- }
- /* test for NULL */
- if(this->fIncludesSets == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
-}
-
-
-//-------------------------------------------------------------------------------------
-//
-// RangeDesriptor Destructor
-//
-//-------------------------------------------------------------------------------------
-RangeDescriptor::~RangeDescriptor() {
- delete fIncludesSets;
- fIncludesSets = NULL;
-}
-
-//-------------------------------------------------------------------------------------
-//
-// RangeDesriptor::split()
-//
-//-------------------------------------------------------------------------------------
-void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
- U_ASSERT(where>fStartChar && where<=fEndChar);
- RangeDescriptor *nr = new RangeDescriptor(*this, status);
- if(nr == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- if (U_FAILURE(status)) {
- delete nr;
- return;
- }
- // RangeDescriptor copy constructor copies all fields.
- // Only need to update those that are different after the split.
- nr->fStartChar = where;
- this->fEndChar = where-1;
- nr->fNext = this->fNext;
- this->fNext = nr;
-}
-
-
-//-------------------------------------------------------------------------------------
-//
-// RangeDescriptor::setDictionaryFlag
-//
-// Character Category Numbers that include characters from
-// the original Unicode Set named "dictionary" have bit 14
-// set to 1. The RBBI runtime engine uses this to trigger
-// use of the word dictionary.
-//
-// This function looks through the Unicode Sets that it
-// (the range) includes, and sets the bit in fNum when
-// "dictionary" is among them.
-//
-// TODO: a faster way would be to find the set node for
-// "dictionary" just once, rather than looking it
-// up by name every time.
-//
-//-------------------------------------------------------------------------------------
-void RangeDescriptor::setDictionaryFlag() {
- int i;
-
- static const char16_t *dictionary = u"dictionary";
- for (i=0; i<fIncludesSets->size(); i++) {
- RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
- RBBINode *setRef = usetNode->fParent;
- if (setRef != nullptr) {
- RBBINode *varRef = setRef->fParent;
- if (varRef && varRef->fType == RBBINode::varRef) {
- const UnicodeString *setName = &varRef->fText;
- if (setName->compare(dictionary, -1) == 0) {
- fNum |= RBBISetBuilder::DICT_BIT;
- break;
- }
- }
- }
- }
-}
-
-
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/contrib/libs/icu/common/rbbisetb.h b/contrib/libs/icu/common/rbbisetb.h
deleted file mode 100644
index ed6a76b1214..00000000000
--- a/contrib/libs/icu/common/rbbisetb.h
+++ /dev/null
@@ -1,147 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// rbbisetb.h
-/*
-**********************************************************************
-* Copyright (c) 2001-2005, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#ifndef RBBISETB_H
-#define RBBISETB_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/uobject.h"
-#include "rbbirb.h"
-#include "utrie2.h"
-#include "uvector.h"
-
-U_NAMESPACE_BEGIN
-
-//
-// RBBISetBuilder Derives the character categories used by the runtime RBBI engine
-// from the Unicode Sets appearing in the source RBBI rules, and
-// creates the TRIE table used to map from Unicode to the
-// character categories.
-//
-
-
-//
-// RangeDescriptor
-//
-// Each of the non-overlapping character ranges gets one of these descriptors.
-// All of them are strung together in a linked list, which is kept in order
-// (by character)
-//
-class RangeDescriptor : public UMemory {
-public:
- UChar32 fStartChar; // Start of range, unicode 32 bit value.
- UChar32 fEndChar; // End of range, unicode 32 bit value.
- int32_t fNum; // runtime-mapped input value for this range.
- UVector *fIncludesSets; // vector of the the original
- // Unicode sets that include this range.
- // (Contains ptrs to uset nodes)
- RangeDescriptor *fNext; // Next RangeDescriptor in the linked list.
-
- RangeDescriptor(UErrorCode &status);
- RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
- ~RangeDescriptor();
- void split(UChar32 where, UErrorCode &status); // Spit this range in two at "where", with
- // where appearing in the second (higher) part.
- void setDictionaryFlag(); // Check whether this range appears as part of
- // the Unicode set named "dictionary"
-
-private:
- RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class
- RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
-};
-
-
-//
-// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules.
-//
-// Starting with the rules parse tree from the scanner,
-//
-// - Enumerate the set of UnicodeSets that are referenced
-// by the RBBI rules.
-// - compute a derived set of non-overlapping UnicodeSets
-// that will correspond to columns in the state table for
-// the RBBI execution engine.
-// - construct the trie table that maps input characters
-// to set numbers in the non-overlapping set of sets.
-//
-
-
-class RBBISetBuilder : public UMemory {
-public:
- RBBISetBuilder(RBBIRuleBuilder *rb);
- ~RBBISetBuilder();
-
- void buildRanges();
- void buildTrie();
- void addValToSets(UVector *sets, uint32_t val);
- void addValToSet (RBBINode *usetNode, uint32_t val);
- int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the
- // runtime state machine, which are the same as
- // columns in the DFA state table
- int32_t getTrieSize() /*const*/; // Size in bytes of the serialized Trie.
- void serializeTrie(uint8_t *where); // write out the serialized Trie.
- UChar32 getFirstChar(int32_t val) const;
- UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo
- // character were encountered.
- /**
- * Merge two character categories that have been identified as having equivalent behavior.
- * The ranges belonging to the second category (table column) will be added to the first.
- * @param categories the pair of categories to be merged.
- */
- void mergeCategories(IntPair categories);
-
- static constexpr int32_t DICT_BIT = 0x4000;
-
-#ifdef RBBI_DEBUG
- void printSets();
- void printRanges();
- void printRangeGroups();
-#else
- #define printSets()
- #define printRanges()
- #define printRangeGroups()
-#endif
-
-private:
- void numberSets();
-
- RBBIRuleBuilder *fRB; // The RBBI Rule Compiler that owns us.
- UErrorCode *fStatus;
-
- RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors
-
- UTrie2 *fTrie; // The mapping TRIE that is the end result of processing
- uint32_t fTrieSize; // the Unicode Sets.
-
- // Groups correspond to character categories -
- // groups of ranges that are in the same original UnicodeSets.
- // fGroupCount is the index of the last used group.
- // fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
- // State table column 0 is not used. Column 1 is for end-of-input.
- // column 2 is for group 0. Funny counting.
- int32_t fGroupCount;
-
- UBool fSawBOF;
-
- RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
- RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
-};
-
-
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
-
-#endif
diff --git a/contrib/libs/icu/common/rbbistbl.cpp b/contrib/libs/icu/common/rbbistbl.cpp
deleted file mode 100644
index 5303f760969..00000000000
--- a/contrib/libs/icu/common/rbbistbl.cpp
+++ /dev/null
@@ -1,270 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class
-//
-/*
-***************************************************************************
-* Copyright (C) 2002-2014 International Business Machines Corporation
-* and others. All rights reserved.
-***************************************************************************
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/unistr.h"
-#include "unicode/uniset.h"
-#include "unicode/uchar.h"
-#include "unicode/parsepos.h"
-
-#include "cstr.h"
-#include "rbbinode.h"
-#include "rbbirb.h"
-#include "umutex.h"
-
-
-//
-// RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
-// when the hash table is deleted.
-//
-U_CDECL_BEGIN
-static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
- icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
- delete px;
-}
-U_CDECL_END
-
-
-
-U_NAMESPACE_BEGIN
-
-RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
- :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
-{
- fHashTable = NULL;
- fCachedSetLookup = NULL;
-
- fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
- // uhash_open checks status
- if (U_FAILURE(status)) {
- return;
- }
- uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
-}
-
-
-
-RBBISymbolTable::~RBBISymbolTable()
-{
- uhash_close(fHashTable);
-}
-
-
-//
-// RBBISymbolTable::lookup This function from the abstract symbol table inteface
-// looks up a variable name and returns a UnicodeString
-// containing the substitution text.
-//
-// The variable name does NOT include the leading $.
-//
-const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const
-{
- RBBISymbolTableEntry *el;
- RBBINode *varRefNode;
- RBBINode *exprNode;
- RBBINode *usetNode;
- const UnicodeString *retString;
- RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
-
- el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
- if (el == NULL) {
- return NULL;
- }
-
- varRefNode = el->val;
- exprNode = varRefNode->fLeftChild; // Root node of expression for variable
- if (exprNode->fType == RBBINode::setRef) {
- // The $variable refers to a single UnicodeSet
- // return the ffffString, which will subsequently be interpreted as a
- // stand-in character for the set by RBBISymbolTable::lookupMatcher()
- usetNode = exprNode->fLeftChild;
- This->fCachedSetLookup = usetNode->fInputSet;
- retString = &ffffString;
- }
- else
- {
- // The variable refers to something other than just a set.
- // return the original source string for the expression
- retString = &exprNode->fText;
- This->fCachedSetLookup = NULL;
- }
- return retString;
-}
-
-
-
-//
-// RBBISymbolTable::lookupMatcher This function from the abstract symbol table
-// interface maps a single stand-in character to a
-// pointer to a Unicode Set. The Unicode Set code uses this
-// mechanism to get all references to the same $variable
-// name to refer to a single common Unicode Set instance.
-//
-// This implementation cheats a little, and does not maintain a map of stand-in chars
-// to sets. Instead, it takes advantage of the fact that the UnicodeSet
-// constructor will always call this function right after calling lookup(),
-// and we just need to remember what set to return between these two calls.
-const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
-{
- UnicodeSet *retVal = NULL;
- RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
- if (ch == 0xffff) {
- retVal = fCachedSetLookup;
- This->fCachedSetLookup = 0;
- }
- return retVal;
-}
-
-//
-// RBBISymbolTable::parseReference This function from the abstract symbol table interface
-// looks for a $variable name in the source text.
-// It does not look it up, only scans for it.
-// It is used by the UnicodeSet parser.
-//
-// This implementation is lifted pretty much verbatim
-// from the rules based transliterator implementation.
-// I didn't see an obvious way of sharing it.
-//
-UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text,
- ParsePosition& pos, int32_t limit) const
-{
- int32_t start = pos.getIndex();
- int32_t i = start;
- UnicodeString result;
- while (i < limit) {
- UChar c = text.charAt(i);
- if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
- break;
- }
- ++i;
- }
- if (i == start) { // No valid name chars
- return result; // Indicate failure with empty string
- }
- pos.setIndex(i);
- text.extractBetween(start, i, result);
- return result;
-}
-
-
-
-//
-// RBBISymbolTable::lookupNode Given a key (a variable name), return the
-// corresponding RBBI Node. If there is no entry
-// in the table for this name, return NULL.
-//
-RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
-
- RBBINode *retNode = NULL;
- RBBISymbolTableEntry *el;
-
- el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
- if (el != NULL) {
- retNode = el->val;
- }
- return retNode;
-}
-
-
-//
-// RBBISymbolTable::addEntry Add a new entry to the symbol table.
-// Indicate an error if the name already exists -
-// this will only occur in the case of duplicate
-// variable assignments.
-//
-void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
- RBBISymbolTableEntry *e;
- /* test for buffer overflows */
- if (U_FAILURE(err)) {
- return;
- }
- e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
- if (e != NULL) {
- err = U_BRK_VARIABLE_REDFINITION;
- return;
- }
-
- e = new RBBISymbolTableEntry;
- if (e == NULL) {
- err = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- e->key = key;
- e->val = val;
- uhash_put( fHashTable, &e->key, e, &err);
-}
-
-
-RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
-
-RBBISymbolTableEntry::~RBBISymbolTableEntry() {
- // The "val" of a symbol table entry is a variable reference node.
- // The l. child of the val is the rhs expression from the assignment.
- // Unlike other node types, children of variable reference nodes are not
- // automatically recursively deleted. We do it manually here.
- delete val->fLeftChild;
- val->fLeftChild = NULL;
-
- delete val;
-
- // Note: the key UnicodeString is destructed by virtue of being in the object by value.
-}
-
-
-//
-// RBBISymbolTable::print Debugging function, dump out the symbol table contents.
-//
-#ifdef RBBI_DEBUG
-void RBBISymbolTable::rbbiSymtablePrint() const {
- RBBIDebugPrintf("Variable Definitions Symbol Table\n"
- "Name Node serial String Val\n"
- "-------------------------------------------------------------------\n");
-
- int32_t pos = UHASH_FIRST;
- const UHashElement *e = NULL;
- for (;;) {
- e = uhash_nextElement(fHashTable, &pos);
- if (e == NULL ) {
- break;
- }
- RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
-
- RBBIDebugPrintf("%-19s %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum);
- RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)());
- }
-
- RBBIDebugPrintf("\nParsed Variable Definitions\n");
- pos = -1;
- for (;;) {
- e = uhash_nextElement(fHashTable, &pos);
- if (e == NULL ) {
- break;
- }
- RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
- RBBIDebugPrintf("%s\n", CStr(s->key)());
- RBBINode::printTree(s->val, TRUE);
- RBBINode::printTree(s->val->fLeftChild, FALSE);
- RBBIDebugPrintf("\n");
- }
-}
-#endif
-
-
-
-
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/contrib/libs/icu/common/rbbitblb.cpp b/contrib/libs/icu/common/rbbitblb.cpp
deleted file mode 100644
index 960ef7ec822..00000000000
--- a/contrib/libs/icu/common/rbbitblb.cpp
+++ /dev/null
@@ -1,1739 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2002-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-//
-// rbbitblb.cpp
-//
-
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/unistr.h"
-#include "rbbitblb.h"
-#include "rbbirb.h"
-#include "rbbiscan.h"
-#include "rbbisetb.h"
-#include "rbbidata.h"
-#include "cstring.h"
-#include "uassert.h"
-#include "uvectr32.h"
-#include "cmemory.h"
-
-U_NAMESPACE_BEGIN
-
-RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) :
- fRB(rb),
- fTree(*rootNode),
- fStatus(&status),
- fDStates(nullptr),
- fSafeTable(nullptr) {
- if (U_FAILURE(status)) {
- return;
- }
- // fDStates is UVector<RBBIStateDescriptor *>
- fDStates = new UVector(status);
- if (U_SUCCESS(status) && fDStates == nullptr ) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-
-
-RBBITableBuilder::~RBBITableBuilder() {
- int i;
- for (i=0; i<fDStates->size(); i++) {
- delete (RBBIStateDescriptor *)fDStates->elementAt(i);
- }
- delete fDStates;
- delete fSafeTable;
- delete fLookAheadRuleMap;
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// RBBITableBuilder::buildForwardTable - This is the main function for building
-// the DFA state transition table from the RBBI rules parse tree.
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::buildForwardTable() {
-
- if (U_FAILURE(*fStatus)) {
- return;
- }
-
- // If there were no rules, just return. This situation can easily arise
- // for the reverse rules.
- if (fTree==NULL) {
- return;
- }
-
- //
- // Walk through the tree, replacing any references to $variables with a copy of the
- // parse tree for the substition expression.
- //
- fTree = fTree->flattenVariables();
-#ifdef RBBI_DEBUG
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
- RBBIDebugPuts("\nParse tree after flattening variable references.");
- RBBINode::printTree(fTree, TRUE);
- }
-#endif
-
- //
- // If the rules contained any references to {bof}
- // add a {bof} <cat> <former root of tree> to the
- // tree. Means that all matches must start out with the
- // {bof} fake character.
- //
- if (fRB->fSetBuilder->sawBOF()) {
- RBBINode *bofTop = new RBBINode(RBBINode::opCat);
- RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar);
- // Delete and exit if memory allocation failed.
- if (bofTop == NULL || bofLeaf == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- delete bofTop;
- delete bofLeaf;
- return;
- }
- bofTop->fLeftChild = bofLeaf;
- bofTop->fRightChild = fTree;
- bofLeaf->fParent = bofTop;
- bofLeaf->fVal = 2; // Reserved value for {bof}.
- fTree = bofTop;
- }
-
- //
- // Add a unique right-end marker to the expression.
- // Appears as a cat-node, left child being the original tree,
- // right child being the end marker.
- //
- RBBINode *cn = new RBBINode(RBBINode::opCat);
- // Exit if memory allocation failed.
- if (cn == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- cn->fLeftChild = fTree;
- fTree->fParent = cn;
- RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark);
- // Delete and exit if memory allocation failed.
- if (cn->fRightChild == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- delete cn;
- return;
- }
- cn->fRightChild->fParent = cn;
- fTree = cn;
-
- //
- // Replace all references to UnicodeSets with the tree for the equivalent
- // expression.
- //
- fTree->flattenSets();
-#ifdef RBBI_DEBUG
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) {
- RBBIDebugPuts("\nParse tree after flattening Unicode Set references.");
- RBBINode::printTree(fTree, TRUE);
- }
-#endif
-
-
- //
- // calculate the functions nullable, firstpos, lastpos and followpos on
- // nodes in the parse tree.
- // See the alogrithm description in Aho.
- // Understanding how this works by looking at the code alone will be
- // nearly impossible.
- //
- calcNullable(fTree);
- calcFirstPos(fTree);
- calcLastPos(fTree);
- calcFollowPos(fTree);
- if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) {
- RBBIDebugPuts("\n");
- printPosSets(fTree);
- }
-
- //
- // For "chained" rules, modify the followPos sets
- //
- if (fRB->fChainRules) {
- calcChainedFollowPos(fTree, endMarkerNode);
- }
-
- //
- // BOF (start of input) test fixup.
- //
- if (fRB->fSetBuilder->sawBOF()) {
- bofFixup();
- }
-
- //
- // Build the DFA state transition tables.
- //
- buildStateTable();
- mapLookAheadRules();
- flagAcceptingStates();
- flagLookAheadStates();
- flagTaggedStates();
-
- //
- // Update the global table of rule status {tag} values
- // The rule builder has a global vector of status values that are common
- // for all tables. Merge the ones from this table into the global set.
- //
- mergeRuleStatusVals();
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// calcNullable. Impossible to explain succinctly. See Aho, section 3.9
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::calcNullable(RBBINode *n) {
- if (n == NULL) {
- return;
- }
- if (n->fType == RBBINode::setRef ||
- n->fType == RBBINode::endMark ) {
- // These are non-empty leaf node types.
- n->fNullable = FALSE;
- return;
- }
-
- if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) {
- // Lookahead marker node. It's a leaf, so no recursion on children.
- // It's nullable because it does not match any literal text from the input stream.
- n->fNullable = TRUE;
- return;
- }
-
-
- // The node is not a leaf.
- // Calculate nullable on its children.
- calcNullable(n->fLeftChild);
- calcNullable(n->fRightChild);
-
- // Apply functions from table 3.40 in Aho
- if (n->fType == RBBINode::opOr) {
- n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable;
- }
- else if (n->fType == RBBINode::opCat) {
- n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable;
- }
- else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) {
- n->fNullable = TRUE;
- }
- else {
- n->fNullable = FALSE;
- }
-}
-
-
-
-
-//-----------------------------------------------------------------------------
-//
-// calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::calcFirstPos(RBBINode *n) {
- if (n == NULL) {
- return;
- }
- if (n->fType == RBBINode::leafChar ||
- n->fType == RBBINode::endMark ||
- n->fType == RBBINode::lookAhead ||
- n->fType == RBBINode::tag) {
- // These are non-empty leaf node types.
- // Note: In order to maintain the sort invariant on the set,
- // this function should only be called on a node whose set is
- // empty to start with.
- n->fFirstPosSet->addElement(n, *fStatus);
- return;
- }
-
- // The node is not a leaf.
- // Calculate firstPos on its children.
- calcFirstPos(n->fLeftChild);
- calcFirstPos(n->fRightChild);
-
- // Apply functions from table 3.40 in Aho
- if (n->fType == RBBINode::opOr) {
- setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
- setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
- }
- else if (n->fType == RBBINode::opCat) {
- setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
- if (n->fLeftChild->fNullable) {
- setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
- }
- }
- else if (n->fType == RBBINode::opStar ||
- n->fType == RBBINode::opQuestion ||
- n->fType == RBBINode::opPlus) {
- setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
- }
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// calcLastPos. Impossible to explain succinctly. See Aho, section 3.9
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::calcLastPos(RBBINode *n) {
- if (n == NULL) {
- return;
- }
- if (n->fType == RBBINode::leafChar ||
- n->fType == RBBINode::endMark ||
- n->fType == RBBINode::lookAhead ||
- n->fType == RBBINode::tag) {
- // These are non-empty leaf node types.
- // Note: In order to maintain the sort invariant on the set,
- // this function should only be called on a node whose set is
- // empty to start with.
- n->fLastPosSet->addElement(n, *fStatus);
- return;
- }
-
- // The node is not a leaf.
- // Calculate lastPos on its children.
- calcLastPos(n->fLeftChild);
- calcLastPos(n->fRightChild);
-
- // Apply functions from table 3.40 in Aho
- if (n->fType == RBBINode::opOr) {
- setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
- setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
- }
- else if (n->fType == RBBINode::opCat) {
- setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
- if (n->fRightChild->fNullable) {
- setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
- }
- }
- else if (n->fType == RBBINode::opStar ||
- n->fType == RBBINode::opQuestion ||
- n->fType == RBBINode::opPlus) {
- setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
- }
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::calcFollowPos(RBBINode *n) {
- if (n == NULL ||
- n->fType == RBBINode::leafChar ||
- n->fType == RBBINode::endMark) {
- return;
- }
-
- calcFollowPos(n->fLeftChild);
- calcFollowPos(n->fRightChild);
-
- // Aho rule #1
- if (n->fType == RBBINode::opCat) {
- RBBINode *i; // is 'i' in Aho's description
- uint32_t ix;
-
- UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet;
-
- for (ix=0; ix<(uint32_t)LastPosOfLeftChild->size(); ix++) {
- i = (RBBINode *)LastPosOfLeftChild->elementAt(ix);
- setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet);
- }
- }
-
- // Aho rule #2
- if (n->fType == RBBINode::opStar ||
- n->fType == RBBINode::opPlus) {
- RBBINode *i; // again, n and i are the names from Aho's description.
- uint32_t ix;
-
- for (ix=0; ix<(uint32_t)n->fLastPosSet->size(); ix++) {
- i = (RBBINode *)n->fLastPosSet->elementAt(ix);
- setAdd(i->fFollowPos, n->fFirstPosSet);
- }
- }
-
-
-
-}
-
-//-----------------------------------------------------------------------------
-//
-// addRuleRootNodes Recursively walk a parse tree, adding all nodes flagged
-// as roots of a rule to a destination vector.
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::addRuleRootNodes(UVector *dest, RBBINode *node) {
- if (node == NULL || U_FAILURE(*fStatus)) {
- return;
- }
- if (node->fRuleRoot) {
- dest->addElement(node, *fStatus);
- // Note: rules cannot nest. If we found a rule start node,
- // no child node can also be a start node.
- return;
- }
- addRuleRootNodes(dest, node->fLeftChild);
- addRuleRootNodes(dest, node->fRightChild);
-}
-
-//-----------------------------------------------------------------------------
-//
-// calcChainedFollowPos. Modify the previously calculated followPos sets
-// to implement rule chaining. NOT described by Aho
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree, RBBINode *endMarkNode) {
-
- UVector leafNodes(*fStatus);
- if (U_FAILURE(*fStatus)) {
- return;
- }
-
- // get a list all leaf nodes
- tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus);
- if (U_FAILURE(*fStatus)) {
- return;
- }
-
- // Collect all leaf nodes that can start matches for rules
- // with inbound chaining enabled, which is the union of the
- // firstPosition sets from each of the rule root nodes.
-
- UVector ruleRootNodes(*fStatus);
- addRuleRootNodes(&ruleRootNodes, tree);
-
- UVector matchStartNodes(*fStatus);
- for (int j=0; j<ruleRootNodes.size(); ++j) {
- RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(j));
- if (node->fChainIn) {
- setAdd(&matchStartNodes, node->fFirstPosSet);
- }
- }
- if (U_FAILURE(*fStatus)) {
- return;
- }
-
- int32_t endNodeIx;
- int32_t startNodeIx;
-
- for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) {
- RBBINode *endNode = (RBBINode *)leafNodes.elementAt(endNodeIx);
-
- // Identify leaf nodes that correspond to overall rule match positions.
- // These include the endMarkNode in their followPos sets.
- //
- // Note: do not consider other end marker nodes, those that are added to
- // look-ahead rules. These can't chain; a match immediately stops
- // further matching. This leaves exactly one end marker node, the one
- // at the end of the complete tree.
-
- if (!endNode->fFollowPos->contains(endMarkNode)) {
- continue;
- }
-
- // We've got a node that can end a match.
-
- // !!LBCMNoChain implementation: If this node's val correspond to
- // the Line Break $CM char class, don't chain from it.
- // TODO: Remove this. !!LBCMNoChain is deprecated, and is not used
- // by any of the standard ICU rules.
- if (fRB->fLBCMNoChain) {
- UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal);
- if (c != -1) {
- // c == -1 occurs with sets containing only the {eof} marker string.
- ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK);
- if (cLBProp == U_LB_COMBINING_MARK) {
- continue;
- }
- }
- }
-
- // Now iterate over the nodes that can start a match, looking for ones
- // with the same char class as our ending node.
- RBBINode *startNode;
- for (startNodeIx = 0; startNodeIx<matchStartNodes.size(); startNodeIx++) {
- startNode = (RBBINode *)matchStartNodes.elementAt(startNodeIx);
- if (startNode->fType != RBBINode::leafChar) {
- continue;
- }
-
- if (endNode->fVal == startNode->fVal) {
- // The end val (character class) of one possible match is the
- // same as the start of another.
-
- // Add all nodes from the followPos of the start node to the
- // followPos set of the end node, which will have the effect of
- // letting matches transition from a match state at endNode
- // to the second char of a match starting with startNode.
- setAdd(endNode->fFollowPos, startNode->fFollowPos);
- }
- }
- }
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// bofFixup. Fixup for state tables that include {bof} beginning of input testing.
-// Do an swizzle similar to chaining, modifying the followPos set of
-// the bofNode to include the followPos nodes from other {bot} nodes
-// scattered through the tree.
-//
-// This function has much in common with calcChainedFollowPos().
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::bofFixup() {
-
- if (U_FAILURE(*fStatus)) {
- return;
- }
-
- // The parse tree looks like this ...
- // fTree root ---> <cat>
- // / \ .
- // <cat> <#end node>
- // / \ .
- // <bofNode> rest
- // of tree
- //
- // We will be adding things to the followPos set of the <bofNode>
- //
- RBBINode *bofNode = fTree->fLeftChild->fLeftChild;
- U_ASSERT(bofNode->fType == RBBINode::leafChar);
- U_ASSERT(bofNode->fVal == 2);
-
- // Get all nodes that can be the start a match of the user-written rules
- // (excluding the fake bofNode)
- // We want the nodes that can start a match in the
- // part labeled "rest of tree"
- //
- UVector *matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet;
-
- RBBINode *startNode;
- int startNodeIx;
- for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) {
- startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx);
- if (startNode->fType != RBBINode::leafChar) {
- continue;
- }
-
- if (startNode->fVal == bofNode->fVal) {
- // We found a leaf node corresponding to a {bof} that was
- // explicitly written into a rule.
- // Add everything from the followPos set of this node to the
- // followPos set of the fake bofNode at the start of the tree.
- //
- setAdd(bofNode->fFollowPos, startNode->fFollowPos);
- }
- }
-}
-
-//-----------------------------------------------------------------------------
-//
-// buildStateTable() Determine the set of runtime DFA states and the
-// transition tables for these states, by the algorithm
-// of fig. 3.44 in Aho.
-//
-// Most of the comments are quotes of Aho's psuedo-code.
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::buildStateTable() {
- if (U_FAILURE(*fStatus)) {
- return;
- }
- RBBIStateDescriptor *failState;
- // Set it to NULL to avoid uninitialized warning
- RBBIStateDescriptor *initialState = NULL;
- //
- // Add a dummy state 0 - the stop state. Not from Aho.
- int lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1;
- failState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
- if (failState == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- goto ExitBuildSTdeleteall;
- }
- failState->fPositions = new UVector(*fStatus);
- if (failState->fPositions == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- }
- if (failState->fPositions == NULL || U_FAILURE(*fStatus)) {
- goto ExitBuildSTdeleteall;
- }
- fDStates->addElement(failState, *fStatus);
- if (U_FAILURE(*fStatus)) {
- goto ExitBuildSTdeleteall;
- }
-
- // initially, the only unmarked state in Dstates is firstpos(root),
- // where toot is the root of the syntax tree for (r)#;
- initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
- if (initialState == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- }
- if (U_FAILURE(*fStatus)) {
- goto ExitBuildSTdeleteall;
- }
- initialState->fPositions = new UVector(*fStatus);
- if (initialState->fPositions == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- }
- if (U_FAILURE(*fStatus)) {
- goto ExitBuildSTdeleteall;
- }
- setAdd(initialState->fPositions, fTree->fFirstPosSet);
- fDStates->addElement(initialState, *fStatus);
- if (U_FAILURE(*fStatus)) {
- goto ExitBuildSTdeleteall;
- }
-
- // while there is an unmarked state T in Dstates do begin
- for (;;) {
- RBBIStateDescriptor *T = NULL;
- int32_t tx;
- for (tx=1; tx<fDStates->size(); tx++) {
- RBBIStateDescriptor *temp;
- temp = (RBBIStateDescriptor *)fDStates->elementAt(tx);
- if (temp->fMarked == FALSE) {
- T = temp;
- break;
- }
- }
- if (T == NULL) {
- break;
- }
-
- // mark T;
- T->fMarked = TRUE;
-
- // for each input symbol a do begin
- int32_t a;
- for (a = 1; a<=lastInputSymbol; a++) {
- // let U be the set of positions that are in followpos(p)
- // for some position p in T
- // such that the symbol at position p is a;
- UVector *U = NULL;
- RBBINode *p;
- int32_t px;
- for (px=0; px<T->fPositions->size(); px++) {
- p = (RBBINode *)T->fPositions->elementAt(px);
- if ((p->fType == RBBINode::leafChar) && (p->fVal == a)) {
- if (U == NULL) {
- U = new UVector(*fStatus);
- if (U == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- goto ExitBuildSTdeleteall;
- }
- }
- setAdd(U, p->fFollowPos);
- }
- }
-
- // if U is not empty and not in DStates then
- int32_t ux = 0;
- UBool UinDstates = FALSE;
- if (U != NULL) {
- U_ASSERT(U->size() > 0);
- int ix;
- for (ix=0; ix<fDStates->size(); ix++) {
- RBBIStateDescriptor *temp2;
- temp2 = (RBBIStateDescriptor *)fDStates->elementAt(ix);
- if (setEquals(U, temp2->fPositions)) {
- delete U;
- U = temp2->fPositions;
- ux = ix;
- UinDstates = TRUE;
- break;
- }
- }
-
- // Add U as an unmarked state to Dstates
- if (!UinDstates)
- {
- RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
- if (newState == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- }
- if (U_FAILURE(*fStatus)) {
- goto ExitBuildSTdeleteall;
- }
- newState->fPositions = U;
- fDStates->addElement(newState, *fStatus);
- if (U_FAILURE(*fStatus)) {
- return;
- }
- ux = fDStates->size()-1;
- }
-
- // Dtran[T, a] := U;
- T->fDtran->setElementAt(ux, a);
- }
- }
- }
- return;
- // delete local pointers only if error occured.
-ExitBuildSTdeleteall:
- delete initialState;
- delete failState;
-}
-
-
-/**
- * mapLookAheadRules
- *
- */
-void RBBITableBuilder::mapLookAheadRules() {
- fLookAheadRuleMap = new UVector32(fRB->fScanner->numRules() + 1, *fStatus);
- if (fLookAheadRuleMap == nullptr) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- }
- if (U_FAILURE(*fStatus)) {
- return;
- }
- fLookAheadRuleMap->setSize(fRB->fScanner->numRules() + 1);
- int32_t laSlotsInUse = 0;
-
- for (int32_t n=0; n<fDStates->size(); n++) {
- RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
- int32_t laSlotForState = 0;
-
- // Establish the look-ahead slot for this state, if the state covers
- // any look-ahead nodes - corresponding to the '/' in look-ahead rules.
-
- // If any of the look-ahead nodes already have a slot assigned, use it,
- // otherwise assign a new one.
-
- bool sawLookAheadNode = false;
- for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) {
- RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));
- if (node->fType != RBBINode::NodeType::lookAhead) {
- continue;
- }
- sawLookAheadNode = true;
- int32_t ruleNum = node->fVal; // Set when rule was originally parsed.
- U_ASSERT(ruleNum < fLookAheadRuleMap->size());
- U_ASSERT(ruleNum > 0);
- int32_t laSlot = fLookAheadRuleMap->elementAti(ruleNum);
- if (laSlot != 0) {
- if (laSlotForState == 0) {
- laSlotForState = laSlot;
- } else {
- // TODO: figure out if this can fail, change to setting an error code if so.
- U_ASSERT(laSlot == laSlotForState);
- }
- }
- }
- if (!sawLookAheadNode) {
- continue;
- }
-
- if (laSlotForState == 0) {
- laSlotForState = ++laSlotsInUse;
- }
-
- // For each look ahead node covered by this state,
- // set the mapping from the node's rule number to the look ahead slot.
- // There can be multiple nodes/rule numbers going to the same la slot.
-
- for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) {
- RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));
- if (node->fType != RBBINode::NodeType::lookAhead) {
- continue;
- }
- int32_t ruleNum = node->fVal; // Set when rule was originally parsed.
- int32_t existingVal = fLookAheadRuleMap->elementAti(ruleNum);
- (void)existingVal;
- U_ASSERT(existingVal == 0 || existingVal == laSlotForState);
- fLookAheadRuleMap->setElementAt(laSlotForState, ruleNum);
- }
- }
-
-}
-
-//-----------------------------------------------------------------------------
-//
-// flagAcceptingStates Identify accepting states.
-// First get a list of all of the end marker nodes.
-// Then, for each state s,
-// if s contains one of the end marker nodes in its list of tree positions then
-// s is an accepting state.
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::flagAcceptingStates() {
- if (U_FAILURE(*fStatus)) {
- return;
- }
- UVector endMarkerNodes(*fStatus);
- RBBINode *endMarker;
- int32_t i;
- int32_t n;
-
- if (U_FAILURE(*fStatus)) {
- return;
- }
-
- fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus);
- if (U_FAILURE(*fStatus)) {
- return;
- }
-
- for (i=0; i<endMarkerNodes.size(); i++) {
- endMarker = (RBBINode *)endMarkerNodes.elementAt(i);
- for (n=0; n<fDStates->size(); n++) {
- RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
- if (sd->fPositions->indexOf(endMarker) >= 0) {
- // Any non-zero value for fAccepting means this is an accepting node.
- // The value is what will be returned to the user as the break status.
- // If no other value was specified, force it to -1.
-
- if (sd->fAccepting==0) {
- // State hasn't been marked as accepting yet. Do it now.
- sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
- if (sd->fAccepting == 0) {
- sd->fAccepting = -1;
- }
- }
- if (sd->fAccepting==-1 && endMarker->fVal != 0) {
- // Both lookahead and non-lookahead accepting for this state.
- // Favor the look-ahead, because a look-ahead match needs to
- // immediately stop the run-time engine. First match, not longest.
- sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
- }
- // implicit else:
- // if sd->fAccepting already had a value other than 0 or -1, leave it be.
- }
- }
- }
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// flagLookAheadStates Very similar to flagAcceptingStates, above.
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::flagLookAheadStates() {
- if (U_FAILURE(*fStatus)) {
- return;
- }
- UVector lookAheadNodes(*fStatus);
- RBBINode *lookAheadNode;
- int32_t i;
- int32_t n;
-
- fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus);
- if (U_FAILURE(*fStatus)) {
- return;
- }
- for (i=0; i<lookAheadNodes.size(); i++) {
- lookAheadNode = (RBBINode *)lookAheadNodes.elementAt(i);
- U_ASSERT(lookAheadNode->fType == RBBINode::NodeType::lookAhead);
-
- for (n=0; n<fDStates->size(); n++) {
- RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
- int32_t positionsIdx = sd->fPositions->indexOf(lookAheadNode);
- if (positionsIdx >= 0) {
- U_ASSERT(lookAheadNode == sd->fPositions->elementAt(positionsIdx));
- int32_t lookaheadSlot = fLookAheadRuleMap->elementAti(lookAheadNode->fVal);
- U_ASSERT(sd->fLookAhead == 0 || sd->fLookAhead == lookaheadSlot);
- // if (sd->fLookAhead != 0 && sd->fLookAhead != lookaheadSlot) {
- // printf("%s:%d Bingo. sd->fLookAhead:%d lookaheadSlot:%d\n",
- // __FILE__, __LINE__, sd->fLookAhead, lookaheadSlot);
- // }
- sd->fLookAhead = lookaheadSlot;
- }
- }
- }
-}
-
-
-
-
-//-----------------------------------------------------------------------------
-//
-// flagTaggedStates
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::flagTaggedStates() {
- if (U_FAILURE(*fStatus)) {
- return;
- }
- UVector tagNodes(*fStatus);
- RBBINode *tagNode;
- int32_t i;
- int32_t n;
-
- if (U_FAILURE(*fStatus)) {
- return;
- }
- fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus);
- if (U_FAILURE(*fStatus)) {
- return;
- }
- for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em)
- tagNode = (RBBINode *)tagNodes.elementAt(i);
-
- for (n=0; n<fDStates->size(); n++) { // For each state s (row in the state table)
- RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
- if (sd->fPositions->indexOf(tagNode) >= 0) { // if s include the tag node t
- sortedAdd(&sd->fTagVals, tagNode->fVal);
- }
- }
- }
-}
-
-
-
-
-//-----------------------------------------------------------------------------
-//
-// mergeRuleStatusVals
-//
-// Update the global table of rule status {tag} values
-// The rule builder has a global vector of status values that are common
-// for all tables. Merge the ones from this table into the global set.
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::mergeRuleStatusVals() {
- //
- // The basic outline of what happens here is this...
- //
- // for each state in this state table
- // if the status tag list for this state is in the global statuses list
- // record where and
- // continue with the next state
- // else
- // add the tag list for this state to the global list.
- //
- int i;
- int n;
-
- // Pre-set a single tag of {0} into the table.
- // We will need this as a default, for rule sets with no explicit tagging.
- if (fRB->fRuleStatusVals->size() == 0) {
- fRB->fRuleStatusVals->addElement(1, *fStatus); // Num of statuses in group
- fRB->fRuleStatusVals->addElement((int32_t)0, *fStatus); // and our single status of zero
- }
-
- // For each state
- for (n=0; n<fDStates->size(); n++) {
- RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
- UVector *thisStatesTagValues = sd->fTagVals;
- if (thisStatesTagValues == NULL) {
- // No tag values are explicitly associated with this state.
- // Set the default tag value.
- sd->fTagsIdx = 0;
- continue;
- }
-
- // There are tag(s) associated with this state.
- // fTagsIdx will be the index into the global tag list for this state's tag values.
- // Initial value of -1 flags that we haven't got it set yet.
- sd->fTagsIdx = -1;
- int32_t thisTagGroupStart = 0; // indexes into the global rule status vals list
- int32_t nextTagGroupStart = 0;
-
- // Loop runs once per group of tags in the global list
- while (nextTagGroupStart < fRB->fRuleStatusVals->size()) {
- thisTagGroupStart = nextTagGroupStart;
- nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1;
- if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) {
- // The number of tags for this state is different from
- // the number of tags in this group from the global list.
- // Continue with the next group from the global list.
- continue;
- }
- // The lengths match, go ahead and compare the actual tag values
- // between this state and the group from the global list.
- for (i=0; i<thisStatesTagValues->size(); i++) {
- if (thisStatesTagValues->elementAti(i) !=
- fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) {
- // Mismatch.
- break;
- }
- }
-
- if (i == thisStatesTagValues->size()) {
- // We found a set of tag values in the global list that match
- // those for this state. Use them.
- sd->fTagsIdx = thisTagGroupStart;
- break;
- }
- }
-
- if (sd->fTagsIdx == -1) {
- // No suitable entry in the global tag list already. Add one
- sd->fTagsIdx = fRB->fRuleStatusVals->size();
- fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus);
- for (i=0; i<thisStatesTagValues->size(); i++) {
- fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus);
- }
- }
- }
-}
-
-
-
-
-
-
-
-//-----------------------------------------------------------------------------
-//
-// sortedAdd Add a value to a vector of sorted values (ints).
-// Do not replicate entries; if the value is already there, do not
-// add a second one.
-// Lazily create the vector if it does not already exist.
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) {
- int32_t i;
-
- if (*vector == NULL) {
- *vector = new UVector(*fStatus);
- }
- if (*vector == NULL || U_FAILURE(*fStatus)) {
- return;
- }
- UVector *vec = *vector;
- int32_t vSize = vec->size();
- for (i=0; i<vSize; i++) {
- int32_t valAtI = vec->elementAti(i);
- if (valAtI == val) {
- // The value is already in the vector. Don't add it again.
- return;
- }
- if (valAtI > val) {
- break;
- }
- }
- vec->insertElementAt(val, i, *fStatus);
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// setAdd Set operation on UVector
-// dest = dest union source
-// Elements may only appear once and must be sorted.
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::setAdd(UVector *dest, UVector *source) {
- int32_t destOriginalSize = dest->size();
- int32_t sourceSize = source->size();
- int32_t di = 0;
- MaybeStackArray<void *, 16> destArray, sourceArray; // Handle small cases without malloc
- void **destPtr, **sourcePtr;
- void **destLim, **sourceLim;
-
- if (destOriginalSize > destArray.getCapacity()) {
- if (destArray.resize(destOriginalSize) == NULL) {
- return;
- }
- }
- destPtr = destArray.getAlias();
- destLim = destPtr + destOriginalSize; // destArray.getArrayLimit()?
-
- if (sourceSize > sourceArray.getCapacity()) {
- if (sourceArray.resize(sourceSize) == NULL) {
- return;
- }
- }
- sourcePtr = sourceArray.getAlias();
- sourceLim = sourcePtr + sourceSize; // sourceArray.getArrayLimit()?
-
- // Avoid multiple "get element" calls by getting the contents into arrays
- (void) dest->toArray(destPtr);
- (void) source->toArray(sourcePtr);
-
- dest->setSize(sourceSize+destOriginalSize, *fStatus);
-
- while (sourcePtr < sourceLim && destPtr < destLim) {
- if (*destPtr == *sourcePtr) {
- dest->setElementAt(*sourcePtr++, di++);
- destPtr++;
- }
- // This check is required for machines with segmented memory, like i5/OS.
- // Direct pointer comparison is not recommended.
- else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) {
- dest->setElementAt(*destPtr++, di++);
- }
- else { /* *sourcePtr < *destPtr */
- dest->setElementAt(*sourcePtr++, di++);
- }
- }
-
- // At most one of these two cleanup loops will execute
- while (destPtr < destLim) {
- dest->setElementAt(*destPtr++, di++);
- }
- while (sourcePtr < sourceLim) {
- dest->setElementAt(*sourcePtr++, di++);
- }
-
- dest->setSize(di, *fStatus);
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// setEqual Set operation on UVector.
-// Compare for equality.
-// Elements must be sorted.
-//
-//-----------------------------------------------------------------------------
-UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) {
- return a->equals(*b);
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos
-// for each node in the tree.
-//
-//-----------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-void RBBITableBuilder::printPosSets(RBBINode *n) {
- if (n==NULL) {
- return;
- }
- printf("\n");
- RBBINode::printNodeHeader();
- RBBINode::printNode(n);
- RBBIDebugPrintf(" Nullable: %s\n", n->fNullable?"TRUE":"FALSE");
-
- RBBIDebugPrintf(" firstpos: ");
- printSet(n->fFirstPosSet);
-
- RBBIDebugPrintf(" lastpos: ");
- printSet(n->fLastPosSet);
-
- RBBIDebugPrintf(" followpos: ");
- printSet(n->fFollowPos);
-
- printPosSets(n->fLeftChild);
- printPosSets(n->fRightChild);
-}
-#endif
-
-//
-// findDuplCharClassFrom()
-//
-bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
- int32_t numStates = fDStates->size();
- int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
-
- for (; categories->first < numCols-1; categories->first++) {
- for (categories->second=categories->first+1; categories->second < numCols; categories->second++) {
- // Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates).
- uint16_t table_base = 0;
- uint16_t table_dupl = 1;
- for (int32_t state=0; state<numStates; state++) {
- RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
- table_base = (uint16_t)sd->fDtran->elementAti(categories->first);
- table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);
- if (table_base != table_dupl) {
- break;
- }
- }
- if (table_base == table_dupl) {
- return true;
- }
- }
- }
- return false;
-}
-
-
-//
-// removeColumn()
-//
-void RBBITableBuilder::removeColumn(int32_t column) {
- int32_t numStates = fDStates->size();
- for (int32_t state=0; state<numStates; state++) {
- RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
- U_ASSERT(column < sd->fDtran->size());
- sd->fDtran->removeElementAt(column);
- }
-}
-
-/*
- * findDuplicateState
- */
-bool RBBITableBuilder::findDuplicateState(IntPair *states) {
- int32_t numStates = fDStates->size();
- int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
-
- for (; states->first<numStates-1; states->first++) {
- RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(states->first);
- for (states->second=states->first+1; states->second<numStates; states->second++) {
- RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(states->second);
- if (firstSD->fAccepting != duplSD->fAccepting ||
- firstSD->fLookAhead != duplSD->fLookAhead ||
- firstSD->fTagsIdx != duplSD->fTagsIdx) {
- continue;
- }
- bool rowsMatch = true;
- for (int32_t col=0; col < numCols; ++col) {
- int32_t firstVal = firstSD->fDtran->elementAti(col);
- int32_t duplVal = duplSD->fDtran->elementAti(col);
- if (!((firstVal == duplVal) ||
- ((firstVal == states->first || firstVal == states->second) &&
- (duplVal == states->first || duplVal == states->second)))) {
- rowsMatch = false;
- break;
- }
- }
- if (rowsMatch) {
- return true;
- }
- }
- }
- return false;
-}
-
-
-bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) {
- int32_t numStates = fSafeTable->size();
-
- for (; states->first<numStates-1; states->first++) {
- UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first));
- for (states->second=states->first+1; states->second<numStates; states->second++) {
- UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second));
- bool rowsMatch = true;
- int32_t numCols = firstRow->length();
- for (int32_t col=0; col < numCols; ++col) {
- int32_t firstVal = firstRow->charAt(col);
- int32_t duplVal = duplRow->charAt(col);
- if (!((firstVal == duplVal) ||
- ((firstVal == states->first || firstVal == states->second) &&
- (duplVal == states->first || duplVal == states->second)))) {
- rowsMatch = false;
- break;
- }
- }
- if (rowsMatch) {
- return true;
- }
- }
- }
- return false;
-}
-
-
-void RBBITableBuilder::removeState(IntPair duplStates) {
- const int32_t keepState = duplStates.first;
- const int32_t duplState = duplStates.second;
- U_ASSERT(keepState < duplState);
- U_ASSERT(duplState < fDStates->size());
-
- RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
- fDStates->removeElementAt(duplState);
- delete duplSD;
-
- int32_t numStates = fDStates->size();
- int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
- for (int32_t state=0; state<numStates; ++state) {
- RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
- for (int32_t col=0; col<numCols; col++) {
- int32_t existingVal = sd->fDtran->elementAti(col);
- int32_t newVal = existingVal;
- if (existingVal == duplState) {
- newVal = keepState;
- } else if (existingVal > duplState) {
- newVal = existingVal - 1;
- }
- sd->fDtran->setElementAt(newVal, col);
- }
- }
-}
-
-void RBBITableBuilder::removeSafeState(IntPair duplStates) {
- const int32_t keepState = duplStates.first;
- const int32_t duplState = duplStates.second;
- U_ASSERT(keepState < duplState);
- U_ASSERT(duplState < fSafeTable->size());
-
- fSafeTable->removeElementAt(duplState); // Note that fSafeTable has a deleter function
- // and will auto-delete the removed element.
- int32_t numStates = fSafeTable->size();
- for (int32_t state=0; state<numStates; ++state) {
- UnicodeString *sd = (UnicodeString *)fSafeTable->elementAt(state);
- int32_t numCols = sd->length();
- for (int32_t col=0; col<numCols; col++) {
- int32_t existingVal = sd->charAt(col);
- int32_t newVal = existingVal;
- if (existingVal == duplState) {
- newVal = keepState;
- } else if (existingVal > duplState) {
- newVal = existingVal - 1;
- }
- sd->setCharAt(col, static_cast<char16_t>(newVal));
- }
- }
-}
-
-
-/*
- * RemoveDuplicateStates
- */
-int32_t RBBITableBuilder::removeDuplicateStates() {
- IntPair dupls = {3, 0};
- int32_t numStatesRemoved = 0;
-
- while (findDuplicateState(&dupls)) {
- // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
- removeState(dupls);
- ++numStatesRemoved;
- }
- return numStatesRemoved;
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// getTableSize() Calculate the size of the runtime form of this
-// state transition table.
-//
-//-----------------------------------------------------------------------------
-int32_t RBBITableBuilder::getTableSize() const {
- int32_t size = 0;
- int32_t numRows;
- int32_t numCols;
- int32_t rowSize;
-
- if (fTree == NULL) {
- return 0;
- }
-
- size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table.
-
- numRows = fDStates->size();
- numCols = fRB->fSetBuilder->getNumCharCategories();
-
- rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols;
- size += numRows * rowSize;
- return size;
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// exportTable() export the state transition table in the format required
-// by the runtime engine. getTableSize() bytes of memory
-// must be available at the output address "where".
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::exportTable(void *where) {
- RBBIStateTable *table = (RBBIStateTable *)where;
- uint32_t state;
- int col;
-
- if (U_FAILURE(*fStatus) || fTree == NULL) {
- return;
- }
-
- int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
- if (catCount > 0x7fff ||
- fDStates->size() > 0x7fff) {
- *fStatus = U_BRK_INTERNAL_ERROR;
- return;
- }
-
- table->fRowLen = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount;
- table->fNumStates = fDStates->size();
- table->fFlags = 0;
- if (fRB->fLookAheadHardBreak) {
- table->fFlags |= RBBI_LOOKAHEAD_HARD_BREAK;
- }
- if (fRB->fSetBuilder->sawBOF()) {
- table->fFlags |= RBBI_BOF_REQUIRED;
- }
- table->fReserved = 0;
-
- for (state=0; state<table->fNumStates; state++) {
- RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
- RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);
- U_ASSERT (-32768 < sd->fAccepting && sd->fAccepting <= 32767);
- U_ASSERT (-32768 < sd->fLookAhead && sd->fLookAhead <= 32767);
- row->fAccepting = (int16_t)sd->fAccepting;
- row->fLookAhead = (int16_t)sd->fLookAhead;
- row->fTagIdx = (int16_t)sd->fTagsIdx;
- for (col=0; col<catCount; col++) {
- row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col);
- }
- }
-}
-
-
-/**
- * Synthesize a safe state table from the main state table.
- */
-void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
- // The safe table creation has three steps:
-
- // 1. Identifiy pairs of character classes that are "safe." Safe means that boundaries
- // following the pair do not depend on context or state before the pair. To test
- // whether a pair is safe, run it through the main forward state table, starting
- // from each state. If the the final state is the same, no matter what the starting state,
- // the pair is safe.
- //
- // 2. Build a state table that recognizes the safe pairs. It's similar to their
- // forward table, with a column for each input character [class], and a row for
- // each state. Row 1 is the start state, and row 0 is the stop state. Initially
- // create an additional state for each input character category; being in
- // one of these states means that the character has been seen, and is potentially
- // the first of a pair. In each of these rows, the entry for the second character
- // of a safe pair is set to the stop state (0), indicating that a match was found.
- // All other table entries are set to the state corresponding the current input
- // character, allowing that charcter to be the of a start following pair.
- //
- // Because the safe rules are to be run in reverse, moving backwards in the text,
- // the first and second pair categories are swapped when building the table.
- //
- // 3. Compress the table. There are typically many rows (states) that are
- // equivalent - that have zeroes (match completed) in the same columns -
- // and can be folded together.
-
- // Each safe pair is stored as two UChars in the safePair string.
- UnicodeString safePairs;
-
- int32_t numCharClasses = fRB->fSetBuilder->getNumCharCategories();
- int32_t numStates = fDStates->size();
-
- for (int32_t c1=0; c1<numCharClasses; ++c1) {
- for (int32_t c2=0; c2 < numCharClasses; ++c2) {
- int32_t wantedEndState = -1;
- int32_t endState = 0;
- for (int32_t startState = 1; startState < numStates; ++startState) {
- RBBIStateDescriptor *startStateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(startState));
- int32_t s2 = startStateD->fDtran->elementAti(c1);
- RBBIStateDescriptor *s2StateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(s2));
- endState = s2StateD->fDtran->elementAti(c2);
- if (wantedEndState < 0) {
- wantedEndState = endState;
- } else {
- if (wantedEndState != endState) {
- break;
- }
- }
- }
- if (wantedEndState == endState) {
- safePairs.append((char16_t)c1);
- safePairs.append((char16_t)c2);
- // printf("(%d, %d) ", c1, c2);
- }
- }
- // printf("\n");
- }
-
- // Populate the initial safe table.
- // The table as a whole is UVector<UnicodeString>
- // Each row is represented by a UnicodeString, being used as a Vector<int16>.
- // Row 0 is the stop state.
- // Row 1 is the start sate.
- // Row 2 and beyond are other states, initially one per char class, but
- // after initial construction, many of the states will be combined, compacting the table.
- // The String holds the nextState data only. The four leading fields of a row, fAccepting,
- // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
-
- U_ASSERT(fSafeTable == nullptr);
- fSafeTable = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status);
- for (int32_t row=0; row<numCharClasses + 2; ++row) {
- fSafeTable->addElement(new UnicodeString(numCharClasses, 0, numCharClasses+4), status);
- }
-
- // From the start state, each input char class transitions to the state for that input.
- UnicodeString &startState = *static_cast<UnicodeString *>(fSafeTable->elementAt(1));
- for (int32_t charClass=0; charClass < numCharClasses; ++charClass) {
- // Note: +2 for the start & stop state.
- startState.setCharAt(charClass, static_cast<char16_t>(charClass+2));
- }
-
- // Initially make every other state table row look like the start state row,
- for (int32_t row=2; row<numCharClasses+2; ++row) {
- UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(row));
- rowState = startState; // UnicodeString assignment, copies contents.
- }
-
- // Run through the safe pairs, set the next state to zero when pair has been seen.
- // Zero being the stop state, meaning we found a safe point.
- for (int32_t pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) {
- int32_t c1 = safePairs.charAt(pairIdx);
- int32_t c2 = safePairs.charAt(pairIdx + 1);
-
- UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(c2 + 2));
- rowState.setCharAt(c1, 0);
- }
-
- // Remove duplicate or redundant rows from the table.
- IntPair states = {1, 0};
- while (findDuplicateSafeState(&states)) {
- // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
- removeSafeState(states);
- }
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// getSafeTableSize() Calculate the size of the runtime form of this
-// safe state table.
-//
-//-----------------------------------------------------------------------------
-int32_t RBBITableBuilder::getSafeTableSize() const {
- int32_t size = 0;
- int32_t numRows;
- int32_t numCols;
- int32_t rowSize;
-
- if (fSafeTable == nullptr) {
- return 0;
- }
-
- size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table.
-
- numRows = fSafeTable->size();
- numCols = fRB->fSetBuilder->getNumCharCategories();
-
- rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols;
- size += numRows * rowSize;
- return size;
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// exportSafeTable() export the state transition table in the format required
-// by the runtime engine. getTableSize() bytes of memory
-// must be available at the output address "where".
-//
-//-----------------------------------------------------------------------------
-void RBBITableBuilder::exportSafeTable(void *where) {
- RBBIStateTable *table = (RBBIStateTable *)where;
- uint32_t state;
- int col;
-
- if (U_FAILURE(*fStatus) || fSafeTable == nullptr) {
- return;
- }
-
- int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
- if (catCount > 0x7fff ||
- fSafeTable->size() > 0x7fff) {
- *fStatus = U_BRK_INTERNAL_ERROR;
- return;
- }
-
- table->fRowLen = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount;
- table->fNumStates = fSafeTable->size();
- table->fFlags = 0;
- table->fReserved = 0;
-
- for (state=0; state<table->fNumStates; state++) {
- UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(state);
- RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);
- row->fAccepting = 0;
- row->fLookAhead = 0;
- row->fTagIdx = 0;
- row->fReserved = 0;
- for (col=0; col<catCount; col++) {
- row->fNextState[col] = rowString->charAt(col);
- }
- }
-}
-
-
-
-
-//-----------------------------------------------------------------------------
-//
-// printSet Debug function. Print the contents of a UVector
-//
-//-----------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-void RBBITableBuilder::printSet(UVector *s) {
- int32_t i;
- for (i=0; i<s->size(); i++) {
- const RBBINode *v = static_cast<const RBBINode *>(s->elementAt(i));
- RBBIDebugPrintf("%5d", v==NULL? -1 : v->fSerialNum);
- }
- RBBIDebugPrintf("\n");
-}
-#endif
-
-
-//-----------------------------------------------------------------------------
-//
-// printStates Debug Function. Dump the fully constructed state transition table.
-//
-//-----------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-void RBBITableBuilder::printStates() {
- int c; // input "character"
- int n; // state number
-
- RBBIDebugPrintf("state | i n p u t s y m b o l s \n");
- RBBIDebugPrintf(" | Acc LA Tag");
- for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
- RBBIDebugPrintf(" %2d", c);
- }
- RBBIDebugPrintf("\n");
- RBBIDebugPrintf(" |---------------");
- for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
- RBBIDebugPrintf("---");
- }
- RBBIDebugPrintf("\n");
-
- for (n=0; n<fDStates->size(); n++) {
- RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
- RBBIDebugPrintf(" %3d | " , n);
- RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx);
- for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
- RBBIDebugPrintf(" %2d", sd->fDtran->elementAti(c));
- }
- RBBIDebugPrintf("\n");
- }
- RBBIDebugPrintf("\n\n");
-}
-#endif
-
-
-//-----------------------------------------------------------------------------
-//
-// printSafeTable Debug Function. Dump the fully constructed safe table.
-//
-//-----------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-void RBBITableBuilder::printReverseTable() {
- int c; // input "character"
- int n; // state number
-
- RBBIDebugPrintf(" Safe Reverse Table \n");
- if (fSafeTable == nullptr) {
- RBBIDebugPrintf(" --- nullptr ---\n");
- return;
- }
- RBBIDebugPrintf("state | i n p u t s y m b o l s \n");
- RBBIDebugPrintf(" | Acc LA Tag");
- for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
- RBBIDebugPrintf(" %2d", c);
- }
- RBBIDebugPrintf("\n");
- RBBIDebugPrintf(" |---------------");
- for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
- RBBIDebugPrintf("---");
- }
- RBBIDebugPrintf("\n");
-
- for (n=0; n<fSafeTable->size(); n++) {
- UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(n);
- RBBIDebugPrintf(" %3d | " , n);
- RBBIDebugPrintf("%3d %3d %5d ", 0, 0, 0); // Accepting, LookAhead, Tags
- for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
- RBBIDebugPrintf(" %2d", rowString->charAt(c));
- }
- RBBIDebugPrintf("\n");
- }
- RBBIDebugPrintf("\n\n");
-}
-#endif
-
-
-
-//-----------------------------------------------------------------------------
-//
-// printRuleStatusTable Debug Function. Dump the common rule status table
-//
-//-----------------------------------------------------------------------------
-#ifdef RBBI_DEBUG
-void RBBITableBuilder::printRuleStatusTable() {
- int32_t thisRecord = 0;
- int32_t nextRecord = 0;
- int i;
- UVector *tbl = fRB->fRuleStatusVals;
-
- RBBIDebugPrintf("index | tags \n");
- RBBIDebugPrintf("-------------------\n");
-
- while (nextRecord < tbl->size()) {
- thisRecord = nextRecord;
- nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1;
- RBBIDebugPrintf("%4d ", thisRecord);
- for (i=thisRecord+1; i<nextRecord; i++) {
- RBBIDebugPrintf(" %5d", tbl->elementAti(i));
- }
- RBBIDebugPrintf("\n");
- }
- RBBIDebugPrintf("\n\n");
-}
-#endif
-
-
-//-----------------------------------------------------------------------------
-//
-// RBBIStateDescriptor Methods. This is a very struct-like class
-// Most access is directly to the fields.
-//
-//-----------------------------------------------------------------------------
-
-RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) {
- fMarked = FALSE;
- fAccepting = 0;
- fLookAhead = 0;
- fTagsIdx = 0;
- fTagVals = NULL;
- fPositions = NULL;
- fDtran = NULL;
-
- fDtran = new UVector32(lastInputSymbol+1, *fStatus);
- if (U_FAILURE(*fStatus)) {
- return;
- }
- if (fDtran == NULL) {
- *fStatus = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- fDtran->setSize(lastInputSymbol+1); // fDtran needs to be pre-sized.
- // It is indexed by input symbols, and will
- // hold the next state number for each
- // symbol.
-}
-
-
-RBBIStateDescriptor::~RBBIStateDescriptor() {
- delete fPositions;
- delete fDtran;
- delete fTagVals;
- fPositions = NULL;
- fDtran = NULL;
- fTagVals = NULL;
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/contrib/libs/icu/common/rbbitblb.h b/contrib/libs/icu/common/rbbitblb.h
deleted file mode 100644
index c2b574fe1b8..00000000000
--- a/contrib/libs/icu/common/rbbitblb.h
+++ /dev/null
@@ -1,220 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// rbbitblb.h
-//
-
-/*
-**********************************************************************
-* Copyright (c) 2002-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#ifndef RBBITBLB_H
-#define RBBITBLB_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/uobject.h"
-#include "unicode/rbbi.h"
-#include "rbbirb.h"
-#include "rbbinode.h"
-
-
-U_NAMESPACE_BEGIN
-
-class RBBIRuleScanner;
-class RBBIRuleBuilder;
-class UVector32;
-
-//
-// class RBBITableBuilder is part of the RBBI rule compiler.
-// It builds the state transition table used by the RBBI runtime
-// from the expression syntax tree generated by the rule scanner.
-//
-// This class is part of the RBBI implementation only.
-// There is no user-visible public API here.
-//
-
-class RBBITableBuilder : public UMemory {
-public:
- RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status);
- ~RBBITableBuilder();
-
- void buildForwardTable();
-
- /** Return the runtime size in bytes of the built state table. */
- int32_t getTableSize() const;
-
- /** Fill in the runtime state table. Sufficient memory must exist at the specified location.
- */
- void exportTable(void *where);
-
- /**
- * Find duplicate (redundant) character classes. Begin looking with categories.first.
- * Duplicate, if found are returned in the categories parameter.
- * This is an iterator-like function, used to identify character classes
- * (state table columns) that can be eliminated.
- * @param categories in/out parameter, specifies where to start looking for duplicates,
- * and returns the first pair of duplicates found, if any.
- * @return true if duplicate char classes were found, false otherwise.
- */
- bool findDuplCharClassFrom(IntPair *categories);
-
- /** Remove a column from the state table. Used when two character categories
- * have been found equivalent, and merged together, to eliminate the uneeded table column.
- */
- void removeColumn(int32_t column);
-
- /**
- * Check for, and remove dupicate states (table rows).
- * @return the number of states removed.
- */
- int32_t removeDuplicateStates();
-
- /** Build the safe reverse table from the already-constructed forward table. */
- void buildSafeReverseTable(UErrorCode &status);
-
- /** Return the runtime size in bytes of the built safe reverse state table. */
- int32_t getSafeTableSize() const;
-
- /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location.
- */
- void exportSafeTable(void *where);
-
-
-private:
- void calcNullable(RBBINode *n);
- void calcFirstPos(RBBINode *n);
- void calcLastPos(RBBINode *n);
- void calcFollowPos(RBBINode *n);
- void calcChainedFollowPos(RBBINode *n, RBBINode *endMarkNode);
- void bofFixup();
- void buildStateTable();
- void mapLookAheadRules();
- void flagAcceptingStates();
- void flagLookAheadStates();
- void flagTaggedStates();
- void mergeRuleStatusVals();
-
- /**
- * Merge redundant state table columns, eliminating character classes with identical behavior.
- * Done after the state tables are generated, just before converting to their run-time format.
- */
- int32_t mergeColumns();
-
- void addRuleRootNodes(UVector *dest, RBBINode *node);
-
- /**
- * Find duplicate (redundant) states, beginning at the specified pair,
- * within this state table. This is an iterator-like function, used to
- * identify states (state table rows) that can be eliminated.
- * @param states in/out parameter, specifies where to start looking for duplicates,
- * and returns the first pair of duplicates found, if any.
- * @return true if duplicate states were found, false otherwise.
- */
- bool findDuplicateState(IntPair *states);
-
- /** Remove a duplicate state.
- * @param duplStates The duplicate states. The first is kept, the second is removed.
- * All references to the second in the state table are retargeted
- * to the first.
- */
- void removeState(IntPair duplStates);
-
- /** Find the next duplicate state in the safe reverse table. An iterator function.
- * @param states in/out parameter, specifies where to start looking for duplicates,
- * and returns the first pair of duplicates found, if any.
- * @return true if a duplicate pair of states was found.
- */
- bool findDuplicateSafeState(IntPair *states);
-
- /** Remove a duplicate state from the safe table.
- * @param duplStates The duplicate states. The first is kept, the second is removed.
- * All references to the second in the state table are retargeted
- * to the first.
- */
- void removeSafeState(IntPair duplStates);
-
- // Set functions for UVector.
- // TODO: make a USet subclass of UVector
-
- void setAdd(UVector *dest, UVector *source);
- UBool setEquals(UVector *a, UVector *b);
-
- void sortedAdd(UVector **dest, int32_t val);
-
-public:
-#ifdef RBBI_DEBUG
- void printSet(UVector *s);
- void printPosSets(RBBINode *n /* = NULL*/);
- void printStates();
- void printRuleStatusTable();
- void printReverseTable();
-#else
- #define printSet(s)
- #define printPosSets(n)
- #define printStates()
- #define printRuleStatusTable()
- #define printReverseTable()
-#endif
-
-private:
- RBBIRuleBuilder *fRB;
- RBBINode *&fTree; // The root node of the parse tree to build a
- // table for.
- UErrorCode *fStatus;
-
- /** State Descriptors, UVector<RBBIStateDescriptor> */
- UVector *fDStates; // D states (Aho's terminology)
- // Index is state number
- // Contents are RBBIStateDescriptor pointers.
-
- /** Synthesized safe table, UVector of UnicodeString, one string per table row. */
- UVector *fSafeTable;
-
- /** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */
- UVector32 *fLookAheadRuleMap = nullptr;
-
-
- RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class
- RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class
-};
-
-//
-// RBBIStateDescriptor - The DFA is constructed as a set of these descriptors,
-// one for each state.
-class RBBIStateDescriptor : public UMemory {
-public:
- UBool fMarked;
- int32_t fAccepting;
- int32_t fLookAhead;
- UVector *fTagVals;
- int32_t fTagsIdx;
- UVector *fPositions; // Set of parse tree positions associated
- // with this state. Unordered (it's a set).
- // UVector contents are RBBINode *
-
- UVector32 *fDtran; // Transitions out of this state.
- // indexed by input character
- // contents is int index of dest state
- // in RBBITableBuilder.fDStates
-
- RBBIStateDescriptor(int maxInputSymbol, UErrorCode *fStatus);
- ~RBBIStateDescriptor();
-
-private:
- RBBIStateDescriptor(const RBBIStateDescriptor &other); // forbid copying of this class
- RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other); // forbid copying of this class
-};
-
-
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
-
-#endif
diff --git a/contrib/libs/icu/common/resbund.cpp b/contrib/libs/icu/common/resbund.cpp
deleted file mode 100644
index 7c5063b2114..00000000000
--- a/contrib/libs/icu/common/resbund.cpp
+++ /dev/null
@@ -1,399 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1997-2013, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*
-* File resbund.cpp
-*
-* Modification History:
-*
-* Date Name Description
-* 02/05/97 aliu Fixed bug in chopLocale. Added scanForLocaleInFile
-* based on code taken from scanForLocale. Added
-* constructor which attempts to read resource bundle
-* from a specific file, without searching other files.
-* 02/11/97 aliu Added UErrorCode return values to constructors. Fixed
-* infinite loops in scanForFile and scanForLocale.
-* Modified getRawResourceData to not delete storage in
-* localeData and resourceData which it doesn't own.
-* Added Mac compatibility #ifdefs for tellp() and
-* ios::nocreate.
-* 03/04/97 aliu Modified to use ExpandingDataSink objects instead of
-* the highly inefficient ostrstream objects.
-* 03/13/97 aliu Rewrote to load in entire resource bundle and store
-* it as a Hashtable of ResourceBundleData objects.
-* Added state table to govern parsing of files.
-* Modified to load locale index out of new file distinct
-* from default.txt.
-* 03/25/97 aliu Modified to support 2-d arrays, needed for timezone data.
-* Added support for custom file suffixes. Again, needed
-* to support timezone data. Improved error handling to
-* detect duplicate tags and subtags.
-* 04/07/97 aliu Fixed bug in getHashtableForLocale(). Fixed handling
-* of failing UErrorCode values on entry to API methods.
-* Fixed bugs in getArrayItem() for negative indices.
-* 04/29/97 aliu Update to use new Hashtable deletion protocol.
-* 05/06/97 aliu Flattened kTransitionTable for HP compiler.
-* Fixed usage of CharString.
-* 06/11/99 stephen Removed parsing of .txt files.
-* Reworked to use new binary format.
-* Cleaned up.
-* 06/14/99 stephen Removed methods taking a filename suffix.
-* 06/22/99 stephen Added missing T_FileStream_close in parse()
-* 11/09/99 weiv Added getLocale(), rewritten constructForLocale()
-* March 2000 weiv complete overhaul.
-******************************************************************************
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/resbund.h"
-
-#include "cmemory.h"
-#include "mutex.h"
-#include "uassert.h"
-#include "umutex.h"
-
-#include "uresimp.h"
-
-U_NAMESPACE_BEGIN
-
-/*-----------------------------------------------------------------------------
- * Implementation Notes
- *
- * Resource bundles are read in once, and thereafter cached.
- * ResourceBundle statically keeps track of which files have been
- * read, so we are guaranteed that each file is read at most once.
- * Resource bundles can be loaded from different data directories and
- * will be treated as distinct, even if they are for the same locale.
- *
- * Resource bundles are lightweight objects, which have pointers to
- * one or more shared Hashtable objects containing all the data.
- * Copying would be cheap, but there is no copy constructor, since
- * there wasn't one in the original API.
- *
- * The ResourceBundle parsing mechanism is implemented as a transition
- * network, for easy maintenance and modification. The network is
- * implemented as a matrix (instead of in code) to make this even
- * easier. The matrix contains Transition objects. Each Transition
- * object describes a destination node and an action to take before
- * moving to the destination node. The source node is encoded by the
- * index of the object in the array that contains it. The pieces
- * needed to understand the transition network are the enums for node
- * IDs and actions, the parse() method, which walks through the
- * network and implements the actions, and the network itself. The
- * network guarantees certain conditions, for example, that a new
- * resource will not be closed until one has been opened first; or
- * that data will not be stored into a TaggedList until a TaggedList
- * has been created. Nonetheless, the code in parse() does some
- * consistency checks as it runs the network, and fails with an
- * U_INTERNAL_PROGRAM_ERROR if one of these checks fails. If the input
- * data has a bad format, an U_INVALID_FORMAT_ERROR is returned. If you
- * see an U_INTERNAL_PROGRAM_ERROR the transition matrix has a bug in
- * it.
- *
- * Old functionality of multiple locales in a single file is still
- * supported. For this reason, LOCALE names override FILE names. If
- * data for en_US is located in the en.txt file, once it is loaded,
- * the code will not care where it came from (other than remembering
- * which directory it came from). However, if there is an en_US
- * resource in en_US.txt, that will take precedence. There is no
- * limit to the number or type of resources that can be stored in a
- * file, however, files are only searched in a specific way. If
- * en_US_CA is requested, then first en_US_CA.txt is searched, then
- * en_US.txt, then en.txt, then default.txt. So it only makes sense
- * to put certain locales in certain files. In this example, it would
- * be logical to put en_US_CA, en_US, and en into the en.txt file,
- * since they would be found there if asked for. The extreme example
- * is to place all locale resources into default.txt, which should
- * also work.
- *
- * Inheritance is implemented. For example, xx_YY_zz inherits as
- * follows: xx_YY_zz, xx_YY, xx, default. Inheritance is implemented
- * as an array of hashtables. There will be from 1 to 4 hashtables in
- * the array.
- *
- * Fallback files are implemented. The fallback pattern is Language
- * Country Variant (LCV) -> LC -> L. Fallback is first done for the
- * requested locale. Then it is done for the default locale, as
- * returned by Locale::getDefault(). Then the special file
- * default.txt is searched for the default locale. The overall FILE
- * fallback path is LCV -> LC -> L -> dLCV -> dLC -> dL -> default.
- *
- * Note that although file name searching includes the default locale,
- * once a ResourceBundle object is constructed, the inheritance path
- * no longer includes the default locale. The path is LCV -> LC -> L
- * -> default.
- *
- * File parsing is lazy. Nothing is parsed unless it is called for by
- * someone. So when a ResourceBundle for xx_YY_zz is constructed,
- * only that locale is parsed (along with anything else in the same
- * file). Later, if the FooBar tag is asked for, and if it isn't
- * found in xx_YY_zz, then xx_YY.txt will be parsed and checked, and
- * so forth, until the chain is exhausted or the tag is found.
- *
- * Thread-safety is implemented around caches, both the cache that
- * stores all the resouce data, and the cache that stores flags
- * indicating whether or not a file has been visited. These caches
- * delete their storage at static cleanup time, when the process
- * quits.
- *
- * ResourceBundle supports TableCollation as a special case. This
- * involves having special ResourceBundle objects which DO own their
- * data, since we don't want large collation rule strings in the
- * ResourceBundle cache (these are already cached in the
- * TableCollation cache). TableCollation files (.ctx files) have the
- * same format as normal resource data files, with a different
- * interpretation, from the standpoint of ResourceBundle. .ctx files
- * are loaded into otherwise ordinary ResourceBundle objects. They
- * don't inherit (that's implemented by TableCollation) and they own
- * their data (as mentioned above). However, they still support
- * possible multiple locales in a single .ctx file. (This is in
- * practice a bad idea, since you only want the one locale you're
- * looking for, and only one tag will be present
- * ("CollationElements"), so you don't need an inheritance chain of
- * multiple locales.) Up to 4 locale resources will be loaded from a
- * .ctx file; everything after the first 4 is ignored (parsed and
- * deleted). (Normal .txt files have no limit.) Instead of being
- * loaded into the cache, and then looked up as needed, the locale
- * resources are read straight into the ResourceBundle object.
- *
- * The Index, which used to reside in default.txt, has been moved to a
- * new file, index.txt. This file contains a slightly modified format
- * with the addition of the "InstalledLocales" tag; it looks like:
- *
- * Index {
- * InstalledLocales {
- * ar
- * ..
- * zh_TW
- * }
- * }
- */
-//-----------------------------------------------------------------------------
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ResourceBundle)
-
-ResourceBundle::ResourceBundle(UErrorCode &err)
- :UObject(), fLocale(NULL)
-{
- fResource = ures_open(0, Locale::getDefault().getName(), &err);
-}
-
-ResourceBundle::ResourceBundle(const ResourceBundle &other)
- :UObject(other), fLocale(NULL)
-{
- UErrorCode status = U_ZERO_ERROR;
-
- if (other.fResource) {
- fResource = ures_copyResb(0, other.fResource, &status);
- } else {
- /* Copying a bad resource bundle */
- fResource = NULL;
- }
-}
-
-ResourceBundle::ResourceBundle(UResourceBundle *res, UErrorCode& err)
- :UObject(), fLocale(NULL)
-{
- if (res) {
- fResource = ures_copyResb(0, res, &err);
- } else {
- /* Copying a bad resource bundle */
- fResource = NULL;
- }
-}
-
-ResourceBundle::ResourceBundle(const char* path, const Locale& locale, UErrorCode& err)
- :UObject(), fLocale(NULL)
-{
- fResource = ures_open(path, locale.getName(), &err);
-}
-
-
-ResourceBundle& ResourceBundle::operator=(const ResourceBundle& other)
-{
- if(this == &other) {
- return *this;
- }
- if(fResource != 0) {
- ures_close(fResource);
- fResource = NULL;
- }
- if (fLocale != NULL) {
- delete fLocale;
- fLocale = NULL;
- }
- UErrorCode status = U_ZERO_ERROR;
- if (other.fResource) {
- fResource = ures_copyResb(0, other.fResource, &status);
- } else {
- /* Copying a bad resource bundle */
- fResource = NULL;
- }
- return *this;
-}
-
-ResourceBundle::~ResourceBundle()
-{
- if(fResource != 0) {
- ures_close(fResource);
- }
- if(fLocale != NULL) {
- delete(fLocale);
- }
-}
-
-ResourceBundle *
-ResourceBundle::clone() const {
- return new ResourceBundle(*this);
-}
-
-UnicodeString ResourceBundle::getString(UErrorCode& status) const {
- int32_t len = 0;
- const UChar *r = ures_getString(fResource, &len, &status);
- return UnicodeString(TRUE, r, len);
-}
-
-const uint8_t *ResourceBundle::getBinary(int32_t& len, UErrorCode& status) const {
- return ures_getBinary(fResource, &len, &status);
-}
-
-const int32_t *ResourceBundle::getIntVector(int32_t& len, UErrorCode& status) const {
- return ures_getIntVector(fResource, &len, &status);
-}
-
-uint32_t ResourceBundle::getUInt(UErrorCode& status) const {
- return ures_getUInt(fResource, &status);
-}
-
-int32_t ResourceBundle::getInt(UErrorCode& status) const {
- return ures_getInt(fResource, &status);
-}
-
-const char *ResourceBundle::getName(void) const {
- return ures_getName(fResource);
-}
-
-const char *ResourceBundle::getKey(void) const {
- return ures_getKey(fResource);
-}
-
-UResType ResourceBundle::getType(void) const {
- return ures_getType(fResource);
-}
-
-int32_t ResourceBundle::getSize(void) const {
- return ures_getSize(fResource);
-}
-
-UBool ResourceBundle::hasNext(void) const {
- return ures_hasNext(fResource);
-}
-
-void ResourceBundle::resetIterator(void) {
- ures_resetIterator(fResource);
-}
-
-ResourceBundle ResourceBundle::getNext(UErrorCode& status) {
- UResourceBundle r;
-
- ures_initStackObject(&r);
- ures_getNextResource(fResource, &r, &status);
- ResourceBundle res(&r, status);
- if (U_SUCCESS(status)) {
- ures_close(&r);
- }
- return res;
-}
-
-UnicodeString ResourceBundle::getNextString(UErrorCode& status) {
- int32_t len = 0;
- const UChar* r = ures_getNextString(fResource, &len, 0, &status);
- return UnicodeString(TRUE, r, len);
-}
-
-UnicodeString ResourceBundle::getNextString(const char ** key, UErrorCode& status) {
- int32_t len = 0;
- const UChar* r = ures_getNextString(fResource, &len, key, &status);
- return UnicodeString(TRUE, r, len);
-}
-
-ResourceBundle ResourceBundle::get(int32_t indexR, UErrorCode& status) const {
- UResourceBundle r;
-
- ures_initStackObject(&r);
- ures_getByIndex(fResource, indexR, &r, &status);
- ResourceBundle res(&r, status);
- if (U_SUCCESS(status)) {
- ures_close(&r);
- }
- return res;
-}
-
-UnicodeString ResourceBundle::getStringEx(int32_t indexS, UErrorCode& status) const {
- int32_t len = 0;
- const UChar* r = ures_getStringByIndex(fResource, indexS, &len, &status);
- return UnicodeString(TRUE, r, len);
-}
-
-ResourceBundle ResourceBundle::get(const char* key, UErrorCode& status) const {
- UResourceBundle r;
-
- ures_initStackObject(&r);
- ures_getByKey(fResource, key, &r, &status);
- ResourceBundle res(&r, status);
- if (U_SUCCESS(status)) {
- ures_close(&r);
- }
- return res;
-}
-
-ResourceBundle ResourceBundle::getWithFallback(const char* key, UErrorCode& status){
- UResourceBundle r;
- ures_initStackObject(&r);
- ures_getByKeyWithFallback(fResource, key, &r, &status);
- ResourceBundle res(&r, status);
- if(U_SUCCESS(status)){
- ures_close(&r);
- }
- return res;
-}
-UnicodeString ResourceBundle::getStringEx(const char* key, UErrorCode& status) const {
- int32_t len = 0;
- const UChar* r = ures_getStringByKey(fResource, key, &len, &status);
- return UnicodeString(TRUE, r, len);
-}
-
-const char*
-ResourceBundle::getVersionNumber() const
-{
- return ures_getVersionNumberInternal(fResource);
-}
-
-void ResourceBundle::getVersion(UVersionInfo versionInfo) const {
- ures_getVersion(fResource, versionInfo);
-}
-
-const Locale &ResourceBundle::getLocale(void) const {
- static UMutex gLocaleLock;
- Mutex lock(&gLocaleLock);
- if (fLocale != NULL) {
- return *fLocale;
- }
- UErrorCode status = U_ZERO_ERROR;
- const char *localeName = ures_getLocaleInternal(fResource, &status);
- ResourceBundle *ncThis = const_cast<ResourceBundle *>(this);
- ncThis->fLocale = new Locale(localeName);
- return ncThis->fLocale != NULL ? *ncThis->fLocale : Locale::getDefault();
-}
-
-const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const
-{
- return ures_getLocaleByType(fResource, type, &status);
-}
-
-U_NAMESPACE_END
-//eof
diff --git a/contrib/libs/icu/common/resbund_cnv.cpp b/contrib/libs/icu/common/resbund_cnv.cpp
deleted file mode 100644
index 45c0b399bff..00000000000
--- a/contrib/libs/icu/common/resbund_cnv.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1997-2006, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: resbund_cnv.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004aug25
-* created by: Markus W. Scherer
-*
-* Character conversion functions moved here from resbund.cpp
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/resbund.h"
-#include "uinvchar.h"
-
-U_NAMESPACE_BEGIN
-
-ResourceBundle::ResourceBundle( const UnicodeString& path,
- const Locale& locale,
- UErrorCode& error)
- :UObject(), fLocale(NULL)
-{
- constructForLocale(path, locale, error);
-}
-
-ResourceBundle::ResourceBundle( const UnicodeString& path,
- UErrorCode& error)
- :UObject(), fLocale(NULL)
-{
- constructForLocale(path, Locale::getDefault(), error);
-}
-
-void
-ResourceBundle::constructForLocale(const UnicodeString& path,
- const Locale& locale,
- UErrorCode& error)
-{
- if (path.isEmpty()) {
- fResource = ures_open(NULL, locale.getName(), &error);
- }
- else {
- UnicodeString nullTerminatedPath(path);
- nullTerminatedPath.append((UChar)0);
- fResource = ures_openU(nullTerminatedPath.getBuffer(), locale.getName(), &error);
- }
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/resource.cpp b/contrib/libs/icu/common/resource.cpp
deleted file mode 100644
index 3d41a16029f..00000000000
--- a/contrib/libs/icu/common/resource.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2015-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* resource.cpp
-*
-* created on: 2015nov04
-* created by: Markus W. Scherer
-*/
-
-#include "resource.h"
-
-U_NAMESPACE_BEGIN
-
-ResourceValue::~ResourceValue() {}
-
-ResourceSink::~ResourceSink() {}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/resource.h b/contrib/libs/icu/common/resource.h
deleted file mode 100644
index 5199b858880..00000000000
--- a/contrib/libs/icu/common/resource.h
+++ /dev/null
@@ -1,293 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2015-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* resource.h
-*
-* created on: 2015nov04
-* created by: Markus W. Scherer
-*/
-
-#ifndef __URESOURCE_H__
-#define __URESOURCE_H__
-
-/**
- * \file
- * \brief ICU resource bundle key and value types.
- */
-
-// Note: Ported from ICU4J class UResource and its nested classes,
-// but the C++ classes are separate, not nested.
-
-// We use the Resource prefix for C++ classes, as usual.
-// The UResource prefix would be used for C types.
-
-#include "unicode/utypes.h"
-#include "unicode/unistr.h"
-#include "unicode/ures.h"
-#include "restrace.h"
-
-struct ResourceData;
-
-U_NAMESPACE_BEGIN
-
-class ResourceValue;
-
-// Note: In C++, we use const char * pointers for keys,
-// rather than an abstraction like Java UResource.Key.
-
-/**
- * Interface for iterating over a resource bundle array resource.
- */
-class U_COMMON_API ResourceArray {
-public:
- /** Constructs an empty array object. */
- ResourceArray() : items16(NULL), items32(NULL), length(0) {}
-
- /** Only for implementation use. @internal */
- ResourceArray(const uint16_t *i16, const uint32_t *i32, int32_t len,
- const ResourceTracer& traceInfo) :
- items16(i16), items32(i32), length(len),
- fTraceInfo(traceInfo) {}
-
- /**
- * @return The number of items in the array resource.
- */
- int32_t getSize() const { return length; }
- /**
- * @param i Array item index.
- * @param value Output-only, receives the value of the i'th item.
- * @return TRUE if i is non-negative and less than getSize().
- */
- UBool getValue(int32_t i, ResourceValue &value) const;
-
- /** Only for implementation use. @internal */
- uint32_t internalGetResource(const ResourceData *pResData, int32_t i) const;
-
-private:
- const uint16_t *items16;
- const uint32_t *items32;
- int32_t length;
- ResourceTracer fTraceInfo;
-};
-
-/**
- * Interface for iterating over a resource bundle table resource.
- */
-class U_COMMON_API ResourceTable {
-public:
- /** Constructs an empty table object. */
- ResourceTable() : keys16(NULL), keys32(NULL), items16(NULL), items32(NULL), length(0) {}
-
- /** Only for implementation use. @internal */
- ResourceTable(const uint16_t *k16, const int32_t *k32,
- const uint16_t *i16, const uint32_t *i32, int32_t len,
- const ResourceTracer& traceInfo) :
- keys16(k16), keys32(k32), items16(i16), items32(i32), length(len),
- fTraceInfo(traceInfo) {}
-
- /**
- * @return The number of items in the array resource.
- */
- int32_t getSize() const { return length; }
- /**
- * @param i Table item index.
- * @param key Output-only, receives the key of the i'th item.
- * @param value Output-only, receives the value of the i'th item.
- * @return TRUE if i is non-negative and less than getSize().
- */
- UBool getKeyAndValue(int32_t i, const char *&key, ResourceValue &value) const;
-
- /**
- * @param key Key string to find in the table.
- * @param value Output-only, receives the value of the item with that key.
- * @return TRUE if the table contains the key.
- */
- UBool findValue(const char *key, ResourceValue &value) const;
-
-private:
- const uint16_t *keys16;
- const int32_t *keys32;
- const uint16_t *items16;
- const uint32_t *items32;
- int32_t length;
- ResourceTracer fTraceInfo;
-};
-
-/**
- * Represents a resource bundle item's value.
- * Avoids object creations as much as possible.
- * Mutable, not thread-safe.
- */
-class U_COMMON_API ResourceValue : public UObject {
-public:
- virtual ~ResourceValue();
-
- /**
- * @return ICU resource type, for example, URES_STRING
- */
- virtual UResType getType() const = 0;
-
- /**
- * Sets U_RESOURCE_TYPE_MISMATCH if this is not a string resource.
- *
- * @see ures_getString()
- */
- virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const = 0;
-
- inline UnicodeString getUnicodeString(UErrorCode &errorCode) const {
- int32_t len = 0;
- const UChar *r = getString(len, errorCode);
- return UnicodeString(TRUE, r, len);
- }
-
- /**
- * Sets U_RESOURCE_TYPE_MISMATCH if this is not an alias resource.
- */
- virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const = 0;
-
- inline UnicodeString getAliasUnicodeString(UErrorCode &errorCode) const {
- int32_t len = 0;
- const UChar *r = getAliasString(len, errorCode);
- return UnicodeString(TRUE, r, len);
- }
-
- /**
- * Sets U_RESOURCE_TYPE_MISMATCH if this is not an integer resource.
- *
- * @see ures_getInt()
- */
- virtual int32_t getInt(UErrorCode &errorCode) const = 0;
-
- /**
- * Sets U_RESOURCE_TYPE_MISMATCH if this is not an integer resource.
- *
- * @see ures_getUInt()
- */
- virtual uint32_t getUInt(UErrorCode &errorCode) const = 0;
-
- /**
- * Sets U_RESOURCE_TYPE_MISMATCH if this is not an intvector resource.
- *
- * @see ures_getIntVector()
- */
- virtual const int32_t *getIntVector(int32_t &length, UErrorCode &errorCode) const = 0;
-
- /**
- * Sets U_RESOURCE_TYPE_MISMATCH if this is not a binary-blob resource.
- *
- * @see ures_getBinary()
- */
- virtual const uint8_t *getBinary(int32_t &length, UErrorCode &errorCode) const = 0;
-
- /**
- * Sets U_RESOURCE_TYPE_MISMATCH if this is not an array resource
- */
- virtual ResourceArray getArray(UErrorCode &errorCode) const = 0;
-
- /**
- * Sets U_RESOURCE_TYPE_MISMATCH if this is not a table resource
- */
- virtual ResourceTable getTable(UErrorCode &errorCode) const = 0;
-
- /**
- * Is this a no-fallback/no-inheritance marker string?
- * Such a marker is used for
- * CLDR no-fallback data values of (three empty-set symbols)=={2205, 2205, 2205}
- * when enumerating tables with fallback from the specific resource bundle to root.
- *
- * @return TRUE if this is a no-inheritance marker string
- */
- virtual UBool isNoInheritanceMarker() const = 0;
-
- /**
- * Sets the dest strings from the string values in this array resource.
- *
- * @return the number of strings in this array resource.
- * If greater than capacity, then an overflow error is set.
- *
- * Sets U_RESOURCE_TYPE_MISMATCH if this is not an array resource
- * or if any of the array items is not a string
- */
- virtual int32_t getStringArray(UnicodeString *dest, int32_t capacity,
- UErrorCode &errorCode) const = 0;
-
- /**
- * Same as
- * <pre>
- * if (getType() == URES_STRING) {
- * return new String[] { getString(); }
- * } else {
- * return getStringArray();
- * }
- * </pre>
- *
- * Sets U_RESOURCE_TYPE_MISMATCH if this is
- * neither a string resource nor an array resource containing strings
- * @see getString()
- * @see getStringArray()
- */
- virtual int32_t getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity,
- UErrorCode &errorCode) const = 0;
-
- /**
- * Same as
- * <pre>
- * if (getType() == URES_STRING) {
- * return getString();
- * } else {
- * return getStringArray()[0];
- * }
- * </pre>
- *
- * Sets U_RESOURCE_TYPE_MISMATCH if this is
- * neither a string resource nor an array resource containing strings
- * @see getString()
- * @see getStringArray()
- */
- virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const = 0;
-
-protected:
- ResourceValue() {}
-
-private:
- ResourceValue(const ResourceValue &); // no copy constructor
- ResourceValue &operator=(const ResourceValue &); // no assignment operator
-};
-
-/**
- * Sink for ICU resource bundle contents.
- */
-class U_COMMON_API ResourceSink : public UObject {
-public:
- ResourceSink() {}
- virtual ~ResourceSink();
-
- /**
- * Called once for each bundle (child-parent-...-root).
- * The value is normally an array or table resource,
- * and implementations of this method normally iterate over the
- * tree of resource items stored there.
- *
- * @param key The key string of the enumeration-start resource.
- * Empty if the enumeration starts at the top level of the bundle.
- * @param value Call getArray() or getTable() as appropriate.
- * Then reuse for output values from Array and Table getters.
- * @param noFallback true if the bundle has no parent;
- * that is, its top-level table has the nofallback attribute,
- * or it is the root bundle of a locale tree.
- */
- virtual void put(const char *key, ResourceValue &value, UBool noFallback,
- UErrorCode &errorCode) = 0;
-
-private:
- ResourceSink(const ResourceSink &); // no copy constructor
- ResourceSink &operator=(const ResourceSink &); // no assignment operator
-};
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/restrace.cpp b/contrib/libs/icu/common/restrace.cpp
deleted file mode 100644
index 5c6498850e2..00000000000
--- a/contrib/libs/icu/common/restrace.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-#include "unicode/utypes.h"
-
-#if U_ENABLE_TRACING
-
-#include "restrace.h"
-#include "charstr.h"
-#include "cstring.h"
-#include "utracimp.h"
-#include "uresimp.h"
-#include "uassert.h"
-#include "util.h"
-
-U_NAMESPACE_BEGIN
-
-ResourceTracer::~ResourceTracer() = default;
-
-void ResourceTracer::trace(const char* resType) const {
- U_ASSERT(fResB || fParent);
- UTRACE_ENTRY(UTRACE_UDATA_RESOURCE);
- UErrorCode status = U_ZERO_ERROR;
-
- CharString filePath;
- getFilePath(filePath, status);
-
- CharString resPath;
- getResPath(resPath, status);
-
- // The longest type ("intvector") is 9 chars
- const char kSpaces[] = " ";
- CharString format;
- format.append(kSpaces, sizeof(kSpaces) - 1 - uprv_strlen(resType), status);
- format.append("(%s) %s @ %s", status);
-
- UTRACE_DATA3(UTRACE_VERBOSE,
- format.data(),
- resType,
- filePath.data(),
- resPath.data());
- UTRACE_EXIT_STATUS(status);
-}
-
-void ResourceTracer::traceOpen() const {
- U_ASSERT(fResB);
- UTRACE_ENTRY(UTRACE_UDATA_BUNDLE);
- UErrorCode status = U_ZERO_ERROR;
-
- CharString filePath;
- UTRACE_DATA1(UTRACE_VERBOSE, "%s", getFilePath(filePath, status).data());
- UTRACE_EXIT_STATUS(status);
-}
-
-CharString& ResourceTracer::getFilePath(CharString& output, UErrorCode& status) const {
- if (fResB) {
- output.append(fResB->fData->fPath, status);
- output.append('/', status);
- output.append(fResB->fData->fName, status);
- output.append(".res", status);
- } else {
- fParent->getFilePath(output, status);
- }
- return output;
-}
-
-CharString& ResourceTracer::getResPath(CharString& output, UErrorCode& status) const {
- if (fResB) {
- output.append('/', status);
- output.append(fResB->fResPath, status);
- // removing the trailing /
- U_ASSERT(output[output.length()-1] == '/');
- output.truncate(output.length()-1);
- } else {
- fParent->getResPath(output, status);
- }
- if (fKey) {
- output.append('/', status);
- output.append(fKey, status);
- }
- if (fIndex != -1) {
- output.append('[', status);
- UnicodeString indexString;
- ICU_Utility::appendNumber(indexString, fIndex);
- output.appendInvariantChars(indexString, status);
- output.append(']', status);
- }
- return output;
-}
-
-void FileTracer::traceOpen(const char* path, const char* type, const char* name) {
- if (uprv_strcmp(type, "res") == 0) {
- traceOpenResFile(path, name);
- } else {
- traceOpenDataFile(path, type, name);
- }
-}
-
-void FileTracer::traceOpenDataFile(const char* path, const char* type, const char* name) {
- UTRACE_ENTRY(UTRACE_UDATA_DATA_FILE);
- UErrorCode status = U_ZERO_ERROR;
-
- CharString filePath;
- filePath.append(path, status);
- filePath.append('/', status);
- filePath.append(name, status);
- filePath.append('.', status);
- filePath.append(type, status);
-
- UTRACE_DATA1(UTRACE_VERBOSE, "%s", filePath.data());
- UTRACE_EXIT_STATUS(status);
-}
-
-void FileTracer::traceOpenResFile(const char* path, const char* name) {
- UTRACE_ENTRY(UTRACE_UDATA_RES_FILE);
- UErrorCode status = U_ZERO_ERROR;
-
- CharString filePath;
- filePath.append(path, status);
- filePath.append('/', status);
- filePath.append(name, status);
- filePath.append(".res", status);
-
- UTRACE_DATA1(UTRACE_VERBOSE, "%s", filePath.data());
- UTRACE_EXIT_STATUS(status);
-}
-
-U_NAMESPACE_END
-
-#endif // U_ENABLE_TRACING
diff --git a/contrib/libs/icu/common/restrace.h b/contrib/libs/icu/common/restrace.h
deleted file mode 100644
index ef29eaed578..00000000000
--- a/contrib/libs/icu/common/restrace.h
+++ /dev/null
@@ -1,147 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-#ifndef __RESTRACE_H__
-#define __RESTRACE_H__
-
-#include "unicode/utypes.h"
-
-#if U_ENABLE_TRACING
-
-struct UResourceBundle;
-
-U_NAMESPACE_BEGIN
-
-class CharString;
-
-/**
- * Instances of this class store information used to trace reads from resource
- * bundles when ICU is built with --enable-tracing.
- *
- * All arguments of type const UResourceBundle*, const char*, and
- * const ResourceTracer& are stored as pointers. The caller must retain
- * ownership for the lifetime of this ResourceTracer.
- *
- * Exported as U_COMMON_API for Windows because it is a value field
- * in other exported types.
- */
-class U_COMMON_API ResourceTracer {
-public:
- ResourceTracer() :
- fResB(nullptr),
- fParent(nullptr),
- fKey(nullptr),
- fIndex(-1) {}
-
- ResourceTracer(const UResourceBundle* resB) :
- fResB(resB),
- fParent(nullptr),
- fKey(nullptr),
- fIndex(-1) {}
-
- ResourceTracer(const UResourceBundle* resB, const char* key) :
- fResB(resB),
- fParent(nullptr),
- fKey(key),
- fIndex(-1) {}
-
- ResourceTracer(const UResourceBundle* resB, int32_t index) :
- fResB(resB),
- fParent(nullptr),
- fKey(nullptr),
- fIndex(index) {}
-
- ResourceTracer(const ResourceTracer& parent, const char* key) :
- fResB(nullptr),
- fParent(&parent),
- fKey(key),
- fIndex(-1) {}
-
- ResourceTracer(const ResourceTracer& parent, int32_t index) :
- fResB(nullptr),
- fParent(&parent),
- fKey(nullptr),
- fIndex(index) {}
-
- ~ResourceTracer();
-
- void trace(const char* type) const;
- void traceOpen() const;
-
- /**
- * Calls trace() if the resB or parent provided to the constructor was
- * non-null; otherwise, does nothing.
- */
- void maybeTrace(const char* type) const {
- if (fResB || fParent) {
- trace(type);
- }
- }
-
-private:
- const UResourceBundle* fResB;
- const ResourceTracer* fParent;
- const char* fKey;
- int32_t fIndex;
-
- CharString& getFilePath(CharString& output, UErrorCode& status) const;
-
- CharString& getResPath(CharString& output, UErrorCode& status) const;
-};
-
-/**
- * This class provides methods to trace data file reads when ICU is built
- * with --enable-tracing.
- */
-class FileTracer {
-public:
- static void traceOpen(const char* path, const char* type, const char* name);
-
-private:
- static void traceOpenDataFile(const char* path, const char* type, const char* name);
- static void traceOpenResFile(const char* path, const char* name);
-};
-
-U_NAMESPACE_END
-
-#else // U_ENABLE_TRACING
-
-U_NAMESPACE_BEGIN
-
-/**
- * Default trivial implementation when --enable-tracing is not used.
- */
-class U_COMMON_API ResourceTracer {
-public:
- ResourceTracer() {}
-
- ResourceTracer(const void*) {}
-
- ResourceTracer(const void*, const char*) {}
-
- ResourceTracer(const void*, int32_t) {}
-
- ResourceTracer(const ResourceTracer&, const char*) {}
-
- ResourceTracer(const ResourceTracer&, int32_t) {}
-
- void trace(const char*) const {}
-
- void traceOpen() const {}
-
- void maybeTrace(const char*) const {}
-};
-
-/**
- * Default trivial implementation when --enable-tracing is not used.
- */
-class FileTracer {
-public:
- static void traceOpen(const char*, const char*, const char*) {}
-};
-
-U_NAMESPACE_END
-
-#endif // U_ENABLE_TRACING
-
-#endif //__RESTRACE_H__
diff --git a/contrib/libs/icu/common/ruleiter.cpp b/contrib/libs/icu/common/ruleiter.cpp
deleted file mode 100644
index 41eea23c0dc..00000000000
--- a/contrib/libs/icu/common/ruleiter.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2003-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: September 24 2003
-* Since: ICU 2.8
-**********************************************************************
-*/
-#include "ruleiter.h"
-#include "unicode/parsepos.h"
-#include "unicode/symtable.h"
-#include "unicode/unistr.h"
-#include "unicode/utf16.h"
-#include "patternprops.h"
-
-/* \U87654321 or \ud800\udc00 */
-#define MAX_U_NOTATION_LEN 12
-
-U_NAMESPACE_BEGIN
-
-RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
- ParsePosition& thePos) :
- text(theText),
- pos(thePos),
- sym(theSym),
- buf(0),
- bufPos(0)
-{}
-
-UBool RuleCharacterIterator::atEnd() const {
- return buf == 0 && pos.getIndex() == text.length();
-}
-
-UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
- if (U_FAILURE(ec)) return DONE;
-
- UChar32 c = DONE;
- isEscaped = FALSE;
-
- for (;;) {
- c = _current();
- _advance(U16_LENGTH(c));
-
- if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
- (options & PARSE_VARIABLES) != 0 && sym != 0) {
- UnicodeString name = sym->parseReference(text, pos, text.length());
- // If name is empty there was an isolated SYMBOL_REF;
- // return it. Caller must be prepared for this.
- if (name.length() == 0) {
- break;
- }
- bufPos = 0;
- buf = sym->lookup(name);
- if (buf == 0) {
- ec = U_UNDEFINED_VARIABLE;
- return DONE;
- }
- // Handle empty variable value
- if (buf->length() == 0) {
- buf = 0;
- }
- continue;
- }
-
- if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
- continue;
- }
-
- if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
- UnicodeString tempEscape;
- int32_t offset = 0;
- c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
- jumpahead(offset);
- isEscaped = TRUE;
- if (c < 0) {
- ec = U_MALFORMED_UNICODE_ESCAPE;
- return DONE;
- }
- }
-
- break;
- }
-
- return c;
-}
-
-void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
- p.buf = buf;
- p.pos = pos.getIndex();
- p.bufPos = bufPos;
-}
-
-void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
- buf = p.buf;
- pos.setIndex(p.pos);
- bufPos = p.bufPos;
-}
-
-void RuleCharacterIterator::skipIgnored(int32_t options) {
- if ((options & SKIP_WHITESPACE) != 0) {
- for (;;) {
- UChar32 a = _current();
- if (!PatternProps::isWhiteSpace(a)) break;
- _advance(U16_LENGTH(a));
- }
- }
-}
-
-UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
- if (maxLookAhead < 0) {
- maxLookAhead = 0x7FFFFFFF;
- }
- if (buf != 0) {
- buf->extract(bufPos, maxLookAhead, result);
- } else {
- text.extract(pos.getIndex(), maxLookAhead, result);
- }
- return result;
-}
-
-void RuleCharacterIterator::jumpahead(int32_t count) {
- _advance(count);
-}
-
-/*
-UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
- int32_t b = pos.getIndex();
- text.extract(0, b, result);
- return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
-}
-*/
-
-UChar32 RuleCharacterIterator::_current() const {
- if (buf != 0) {
- return buf->char32At(bufPos);
- } else {
- int i = pos.getIndex();
- return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
- }
-}
-
-void RuleCharacterIterator::_advance(int32_t count) {
- if (buf != 0) {
- bufPos += count;
- if (bufPos == buf->length()) {
- buf = 0;
- }
- } else {
- pos.setIndex(pos.getIndex() + count);
- if (pos.getIndex() > text.length()) {
- pos.setIndex(text.length());
- }
- }
-}
-
-U_NAMESPACE_END
-
-//eof
diff --git a/contrib/libs/icu/common/ruleiter.h b/contrib/libs/icu/common/ruleiter.h
deleted file mode 100644
index 4e1be53823d..00000000000
--- a/contrib/libs/icu/common/ruleiter.h
+++ /dev/null
@@ -1,233 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2003-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: September 24 2003
-* Since: ICU 2.8
-**********************************************************************
-*/
-#ifndef _RULEITER_H_
-#define _RULEITER_H_
-
-#include "unicode/uobject.h"
-
-U_NAMESPACE_BEGIN
-
-class UnicodeString;
-class ParsePosition;
-class SymbolTable;
-
-/**
- * An iterator that returns 32-bit code points. This class is deliberately
- * <em>not</em> related to any of the ICU character iterator classes
- * in order to minimize complexity.
- * @author Alan Liu
- * @since ICU 2.8
- */
-class RuleCharacterIterator : public UMemory {
-
- // TODO: Ideas for later. (Do not implement if not needed, lest the
- // code coverage numbers go down due to unused methods.)
- // 1. Add a copy constructor, operator==() method.
- // 2. Rather than return DONE, throw an exception if the end
- // is reached -- this is an alternate usage model, probably not useful.
-
-private:
- /**
- * Text being iterated.
- */
- const UnicodeString& text;
-
- /**
- * Position of iterator.
- */
- ParsePosition& pos;
-
- /**
- * Symbol table used to parse and dereference variables. May be 0.
- */
- const SymbolTable* sym;
-
- /**
- * Current variable expansion, or 0 if none.
- */
- const UnicodeString* buf;
-
- /**
- * Position within buf. Meaningless if buf == 0.
- */
- int32_t bufPos;
-
-public:
- /**
- * Value returned when there are no more characters to iterate.
- */
- enum { DONE = -1 };
-
- /**
- * Bitmask option to enable parsing of variable names. If (options &
- * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
- * its value. Variables are parsed using the SymbolTable API.
- */
- enum { PARSE_VARIABLES = 1 };
-
- /**
- * Bitmask option to enable parsing of escape sequences. If (options &
- * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
- * to its value. Escapes are parsed using Utility.unescapeAt().
- */
- enum { PARSE_ESCAPES = 2 };
-
- /**
- * Bitmask option to enable skipping of whitespace. If (options &
- * SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently
- * skipped, as if they were not present in the input.
- */
- enum { SKIP_WHITESPACE = 4 };
-
- /**
- * Constructs an iterator over the given text, starting at the given
- * position.
- * @param text the text to be iterated
- * @param sym the symbol table, or null if there is none. If sym is null,
- * then variables will not be deferenced, even if the PARSE_VARIABLES
- * option is set.
- * @param pos upon input, the index of the next character to return. If a
- * variable has been dereferenced, then pos will <em>not</em> increment as
- * characters of the variable value are iterated.
- */
- RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym,
- ParsePosition& pos);
-
- /**
- * Returns true if this iterator has no more characters to return.
- */
- UBool atEnd() const;
-
- /**
- * Returns the next character using the given options, or DONE if there
- * are no more characters, and advance the position to the next
- * character.
- * @param options one or more of the following options, bitwise-OR-ed
- * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
- * @param isEscaped output parameter set to TRUE if the character
- * was escaped
- * @param ec input-output error code. An error will only be set by
- * this routing if options includes PARSE_VARIABLES and an unknown
- * variable name is seen, or if options includes PARSE_ESCAPES and
- * an invalid escape sequence is seen.
- * @return the current 32-bit code point, or DONE
- */
- UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec);
-
- /**
- * Returns true if this iterator is currently within a variable expansion.
- */
- inline UBool inVariable() const;
-
- /**
- * An opaque object representing the position of a RuleCharacterIterator.
- */
- struct Pos : public UMemory {
- private:
- const UnicodeString* buf;
- int32_t pos;
- int32_t bufPos;
- friend class RuleCharacterIterator;
- };
-
- /**
- * Sets an object which, when later passed to setPos(), will
- * restore this iterator's position. Usage idiom:
- *
- * RuleCharacterIterator iterator = ...;
- * RuleCharacterIterator::Pos pos;
- * iterator.getPos(pos);
- * for (;;) {
- * iterator.getPos(pos);
- * int c = iterator.next(...);
- * ...
- * }
- * iterator.setPos(pos);
- *
- * @param p a position object to be set to this iterator's
- * current position.
- */
- void getPos(Pos& p) const;
-
- /**
- * Restores this iterator to the position it had when getPos()
- * set the given object.
- * @param p a position object previously set by getPos()
- */
- void setPos(const Pos& p);
-
- /**
- * Skips ahead past any ignored characters, as indicated by the given
- * options. This is useful in conjunction with the lookahead() method.
- *
- * Currently, this only has an effect for SKIP_WHITESPACE.
- * @param options one or more of the following options, bitwise-OR-ed
- * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
- */
- void skipIgnored(int32_t options);
-
- /**
- * Returns a string containing the remainder of the characters to be
- * returned by this iterator, without any option processing. If the
- * iterator is currently within a variable expansion, this will only
- * extend to the end of the variable expansion. This method is provided
- * so that iterators may interoperate with string-based APIs. The typical
- * sequence of calls is to call skipIgnored(), then call lookahead(), then
- * parse the string returned by lookahead(), then call jumpahead() to
- * resynchronize the iterator.
- * @param result a string to receive the characters to be returned
- * by future calls to next()
- * @param maxLookAhead The maximum to copy into the result.
- * @return a reference to result
- */
- UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const;
-
- /**
- * Advances the position by the given number of 16-bit code units.
- * This is useful in conjunction with the lookahead() method.
- * @param count the number of 16-bit code units to jump over
- */
- void jumpahead(int32_t count);
-
- /**
- * Returns a string representation of this object, consisting of the
- * characters being iterated, with a '|' marking the current position.
- * Position within an expanded variable is <em>not</em> indicated.
- * @param result output parameter to receive a string
- * representation of this object
- */
-// UnicodeString& toString(UnicodeString& result) const;
-
-private:
- /**
- * Returns the current 32-bit code point without parsing escapes, parsing
- * variables, or skipping whitespace.
- * @return the current 32-bit code point
- */
- UChar32 _current() const;
-
- /**
- * Advances the position by the given amount.
- * @param count the number of 16-bit code units to advance past
- */
- void _advance(int32_t count);
-};
-
-inline UBool RuleCharacterIterator::inVariable() const {
- return buf != 0;
-}
-
-U_NAMESPACE_END
-
-#endif // _RULEITER_H_
-//eof
diff --git a/contrib/libs/icu/common/schriter.cpp b/contrib/libs/icu/common/schriter.cpp
deleted file mode 100644
index 17b68aee9d9..00000000000
--- a/contrib/libs/icu/common/schriter.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 1998-2012, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-*
-* File schriter.cpp
-*
-* Modification History:
-*
-* Date Name Description
-* 05/05/99 stephen Cleaned up.
-******************************************************************************
-*/
-
-#include "utypeinfo.h" // for 'typeid' to work
-
-#include "unicode/chariter.h"
-#include "unicode/schriter.h"
-
-U_NAMESPACE_BEGIN
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringCharacterIterator)
-
-StringCharacterIterator::StringCharacterIterator()
- : UCharCharacterIterator(),
- text()
-{
- // NEVER DEFAULT CONSTRUCT!
-}
-
-StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr)
- : UCharCharacterIterator(textStr.getBuffer(), textStr.length()),
- text(textStr)
-{
- // we had set the input parameter's array, now we need to set our copy's array
- UCharCharacterIterator::text = this->text.getBuffer();
-}
-
-StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr,
- int32_t textPos)
- : UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textPos),
- text(textStr)
-{
- // we had set the input parameter's array, now we need to set our copy's array
- UCharCharacterIterator::text = this->text.getBuffer();
-}
-
-StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr,
- int32_t textBegin,
- int32_t textEnd,
- int32_t textPos)
- : UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textBegin, textEnd, textPos),
- text(textStr)
-{
- // we had set the input parameter's array, now we need to set our copy's array
- UCharCharacterIterator::text = this->text.getBuffer();
-}
-
-StringCharacterIterator::StringCharacterIterator(const StringCharacterIterator& that)
- : UCharCharacterIterator(that),
- text(that.text)
-{
- // we had set the input parameter's array, now we need to set our copy's array
- UCharCharacterIterator::text = this->text.getBuffer();
-}
-
-StringCharacterIterator::~StringCharacterIterator() {
-}
-
-StringCharacterIterator&
-StringCharacterIterator::operator=(const StringCharacterIterator& that) {
- UCharCharacterIterator::operator=(that);
- text = that.text;
- // we had set the input parameter's array, now we need to set our copy's array
- UCharCharacterIterator::text = this->text.getBuffer();
- return *this;
-}
-
-UBool
-StringCharacterIterator::operator==(const ForwardCharacterIterator& that) const {
- if (this == &that) {
- return TRUE;
- }
-
- // do not call UCharCharacterIterator::operator==()
- // because that checks for array pointer equality
- // while we compare UnicodeString objects
-
- if (typeid(*this) != typeid(that)) {
- return FALSE;
- }
-
- StringCharacterIterator& realThat = (StringCharacterIterator&)that;
-
- return text == realThat.text
- && pos == realThat.pos
- && begin == realThat.begin
- && end == realThat.end;
-}
-
-StringCharacterIterator*
-StringCharacterIterator::clone() const {
- return new StringCharacterIterator(*this);
-}
-
-void
-StringCharacterIterator::setText(const UnicodeString& newText) {
- text = newText;
- UCharCharacterIterator::setText(text.getBuffer(), text.length());
-}
-
-void
-StringCharacterIterator::getText(UnicodeString& result) {
- result = text;
-}
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/serv.cpp b/contrib/libs/icu/common/serv.cpp
deleted file mode 100644
index ce545b9dbd5..00000000000
--- a/contrib/libs/icu/common/serv.cpp
+++ /dev/null
@@ -1,982 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
-*******************************************************************************
-* Copyright (C) 2001-2014, International Business Machines Corporation.
-* All Rights Reserved.
-*******************************************************************************
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_SERVICE
-
-#include "serv.h"
-#include "umutex.h"
-
-#undef SERVICE_REFCOUNT
-
-// in case we use the refcount stuff
-
-U_NAMESPACE_BEGIN
-
-/*
-******************************************************************
-*/
-
-const UChar ICUServiceKey::PREFIX_DELIMITER = 0x002F; /* '/' */
-
-ICUServiceKey::ICUServiceKey(const UnicodeString& id)
-: _id(id) {
-}
-
-ICUServiceKey::~ICUServiceKey()
-{
-}
-
-const UnicodeString&
-ICUServiceKey::getID() const
-{
- return _id;
-}
-
-UnicodeString&
-ICUServiceKey::canonicalID(UnicodeString& result) const
-{
- return result.append(_id);
-}
-
-UnicodeString&
-ICUServiceKey::currentID(UnicodeString& result) const
-{
- return canonicalID(result);
-}
-
-UnicodeString&
-ICUServiceKey::currentDescriptor(UnicodeString& result) const
-{
- prefix(result);
- result.append(PREFIX_DELIMITER);
- return currentID(result);
-}
-
-UBool
-ICUServiceKey::fallback()
-{
- return FALSE;
-}
-
-UBool
-ICUServiceKey::isFallbackOf(const UnicodeString& id) const
-{
- return id == _id;
-}
-
-UnicodeString&
-ICUServiceKey::prefix(UnicodeString& result) const
-{
- return result;
-}
-
-UnicodeString&
-ICUServiceKey::parsePrefix(UnicodeString& result)
-{
- int32_t n = result.indexOf(PREFIX_DELIMITER);
- if (n < 0) {
- n = 0;
- }
- result.remove(n);
- return result;
-}
-
-UnicodeString&
-ICUServiceKey::parseSuffix(UnicodeString& result)
-{
- int32_t n = result.indexOf(PREFIX_DELIMITER);
- if (n >= 0) {
- result.remove(0, n+1);
- }
- return result;
-}
-
-#ifdef SERVICE_DEBUG
-UnicodeString&
-ICUServiceKey::debug(UnicodeString& result) const
-{
- debugClass(result);
- result.append((UnicodeString)" id: ");
- result.append(_id);
- return result;
-}
-
-UnicodeString&
-ICUServiceKey::debugClass(UnicodeString& result) const
-{
- return result.append((UnicodeString)"ICUServiceKey");
-}
-#endif
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ICUServiceKey)
-
-/*
-******************************************************************
-*/
-
-ICUServiceFactory::~ICUServiceFactory() {}
-
-SimpleFactory::SimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible)
-: _instance(instanceToAdopt), _id(id), _visible(visible)
-{
-}
-
-SimpleFactory::~SimpleFactory()
-{
- delete _instance;
-}
-
-UObject*
-SimpleFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const
-{
- if (U_SUCCESS(status)) {
- UnicodeString temp;
- if (_id == key.currentID(temp)) {
- return service->cloneInstance(_instance);
- }
- }
- return NULL;
-}
-
-void
-SimpleFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const
-{
- if (_visible) {
- result.put(_id, (void*)this, status); // cast away const
- } else {
- result.remove(_id);
- }
-}
-
-UnicodeString&
-SimpleFactory::getDisplayName(const UnicodeString& id, const Locale& /* locale */, UnicodeString& result) const
-{
- if (_visible && _id == id) {
- result = _id;
- } else {
- result.setToBogus();
- }
- return result;
-}
-
-#ifdef SERVICE_DEBUG
-UnicodeString&
-SimpleFactory::debug(UnicodeString& toAppendTo) const
-{
- debugClass(toAppendTo);
- toAppendTo.append((UnicodeString)" id: ");
- toAppendTo.append(_id);
- toAppendTo.append((UnicodeString)", visible: ");
- toAppendTo.append(_visible ? (UnicodeString)"T" : (UnicodeString)"F");
- return toAppendTo;
-}
-
-UnicodeString&
-SimpleFactory::debugClass(UnicodeString& toAppendTo) const
-{
- return toAppendTo.append((UnicodeString)"SimpleFactory");
-}
-#endif
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleFactory)
-
-/*
-******************************************************************
-*/
-
-ServiceListener::~ServiceListener() {}
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ServiceListener)
-
-/*
-******************************************************************
-*/
-
-// Record the actual id for this service in the cache, so we can return it
-// even if we succeed later with a different id.
-class CacheEntry : public UMemory {
-private:
- int32_t refcount;
-
-public:
- UnicodeString actualDescriptor;
- UObject* service;
-
- /**
- * Releases a reference to the shared resource.
- */
- ~CacheEntry() {
- delete service;
- }
-
- CacheEntry(const UnicodeString& _actualDescriptor, UObject* _service)
- : refcount(1), actualDescriptor(_actualDescriptor), service(_service) {
- }
-
- /**
- * Instantiation creates an initial reference, so don't call this
- * unless you're creating a new pointer to this. Management of
- * that pointer will have to know how to deal with refcounts.
- * Return true if the resource has not already been released.
- */
- CacheEntry* ref() {
- ++refcount;
- return this;
- }
-
- /**
- * Destructions removes a reference, so don't call this unless
- * you're removing pointer to this somewhere. Management of that
- * pointer will have to know how to deal with refcounts. Once
- * the refcount drops to zero, the resource is released. Return
- * false if the resouce has been released.
- */
- CacheEntry* unref() {
- if ((--refcount) == 0) {
- delete this;
- return NULL;
- }
- return this;
- }
-
- /**
- * Return TRUE if there is at least one reference to this and the
- * resource has not been released.
- */
- UBool isShared() const {
- return refcount > 1;
- }
-};
-
-// UObjectDeleter for serviceCache
-U_CDECL_BEGIN
-static void U_CALLCONV
-cacheDeleter(void* obj) {
- U_NAMESPACE_USE ((CacheEntry*)obj)->unref();
-}
-
-/**
-* Deleter for UObjects
-*/
-static void U_CALLCONV
-deleteUObject(void *obj) {
- U_NAMESPACE_USE delete (UObject*) obj;
-}
-U_CDECL_END
-
-/*
-******************************************************************
-*/
-
-class DNCache : public UMemory {
-public:
- Hashtable cache;
- const Locale locale;
-
- DNCache(const Locale& _locale)
- : cache(), locale(_locale)
- {
- // cache.setKeyDeleter(uprv_deleteUObject);
- }
-};
-
-
-/*
-******************************************************************
-*/
-
-StringPair*
-StringPair::create(const UnicodeString& displayName,
- const UnicodeString& id,
- UErrorCode& status)
-{
- if (U_SUCCESS(status)) {
- StringPair* sp = new StringPair(displayName, id);
- if (sp == NULL || sp->isBogus()) {
- status = U_MEMORY_ALLOCATION_ERROR;
- delete sp;
- return NULL;
- }
- return sp;
- }
- return NULL;
-}
-
-UBool
-StringPair::isBogus() const {
- return displayName.isBogus() || id.isBogus();
-}
-
-StringPair::StringPair(const UnicodeString& _displayName,
- const UnicodeString& _id)
-: displayName(_displayName)
-, id(_id)
-{
-}
-
-U_CDECL_BEGIN
-static void U_CALLCONV
-userv_deleteStringPair(void *obj) {
- U_NAMESPACE_USE delete (StringPair*) obj;
-}
-U_CDECL_END
-
-/*
-******************************************************************
-*/
-
-static UMutex lock;
-
-ICUService::ICUService()
-: name()
-, timestamp(0)
-, factories(NULL)
-, serviceCache(NULL)
-, idCache(NULL)
-, dnCache(NULL)
-{
-}
-
-ICUService::ICUService(const UnicodeString& newName)
-: name(newName)
-, timestamp(0)
-, factories(NULL)
-, serviceCache(NULL)
-, idCache(NULL)
-, dnCache(NULL)
-{
-}
-
-ICUService::~ICUService()
-{
- {
- Mutex mutex(&lock);
- clearCaches();
- delete factories;
- factories = NULL;
- }
-}
-
-UObject*
-ICUService::get(const UnicodeString& descriptor, UErrorCode& status) const
-{
- return get(descriptor, NULL, status);
-}
-
-UObject*
-ICUService::get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const
-{
- UObject* result = NULL;
- ICUServiceKey* key = createKey(&descriptor, status);
- if (key) {
- result = getKey(*key, actualReturn, status);
- delete key;
- }
- return result;
-}
-
-UObject*
-ICUService::getKey(ICUServiceKey& key, UErrorCode& status) const
-{
- return getKey(key, NULL, status);
-}
-
-// this is a vector that subclasses of ICUService can override to further customize the result object
-// before returning it. All other public get functions should call this one.
-
-UObject*
-ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const
-{
- return getKey(key, actualReturn, NULL, status);
-}
-
-// make it possible to call reentrantly on systems that don't have reentrant mutexes.
-// we can use this simple approach since we know the situation where we're calling
-// reentrantly even without knowing the thread.
-class XMutex : public UMemory {
-public:
- inline XMutex(UMutex *mutex, UBool reentering)
- : fMutex(mutex)
- , fActive(!reentering)
- {
- if (fActive) umtx_lock(fMutex);
- }
- inline ~XMutex() {
- if (fActive) umtx_unlock(fMutex);
- }
-
-private:
- UMutex *fMutex;
- UBool fActive;
-};
-
-struct UVectorDeleter {
- UVector* _obj;
- UVectorDeleter() : _obj(NULL) {}
- ~UVectorDeleter() { delete _obj; }
-};
-
-// called only by factories, treat as private
-UObject*
-ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const
-{
- if (U_FAILURE(status)) {
- return NULL;
- }
-
- if (isDefault()) {
- return handleDefault(key, actualReturn, status);
- }
-
- ICUService* ncthis = (ICUService*)this; // cast away semantic const
-
- CacheEntry* result = NULL;
- {
- // The factory list can't be modified until we're done,
- // otherwise we might update the cache with an invalid result.
- // The cache has to stay in synch with the factory list.
- // ICU doesn't have monitors so we can't use rw locks, so
- // we single-thread everything using this service, for now.
-
- // if factory is not null, we're calling from within the mutex,
- // and since some unix machines don't have reentrant mutexes we
- // need to make sure not to try to lock it again.
- XMutex mutex(&lock, factory != NULL);
-
- if (serviceCache == NULL) {
- ncthis->serviceCache = new Hashtable(status);
- if (ncthis->serviceCache == NULL) {
- return NULL;
- }
- if (U_FAILURE(status)) {
- delete serviceCache;
- return NULL;
- }
- serviceCache->setValueDeleter(cacheDeleter);
- }
-
- UnicodeString currentDescriptor;
- UVectorDeleter cacheDescriptorList;
- UBool putInCache = FALSE;
-
- int32_t startIndex = 0;
- int32_t limit = factories->size();
- UBool cacheResult = TRUE;
-
- if (factory != NULL) {
- for (int32_t i = 0; i < limit; ++i) {
- if (factory == (const ICUServiceFactory*)factories->elementAt(i)) {
- startIndex = i + 1;
- break;
- }
- }
- if (startIndex == 0) {
- // throw new InternalError("Factory " + factory + "not registered with service: " + this);
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- cacheResult = FALSE;
- }
-
- do {
- currentDescriptor.remove();
- key.currentDescriptor(currentDescriptor);
- result = (CacheEntry*)serviceCache->get(currentDescriptor);
- if (result != NULL) {
- break;
- }
-
- // first test of cache failed, so we'll have to update
- // the cache if we eventually succeed-- that is, if we're
- // going to update the cache at all.
- putInCache = TRUE;
-
- int32_t index = startIndex;
- while (index < limit) {
- ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(index++);
- UObject* service = f->create(key, this, status);
- if (U_FAILURE(status)) {
- delete service;
- return NULL;
- }
- if (service != NULL) {
- result = new CacheEntry(currentDescriptor, service);
- if (result == NULL) {
- delete service;
- status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- goto outerEnd;
- }
- }
-
- // prepare to load the cache with all additional ids that
- // will resolve to result, assuming we'll succeed. We
- // don't want to keep querying on an id that's going to
- // fallback to the one that succeeded, we want to hit the
- // cache the first time next goaround.
- if (cacheDescriptorList._obj == NULL) {
- cacheDescriptorList._obj = new UVector(uprv_deleteUObject, NULL, 5, status);
- if (U_FAILURE(status)) {
- return NULL;
- }
- }
- UnicodeString* idToCache = new UnicodeString(currentDescriptor);
- if (idToCache == NULL || idToCache->isBogus()) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- cacheDescriptorList._obj->addElement(idToCache, status);
- if (U_FAILURE(status)) {
- return NULL;
- }
- } while (key.fallback());
-outerEnd:
-
- if (result != NULL) {
- if (putInCache && cacheResult) {
- serviceCache->put(result->actualDescriptor, result, status);
- if (U_FAILURE(status)) {
- return NULL;
- }
-
- if (cacheDescriptorList._obj != NULL) {
- for (int32_t i = cacheDescriptorList._obj->size(); --i >= 0;) {
- UnicodeString* desc = (UnicodeString*)cacheDescriptorList._obj->elementAt(i);
-
- serviceCache->put(*desc, result, status);
- if (U_FAILURE(status)) {
- return NULL;
- }
-
- result->ref();
- cacheDescriptorList._obj->removeElementAt(i);
- }
- }
- }
-
- if (actualReturn != NULL) {
- // strip null prefix
- if (result->actualDescriptor.indexOf((UChar)0x2f) == 0) { // U+002f=slash (/)
- actualReturn->remove();
- actualReturn->append(result->actualDescriptor,
- 1,
- result->actualDescriptor.length() - 1);
- } else {
- *actualReturn = result->actualDescriptor;
- }
-
- if (actualReturn->isBogus()) {
- status = U_MEMORY_ALLOCATION_ERROR;
- delete result;
- return NULL;
- }
- }
-
- UObject* service = cloneInstance(result->service);
- if (putInCache && !cacheResult) {
- delete result;
- }
- return service;
- }
- }
-
- return handleDefault(key, actualReturn, status);
-}
-
-UObject*
-ICUService::handleDefault(const ICUServiceKey& /* key */, UnicodeString* /* actualIDReturn */, UErrorCode& /* status */) const
-{
- return NULL;
-}
-
-UVector&
-ICUService::getVisibleIDs(UVector& result, UErrorCode& status) const {
- return getVisibleIDs(result, NULL, status);
-}
-
-UVector&
-ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const
-{
- result.removeAllElements();
-
- if (U_FAILURE(status)) {
- return result;
- }
-
- {
- Mutex mutex(&lock);
- const Hashtable* map = getVisibleIDMap(status);
- if (map != NULL) {
- ICUServiceKey* fallbackKey = createKey(matchID, status);
-
- for (int32_t pos = UHASH_FIRST;;) {
- const UHashElement* e = map->nextElement(pos);
- if (e == NULL) {
- break;
- }
-
- const UnicodeString* id = (const UnicodeString*)e->key.pointer;
- if (fallbackKey != NULL) {
- if (!fallbackKey->isFallbackOf(*id)) {
- continue;
- }
- }
-
- UnicodeString* idClone = new UnicodeString(*id);
- if (idClone == NULL || idClone->isBogus()) {
- delete idClone;
- status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- result.addElement(idClone, status);
- if (U_FAILURE(status)) {
- delete idClone;
- break;
- }
- }
- delete fallbackKey;
- }
- }
- if (U_FAILURE(status)) {
- result.removeAllElements();
- }
- return result;
-}
-
-const Hashtable*
-ICUService::getVisibleIDMap(UErrorCode& status) const {
- if (U_FAILURE(status)) return NULL;
-
- // must only be called when lock is already held
-
- ICUService* ncthis = (ICUService*)this; // cast away semantic const
- if (idCache == NULL) {
- ncthis->idCache = new Hashtable(status);
- if (idCache == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- } else if (factories != NULL) {
- for (int32_t pos = factories->size(); --pos >= 0;) {
- ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(pos);
- f->updateVisibleIDs(*idCache, status);
- }
- if (U_FAILURE(status)) {
- delete idCache;
- ncthis->idCache = NULL;
- }
- }
- }
-
- return idCache;
-}
-
-
-UnicodeString&
-ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result) const
-{
- return getDisplayName(id, result, Locale::getDefault());
-}
-
-UnicodeString&
-ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result, const Locale& locale) const
-{
- {
- UErrorCode status = U_ZERO_ERROR;
- Mutex mutex(&lock);
- const Hashtable* map = getVisibleIDMap(status);
- if (map != NULL) {
- ICUServiceFactory* f = (ICUServiceFactory*)map->get(id);
- if (f != NULL) {
- f->getDisplayName(id, locale, result);
- return result;
- }
-
- // fallback
- status = U_ZERO_ERROR;
- ICUServiceKey* fallbackKey = createKey(&id, status);
- while (fallbackKey != NULL && fallbackKey->fallback()) {
- UnicodeString us;
- fallbackKey->currentID(us);
- f = (ICUServiceFactory*)map->get(us);
- if (f != NULL) {
- f->getDisplayName(id, locale, result);
- delete fallbackKey;
- return result;
- }
- }
- delete fallbackKey;
- }
- }
- result.setToBogus();
- return result;
-}
-
-UVector&
-ICUService::getDisplayNames(UVector& result, UErrorCode& status) const
-{
- return getDisplayNames(result, Locale::getDefault(), NULL, status);
-}
-
-
-UVector&
-ICUService::getDisplayNames(UVector& result, const Locale& locale, UErrorCode& status) const
-{
- return getDisplayNames(result, locale, NULL, status);
-}
-
-UVector&
-ICUService::getDisplayNames(UVector& result,
- const Locale& locale,
- const UnicodeString* matchID,
- UErrorCode& status) const
-{
- result.removeAllElements();
- result.setDeleter(userv_deleteStringPair);
- if (U_SUCCESS(status)) {
- ICUService* ncthis = (ICUService*)this; // cast away semantic const
- Mutex mutex(&lock);
-
- if (dnCache != NULL && dnCache->locale != locale) {
- delete dnCache;
- ncthis->dnCache = NULL;
- }
-
- if (dnCache == NULL) {
- const Hashtable* m = getVisibleIDMap(status);
- if (U_FAILURE(status)) {
- return result;
- }
- ncthis->dnCache = new DNCache(locale);
- if (dnCache == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return result;
- }
-
- int32_t pos = UHASH_FIRST;
- const UHashElement* entry = NULL;
- while ((entry = m->nextElement(pos)) != NULL) {
- const UnicodeString* id = (const UnicodeString*)entry->key.pointer;
- ICUServiceFactory* f = (ICUServiceFactory*)entry->value.pointer;
- UnicodeString dname;
- f->getDisplayName(*id, locale, dname);
- if (dname.isBogus()) {
- status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- dnCache->cache.put(dname, (void*)id, status); // share pointer with visibleIDMap
- if (U_SUCCESS(status)) {
- continue;
- }
- }
- delete dnCache;
- ncthis->dnCache = NULL;
- return result;
- }
- }
- }
-
- ICUServiceKey* matchKey = createKey(matchID, status);
- /* To ensure that all elements in the hashtable are iterated, set pos to -1.
- * nextElement(pos) will skip the position at pos and begin the iteration
- * at the next position, which in this case will be 0.
- */
- int32_t pos = UHASH_FIRST;
- const UHashElement *entry = NULL;
- while ((entry = dnCache->cache.nextElement(pos)) != NULL) {
- const UnicodeString* id = (const UnicodeString*)entry->value.pointer;
- if (matchKey != NULL && !matchKey->isFallbackOf(*id)) {
- continue;
- }
- const UnicodeString* dn = (const UnicodeString*)entry->key.pointer;
- StringPair* sp = StringPair::create(*id, *dn, status);
- result.addElement(sp, status);
- if (U_FAILURE(status)) {
- result.removeAllElements();
- break;
- }
- }
- delete matchKey;
-
- return result;
-}
-
-URegistryKey
-ICUService::registerInstance(UObject* objToAdopt, const UnicodeString& id, UErrorCode& status)
-{
- return registerInstance(objToAdopt, id, TRUE, status);
-}
-
-URegistryKey
-ICUService::registerInstance(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status)
-{
- ICUServiceKey* key = createKey(&id, status);
- if (key != NULL) {
- UnicodeString canonicalID;
- key->canonicalID(canonicalID);
- delete key;
-
- ICUServiceFactory* f = createSimpleFactory(objToAdopt, canonicalID, visible, status);
- if (f != NULL) {
- return registerFactory(f, status);
- }
- }
- delete objToAdopt;
- return NULL;
-}
-
-ICUServiceFactory*
-ICUService::createSimpleFactory(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status)
-{
- if (U_SUCCESS(status)) {
- if ((objToAdopt != NULL) && (!id.isBogus())) {
- return new SimpleFactory(objToAdopt, id, visible);
- }
- status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return NULL;
-}
-
-URegistryKey
-ICUService::registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status)
-{
- if (U_SUCCESS(status) && factoryToAdopt != NULL) {
- Mutex mutex(&lock);
-
- if (factories == NULL) {
- factories = new UVector(deleteUObject, NULL, status);
- if (U_FAILURE(status)) {
- delete factories;
- return NULL;
- }
- }
- factories->insertElementAt(factoryToAdopt, 0, status);
- if (U_SUCCESS(status)) {
- clearCaches();
- } else {
- delete factoryToAdopt;
- factoryToAdopt = NULL;
- }
- }
-
- if (factoryToAdopt != NULL) {
- notifyChanged();
- }
-
- return (URegistryKey)factoryToAdopt;
-}
-
-UBool
-ICUService::unregister(URegistryKey rkey, UErrorCode& status)
-{
- ICUServiceFactory *factory = (ICUServiceFactory*)rkey;
- UBool result = FALSE;
- if (factory != NULL && factories != NULL) {
- Mutex mutex(&lock);
-
- if (factories->removeElement(factory)) {
- clearCaches();
- result = TRUE;
- } else {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- delete factory;
- }
- }
- if (result) {
- notifyChanged();
- }
- return result;
-}
-
-void
-ICUService::reset()
-{
- {
- Mutex mutex(&lock);
- reInitializeFactories();
- clearCaches();
- }
- notifyChanged();
-}
-
-void
-ICUService::reInitializeFactories()
-{
- if (factories != NULL) {
- factories->removeAllElements();
- }
-}
-
-UBool
-ICUService::isDefault() const
-{
- return countFactories() == 0;
-}
-
-ICUServiceKey*
-ICUService::createKey(const UnicodeString* id, UErrorCode& status) const
-{
- return (U_FAILURE(status) || id == NULL) ? NULL : new ICUServiceKey(*id);
-}
-
-void
-ICUService::clearCaches()
-{
- // callers synchronize before use
- ++timestamp;
- delete dnCache;
- dnCache = NULL;
- delete idCache;
- idCache = NULL;
- delete serviceCache; serviceCache = NULL;
-}
-
-void
-ICUService::clearServiceCache()
-{
- // callers synchronize before use
- delete serviceCache; serviceCache = NULL;
-}
-
-UBool
-ICUService::acceptsListener(const EventListener& l) const
-{
- return dynamic_cast<const ServiceListener*>(&l) != NULL;
-}
-
-void
-ICUService::notifyListener(EventListener& l) const
-{
- ((ServiceListener&)l).serviceChanged(*this);
-}
-
-UnicodeString&
-ICUService::getName(UnicodeString& result) const
-{
- return result.append(name);
-}
-
-int32_t
-ICUService::countFactories() const
-{
- return factories == NULL ? 0 : factories->size();
-}
-
-int32_t
-ICUService::getTimestamp() const
-{
- return timestamp;
-}
-
-U_NAMESPACE_END
-
-/* UCONFIG_NO_SERVICE */
-#endif
diff --git a/contrib/libs/icu/common/serv.h b/contrib/libs/icu/common/serv.h
deleted file mode 100644
index e1f69cd4119..00000000000
--- a/contrib/libs/icu/common/serv.h
+++ /dev/null
@@ -1,996 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- *******************************************************************************
- * Copyright (C) 2001-2011, International Business Machines Corporation. *
- * All Rights Reserved. *
- *******************************************************************************
- */
-
-#ifndef ICUSERV_H
-#define ICUSERV_H
-
-#include "unicode/utypes.h"
-
-#if UCONFIG_NO_SERVICE
-
-U_NAMESPACE_BEGIN
-
-/*
- * Allow the declaration of APIs with pointers to ICUService
- * even when service is removed from the build.
- */
-class ICUService;
-
-U_NAMESPACE_END
-
-#else
-
-#include "unicode/unistr.h"
-#include "unicode/locid.h"
-#include "unicode/umisc.h"
-
-#include "hash.h"
-#include "uvector.h"
-#include "servnotf.h"
-
-class ICUServiceTest;
-
-U_NAMESPACE_BEGIN
-
-class ICUServiceKey;
-class ICUServiceFactory;
-class SimpleFactory;
-class ServiceListener;
-class ICUService;
-
-class DNCache;
-
-/*******************************************************************
- * ICUServiceKey
- */
-
-/**
- * <p>ICUServiceKeys are used to communicate with factories to
- * generate an instance of the service. ICUServiceKeys define how
- * ids are canonicalized, provide both a current id and a current
- * descriptor to use in querying the cache and factories, and
- * determine the fallback strategy.</p>
- *
- * <p>ICUServiceKeys provide both a currentDescriptor and a currentID.
- * The descriptor contains an optional prefix, followed by '/'
- * and the currentID. Factories that handle complex keys,
- * for example number format factories that generate multiple
- * kinds of formatters for the same locale, use the descriptor
- * to provide a fully unique identifier for the service object,
- * while using the currentID (in this case, the locale string),
- * as the visible IDs that can be localized.</p>
- *
- * <p>The default implementation of ICUServiceKey has no fallbacks and
- * has no custom descriptors.</p>
- */
-class U_COMMON_API ICUServiceKey : public UObject {
- private:
- const UnicodeString _id;
-
- protected:
- static const UChar PREFIX_DELIMITER;
-
- public:
-
- /**
- * <p>Construct a key from an id.</p>
- *
- * @param id the ID from which to construct the key.
- */
- ICUServiceKey(const UnicodeString& id);
-
- /**
- * <p>Virtual destructor.</p>
- */
- virtual ~ICUServiceKey();
-
- /**
- * <p>Return the original ID used to construct this key.</p>
- *
- * @return the ID used to construct this key.
- */
- virtual const UnicodeString& getID() const;
-
- /**
- * <p>Return the canonical version of the original ID. This implementation
- * appends the original ID to result. Result is returned as a convenience.</p>
- *
- * @param result the output parameter to which the id will be appended.
- * @return the modified result.
- */
- virtual UnicodeString& canonicalID(UnicodeString& result) const;
-
- /**
- * <p>Return the (canonical) current ID. This implementation appends
- * the canonical ID to result. Result is returned as a convenience.</p>
- *
- * @param result the output parameter to which the current id will be appended.
- * @return the modified result.
- */
- virtual UnicodeString& currentID(UnicodeString& result) const;
-
- /**
- * <p>Return the current descriptor. This implementation appends
- * the current descriptor to result. Result is returned as a convenience.</p>
- *
- * <p>The current descriptor is used to fully
- * identify an instance of the service in the cache. A
- * factory may handle all descriptors for an ID, or just a
- * particular descriptor. The factory can either parse the
- * descriptor or use custom API on the key in order to
- * instantiate the service.</p>
- *
- * @param result the output parameter to which the current id will be appended.
- * @return the modified result.
- */
- virtual UnicodeString& currentDescriptor(UnicodeString& result) const;
-
- /**
- * <p>If the key has a fallback, modify the key and return true,
- * otherwise return false. The current ID will change if there
- * is a fallback. No currentIDs should be repeated, and fallback
- * must eventually return false. This implementation has no fallbacks
- * and always returns false.</p>
- *
- * @return TRUE if the ICUServiceKey changed to a valid fallback value.
- */
- virtual UBool fallback();
-
- /**
- * <p>Return TRUE if a key created from id matches, or would eventually
- * fallback to match, the canonical ID of this ICUServiceKey.</p>
- *
- * @param id the id to test.
- * @return TRUE if this ICUServiceKey's canonical ID is a fallback of id.
- */
- virtual UBool isFallbackOf(const UnicodeString& id) const;
-
- /**
- * <p>Return the prefix. This implementation leaves result unchanged.
- * Result is returned as a convenience.</p>
- *
- * @param result the output parameter to which the prefix will be appended.
- * @return the modified result.
- */
- virtual UnicodeString& prefix(UnicodeString& result) const;
-
- /**
- * <p>A utility to parse the prefix out of a descriptor string. Only
- * the (undelimited) prefix, if any, remains in result. Result is returned as a
- * convenience.</p>
- *
- * @param result an input/output parameter that on entry is a descriptor, and
- * on exit is the prefix of that descriptor.
- * @return the modified result.
- */
- static UnicodeString& parsePrefix(UnicodeString& result);
-
- /**
- * <p>A utility to parse the suffix out of a descriptor string. Only
- * the (undelimited) suffix, if any, remains in result. Result is returned as a
- * convenience.</p>
- *
- * @param result an input/output parameter that on entry is a descriptor, and
- * on exit is the suffix of that descriptor.
- * @return the modified result.
- */
- static UnicodeString& parseSuffix(UnicodeString& result);
-
-public:
- /**
- * UObject RTTI boilerplate.
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
- /**
- * UObject RTTI boilerplate.
- */
- virtual UClassID getDynamicClassID() const;
-
-#ifdef SERVICE_DEBUG
- public:
- virtual UnicodeString& debug(UnicodeString& result) const;
- virtual UnicodeString& debugClass(UnicodeString& result) const;
-#endif
-
-};
-
- /*******************************************************************
- * ICUServiceFactory
- */
-
- /**
- * <p>An implementing ICUServiceFactory generates the service objects maintained by the
- * service. A factory generates a service object from a key,
- * updates id->factory mappings, and returns the display name for
- * a supported id.</p>
- */
-class U_COMMON_API ICUServiceFactory : public UObject {
- public:
- virtual ~ICUServiceFactory();
-
- /**
- * <p>Create a service object from the key, if this factory
- * supports the key. Otherwise, return NULL.</p>
- *
- * <p>If the factory supports the key, then it can call
- * the service's getKey(ICUServiceKey, String[], ICUServiceFactory) method
- * passing itself as the factory to get the object that
- * the service would have created prior to the factory's
- * registration with the service. This can change the
- * key, so any information required from the key should
- * be extracted before making such a callback.</p>
- *
- * @param key the service key.
- * @param service the service with which this factory is registered.
- * @param status the error code status.
- * @return the service object, or NULL if the factory does not support the key.
- */
- virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const = 0;
-
- /**
- * <p>Update result to reflect the IDs (not descriptors) that this
- * factory publicly handles. Result contains mappings from ID to
- * factory. On entry it will contain all (visible) mappings from
- * previously-registered factories.</p>
- *
- * <p>This function, together with getDisplayName, are used to
- * support ICUService::getDisplayNames. The factory determines
- * which IDs (of those it supports) it will make visible, and of
- * those, which it will provide localized display names for. In
- * most cases it will register mappings from all IDs it supports
- * to itself.</p>
- *
- * @param result the mapping table to update.
- * @param status the error code status.
- */
- virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const = 0;
-
- /**
- * <p>Return, in result, the display name of the id in the provided locale.
- * This is an id, not a descriptor. If the id is
- * not visible, sets result to bogus. If the
- * incoming result is bogus, it remains bogus. Result is returned as a
- * convenience. Results are not defined if id is not one supported by this
- * factory.</p>
- *
- * @param id a visible id supported by this factory.
- * @param locale the locale for which to generate the corresponding localized display name.
- * @param result output parameter to hold the display name.
- * @return result.
- */
- virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const = 0;
-};
-
-/*
- ******************************************************************
- */
-
- /**
- * <p>A default implementation of factory. This provides default
- * implementations for subclasses, and implements a singleton
- * factory that matches a single ID and returns a single
- * (possibly deferred-initialized) instance. This implements
- * updateVisibleIDs to add a mapping from its ID to itself
- * if visible is true, or to remove any existing mapping
- * for its ID if visible is false. No localization of display
- * names is performed.</p>
- */
-class U_COMMON_API SimpleFactory : public ICUServiceFactory {
- protected:
- UObject* _instance;
- const UnicodeString _id;
- const UBool _visible;
-
- public:
- /**
- * <p>Construct a SimpleFactory that maps a single ID to a single
- * service instance. If visible is TRUE, the ID will be visible.
- * The instance must not be NULL. The SimpleFactory will adopt
- * the instance, which must not be changed subsequent to this call.</p>
- *
- * @param instanceToAdopt the service instance to adopt.
- * @param id the ID to assign to this service instance.
- * @param visible if TRUE, the ID will be visible.
- */
- SimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible = TRUE);
-
- /**
- * <p>Destructor.</p>
- */
- virtual ~SimpleFactory();
-
- /**
- * <p>This implementation returns a clone of the service instance if the factory's ID is equal to
- * the key's currentID. Service and prefix are ignored.</p>
- *
- * @param key the service key.
- * @param service the service with which this factory is registered.
- * @param status the error code status.
- * @return the service object, or NULL if the factory does not support the key.
- */
- virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
-
- /**
- * <p>This implementation adds a mapping from ID -> this to result if visible is TRUE,
- * otherwise it removes ID from result.</p>
- *
- * @param result the mapping table to update.
- * @param status the error code status.
- */
- virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
-
- /**
- * <p>This implementation returns the factory ID if it equals id and visible is TRUE,
- * otherwise it returns the empty string. (This implementation provides
- * no localized id information.)</p>
- *
- * @param id a visible id supported by this factory.
- * @param locale the locale for which to generate the corresponding localized display name.
- * @param result output parameter to hold the display name.
- * @return result.
- */
- virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
-
-public:
- /**
- * UObject RTTI boilerplate.
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
- /**
- * UObject RTTI boilerplate.
- */
- virtual UClassID getDynamicClassID() const;
-
-#ifdef SERVICE_DEBUG
- public:
- virtual UnicodeString& debug(UnicodeString& toAppendTo) const;
- virtual UnicodeString& debugClass(UnicodeString& toAppendTo) const;
-#endif
-
-};
-
-/*
- ******************************************************************
- */
-
-/**
- * <p>ServiceListener is the listener that ICUService provides by default.
- * ICUService will notifiy this listener when factories are added to
- * or removed from the service. Subclasses can provide
- * different listener interfaces that extend EventListener, and modify
- * acceptsListener and notifyListener as appropriate.</p>
- */
-class U_COMMON_API ServiceListener : public EventListener {
-public:
- virtual ~ServiceListener();
-
- /**
- * <p>This method is called when the service changes. At the time of the
- * call this listener is registered with the service. It must
- * not modify the notifier in the context of this call.</p>
- *
- * @param service the service that changed.
- */
- virtual void serviceChanged(const ICUService& service) const = 0;
-
-public:
- /**
- * UObject RTTI boilerplate.
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
- /**
- * UObject RTTI boilerplate.
- */
- virtual UClassID getDynamicClassID() const;
-
-};
-
-/*
- ******************************************************************
- */
-
-/**
- * <p>A StringPair holds a displayName/ID pair. ICUService uses it
- * as the array elements returned by getDisplayNames.
- */
-class U_COMMON_API StringPair : public UMemory {
-public:
- /**
- * <p>The display name of the pair.</p>
- */
- const UnicodeString displayName;
-
- /**
- * <p>The ID of the pair.</p>
- */
- const UnicodeString id;
-
- /**
- * <p>Creates a string pair from a displayName and an ID.</p>
- *
- * @param displayName the displayName.
- * @param id the ID.
- * @param status the error code status.
- * @return a StringPair if the creation was successful, otherwise NULL.
- */
- static StringPair* create(const UnicodeString& displayName,
- const UnicodeString& id,
- UErrorCode& status);
-
- /**
- * <p>Return TRUE if either string of the pair is bogus.</p>
- * @return TRUE if either string of the pair is bogus.
- */
- UBool isBogus() const;
-
-private:
- StringPair(const UnicodeString& displayName, const UnicodeString& id);
-};
-
-/*******************************************************************
- * ICUService
- */
-
- /**
- * <p>A Service provides access to service objects that implement a
- * particular service, e.g. transliterators. Users provide a String
- * id (for example, a locale string) to the service, and get back an
- * object for that id. Service objects can be any kind of object. A
- * new service object is returned for each query. The caller is
- * responsible for deleting it.</p>
- *
- * <p>Services 'canonicalize' the query ID and use the canonical ID to
- * query for the service. The service also defines a mechanism to
- * 'fallback' the ID multiple times. Clients can optionally request
- * the actual ID that was matched by a query when they use an ID to
- * retrieve a service object.</p>
- *
- * <p>Service objects are instantiated by ICUServiceFactory objects
- * registered with the service. The service queries each
- * ICUServiceFactory in turn, from most recently registered to
- * earliest registered, until one returns a service object. If none
- * responds with a service object, a fallback ID is generated, and the
- * process repeats until a service object is returned or until the ID
- * has no further fallbacks.</p>
- *
- * <p>In ICU 2.4, UObject (the base class of service instances) does
- * not define a polymorphic clone function. ICUService uses clones to
- * manage ownership. Thus, for now, ICUService defines an abstract
- * method, cloneInstance, that clients must implement to create clones
- * of the service instances. This may change in future releases of
- * ICU.</p>
- *
- * <p>ICUServiceFactories can be dynamically registered and
- * unregistered with the service. When registered, an
- * ICUServiceFactory is installed at the head of the factory list, and
- * so gets 'first crack' at any keys or fallback keys. When
- * unregistered, it is removed from the service and can no longer be
- * located through it. Service objects generated by this factory and
- * held by the client are unaffected.</p>
- *
- * <p>If a service has variants (e.g., the different variants of
- * BreakIterator) an ICUServiceFactory can use the prefix of the
- * ICUServiceKey to determine the variant of a service to generate.
- * If it does not support all variants, it can request
- * previously-registered factories to handle the ones it does not
- * support.</p>
- *
- * <p>ICUService uses ICUServiceKeys to query factories and perform
- * fallback. The ICUServiceKey defines the canonical form of the ID,
- * and implements the fallback strategy. Custom ICUServiceKeys can be
- * defined that parse complex IDs into components that
- * ICUServiceFactories can more easily use. The ICUServiceKey can
- * cache the results of this parsing to save repeated effort.
- * ICUService provides convenience APIs that take UnicodeStrings and
- * generate default ICUServiceKeys for use in querying.</p>
- *
- * <p>ICUService provides API to get the list of IDs publicly
- * supported by the service (although queries aren't restricted to
- * this list). This list contains only 'simple' IDs, and not fully
- * unique IDs. ICUServiceFactories are associated with each simple ID
- * and the responsible factory can also return a human-readable
- * localized version of the simple ID, for use in user interfaces.
- * ICUService can also provide an array of the all the localized
- * visible IDs and their corresponding internal IDs.</p>
- *
- * <p>ICUService implements ICUNotifier, so that clients can register
- * to receive notification when factories are added or removed from
- * the service. ICUService provides a default EventListener
- * subinterface, ServiceListener, which can be registered with the
- * service. When the service changes, the ServiceListener's
- * serviceChanged method is called with the service as the
- * argument.</p>
- *
- * <p>The ICUService API is both rich and generic, and it is expected
- * that most implementations will statically 'wrap' ICUService to
- * present a more appropriate API-- for example, to declare the type
- * of the objects returned from get, to limit the factories that can
- * be registered with the service, or to define their own listener
- * interface with a custom callback method. They might also customize
- * ICUService by overriding it, for example, to customize the
- * ICUServiceKey and fallback strategy. ICULocaleService is a
- * subclass of ICUService that uses Locale names as IDs and uses
- * ICUServiceKeys that implement the standard resource bundle fallback
- * strategy. Most clients will wish to subclass it instead of
- * ICUService.</p>
- */
-class U_COMMON_API ICUService : public ICUNotifier {
- protected:
- /**
- * Name useful for debugging.
- */
- const UnicodeString name;
-
- private:
-
- /**
- * Timestamp so iterators can be fail-fast.
- */
- uint32_t timestamp;
-
- /**
- * All the factories registered with this service.
- */
- UVector* factories;
-
- /**
- * The service cache.
- */
- Hashtable* serviceCache;
-
- /**
- * The ID cache.
- */
- Hashtable* idCache;
-
- /**
- * The name cache.
- */
- DNCache* dnCache;
-
- /**
- * Constructor.
- */
- public:
- /**
- * <p>Construct a new ICUService.</p>
- */
- ICUService();
-
- /**
- * <p>Construct with a name (useful for debugging).</p>
- *
- * @param name a name to use in debugging.
- */
- ICUService(const UnicodeString& name);
-
- /**
- * <p>Destructor.</p>
- */
- virtual ~ICUService();
-
- /**
- * <p>Return the name of this service. This will be the empty string if none was assigned.
- * Returns result as a convenience.</p>
- *
- * @param result an output parameter to contain the name of this service.
- * @return the name of this service.
- */
- UnicodeString& getName(UnicodeString& result) const;
-
- /**
- * <p>Convenience override for get(ICUServiceKey&, UnicodeString*). This uses
- * createKey to create a key for the provided descriptor.</p>
- *
- * @param descriptor the descriptor.
- * @param status the error code status.
- * @return the service instance, or NULL.
- */
- UObject* get(const UnicodeString& descriptor, UErrorCode& status) const;
-
- /**
- * <p>Convenience override for get(ICUServiceKey&, UnicodeString*). This uses
- * createKey to create a key from the provided descriptor.</p>
- *
- * @param descriptor the descriptor.
- * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
- * @param status the error code status.
- * @return the service instance, or NULL.
- */
- UObject* get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const;
-
- /**
- * <p>Convenience override for get(ICUServiceKey&, UnicodeString*).</p>
- *
- * @param key the key.
- * @param status the error code status.
- * @return the service instance, or NULL.
- */
- UObject* getKey(ICUServiceKey& key, UErrorCode& status) const;
-
- /**
- * <p>Given a key, return a service object, and, if actualReturn
- * is not NULL, the descriptor with which it was found in the
- * first element of actualReturn. If no service object matches
- * this key, returns NULL and leaves actualReturn unchanged.</p>
- *
- * <p>This queries the cache using the key's descriptor, and if no
- * object in the cache matches, tries the key on each
- * registered factory, in order. If none generates a service
- * object for the key, repeats the process with each fallback of
- * the key, until either a factory returns a service object, or the key
- * has no fallback. If no object is found, the result of handleDefault
- * is returned.</p>
- *
- * <p>Subclasses can override this method to further customize the
- * result before returning it.
- *
- * @param key the key.
- * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
- * @param status the error code status.
- * @return the service instance, or NULL.
- */
- virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const;
-
- /**
- * <p>This version of getKey is only called by ICUServiceFactories within the scope
- * of a previous getKey call, to determine what previously-registered factories would
- * have returned. For details, see getKey(ICUServiceKey&, UErrorCode&). Subclasses
- * should not call it directly, but call through one of the other get functions.</p>
- *
- * @param key the key.
- * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
- * @param factory the factory making the recursive call.
- * @param status the error code status.
- * @return the service instance, or NULL.
- */
- UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const;
-
- /**
- * <p>Convenience override for getVisibleIDs(String) that passes null
- * as the fallback, thus returning all visible IDs.</p>
- *
- * @param result a vector to hold the returned IDs.
- * @param status the error code status.
- * @return the result vector.
- */
- UVector& getVisibleIDs(UVector& result, UErrorCode& status) const;
-
- /**
- * <p>Return a snapshot of the visible IDs for this service. This
- * list will not change as ICUServiceFactories are added or removed, but the
- * supported IDs will, so there is no guarantee that all and only
- * the IDs in the returned list will be visible and supported by the
- * service in subsequent calls.</p>
- *
- * <p>The IDs are returned as pointers to UnicodeStrings. The
- * caller owns the IDs. Previous contents of result are discarded before
- * new elements, if any, are added.</p>
- *
- * <p>matchID is passed to createKey to create a key. If the key
- * is not NULL, its isFallbackOf method is used to filter out IDs
- * that don't match the key or have it as a fallback.</p>
- *
- * @param result a vector to hold the returned IDs.
- * @param matchID an ID used to filter the result, or NULL if all IDs are desired.
- * @param status the error code status.
- * @return the result vector.
- */
- UVector& getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const;
-
- /**
- * <p>Convenience override for getDisplayName(const UnicodeString&, const Locale&, UnicodeString&) that
- * uses the current default locale.</p>
- *
- * @param id the ID for which to retrieve the localized displayName.
- * @param result an output parameter to hold the display name.
- * @return the modified result.
- */
- UnicodeString& getDisplayName(const UnicodeString& id, UnicodeString& result) const;
-
- /**
- * <p>Given a visible ID, return the display name in the requested locale.
- * If there is no directly supported ID corresponding to this ID, result is
- * set to bogus.</p>
- *
- * @param id the ID for which to retrieve the localized displayName.
- * @param result an output parameter to hold the display name.
- * @param locale the locale in which to localize the ID.
- * @return the modified result.
- */
- UnicodeString& getDisplayName(const UnicodeString& id, UnicodeString& result, const Locale& locale) const;
-
- /**
- * <p>Convenience override of getDisplayNames(const Locale&, const UnicodeString*) that
- * uses the current default Locale as the locale and NULL for
- * the matchID.</p>
- *
- * @param result a vector to hold the returned displayName/id StringPairs.
- * @param status the error code status.
- * @return the modified result vector.
- */
- UVector& getDisplayNames(UVector& result, UErrorCode& status) const;
-
- /**
- * <p>Convenience override of getDisplayNames(const Locale&, const UnicodeString*) that
- * uses NULL for the matchID.</p>
- *
- * @param result a vector to hold the returned displayName/id StringPairs.
- * @param locale the locale in which to localize the ID.
- * @param status the error code status.
- * @return the modified result vector.
- */
- UVector& getDisplayNames(UVector& result, const Locale& locale, UErrorCode& status) const;
-
- /**
- * <p>Return a snapshot of the mapping from display names to visible
- * IDs for this service. This set will not change as factories
- * are added or removed, but the supported IDs will, so there is
- * no guarantee that all and only the IDs in the returned map will
- * be visible and supported by the service in subsequent calls,
- * nor is there any guarantee that the current display names match
- * those in the result.</p>
- *
- * <p>The names are returned as pointers to StringPairs, which
- * contain both the displayName and the corresponding ID. The
- * caller owns the StringPairs. Previous contents of result are
- * discarded before new elements, if any, are added.</p>
- *
- * <p>matchID is passed to createKey to create a key. If the key
- * is not NULL, its isFallbackOf method is used to filter out IDs
- * that don't match the key or have it as a fallback.</p>
- *
- * @param result a vector to hold the returned displayName/id StringPairs.
- * @param locale the locale in which to localize the ID.
- * @param matchID an ID used to filter the result, or NULL if all IDs are desired.
- * @param status the error code status.
- * @return the result vector. */
- UVector& getDisplayNames(UVector& result,
- const Locale& locale,
- const UnicodeString* matchID,
- UErrorCode& status) const;
-
- /**
- * <p>A convenience override of registerInstance(UObject*, const UnicodeString&, UBool)
- * that defaults visible to TRUE.</p>
- *
- * @param objToAdopt the object to register and adopt.
- * @param id the ID to assign to this object.
- * @param status the error code status.
- * @return a registry key that can be passed to unregister to unregister
- * (and discard) this instance.
- */
- URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& id, UErrorCode& status);
-
- /**
- * <p>Register a service instance with the provided ID. The ID will be
- * canonicalized. The canonicalized ID will be returned by
- * getVisibleIDs if visible is TRUE. The service instance will be adopted and
- * must not be modified subsequent to this call.</p>
- *
- * <p>This issues a serviceChanged notification to registered listeners.</p>
- *
- * <p>This implementation wraps the object using
- * createSimpleFactory, and calls registerFactory.</p>
- *
- * @param objToAdopt the object to register and adopt.
- * @param id the ID to assign to this object.
- * @param visible TRUE if getVisibleIDs is to return this ID.
- * @param status the error code status.
- * @return a registry key that can be passed to unregister() to unregister
- * (and discard) this instance.
- */
- virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status);
-
- /**
- * <p>Register an ICUServiceFactory. Returns a registry key that
- * can be used to unregister the factory. The factory
- * must not be modified subsequent to this call. The service owns
- * all registered factories. In case of an error, the factory is
- * deleted.</p>
- *
- * <p>This issues a serviceChanged notification to registered listeners.</p>
- *
- * <p>The default implementation accepts all factories.</p>
- *
- * @param factoryToAdopt the factory to register and adopt.
- * @param status the error code status.
- * @return a registry key that can be passed to unregister to unregister
- * (and discard) this factory.
- */
- virtual URegistryKey registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status);
-
- /**
- * <p>Unregister a factory using a registry key returned by
- * registerInstance or registerFactory. After a successful call,
- * the factory will be removed from the service factory list and
- * deleted, and the key becomes invalid.</p>
- *
- * <p>This issues a serviceChanged notification to registered
- * listeners.</p>
- *
- * @param rkey the registry key.
- * @param status the error code status.
- * @return TRUE if the call successfully unregistered the factory.
- */
- virtual UBool unregister(URegistryKey rkey, UErrorCode& status);
-
- /**
- * </p>Reset the service to the default factories. The factory
- * lock is acquired and then reInitializeFactories is called.</p>
- *
- * <p>This issues a serviceChanged notification to registered listeners.</p>
- */
- virtual void reset(void);
-
- /**
- * <p>Return TRUE if the service is in its default state.</p>
- *
- * <p>The default implementation returns TRUE if there are no
- * factories registered.</p>
- */
- virtual UBool isDefault(void) const;
-
- /**
- * <p>Create a key from an ID. If ID is NULL, returns NULL.</p>
- *
- * <p>The default implementation creates an ICUServiceKey instance.
- * Subclasses can override to define more useful keys appropriate
- * to the factories they accept.</p>
- *
- * @param a pointer to the ID for which to create a default ICUServiceKey.
- * @param status the error code status.
- * @return the ICUServiceKey corresponding to ID, or NULL.
- */
- virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const;
-
- /**
- * <p>Clone object so that caller can own the copy. In ICU2.4, UObject doesn't define
- * clone, so we need an instance-aware method that knows how to do this.
- * This is public so factories can call it, but should really be protected.</p>
- *
- * @param instance the service instance to clone.
- * @return a clone of the passed-in instance, or NULL if cloning was unsuccessful.
- */
- virtual UObject* cloneInstance(UObject* instance) const = 0;
-
-
- /************************************************************************
- * Subclassing API
- */
-
- protected:
-
- /**
- * <p>Create a factory that wraps a single service object. Called by registerInstance.</p>
- *
- * <p>The default implementation returns an instance of SimpleFactory.</p>
- *
- * @param instanceToAdopt the service instance to adopt.
- * @param id the ID to assign to this service instance.
- * @param visible if TRUE, the ID will be visible.
- * @param status the error code status.
- * @return an instance of ICUServiceFactory that maps this instance to the provided ID.
- */
- virtual ICUServiceFactory* createSimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status);
-
- /**
- * <p>Reinitialize the factory list to its default state. After this call, isDefault()
- * must return TRUE.</p>
- *
- * <p>This issues a serviceChanged notification to registered listeners.</p>
- *
- * <p>The default implementation clears the factory list.
- * Subclasses can override to provide other default initialization
- * of the factory list. Subclasses must not call this method
- * directly, since it must only be called while holding write
- * access to the factory list.</p>
- */
- virtual void reInitializeFactories(void);
-
- /**
- * <p>Default handler for this service if no factory in the factory list
- * handled the key passed to getKey.</p>
- *
- * <p>The default implementation returns NULL.</p>
- *
- * @param key the key.
- * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
- * @param status the error code status.
- * @return the service instance, or NULL.
- */
- virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const;
-
- /**
- * <p>Clear caches maintained by this service.</p>
- *
- * <p>Subclasses can override if they implement additional caches
- * that need to be cleared when the service changes. Subclasses
- * should generally not call this method directly, as it must only
- * be called while synchronized on the factory lock.</p>
- */
- virtual void clearCaches(void);
-
- /**
- * <p>Return true if the listener is accepted.</p>
- *
- * <p>The default implementation accepts the listener if it is
- * a ServiceListener. Subclasses can override this to accept
- * different listeners.</p>
- *
- * @param l the listener to test.
- * @return TRUE if the service accepts the listener.
- */
- virtual UBool acceptsListener(const EventListener& l) const;
-
- /**
- * <p>Notify the listener of a service change.</p>
- *
- * <p>The default implementation assumes a ServiceListener.
- * If acceptsListener has been overridden to accept different
- * listeners, this should be overridden as well.</p>
- *
- * @param l the listener to notify.
- */
- virtual void notifyListener(EventListener& l) const;
-
- /************************************************************************
- * Utilities for subclasses.
- */
-
- /**
- * <p>Clear only the service cache.</p>
- *
- * <p>This can be called by subclasses when a change affects the service
- * cache but not the ID caches, e.g., when the default locale changes
- * the resolution of IDs also changes, requiring the cache to be
- * flushed, but not the visible IDs themselves.</p>
- */
- void clearServiceCache(void);
-
- /**
- * <p>Return a map from visible IDs to factories.
- * This must only be called when the mutex is held.</p>
- *
- * @param status the error code status.
- * @return a Hashtable containing mappings from visible
- * IDs to factories.
- */
- const Hashtable* getVisibleIDMap(UErrorCode& status) const;
-
- /**
- * <p>Allow subclasses to read the time stamp.</p>
- *
- * @return the timestamp.
- */
- int32_t getTimestamp(void) const;
-
- /**
- * <p>Return the number of registered factories.</p>
- *
- * @return the number of factories registered at the time of the call.
- */
- int32_t countFactories(void) const;
-
-private:
-
- friend class ::ICUServiceTest; // give tests access to countFactories.
-};
-
-U_NAMESPACE_END
-
- /* UCONFIG_NO_SERVICE */
-#endif
-
- /* ICUSERV_H */
-#endif
-
diff --git a/contrib/libs/icu/common/servlk.cpp b/contrib/libs/icu/common/servlk.cpp
deleted file mode 100644
index 538982ca362..00000000000
--- a/contrib/libs/icu/common/servlk.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- *******************************************************************************
- * Copyright (C) 2001-2014, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- *
- *******************************************************************************
- */
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_SERVICE
-
-#include "unicode/resbund.h"
-#include "uresimp.h"
-#include "cmemory.h"
-#include "servloc.h"
-#include "ustrfmt.h"
-#include "uhash.h"
-#include "charstr.h"
-#include "uassert.h"
-
-#define UNDERSCORE_CHAR ((UChar)0x005f)
-#define AT_SIGN_CHAR ((UChar)64)
-#define PERIOD_CHAR ((UChar)46)
-
-U_NAMESPACE_BEGIN
-
-LocaleKey*
-LocaleKey::createWithCanonicalFallback(const UnicodeString* primaryID,
- const UnicodeString* canonicalFallbackID,
- UErrorCode& status)
-{
- return LocaleKey::createWithCanonicalFallback(primaryID, canonicalFallbackID, KIND_ANY, status);
-}
-
-LocaleKey*
-LocaleKey::createWithCanonicalFallback(const UnicodeString* primaryID,
- const UnicodeString* canonicalFallbackID,
- int32_t kind,
- UErrorCode& status)
-{
- if (primaryID == NULL || U_FAILURE(status)) {
- return NULL;
- }
- UnicodeString canonicalPrimaryID;
- LocaleUtility::canonicalLocaleString(primaryID, canonicalPrimaryID);
- return new LocaleKey(*primaryID, canonicalPrimaryID, canonicalFallbackID, kind);
-}
-
-LocaleKey::LocaleKey(const UnicodeString& primaryID,
- const UnicodeString& canonicalPrimaryID,
- const UnicodeString* canonicalFallbackID,
- int32_t kind)
- : ICUServiceKey(primaryID)
- , _kind(kind)
- , _primaryID(canonicalPrimaryID)
- , _fallbackID()
- , _currentID()
-{
- _fallbackID.setToBogus();
- if (_primaryID.length() != 0) {
- if (canonicalFallbackID != NULL && _primaryID != *canonicalFallbackID) {
- _fallbackID = *canonicalFallbackID;
- }
- }
-
- _currentID = _primaryID;
-}
-
-LocaleKey::~LocaleKey() {}
-
-UnicodeString&
-LocaleKey::prefix(UnicodeString& result) const {
- if (_kind != KIND_ANY) {
- UChar buffer[64];
- uprv_itou(buffer, 64, _kind, 10, 0);
- UnicodeString temp(buffer);
- result.append(temp);
- }
- return result;
-}
-
-int32_t
-LocaleKey::kind() const {
- return _kind;
-}
-
-UnicodeString&
-LocaleKey::canonicalID(UnicodeString& result) const {
- return result.append(_primaryID);
-}
-
-UnicodeString&
-LocaleKey::currentID(UnicodeString& result) const {
- if (!_currentID.isBogus()) {
- result.append(_currentID);
- }
- return result;
-}
-
-UnicodeString&
-LocaleKey::currentDescriptor(UnicodeString& result) const {
- if (!_currentID.isBogus()) {
- prefix(result).append(PREFIX_DELIMITER).append(_currentID);
- } else {
- result.setToBogus();
- }
- return result;
-}
-
-Locale&
-LocaleKey::canonicalLocale(Locale& result) const {
- return LocaleUtility::initLocaleFromName(_primaryID, result);
-}
-
-Locale&
-LocaleKey::currentLocale(Locale& result) const {
- return LocaleUtility::initLocaleFromName(_currentID, result);
-}
-
-UBool
-LocaleKey::fallback() {
- if (!_currentID.isBogus()) {
- int x = _currentID.lastIndexOf(UNDERSCORE_CHAR);
- if (x != -1) {
- _currentID.remove(x); // truncate current or fallback, whichever we're pointing to
- return TRUE;
- }
-
- if (!_fallbackID.isBogus()) {
- _currentID = _fallbackID;
- _fallbackID.setToBogus();
- return TRUE;
- }
-
- if (_currentID.length() > 0) {
- _currentID.remove(0); // completely truncate
- return TRUE;
- }
-
- _currentID.setToBogus();
- }
-
- return FALSE;
-}
-
-UBool
-LocaleKey::isFallbackOf(const UnicodeString& id) const {
- UnicodeString temp(id);
- parseSuffix(temp);
- return temp.indexOf(_primaryID) == 0 &&
- (temp.length() == _primaryID.length() ||
- temp.charAt(_primaryID.length()) == UNDERSCORE_CHAR);
-}
-
-#ifdef SERVICE_DEBUG
-UnicodeString&
-LocaleKey::debug(UnicodeString& result) const
-{
- ICUServiceKey::debug(result);
- result.append((UnicodeString)" kind: ");
- result.append(_kind);
- result.append((UnicodeString)" primaryID: ");
- result.append(_primaryID);
- result.append((UnicodeString)" fallbackID: ");
- result.append(_fallbackID);
- result.append((UnicodeString)" currentID: ");
- result.append(_currentID);
- return result;
-}
-
-UnicodeString&
-LocaleKey::debugClass(UnicodeString& result) const
-{
- return result.append((UnicodeString)"LocaleKey ");
-}
-#endif
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocaleKey)
-
-U_NAMESPACE_END
-
-/* !UCONFIG_NO_SERVICE */
-#endif
-
-
diff --git a/contrib/libs/icu/common/servlkf.cpp b/contrib/libs/icu/common/servlkf.cpp
deleted file mode 100644
index 84f2347cdde..00000000000
--- a/contrib/libs/icu/common/servlkf.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- *******************************************************************************
- * Copyright (C) 2001-2014, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- *
- *******************************************************************************
- */
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_SERVICE
-
-#include "unicode/resbund.h"
-#include "uresimp.h"
-#include "cmemory.h"
-#include "servloc.h"
-#include "ustrfmt.h"
-#include "uhash.h"
-#include "charstr.h"
-#include "ucln_cmn.h"
-#include "uassert.h"
-
-#define UNDERSCORE_CHAR ((UChar)0x005f)
-#define AT_SIGN_CHAR ((UChar)64)
-#define PERIOD_CHAR ((UChar)46)
-
-
-U_NAMESPACE_BEGIN
-
-LocaleKeyFactory::LocaleKeyFactory(int32_t coverage)
- : _name()
- , _coverage(coverage)
-{
-}
-
-LocaleKeyFactory::LocaleKeyFactory(int32_t coverage, const UnicodeString& name)
- : _name(name)
- , _coverage(coverage)
-{
-}
-
-LocaleKeyFactory::~LocaleKeyFactory() {
-}
-
-UObject*
-LocaleKeyFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const {
- if (handlesKey(key, status)) {
- const LocaleKey& lkey = (const LocaleKey&)key;
- int32_t kind = lkey.kind();
- Locale loc;
- lkey.currentLocale(loc);
-
- return handleCreate(loc, kind, service, status);
- }
- return NULL;
-}
-
-UBool
-LocaleKeyFactory::handlesKey(const ICUServiceKey& key, UErrorCode& status) const {
- const Hashtable* supported = getSupportedIDs(status);
- if (supported) {
- UnicodeString id;
- key.currentID(id);
- return supported->get(id) != NULL;
- }
- return FALSE;
-}
-
-void
-LocaleKeyFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const {
- const Hashtable* supported = getSupportedIDs(status);
- if (supported) {
- UBool visible = (_coverage & 0x1) == 0;
- const UHashElement* elem = NULL;
- int32_t pos = UHASH_FIRST;
- while ((elem = supported->nextElement(pos)) != NULL) {
- const UnicodeString& id = *((const UnicodeString*)elem->key.pointer);
- if (!visible) {
- result.remove(id);
- } else {
- result.put(id, (void*)this, status); // this is dummy non-void marker used for set semantics
- if (U_FAILURE(status)) {
- break;
- }
- }
- }
- }
-}
-
-UnicodeString&
-LocaleKeyFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const {
- if ((_coverage & 0x1) == 0) {
- //UErrorCode status = U_ZERO_ERROR;
- // assume if this is called on us, we support some fallback of this id
- // if (isSupportedID(id, status)) {
- Locale loc;
- LocaleUtility::initLocaleFromName(id, loc);
- return loc.getDisplayName(locale, result);
- // }
- }
- result.setToBogus();
- return result;
-}
-
-UObject*
-LocaleKeyFactory::handleCreate(const Locale& /* loc */,
- int32_t /* kind */,
- const ICUService* /* service */,
- UErrorCode& /* status */) const {
- return NULL;
-}
-
-//UBool
-//LocaleKeyFactory::isSupportedID(const UnicodeString& id, UErrorCode& status) const {
-// const Hashtable* ids = getSupportedIDs(status);
-// return ids && ids->get(id);
-//}
-
-const Hashtable*
-LocaleKeyFactory::getSupportedIDs(UErrorCode& /* status */) const {
- return NULL;
-}
-
-#ifdef SERVICE_DEBUG
-UnicodeString&
-LocaleKeyFactory::debug(UnicodeString& result) const
-{
- debugClass(result);
- result.append((UnicodeString)", name: ");
- result.append(_name);
- result.append((UnicodeString)", coverage: ");
- result.append(_coverage);
- return result;
-}
-
-UnicodeString&
-LocaleKeyFactory::debugClass(UnicodeString& result) const
-{
- return result.append((UnicodeString)"LocaleKeyFactory");
-}
-#endif
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocaleKeyFactory)
-
-U_NAMESPACE_END
-
-/* !UCONFIG_NO_SERVICE */
-#endif
-
-
diff --git a/contrib/libs/icu/common/servloc.h b/contrib/libs/icu/common/servloc.h
deleted file mode 100644
index ccf6433379d..00000000000
--- a/contrib/libs/icu/common/servloc.h
+++ /dev/null
@@ -1,551 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- *******************************************************************************
- * Copyright (C) 2001-2011, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- *
- *******************************************************************************
- */
-#ifndef ICULSERV_H
-#define ICULSERV_H
-
-#include "unicode/utypes.h"
-
-#if UCONFIG_NO_SERVICE
-
-U_NAMESPACE_BEGIN
-
-/*
- * Allow the declaration of APIs with pointers to ICUService
- * even when service is removed from the build.
- */
-class ICULocaleService;
-
-U_NAMESPACE_END
-
-#else
-
-#include "unicode/unistr.h"
-#include "unicode/locid.h"
-#include "unicode/strenum.h"
-
-#include "hash.h"
-#include "uvector.h"
-
-#include "serv.h"
-#include "locutil.h"
-
-U_NAMESPACE_BEGIN
-
-class ICULocaleService;
-
-class LocaleKey;
-class LocaleKeyFactory;
-class SimpleLocaleKeyFactory;
-class ServiceListener;
-
-/*
- ******************************************************************
- */
-
-/**
- * A subclass of Key that implements a locale fallback mechanism.
- * The first locale to search for is the locale provided by the
- * client, and the fallback locale to search for is the current
- * default locale. If a prefix is present, the currentDescriptor
- * includes it before the locale proper, separated by "/". This
- * is the default key instantiated by ICULocaleService.</p>
- *
- * <p>Canonicalization adjusts the locale string so that the
- * section before the first understore is in lower case, and the rest
- * is in upper case, with no trailing underscores.</p>
- */
-
-class U_COMMON_API LocaleKey : public ICUServiceKey {
- private:
- int32_t _kind;
- UnicodeString _primaryID;
- UnicodeString _fallbackID;
- UnicodeString _currentID;
-
- public:
- enum {
- KIND_ANY = -1
- };
-
- /**
- * Create a LocaleKey with canonical primary and fallback IDs.
- */
- static LocaleKey* createWithCanonicalFallback(const UnicodeString* primaryID,
- const UnicodeString* canonicalFallbackID,
- UErrorCode& status);
-
- /**
- * Create a LocaleKey with canonical primary and fallback IDs.
- */
- static LocaleKey* createWithCanonicalFallback(const UnicodeString* primaryID,
- const UnicodeString* canonicalFallbackID,
- int32_t kind,
- UErrorCode& status);
-
- protected:
- /**
- * PrimaryID is the user's requested locale string,
- * canonicalPrimaryID is this string in canonical form,
- * fallbackID is the current default locale's string in
- * canonical form.
- */
- LocaleKey(const UnicodeString& primaryID,
- const UnicodeString& canonicalPrimaryID,
- const UnicodeString* canonicalFallbackID,
- int32_t kind);
-
- public:
- /**
- * Append the prefix associated with the kind, or nothing if the kind is KIND_ANY.
- */
- virtual UnicodeString& prefix(UnicodeString& result) const;
-
- /**
- * Return the kind code associated with this key.
- */
- virtual int32_t kind() const;
-
- /**
- * Return the canonicalID.
- */
- virtual UnicodeString& canonicalID(UnicodeString& result) const;
-
- /**
- * Return the currentID.
- */
- virtual UnicodeString& currentID(UnicodeString& result) const;
-
- /**
- * Return the (canonical) current descriptor, or null if no current id.
- */
- virtual UnicodeString& currentDescriptor(UnicodeString& result) const;
-
- /**
- * Convenience method to return the locale corresponding to the (canonical) original ID.
- */
- virtual Locale& canonicalLocale(Locale& result) const;
-
- /**
- * Convenience method to return the locale corresponding to the (canonical) current ID.
- */
- virtual Locale& currentLocale(Locale& result) const;
-
- /**
- * If the key has a fallback, modify the key and return true,
- * otherwise return false.</p>
- *
- * <p>First falls back through the primary ID, then through
- * the fallbackID. The final fallback is the empty string,
- * unless the primary id was the empty string, in which case
- * there is no fallback.
- */
- virtual UBool fallback();
-
- /**
- * Return true if a key created from id matches, or would eventually
- * fallback to match, the canonical ID of this key.
- */
- virtual UBool isFallbackOf(const UnicodeString& id) const;
-
- public:
- /**
- * UObject boilerplate.
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
- virtual UClassID getDynamicClassID() const;
-
- /**
- * Destructor.
- */
- virtual ~LocaleKey();
-
-#ifdef SERVICE_DEBUG
- public:
- virtual UnicodeString& debug(UnicodeString& result) const;
- virtual UnicodeString& debugClass(UnicodeString& result) const;
-#endif
-
-};
-
-/*
- ******************************************************************
- */
-
-/**
- * A subclass of ICUServiceFactory that uses LocaleKeys, and is able to
- * 'cover' more specific locales with more general locales that it
- * supports.
- *
- * <p>Coverage may be either of the values VISIBLE or INVISIBLE.
- *
- * <p>'Visible' indicates that the specific locale(s) supported by
- * the factory are registered in getSupportedIDs, 'Invisible'
- * indicates that they are not.
- *
- * <p>Localization of visible ids is handled
- * by the handling factory, regardless of kind.
- */
-class U_COMMON_API LocaleKeyFactory : public ICUServiceFactory {
-protected:
- const UnicodeString _name;
- const int32_t _coverage;
-
-public:
- enum {
- /**
- * Coverage value indicating that the factory makes
- * its locales visible, and does not cover more specific
- * locales.
- */
- VISIBLE = 0,
-
- /**
- * Coverage value indicating that the factory does not make
- * its locales visible, and does not cover more specific
- * locales.
- */
- INVISIBLE = 1
- };
-
- /**
- * Destructor.
- */
- virtual ~LocaleKeyFactory();
-
-protected:
- /**
- * Constructor used by subclasses.
- */
- LocaleKeyFactory(int32_t coverage);
-
- /**
- * Constructor used by subclasses.
- */
- LocaleKeyFactory(int32_t coverage, const UnicodeString& name);
-
- /**
- * Implement superclass abstract method. This checks the currentID of
- * the key against the supported IDs, and passes the canonicalLocale and
- * kind off to handleCreate (which subclasses must implement).
- */
-public:
- virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
-
-protected:
- virtual UBool handlesKey(const ICUServiceKey& key, UErrorCode& status) const;
-
-public:
- /**
- * Override of superclass method. This adjusts the result based
- * on the coverage rule for this factory.
- */
- virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
-
- /**
- * Return a localized name for the locale represented by id.
- */
- virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
-
-protected:
- /**
- * Utility method used by create(ICUServiceKey, ICUService). Subclasses can implement
- * this instead of create. The default returns NULL.
- */
- virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const;
-
- /**
- * Return true if this id is one the factory supports (visible or
- * otherwise).
- */
- // virtual UBool isSupportedID(const UnicodeString& id, UErrorCode& status) const;
-
- /**
- * Return the set of ids that this factory supports (visible or
- * otherwise). This can be called often and might need to be
- * cached if it is expensive to create.
- */
- virtual const Hashtable* getSupportedIDs(UErrorCode& status) const;
-
-public:
- /**
- * UObject boilerplate.
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
- virtual UClassID getDynamicClassID() const;
-
-#ifdef SERVICE_DEBUG
- public:
- virtual UnicodeString& debug(UnicodeString& result) const;
- virtual UnicodeString& debugClass(UnicodeString& result) const;
-#endif
-
-};
-
-/*
- ******************************************************************
- */
-
-/**
- * A LocaleKeyFactory that just returns a single object for a kind/locale.
- */
-
-class U_COMMON_API SimpleLocaleKeyFactory : public LocaleKeyFactory {
- private:
- UObject* _obj;
- UnicodeString _id;
- const int32_t _kind;
-
- public:
- SimpleLocaleKeyFactory(UObject* objToAdopt,
- const UnicodeString& locale,
- int32_t kind,
- int32_t coverage);
-
- SimpleLocaleKeyFactory(UObject* objToAdopt,
- const Locale& locale,
- int32_t kind,
- int32_t coverage);
-
- /**
- * Destructor.
- */
- virtual ~SimpleLocaleKeyFactory();
-
- /**
- * Override of superclass method. Returns the service object if kind/locale match. Service is not used.
- */
- virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
-
- /**
- * Override of superclass method. This adjusts the result based
- * on the coverage rule for this factory.
- */
- virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
-
- protected:
- /**
- * Return true if this id is equal to the locale name.
- */
- //virtual UBool isSupportedID(const UnicodeString& id, UErrorCode& status) const;
-
-
-public:
- /**
- * UObject boilerplate.
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
- virtual UClassID getDynamicClassID() const;
-
-#ifdef SERVICE_DEBUG
- public:
- virtual UnicodeString& debug(UnicodeString& result) const;
- virtual UnicodeString& debugClass(UnicodeString& result) const;
-#endif
-
-};
-
-/*
- ******************************************************************
- */
-
-/**
- * A LocaleKeyFactory that creates a service based on the ICU locale data.
- * This is a base class for most ICU factories. Subclasses instantiate it
- * with a constructor that takes a bundle name, which determines the supported
- * IDs. Subclasses then override handleCreate to create the actual service
- * object. The default implementation returns a resource bundle.
- */
-class U_COMMON_API ICUResourceBundleFactory : public LocaleKeyFactory
-{
- protected:
- UnicodeString _bundleName;
-
- public:
- /**
- * Convenience constructor that uses the main ICU bundle name.
- */
- ICUResourceBundleFactory();
-
- /**
- * A service factory based on ICU resource data in resources with
- * the given name. This should be a 'path' that can be passed to
- * ures_openAvailableLocales, such as U_ICUDATA or U_ICUDATA_COLL.
- * The empty string is equivalent to U_ICUDATA.
- */
- ICUResourceBundleFactory(const UnicodeString& bundleName);
-
- /**
- * Destructor
- */
- virtual ~ICUResourceBundleFactory();
-
-protected:
- /**
- * Return the supported IDs. This is the set of all locale names in ICULocaleData.
- */
- virtual const Hashtable* getSupportedIDs(UErrorCode& status) const;
-
- /**
- * Create the service. The default implementation returns the resource bundle
- * for the locale, ignoring kind, and service.
- */
- virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const;
-
-public:
- /**
- * UObject boilerplate.
- */
- static UClassID U_EXPORT2 getStaticClassID();
- virtual UClassID getDynamicClassID() const;
-
-
-#ifdef SERVICE_DEBUG
- public:
- virtual UnicodeString& debug(UnicodeString& result) const;
- virtual UnicodeString& debugClass(UnicodeString& result) const;
-#endif
-
-};
-
-/*
- ******************************************************************
- */
-
-class U_COMMON_API ICULocaleService : public ICUService
-{
- private:
- Locale fallbackLocale;
- UnicodeString fallbackLocaleName;
-
- public:
- /**
- * Construct an ICULocaleService.
- */
- ICULocaleService();
-
- /**
- * Construct an ICULocaleService with a name (useful for debugging).
- */
- ICULocaleService(const UnicodeString& name);
-
- /**
- * Destructor.
- */
- virtual ~ICULocaleService();
-
-#if 0
- // redeclare because of overload resolution rules?
- // no, causes ambiguities since both UnicodeString and Locale have constructors that take a const char*
- // need some compiler flag to remove warnings
- UObject* get(const UnicodeString& descriptor, UErrorCode& status) const {
- return ICUService::get(descriptor, status);
- }
-
- UObject* get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const {
- return ICUService::get(descriptor, actualReturn, status);
- }
-#endif
-
- /**
- * Convenience override for callers using locales. This calls
- * get(Locale, int, Locale[]) with KIND_ANY for kind and null for
- * actualReturn.
- */
- UObject* get(const Locale& locale, UErrorCode& status) const;
-
- /**
- * Convenience override for callers using locales. This calls
- * get(Locale, int, Locale[]) with a null actualReturn.
- */
- UObject* get(const Locale& locale, int32_t kind, UErrorCode& status) const;
-
- /**
- * Convenience override for callers using locales. This calls
- * get(Locale, String, Locale[]) with a null kind.
- */
- UObject* get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const;
-
- /**
- * Convenience override for callers using locales. This uses
- * createKey(Locale.toString(), kind) to create a key, calls getKey, and then
- * if actualReturn is not null, returns the actualResult from
- * getKey (stripping any prefix) into a Locale.
- */
- UObject* get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const;
-
- /**
- * Convenience override for callers using locales. This calls
- * registerObject(Object, Locale, int32_t kind, int coverage)
- * passing KIND_ANY for the kind, and VISIBLE for the coverage.
- */
- virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status);
-
- /**
- * Convenience function for callers using locales. This calls
- * registerObject(Object, Locale, int kind, int coverage)
- * passing VISIBLE for the coverage.
- */
- virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status);
-
- /**
- * Convenience function for callers using locales. This instantiates
- * a SimpleLocaleKeyFactory, and registers the factory.
- */
- virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status);
-
-
- /**
- * (Stop compiler from complaining about hidden overrides.)
- * Since both UnicodeString and Locale have constructors that take const char*, adding a public
- * method that takes UnicodeString causes ambiguity at call sites that use const char*.
- * We really need a flag that is understood by all compilers that will suppress the warning about
- * hidden overrides.
- */
- virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status);
-
- /**
- * Convenience method for callers using locales. This returns the standard
- * service ID enumeration.
- */
- virtual StringEnumeration* getAvailableLocales(void) const;
-
- protected:
-
- /**
- * Return the name of the current fallback locale. If it has changed since this was
- * last accessed, the service cache is cleared.
- */
- const UnicodeString& validateFallbackLocale() const;
-
- /**
- * Override superclass createKey method.
- */
- virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const;
-
- /**
- * Additional createKey that takes a kind.
- */
- virtual ICUServiceKey* createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const;
-
- friend class ServiceEnumeration;
-};
-
-U_NAMESPACE_END
-
- /* UCONFIG_NO_SERVICE */
-#endif
-
- /* ICULSERV_H */
-#endif
-
diff --git a/contrib/libs/icu/common/servls.cpp b/contrib/libs/icu/common/servls.cpp
deleted file mode 100644
index 81dc4f750ea..00000000000
--- a/contrib/libs/icu/common/servls.cpp
+++ /dev/null
@@ -1,295 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- *******************************************************************************
- * Copyright (C) 2001-2014, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- *
- *******************************************************************************
- */
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_SERVICE
-
-#include "unicode/resbund.h"
-#include "uresimp.h"
-#include "cmemory.h"
-#include "servloc.h"
-#include "ustrfmt.h"
-#include "charstr.h"
-#include "uassert.h"
-
-#define UNDERSCORE_CHAR ((UChar)0x005f)
-#define AT_SIGN_CHAR ((UChar)64)
-#define PERIOD_CHAR ((UChar)46)
-
-U_NAMESPACE_BEGIN
-
-ICULocaleService::ICULocaleService()
- : fallbackLocale(Locale::getDefault())
-{
-}
-
-ICULocaleService::ICULocaleService(const UnicodeString& dname)
- : ICUService(dname)
- , fallbackLocale(Locale::getDefault())
-{
-}
-
-ICULocaleService::~ICULocaleService()
-{
-}
-
-UObject*
-ICULocaleService::get(const Locale& locale, UErrorCode& status) const
-{
- return get(locale, LocaleKey::KIND_ANY, NULL, status);
-}
-
-UObject*
-ICULocaleService::get(const Locale& locale, int32_t kind, UErrorCode& status) const
-{
- return get(locale, kind, NULL, status);
-}
-
-UObject*
-ICULocaleService::get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const
-{
- return get(locale, LocaleKey::KIND_ANY, actualReturn, status);
-}
-
-UObject*
-ICULocaleService::get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const
-{
- UObject* result = NULL;
- if (U_FAILURE(status)) {
- return result;
- }
-
- UnicodeString locName(locale.getName(), -1, US_INV);
- if (locName.isBogus()) {
- status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- ICUServiceKey* key = createKey(&locName, kind, status);
- if (key) {
- if (actualReturn == NULL) {
- result = getKey(*key, status);
- } else {
- UnicodeString temp;
- result = getKey(*key, &temp, status);
-
- if (result != NULL) {
- key->parseSuffix(temp);
- LocaleUtility::initLocaleFromName(temp, *actualReturn);
- }
- }
- delete key;
- }
- }
- return result;
-}
-
-
-URegistryKey
-ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale,
- UBool visible, UErrorCode& status)
-{
- Locale loc;
- LocaleUtility::initLocaleFromName(locale, loc);
- return registerInstance(objToAdopt, loc, LocaleKey::KIND_ANY,
- visible ? LocaleKeyFactory::VISIBLE : LocaleKeyFactory::INVISIBLE, status);
-}
-
-URegistryKey
-ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status)
-{
- return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, LocaleKeyFactory::VISIBLE, status);
-}
-
-URegistryKey
-ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status)
-{
- return registerInstance(objToAdopt, locale, kind, LocaleKeyFactory::VISIBLE, status);
-}
-
-URegistryKey
-ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status)
-{
- ICUServiceFactory * factory = new SimpleLocaleKeyFactory(objToAdopt, locale, kind, coverage);
- if (factory != NULL) {
- return registerFactory(factory, status);
- }
- delete objToAdopt;
- return NULL;
-}
-
-#if 0
-URegistryKey
-ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, UErrorCode& status)
-{
- return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, LocaleKeyFactory::VISIBLE, status);
-}
-
-URegistryKey
-ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status)
-{
- return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY,
- visible ? LocaleKeyFactory::VISIBLE : LocaleKeyFactory::INVISIBLE,
- status);
-}
-
-URegistryKey
-ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, int32_t kind, int32_t coverage, UErrorCode& status)
-{
- ICUServiceFactory * factory = new SimpleLocaleKeyFactory(objToAdopt, locale, kind, coverage);
- if (factory != NULL) {
- return registerFactory(factory, status);
- }
- delete objToAdopt;
- return NULL;
-}
-#endif
-
-class ServiceEnumeration : public StringEnumeration {
-private:
- const ICULocaleService* _service;
- int32_t _timestamp;
- UVector _ids;
- int32_t _pos;
-
-private:
- ServiceEnumeration(const ICULocaleService* service, UErrorCode &status)
- : _service(service)
- , _timestamp(service->getTimestamp())
- , _ids(uprv_deleteUObject, NULL, status)
- , _pos(0)
- {
- _service->getVisibleIDs(_ids, status);
- }
-
- ServiceEnumeration(const ServiceEnumeration &other, UErrorCode &status)
- : _service(other._service)
- , _timestamp(other._timestamp)
- , _ids(uprv_deleteUObject, NULL, status)
- , _pos(0)
- {
- if(U_SUCCESS(status)) {
- int32_t i, length;
-
- length = other._ids.size();
- for(i = 0; i < length; ++i) {
- _ids.addElement(((UnicodeString *)other._ids.elementAt(i))->clone(), status);
- }
-
- if(U_SUCCESS(status)) {
- _pos = other._pos;
- }
- }
- }
-
-public:
- static ServiceEnumeration* create(const ICULocaleService* service) {
- UErrorCode status = U_ZERO_ERROR;
- ServiceEnumeration* result = new ServiceEnumeration(service, status);
- if (U_SUCCESS(status)) {
- return result;
- }
- delete result;
- return NULL;
- }
-
- virtual ~ServiceEnumeration();
-
- virtual StringEnumeration *clone() const {
- UErrorCode status = U_ZERO_ERROR;
- ServiceEnumeration *cl = new ServiceEnumeration(*this, status);
- if(U_FAILURE(status)) {
- delete cl;
- cl = NULL;
- }
- return cl;
- }
-
- UBool upToDate(UErrorCode& status) const {
- if (U_SUCCESS(status)) {
- if (_timestamp == _service->getTimestamp()) {
- return TRUE;
- }
- status = U_ENUM_OUT_OF_SYNC_ERROR;
- }
- return FALSE;
- }
-
- virtual int32_t count(UErrorCode& status) const {
- return upToDate(status) ? _ids.size() : 0;
- }
-
- virtual const UnicodeString* snext(UErrorCode& status) {
- if (upToDate(status) && (_pos < _ids.size())) {
- return (const UnicodeString*)_ids[_pos++];
- }
- return NULL;
- }
-
- virtual void reset(UErrorCode& status) {
- if (status == U_ENUM_OUT_OF_SYNC_ERROR) {
- status = U_ZERO_ERROR;
- }
- if (U_SUCCESS(status)) {
- _timestamp = _service->getTimestamp();
- _pos = 0;
- _service->getVisibleIDs(_ids, status);
- }
- }
-
-public:
- static UClassID U_EXPORT2 getStaticClassID(void);
- virtual UClassID getDynamicClassID(void) const;
-};
-
-ServiceEnumeration::~ServiceEnumeration() {}
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ServiceEnumeration)
-
-StringEnumeration*
-ICULocaleService::getAvailableLocales(void) const
-{
- return ServiceEnumeration::create(this);
-}
-
-const UnicodeString&
-ICULocaleService::validateFallbackLocale() const
-{
- const Locale& loc = Locale::getDefault();
- ICULocaleService* ncThis = (ICULocaleService*)this;
- static UMutex llock;
- {
- Mutex mutex(&llock);
- if (loc != fallbackLocale) {
- ncThis->fallbackLocale = loc;
- LocaleUtility::initNameFromLocale(loc, ncThis->fallbackLocaleName);
- ncThis->clearServiceCache();
- }
- }
- return fallbackLocaleName;
-}
-
-ICUServiceKey*
-ICULocaleService::createKey(const UnicodeString* id, UErrorCode& status) const
-{
- return LocaleKey::createWithCanonicalFallback(id, &validateFallbackLocale(), status);
-}
-
-ICUServiceKey*
-ICULocaleService::createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const
-{
- return LocaleKey::createWithCanonicalFallback(id, &validateFallbackLocale(), kind, status);
-}
-
-U_NAMESPACE_END
-
-/* !UCONFIG_NO_SERVICE */
-#endif
-
-
diff --git a/contrib/libs/icu/common/servnotf.cpp b/contrib/libs/icu/common/servnotf.cpp
deleted file mode 100644
index f577795cae9..00000000000
--- a/contrib/libs/icu/common/servnotf.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- *******************************************************************************
- * Copyright (C) 2001-2012, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_SERVICE
-
-#include "servnotf.h"
-#ifdef NOTIFIER_DEBUG
-#include <stdio.h>
-#endif
-
-U_NAMESPACE_BEGIN
-
-EventListener::~EventListener() {}
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EventListener)
-
-static UMutex notifyLock;
-
-ICUNotifier::ICUNotifier(void)
-: listeners(NULL)
-{
-}
-
-ICUNotifier::~ICUNotifier(void) {
- {
- Mutex lmx(&notifyLock);
- delete listeners;
- listeners = NULL;
- }
-}
-
-
-void
-ICUNotifier::addListener(const EventListener* l, UErrorCode& status)
-{
- if (U_SUCCESS(status)) {
- if (l == NULL) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- if (acceptsListener(*l)) {
- Mutex lmx(&notifyLock);
- if (listeners == NULL) {
- listeners = new UVector(5, status);
- } else {
- for (int i = 0, e = listeners->size(); i < e; ++i) {
- const EventListener* el = (const EventListener*)(listeners->elementAt(i));
- if (l == el) {
- return;
- }
- }
- }
-
- listeners->addElement((void*)l, status); // cast away const
- }
-#ifdef NOTIFIER_DEBUG
- else {
- fprintf(stderr, "Listener invalid for this notifier.");
- exit(1);
- }
-#endif
- }
-}
-
-void
-ICUNotifier::removeListener(const EventListener *l, UErrorCode& status)
-{
- if (U_SUCCESS(status)) {
- if (l == NULL) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- {
- Mutex lmx(&notifyLock);
- if (listeners != NULL) {
- // identity equality check
- for (int i = 0, e = listeners->size(); i < e; ++i) {
- const EventListener* el = (const EventListener*)listeners->elementAt(i);
- if (l == el) {
- listeners->removeElementAt(i);
- if (listeners->size() == 0) {
- delete listeners;
- listeners = NULL;
- }
- return;
- }
- }
- }
- }
- }
-}
-
-void
-ICUNotifier::notifyChanged(void)
-{
- if (listeners != NULL) {
- Mutex lmx(&notifyLock);
- if (listeners != NULL) {
- for (int i = 0, e = listeners->size(); i < e; ++i) {
- EventListener* el = (EventListener*)listeners->elementAt(i);
- notifyListener(*el);
- }
- }
- }
-}
-
-U_NAMESPACE_END
-
-/* UCONFIG_NO_SERVICE */
-#endif
-
diff --git a/contrib/libs/icu/common/servnotf.h b/contrib/libs/icu/common/servnotf.h
deleted file mode 100644
index dba7a0fea3b..00000000000
--- a/contrib/libs/icu/common/servnotf.h
+++ /dev/null
@@ -1,125 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- *******************************************************************************
- * Copyright (C) 2001-2014, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- */
-#ifndef ICUNOTIF_H
-#define ICUNOTIF_H
-
-#include "unicode/utypes.h"
-
-#if UCONFIG_NO_SERVICE
-
-U_NAMESPACE_BEGIN
-
-/*
- * Allow the declaration of APIs with pointers to BreakIterator
- * even when break iteration is removed from the build.
- */
-class ICUNotifier;
-
-U_NAMESPACE_END
-
-#else
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-
-#include "mutex.h"
-#include "uvector.h"
-
-U_NAMESPACE_BEGIN
-
-class U_COMMON_API EventListener : public UObject {
-public:
- virtual ~EventListener();
-
-public:
- static UClassID U_EXPORT2 getStaticClassID();
-
- virtual UClassID getDynamicClassID() const;
-
-public:
-#ifdef SERVICE_DEBUG
- virtual UnicodeString& debug(UnicodeString& result) const {
- return debugClass(result);
- }
-
- virtual UnicodeString& debugClass(UnicodeString& result) const {
- return result.append((UnicodeString)"Key");
- }
-#endif
-};
-
-/**
- * <p>Abstract implementation of a notification facility. Clients add
- * EventListeners with addListener and remove them with removeListener.
- * Notifiers call notifyChanged when they wish to notify listeners.
- * This queues the listener list on the notification thread, which
- * eventually dequeues the list and calls notifyListener on each
- * listener in the list.</p>
- *
- * <p>Subclasses override acceptsListener and notifyListener
- * to add type-safe notification. AcceptsListener should return
- * true if the listener is of the appropriate type; ICUNotifier
- * itself will ensure the listener is non-null and that the
- * identical listener is not already registered with the Notifier.
- * NotifyListener should cast the listener to the appropriate
- * type and call the appropriate method on the listener.
- */
-
-class U_COMMON_API ICUNotifier : public UMemory {
-private: UVector* listeners;
-
-public:
- ICUNotifier(void);
-
- virtual ~ICUNotifier(void);
-
- /**
- * Add a listener to be notified when notifyChanged is called.
- * The listener must not be null. AcceptsListener must return
- * true for the listener. Attempts to concurrently
- * register the identical listener more than once will be
- * silently ignored.
- */
- virtual void addListener(const EventListener* l, UErrorCode& status);
-
- /**
- * Stop notifying this listener. The listener must
- * not be null. Attemps to remove a listener that is
- * not registered will be silently ignored.
- */
- virtual void removeListener(const EventListener* l, UErrorCode& status);
-
- /**
- * ICU doesn't spawn its own threads. All listeners are notified in
- * the thread of the caller. Misbehaved listeners can therefore
- * indefinitely block the calling thread. Callers should beware of
- * deadlock situations.
- */
- virtual void notifyChanged(void);
-
-protected:
- /**
- * Subclasses implement this to return TRUE if the listener is
- * of the appropriate type.
- */
- virtual UBool acceptsListener(const EventListener& l) const = 0;
-
- /**
- * Subclasses implement this to notify the listener.
- */
- virtual void notifyListener(EventListener& l) const = 0;
-};
-
-U_NAMESPACE_END
-
-/* UCONFIG_NO_SERVICE */
-#endif
-
-/* ICUNOTIF_H */
-#endif
diff --git a/contrib/libs/icu/common/servrbf.cpp b/contrib/libs/icu/common/servrbf.cpp
deleted file mode 100644
index 94279ab3a15..00000000000
--- a/contrib/libs/icu/common/servrbf.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- *******************************************************************************
- * Copyright (C) 2001-2014, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- *
- *******************************************************************************
- */
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_SERVICE
-
-#include "unicode/resbund.h"
-#include "uresimp.h"
-#include "cmemory.h"
-#include "servloc.h"
-#include "ustrfmt.h"
-#include "uhash.h"
-#include "charstr.h"
-#include "ucln_cmn.h"
-#include "uassert.h"
-
-#define UNDERSCORE_CHAR ((UChar)0x005f)
-#define AT_SIGN_CHAR ((UChar)64)
-#define PERIOD_CHAR ((UChar)46)
-
-U_NAMESPACE_BEGIN
-
-ICUResourceBundleFactory::ICUResourceBundleFactory()
- : LocaleKeyFactory(VISIBLE)
- , _bundleName()
-{
-}
-
-ICUResourceBundleFactory::ICUResourceBundleFactory(const UnicodeString& bundleName)
- : LocaleKeyFactory(VISIBLE)
- , _bundleName(bundleName)
-{
-}
-
-ICUResourceBundleFactory::~ICUResourceBundleFactory() {}
-
-const Hashtable*
-ICUResourceBundleFactory::getSupportedIDs(UErrorCode& status) const
-{
- if (U_SUCCESS(status)) {
- return LocaleUtility::getAvailableLocaleNames(_bundleName);
- }
- return NULL;
-}
-
-UObject*
-ICUResourceBundleFactory::handleCreate(const Locale& loc, int32_t /* kind */, const ICUService* /* service */, UErrorCode& status) const
-{
- if (U_SUCCESS(status)) {
- // _bundleName is a package name
- // and should only contain invariant characters
- // ??? is it always true that the max length of the bundle name is 19?
- // who made this change? -- dlf
- char pkg[20];
- int32_t length;
- length=_bundleName.extract(0, INT32_MAX, pkg, (int32_t)sizeof(pkg), US_INV);
- if(length>=(int32_t)sizeof(pkg)) {
- return NULL;
- }
- return new ResourceBundle(pkg, loc, status);
- }
- return NULL;
-}
-
-#ifdef SERVICE_DEBUG
-UnicodeString&
-ICUResourceBundleFactory::debug(UnicodeString& result) const
-{
- LocaleKeyFactory::debug(result);
- result.append((UnicodeString)", bundle: ");
- return result.append(_bundleName);
-}
-
-UnicodeString&
-ICUResourceBundleFactory::debugClass(UnicodeString& result) const
-{
- return result.append((UnicodeString)"ICUResourceBundleFactory");
-}
-#endif
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ICUResourceBundleFactory)
-
-U_NAMESPACE_END
-
-/* !UCONFIG_NO_SERVICE */
-#endif
-
-
diff --git a/contrib/libs/icu/common/servslkf.cpp b/contrib/libs/icu/common/servslkf.cpp
deleted file mode 100644
index 09154d1b919..00000000000
--- a/contrib/libs/icu/common/servslkf.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/**
- *******************************************************************************
- * Copyright (C) 2001-2014, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- *
- *******************************************************************************
- */
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_SERVICE
-
-#include "unicode/resbund.h"
-#include "uresimp.h"
-#include "cmemory.h"
-#include "servloc.h"
-#include "ustrfmt.h"
-#include "uhash.h"
-#include "charstr.h"
-#include "uassert.h"
-
-#define UNDERSCORE_CHAR ((UChar)0x005f)
-#define AT_SIGN_CHAR ((UChar)64)
-#define PERIOD_CHAR ((UChar)46)
-
-U_NAMESPACE_BEGIN
-
-/*
- ******************************************************************
- */
-
-SimpleLocaleKeyFactory::SimpleLocaleKeyFactory(UObject* objToAdopt,
- const UnicodeString& locale,
- int32_t kind,
- int32_t coverage)
- : LocaleKeyFactory(coverage)
- , _obj(objToAdopt)
- , _id(locale)
- , _kind(kind)
-{
-}
-
-SimpleLocaleKeyFactory::SimpleLocaleKeyFactory(UObject* objToAdopt,
- const Locale& locale,
- int32_t kind,
- int32_t coverage)
- : LocaleKeyFactory(coverage)
- , _obj(objToAdopt)
- , _id()
- , _kind(kind)
-{
- LocaleUtility::initNameFromLocale(locale, _id);
-}
-
-SimpleLocaleKeyFactory::~SimpleLocaleKeyFactory()
-{
- delete _obj;
- _obj = NULL;
-}
-
-UObject*
-SimpleLocaleKeyFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const
-{
- if (U_SUCCESS(status)) {
- const LocaleKey& lkey = (const LocaleKey&)key;
- if (_kind == LocaleKey::KIND_ANY || _kind == lkey.kind()) {
- UnicodeString keyID;
- lkey.currentID(keyID);
- if (_id == keyID) {
- return service->cloneInstance(_obj);
- }
- }
- }
- return NULL;
-}
-
-//UBool
-//SimpleLocaleKeyFactory::isSupportedID(const UnicodeString& id, UErrorCode& /* status */) const
-//{
-// return id == _id;
-//}
-
-void
-SimpleLocaleKeyFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const
-{
- if (U_SUCCESS(status)) {
- if (_coverage & 0x1) {
- result.remove(_id);
- } else {
- result.put(_id, (void*)this, status);
- }
- }
-}
-
-#ifdef SERVICE_DEBUG
-UnicodeString&
-SimpleLocaleKeyFactory::debug(UnicodeString& result) const
-{
- LocaleKeyFactory::debug(result);
- result.append((UnicodeString)", id: ");
- result.append(_id);
- result.append((UnicodeString)", kind: ");
- result.append(_kind);
- return result;
-}
-
-UnicodeString&
-SimpleLocaleKeyFactory::debugClass(UnicodeString& result) const
-{
- return result.append((UnicodeString)"SimpleLocaleKeyFactory");
-}
-#endif
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleLocaleKeyFactory)
-
-U_NAMESPACE_END
-
-/* !UCONFIG_NO_SERVICE */
-#endif
-
-
diff --git a/contrib/libs/icu/common/sharedobject.cpp b/contrib/libs/icu/common/sharedobject.cpp
deleted file mode 100644
index 6eeca8605f0..00000000000
--- a/contrib/libs/icu/common/sharedobject.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-* sharedobject.cpp
-*/
-#include "sharedobject.h"
-#include "mutex.h"
-#include "uassert.h"
-#include "umutex.h"
-#include "unifiedcache.h"
-
-U_NAMESPACE_BEGIN
-
-SharedObject::~SharedObject() {}
-
-UnifiedCacheBase::~UnifiedCacheBase() {}
-
-void
-SharedObject::addRef() const {
- umtx_atomic_inc(&hardRefCount);
-}
-
-// removeRef Decrement the reference count and delete if it is zero.
-// Note that SharedObjects with a non-null cachePtr are owned by the
-// unified cache, and the cache will be responsible for the actual deletion.
-// The deletion could be as soon as immediately following the
-// update to the reference count, if another thread is running
-// a cache eviction cycle concurrently.
-// NO ACCESS TO *this PERMITTED AFTER REFERENCE COUNT == 0 for cached objects.
-// THE OBJECT MAY ALREADY BE GONE.
-void
-SharedObject::removeRef() const {
- const UnifiedCacheBase *cache = this->cachePtr;
- int32_t updatedRefCount = umtx_atomic_dec(&hardRefCount);
- U_ASSERT(updatedRefCount >= 0);
- if (updatedRefCount == 0) {
- if (cache) {
- cache->handleUnreferencedObject();
- } else {
- delete this;
- }
- }
-}
-
-
-int32_t
-SharedObject::getRefCount() const {
- return umtx_loadAcquire(hardRefCount);
-}
-
-void
-SharedObject::deleteIfZeroRefCount() const {
- if (this->cachePtr == nullptr && getRefCount() == 0) {
- delete this;
- }
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/sharedobject.h b/contrib/libs/icu/common/sharedobject.h
deleted file mode 100644
index c0a5aba4782..00000000000
--- a/contrib/libs/icu/common/sharedobject.h
+++ /dev/null
@@ -1,184 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 2015-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-* sharedobject.h
-*/
-
-#ifndef __SHAREDOBJECT_H__
-#define __SHAREDOBJECT_H__
-
-
-#include "unicode/uobject.h"
-#include "umutex.h"
-
-U_NAMESPACE_BEGIN
-
-class SharedObject;
-
-/**
- * Base class for unified cache exposing enough methods to SharedObject
- * instances to allow their addRef() and removeRef() methods to
- * update cache metrics. No other part of ICU, except for SharedObject,
- * should directly call the methods of this base class.
- */
-class U_COMMON_API UnifiedCacheBase : public UObject {
-public:
- UnifiedCacheBase() { }
-
- /**
- * Notify the cache implementation that an object was seen transitioning to
- * zero hard references. The cache may use this to keep track the number of
- * unreferenced SharedObjects, and to trigger evictions.
- */
- virtual void handleUnreferencedObject() const = 0;
-
- virtual ~UnifiedCacheBase();
-private:
- UnifiedCacheBase(const UnifiedCacheBase &);
- UnifiedCacheBase &operator=(const UnifiedCacheBase &);
-};
-
-/**
- * Base class for shared, reference-counted, auto-deleted objects.
- * Subclasses can be immutable.
- * If they are mutable, then they must implement their copy constructor
- * so that copyOnWrite() works.
- *
- * Either stack-allocate, use LocalPointer, or use addRef()/removeRef().
- * Sharing requires reference-counting.
- */
-class U_COMMON_API SharedObject : public UObject {
-public:
- /** Initializes totalRefCount, softRefCount to 0. */
- SharedObject() :
- softRefCount(0),
- hardRefCount(0),
- cachePtr(NULL) {}
-
- /** Initializes totalRefCount, softRefCount to 0. */
- SharedObject(const SharedObject &other) :
- UObject(other),
- softRefCount(0),
- hardRefCount(0),
- cachePtr(NULL) {}
-
- virtual ~SharedObject();
-
- /**
- * Increments the number of hard references to this object. Thread-safe.
- * Not for use from within the Unified Cache implementation.
- */
- void addRef() const;
-
- /**
- * Decrements the number of hard references to this object, and
- * arrange for possible cache-eviction and/or deletion if ref
- * count goes to zero. Thread-safe.
- *
- * Not for use from within the UnifiedCache implementation.
- */
- void removeRef() const;
-
- /**
- * Returns the number of hard references for this object.
- * Uses a memory barrier.
- */
- int32_t getRefCount() const;
-
- /**
- * If noHardReferences() == TRUE then this object has no hard references.
- * Must be called only from within the internals of UnifiedCache.
- */
- inline UBool noHardReferences() const { return getRefCount() == 0; }
-
- /**
- * If hasHardReferences() == TRUE then this object has hard references.
- * Must be called only from within the internals of UnifiedCache.
- */
- inline UBool hasHardReferences() const { return getRefCount() != 0; }
-
- /**
- * Deletes this object if it has no references.
- * Available for non-cached SharedObjects only. Ownership of cached objects
- * is with the UnifiedCache, which is solely responsible for eviction and deletion.
- */
- void deleteIfZeroRefCount() const;
-
-
- /**
- * Returns a writable version of ptr.
- * If there is exactly one owner, then ptr itself is returned as a
- * non-const pointer.
- * If there are multiple owners, then ptr is replaced with a
- * copy-constructed clone,
- * and that is returned.
- * Returns NULL if cloning failed.
- *
- * T must be a subclass of SharedObject.
- */
- template<typename T>
- static T *copyOnWrite(const T *&ptr) {
- const T *p = ptr;
- if(p->getRefCount() <= 1) { return const_cast<T *>(p); }
- T *p2 = new T(*p);
- if(p2 == NULL) { return NULL; }
- p->removeRef();
- ptr = p2;
- p2->addRef();
- return p2;
- }
-
- /**
- * Makes dest an owner of the object pointed to by src while adjusting
- * reference counts and deleting the previous object dest pointed to
- * if necessary. Before this call is made, dest must either be NULL or
- * be included in the reference count of the object it points to.
- *
- * T must be a subclass of SharedObject.
- */
- template<typename T>
- static void copyPtr(const T *src, const T *&dest) {
- if(src != dest) {
- if(dest != NULL) { dest->removeRef(); }
- dest = src;
- if(src != NULL) { src->addRef(); }
- }
- }
-
- /**
- * Equivalent to copyPtr(NULL, dest).
- */
- template<typename T>
- static void clearPtr(const T *&ptr) {
- if (ptr != NULL) {
- ptr->removeRef();
- ptr = NULL;
- }
- }
-
-private:
- /**
- * The number of references from the UnifiedCache, which is
- * the number of times that the sharedObject is stored as a hash table value.
- * For use by UnifiedCache implementation code only.
- * All access is synchronized by UnifiedCache's gCacheMutex
- */
- mutable int32_t softRefCount;
- friend class UnifiedCache;
-
- /**
- * Reference count, excluding references from within the UnifiedCache implementation.
- */
- mutable u_atomic_int32_t hardRefCount;
-
- mutable const UnifiedCacheBase *cachePtr;
-
-};
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/simpleformatter.cpp b/contrib/libs/icu/common/simpleformatter.cpp
deleted file mode 100644
index 76d8f54efd4..00000000000
--- a/contrib/libs/icu/common/simpleformatter.cpp
+++ /dev/null
@@ -1,323 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 2014-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-* simpleformatter.cpp
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/simpleformatter.h"
-#include "unicode/unistr.h"
-#include "uassert.h"
-
-U_NAMESPACE_BEGIN
-
-namespace {
-
-/**
- * Argument numbers must be smaller than this limit.
- * Text segment lengths are offset by this much.
- * This is currently the only unused char value in compiled patterns,
- * except it is the maximum value of the first unit (max arg +1).
- */
-const int32_t ARG_NUM_LIMIT = 0x100;
-/**
- * Initial and maximum char/UChar value set for a text segment.
- * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
- * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
- */
-const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff;
-/**
- * Maximum length of a text segment. Longer segments are split into shorter ones.
- */
-const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT;
-
-enum {
- APOS = 0x27,
- DIGIT_ZERO = 0x30,
- DIGIT_ONE = 0x31,
- DIGIT_NINE = 0x39,
- OPEN_BRACE = 0x7b,
- CLOSE_BRACE = 0x7d
-};
-
-inline UBool isInvalidArray(const void *array, int32_t length) {
- return (length < 0 || (array == NULL && length != 0));
-}
-
-} // namespace
-
-SimpleFormatter &SimpleFormatter::operator=(const SimpleFormatter& other) {
- if (this == &other) {
- return *this;
- }
- compiledPattern = other.compiledPattern;
- return *this;
-}
-
-SimpleFormatter::~SimpleFormatter() {}
-
-UBool SimpleFormatter::applyPatternMinMaxArguments(
- const UnicodeString &pattern,
- int32_t min, int32_t max,
- UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return FALSE;
- }
- // Parse consistent with MessagePattern, but
- // - support only simple numbered arguments
- // - build a simple binary structure into the result string
- const UChar *patternBuffer = pattern.getBuffer();
- int32_t patternLength = pattern.length();
- // Reserve the first char for the number of arguments.
- compiledPattern.setTo((UChar)0);
- int32_t textLength = 0;
- int32_t maxArg = -1;
- UBool inQuote = FALSE;
- for (int32_t i = 0; i < patternLength;) {
- UChar c = patternBuffer[i++];
- if (c == APOS) {
- if (i < patternLength && (c = patternBuffer[i]) == APOS) {
- // double apostrophe, skip the second one
- ++i;
- } else if (inQuote) {
- // skip the quote-ending apostrophe
- inQuote = FALSE;
- continue;
- } else if (c == OPEN_BRACE || c == CLOSE_BRACE) {
- // Skip the quote-starting apostrophe, find the end of the quoted literal text.
- ++i;
- inQuote = TRUE;
- } else {
- // The apostrophe is part of literal text.
- c = APOS;
- }
- } else if (!inQuote && c == OPEN_BRACE) {
- if (textLength > 0) {
- compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
- (UChar)(ARG_NUM_LIMIT + textLength));
- textLength = 0;
- }
- int32_t argNumber;
- if ((i + 1) < patternLength &&
- 0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 &&
- patternBuffer[i + 1] == CLOSE_BRACE) {
- i += 2;
- } else {
- // Multi-digit argument number (no leading zero) or syntax error.
- // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
- // around the number, but this class does not.
- argNumber = -1;
- if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
- argNumber = c - DIGIT_ZERO;
- while (i < patternLength &&
- DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
- argNumber = argNumber * 10 + (c - DIGIT_ZERO);
- if (argNumber >= ARG_NUM_LIMIT) {
- break;
- }
- }
- }
- if (argNumber < 0 || c != CLOSE_BRACE) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- }
- if (argNumber > maxArg) {
- maxArg = argNumber;
- }
- compiledPattern.append((UChar)argNumber);
- continue;
- } // else: c is part of literal text
- // Append c and track the literal-text segment length.
- if (textLength == 0) {
- // Reserve a char for the length of a new text segment, preset the maximum length.
- compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR);
- }
- compiledPattern.append(c);
- if (++textLength == MAX_SEGMENT_LENGTH) {
- textLength = 0;
- }
- }
- if (textLength > 0) {
- compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
- (UChar)(ARG_NUM_LIMIT + textLength));
- }
- int32_t argCount = maxArg + 1;
- if (argCount < min || max < argCount) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- compiledPattern.setCharAt(0, (UChar)argCount);
- return TRUE;
-}
-
-UnicodeString& SimpleFormatter::format(
- const UnicodeString &value0,
- UnicodeString &appendTo, UErrorCode &errorCode) const {
- const UnicodeString *values[] = { &value0 };
- return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode);
-}
-
-UnicodeString& SimpleFormatter::format(
- const UnicodeString &value0,
- const UnicodeString &value1,
- UnicodeString &appendTo, UErrorCode &errorCode) const {
- const UnicodeString *values[] = { &value0, &value1 };
- return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode);
-}
-
-UnicodeString& SimpleFormatter::format(
- const UnicodeString &value0,
- const UnicodeString &value1,
- const UnicodeString &value2,
- UnicodeString &appendTo, UErrorCode &errorCode) const {
- const UnicodeString *values[] = { &value0, &value1, &value2 };
- return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode);
-}
-
-UnicodeString& SimpleFormatter::formatAndAppend(
- const UnicodeString *const *values, int32_t valuesLength,
- UnicodeString &appendTo,
- int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode)) {
- return appendTo;
- }
- if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) ||
- valuesLength < getArgumentLimit()) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return appendTo;
- }
- return format(compiledPattern.getBuffer(), compiledPattern.length(), values,
- appendTo, NULL, TRUE,
- offsets, offsetsLength, errorCode);
-}
-
-UnicodeString &SimpleFormatter::formatAndReplace(
- const UnicodeString *const *values, int32_t valuesLength,
- UnicodeString &result,
- int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
- if (U_FAILURE(errorCode)) {
- return result;
- }
- if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return result;
- }
- const UChar *cp = compiledPattern.getBuffer();
- int32_t cpLength = compiledPattern.length();
- if (valuesLength < getArgumentLimit(cp, cpLength)) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return result;
- }
-
- // If the pattern starts with an argument whose value is the same object
- // as the result, then we keep the result contents and append to it.
- // Otherwise we replace its contents.
- int32_t firstArg = -1;
- // If any non-initial argument value is the same object as the result,
- // then we first copy its contents and use that instead while formatting.
- UnicodeString resultCopy;
- if (getArgumentLimit(cp, cpLength) > 0) {
- for (int32_t i = 1; i < cpLength;) {
- int32_t n = cp[i++];
- if (n < ARG_NUM_LIMIT) {
- if (values[n] == &result) {
- if (i == 2) {
- firstArg = n;
- } else if (resultCopy.isEmpty() && !result.isEmpty()) {
- resultCopy = result;
- }
- }
- } else {
- i += n - ARG_NUM_LIMIT;
- }
- }
- }
- if (firstArg < 0) {
- result.remove();
- }
- return format(cp, cpLength, values,
- result, &resultCopy, FALSE,
- offsets, offsetsLength, errorCode);
-}
-
-UnicodeString SimpleFormatter::getTextWithNoArguments(
- const UChar *compiledPattern,
- int32_t compiledPatternLength,
- int32_t* offsets,
- int32_t offsetsLength) {
- for (int32_t i = 0; i < offsetsLength; i++) {
- offsets[i] = -1;
- }
- int32_t capacity = compiledPatternLength - 1 -
- getArgumentLimit(compiledPattern, compiledPatternLength);
- UnicodeString sb(capacity, 0, 0); // Java: StringBuilder
- for (int32_t i = 1; i < compiledPatternLength;) {
- int32_t n = compiledPattern[i++];
- if (n > ARG_NUM_LIMIT) {
- n -= ARG_NUM_LIMIT;
- sb.append(compiledPattern + i, n);
- i += n;
- } else if (n < offsetsLength) {
- offsets[n] = sb.length();
- }
- }
- return sb;
-}
-
-UnicodeString &SimpleFormatter::format(
- const UChar *compiledPattern, int32_t compiledPatternLength,
- const UnicodeString *const *values,
- UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
- int32_t *offsets, int32_t offsetsLength,
- UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return result;
- }
- for (int32_t i = 0; i < offsetsLength; i++) {
- offsets[i] = -1;
- }
- for (int32_t i = 1; i < compiledPatternLength;) {
- int32_t n = compiledPattern[i++];
- if (n < ARG_NUM_LIMIT) {
- const UnicodeString *value = values[n];
- if (value == NULL) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return result;
- }
- if (value == &result) {
- if (forbidResultAsValue) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return result;
- }
- if (i == 2) {
- // We are appending to result which is also the first value object.
- if (n < offsetsLength) {
- offsets[n] = 0;
- }
- } else {
- if (n < offsetsLength) {
- offsets[n] = result.length();
- }
- result.append(*resultCopy);
- }
- } else {
- if (n < offsetsLength) {
- offsets[n] = result.length();
- }
- result.append(*value);
- }
- } else {
- int32_t length = n - ARG_NUM_LIMIT;
- result.append(compiledPattern + i, length);
- i += length;
- }
- }
- return result;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/sprpimpl.h b/contrib/libs/icu/common/sprpimpl.h
deleted file mode 100644
index ca0bcdb5169..00000000000
--- a/contrib/libs/icu/common/sprpimpl.h
+++ /dev/null
@@ -1,130 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- *******************************************************************************
- *
- * Copyright (C) 2003-2006, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- *******************************************************************************
- * file name: sprpimpl.h
- * encoding: UTF-8
- * tab size: 8 (not used)
- * indentation:4
- *
- * created on: 2003feb1
- * created by: Ram Viswanadha
- */
-
-#ifndef SPRPIMPL_H
-#define SPRPIMPL_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_IDNA
-
-#include "unicode/ustring.h"
-#include "unicode/parseerr.h"
-#include "unicode/usprep.h"
-#include "unicode/udata.h"
-#include "utrie.h"
-#include "udataswp.h"
-#include "ubidi_props.h"
-
-#define _SPREP_DATA_TYPE "spp"
-
-enum UStringPrepType{
- USPREP_UNASSIGNED = 0x0000 ,
- USPREP_MAP = 0x0001 ,
- USPREP_PROHIBITED = 0x0002 ,
- USPREP_DELETE = 0x0003 ,
- USPREP_TYPE_LIMIT = 0x0004
-};
-
-typedef enum UStringPrepType UStringPrepType;
-
-#ifdef USPREP_TYPE_NAMES_ARRAY
-static const char* usprepTypeNames[] ={
- "UNASSIGNED" ,
- "MAP" ,
- "PROHIBITED" ,
- "DELETE",
- "TYPE_LIMIT"
-};
-#endif
-
-enum{
- _SPREP_NORMALIZATION_ON = 0x0001,
- _SPREP_CHECK_BIDI_ON = 0x0002
-};
-
-enum{
- _SPREP_TYPE_THRESHOLD = 0xFFF0,
- _SPREP_MAX_INDEX_VALUE = 0x3FBF, /*16139*/
- _SPREP_MAX_INDEX_TOP_LENGTH = 0x0003
-};
-
-/* indexes[] value names */
-enum {
- _SPREP_INDEX_TRIE_SIZE = 0, /* number of bytes in StringPrep trie */
- _SPREP_INDEX_MAPPING_DATA_SIZE = 1, /* The array that contains the mapping */
- _SPREP_NORM_CORRECTNS_LAST_UNI_VERSION = 2, /* The index of Unicode version of last entry in NormalizationCorrections.txt */
- _SPREP_ONE_UCHAR_MAPPING_INDEX_START = 3, /* The starting index of 1 UChar mapping index in the mapping data array */
- _SPREP_TWO_UCHARS_MAPPING_INDEX_START = 4, /* The starting index of 2 UChars mapping index in the mapping data array */
- _SPREP_THREE_UCHARS_MAPPING_INDEX_START = 5, /* The starting index of 3 UChars mapping index in the mapping data array */
- _SPREP_FOUR_UCHARS_MAPPING_INDEX_START = 6, /* The starting index of 4 UChars mapping index in the mapping data array */
- _SPREP_OPTIONS = 7, /* Bit set of options to turn on in the profile */
- _SPREP_INDEX_TOP=16 /* changing this requires a new formatVersion */
-};
-
-typedef struct UStringPrepKey UStringPrepKey;
-
-
-struct UStringPrepKey{
- char* name;
- char* path;
-};
-
-struct UStringPrepProfile{
- int32_t indexes[_SPREP_INDEX_TOP];
- UTrie sprepTrie;
- const uint16_t* mappingData;
- UDataMemory* sprepData;
- int32_t refCount;
- UBool isDataLoaded;
- UBool doNFKC;
- UBool checkBiDi;
-};
-
-/**
- * Helper function for populating the UParseError struct
- * @internal
- */
-U_CAPI void U_EXPORT2
-uprv_syntaxError(const UChar* rules,
- int32_t pos,
- int32_t rulesLen,
- UParseError* parseError);
-
-
-/**
- * Swap StringPrep .spp profile data. See udataswp.h.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-usprep_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-#endif /* #if !UCONFIG_NO_IDNA */
-
-#endif
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/contrib/libs/icu/common/static_unicode_sets.cpp b/contrib/libs/icu/common/static_unicode_sets.cpp
deleted file mode 100644
index 5dab3931a70..00000000000
--- a/contrib/libs/icu/common/static_unicode_sets.cpp
+++ /dev/null
@@ -1,245 +0,0 @@
-// © 2018 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-// Allow implicit conversion from char16_t* to UnicodeString for this file:
-// Helpful in toString methods and elsewhere.
-#define UNISTR_FROM_STRING_EXPLICIT
-
-#include "static_unicode_sets.h"
-#include "umutex.h"
-#include "ucln_cmn.h"
-#include "unicode/uniset.h"
-#include "uresimp.h"
-#include "cstring.h"
-#include "uassert.h"
-
-using namespace icu;
-using namespace icu::unisets;
-
-
-namespace {
-
-UnicodeSet* gUnicodeSets[UNISETS_KEY_COUNT] = {};
-
-// Save the empty instance in static memory to have well-defined behavior if a
-// regular UnicodeSet cannot be allocated.
-alignas(UnicodeSet)
-char gEmptyUnicodeSet[sizeof(UnicodeSet)];
-
-// Whether the gEmptyUnicodeSet is initialized and ready to use.
-UBool gEmptyUnicodeSetInitialized = FALSE;
-
-inline UnicodeSet* getImpl(Key key) {
- UnicodeSet* candidate = gUnicodeSets[key];
- if (candidate == nullptr) {
- return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
- }
- return candidate;
-}
-
-UnicodeSet* computeUnion(Key k1, Key k2) {
- UnicodeSet* result = new UnicodeSet();
- if (result == nullptr) {
- return nullptr;
- }
- result->addAll(*getImpl(k1));
- result->addAll(*getImpl(k2));
- result->freeze();
- return result;
-}
-
-UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
- UnicodeSet* result = new UnicodeSet();
- if (result == nullptr) {
- return nullptr;
- }
- result->addAll(*getImpl(k1));
- result->addAll(*getImpl(k2));
- result->addAll(*getImpl(k3));
- result->freeze();
- return result;
-}
-
-
-void saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) {
- // assert unicodeSets.get(key) == null;
- gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status);
-}
-
-class ParseDataSink : public ResourceSink {
- public:
- void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
- ResourceTable contextsTable = value.getTable(status);
- if (U_FAILURE(status)) { return; }
- for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) {
- if (uprv_strcmp(key, "date") == 0) {
- // ignore
- } else {
- ResourceTable strictnessTable = value.getTable(status);
- if (U_FAILURE(status)) { return; }
- for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) {
- bool isLenient = (uprv_strcmp(key, "lenient") == 0);
- ResourceArray array = value.getArray(status);
- if (U_FAILURE(status)) { return; }
- for (int k = 0; k < array.getSize(); k++) {
- array.getValue(k, value);
- UnicodeString str = value.getUnicodeString(status);
- if (U_FAILURE(status)) { return; }
- // There is both lenient and strict data for comma/period,
- // but not for any of the other symbols.
- if (str.indexOf(u'.') != -1) {
- saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status);
- } else if (str.indexOf(u',') != -1) {
- saveSet(isLenient ? COMMA : STRICT_COMMA, str, status);
- } else if (str.indexOf(u'+') != -1) {
- saveSet(PLUS_SIGN, str, status);
- } else if (str.indexOf(u'-') != -1) {
- saveSet(MINUS_SIGN, str, status);
- } else if (str.indexOf(u'$') != -1) {
- saveSet(DOLLAR_SIGN, str, status);
- } else if (str.indexOf(u'£') != -1) {
- saveSet(POUND_SIGN, str, status);
- } else if (str.indexOf(u'₹') != -1) {
- saveSet(RUPEE_SIGN, str, status);
- } else if (str.indexOf(u'¥') != -1) {
- saveSet(YEN_SIGN, str, status);
- } else if (str.indexOf(u'₩') != -1) {
- saveSet(WON_SIGN, str, status);
- } else if (str.indexOf(u'%') != -1) {
- saveSet(PERCENT_SIGN, str, status);
- } else if (str.indexOf(u'‰') != -1) {
- saveSet(PERMILLE_SIGN, str, status);
- } else if (str.indexOf(u'’') != -1) {
- saveSet(APOSTROPHE_SIGN, str, status);
- } else {
- // Unknown class of parse lenients
- // TODO(ICU-20428): Make ICU automatically accept new classes?
- U_ASSERT(FALSE);
- }
- if (U_FAILURE(status)) { return; }
- }
- }
- }
- }
- }
-};
-
-
-icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
-
-UBool U_CALLCONV cleanupNumberParseUniSets() {
- if (gEmptyUnicodeSetInitialized) {
- reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet();
- gEmptyUnicodeSetInitialized = FALSE;
- }
- for (int32_t i = 0; i < UNISETS_KEY_COUNT; i++) {
- delete gUnicodeSets[i];
- gUnicodeSets[i] = nullptr;
- }
- gNumberParseUniSetsInitOnce.reset();
- return TRUE;
-}
-
-void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
- ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
-
- // Initialize the empty instance for well-defined fallback behavior
- new(gEmptyUnicodeSet) UnicodeSet();
- reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->freeze();
- gEmptyUnicodeSetInitialized = TRUE;
-
- // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
- // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
- gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet(
- u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status);
- gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status);
-
- LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status));
- if (U_FAILURE(status)) { return; }
- ParseDataSink sink;
- ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status);
- if (U_FAILURE(status)) { return; }
-
- // NOTE: It is OK for these assertions to fail if there was a no-data build.
- U_ASSERT(gUnicodeSets[COMMA] != nullptr);
- U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr);
- U_ASSERT(gUnicodeSets[PERIOD] != nullptr);
- U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr);
- U_ASSERT(gUnicodeSets[APOSTROPHE_SIGN] != nullptr);
-
- LocalPointer<UnicodeSet> otherGrouping(new UnicodeSet(
- u"[٬‘\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]",
- status
- ), status);
- if (U_FAILURE(status)) { return; }
- otherGrouping->addAll(*gUnicodeSets[APOSTROPHE_SIGN]);
- gUnicodeSets[OTHER_GROUPING_SEPARATORS] = otherGrouping.orphan();
- gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
- gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
- STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
-
- U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr);
-
- gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status);
- if (U_FAILURE(status)) { return; }
-
- U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[YEN_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[WON_SIGN] != nullptr);
-
- gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
- if (U_FAILURE(status)) { return; }
- gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
- gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
-
- for (auto* uniset : gUnicodeSets) {
- if (uniset != nullptr) {
- uniset->freeze();
- }
- }
-}
-
-}
-
-const UnicodeSet* unisets::get(Key key) {
- UErrorCode localStatus = U_ZERO_ERROR;
- umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
- if (U_FAILURE(localStatus)) {
- return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
- }
- return getImpl(key);
-}
-
-Key unisets::chooseFrom(UnicodeString str, Key key1) {
- return get(key1)->contains(str) ? key1 : NONE;
-}
-
-Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
- return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
-}
-
-//Key unisets::chooseCurrency(UnicodeString str) {
-// if (get(DOLLAR_SIGN)->contains(str)) {
-// return DOLLAR_SIGN;
-// } else if (get(POUND_SIGN)->contains(str)) {
-// return POUND_SIGN;
-// } else if (get(RUPEE_SIGN)->contains(str)) {
-// return RUPEE_SIGN;
-// } else if (get(YEN_SIGN)->contains(str)) {
-// return YEN_SIGN;
-// } else {
-// return NONE;
-// }
-//}
-
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/contrib/libs/icu/common/static_unicode_sets.h b/contrib/libs/icu/common/static_unicode_sets.h
deleted file mode 100644
index 5d90ce5908d..00000000000
--- a/contrib/libs/icu/common/static_unicode_sets.h
+++ /dev/null
@@ -1,140 +0,0 @@
-// © 2018 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// This file contains utilities to deal with static-allocated UnicodeSets.
-//
-// Common use case: you write a "private static final" UnicodeSet in Java, and
-// want something similarly easy in C++. Originally written for number
-// parsing, but this header can be used for other applications.
-//
-// Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)`
-//
-// This file is in common instead of i18n because it is needed by ucurr.cpp.
-//
-// Author: sffc
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-#ifndef __STATIC_UNICODE_SETS_H__
-#define __STATIC_UNICODE_SETS_H__
-
-#include "unicode/uniset.h"
-#include "unicode/unistr.h"
-
-U_NAMESPACE_BEGIN
-namespace unisets {
-
-enum Key {
- // NONE is used to indicate null in chooseFrom().
- // EMPTY is used to get an empty UnicodeSet.
- NONE = -1,
- EMPTY = 0,
-
- // Ignorables
- DEFAULT_IGNORABLES,
- STRICT_IGNORABLES,
-
- // Separators
- // Notes:
- // - COMMA is a superset of STRICT_COMMA
- // - PERIOD is a superset of SCRICT_PERIOD
- // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
- // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
- COMMA,
- PERIOD,
- STRICT_COMMA,
- STRICT_PERIOD,
- APOSTROPHE_SIGN,
- OTHER_GROUPING_SEPARATORS,
- ALL_SEPARATORS,
- STRICT_ALL_SEPARATORS,
-
- // Symbols
- MINUS_SIGN,
- PLUS_SIGN,
- PERCENT_SIGN,
- PERMILLE_SIGN,
- INFINITY_SIGN,
-
- // Currency Symbols
- DOLLAR_SIGN,
- POUND_SIGN,
- RUPEE_SIGN,
- YEN_SIGN,
- WON_SIGN,
-
- // Other
- DIGITS,
-
- // Combined Separators with Digits (for lead code points)
- DIGITS_OR_ALL_SEPARATORS,
- DIGITS_OR_STRICT_ALL_SEPARATORS,
-
- // The number of elements in the enum.
- UNISETS_KEY_COUNT
-};
-
-/**
- * Gets the static-allocated UnicodeSet according to the provided key. The
- * pointer will be deleted during u_cleanup(); the caller should NOT delete it.
- *
- * Exported as U_COMMON_API for ucurr.cpp
- *
- * This method is always safe and OK to chain: in the case of a memory or other
- * error, it returns an empty set from static memory.
- *
- * Example:
- *
- * UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...);
- *
- * @param key The desired UnicodeSet according to the enum in this file.
- * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but
- * may be empty if an error occurred during data loading.
- */
-U_COMMON_API const UnicodeSet* get(Key key);
-
-/**
- * Checks if the UnicodeSet given by key1 contains the given string.
- *
- * Exported as U_COMMON_API for numparse_decimal.cpp
- *
- * @param str The string to check.
- * @param key1 The set to check.
- * @return key1 if the set contains str, or NONE if not.
- */
-U_COMMON_API Key chooseFrom(UnicodeString str, Key key1);
-
-/**
- * Checks if the UnicodeSet given by either key1 or key2 contains the string.
- *
- * Exported as U_COMMON_API for numparse_decimal.cpp
- *
- * @param str The string to check.
- * @param key1 The first set to check.
- * @param key2 The second set to check.
- * @return key1 if that set contains str; key2 if that set contains str; or
- * NONE if neither set contains str.
- */
-U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2);
-
-// TODO: Load these from data: ICU-20108
-// Unused in C++:
-// Key chooseCurrency(UnicodeString str);
-// Used instead:
-static const struct {
- Key key;
- UChar32 exemplar;
-} kCurrencyEntries[] = {
- {DOLLAR_SIGN, u'$'},
- {POUND_SIGN, u'£'},
- {RUPEE_SIGN, u'₹'},
- {YEN_SIGN, u'¥'},
- {WON_SIGN, u'₩'},
-};
-
-} // namespace unisets
-U_NAMESPACE_END
-
-#endif //__STATIC_UNICODE_SETS_H__
-#endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/contrib/libs/icu/common/stringpiece.cpp b/contrib/libs/icu/common/stringpiece.cpp
deleted file mode 100644
index 99089e08ef9..00000000000
--- a/contrib/libs/icu/common/stringpiece.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-// Copyright (C) 2009-2013, International Business Machines
-// Corporation and others. All Rights Reserved.
-//
-// Copyright 2004 and onwards Google Inc.
-//
-// Author: [email protected] (Wilson Hsieh)
-//
-
-#include "unicode/utypes.h"
-#include "unicode/stringpiece.h"
-#include "cstring.h"
-#include "cmemory.h"
-
-U_NAMESPACE_BEGIN
-
-StringPiece::StringPiece(const char* str)
- : ptr_(str), length_((str == NULL) ? 0 : static_cast<int32_t>(uprv_strlen(str))) { }
-
-StringPiece::StringPiece(const StringPiece& x, int32_t pos) {
- if (pos < 0) {
- pos = 0;
- } else if (pos > x.length_) {
- pos = x.length_;
- }
- ptr_ = x.ptr_ + pos;
- length_ = x.length_ - pos;
-}
-
-StringPiece::StringPiece(const StringPiece& x, int32_t pos, int32_t len) {
- if (pos < 0) {
- pos = 0;
- } else if (pos > x.length_) {
- pos = x.length_;
- }
- if (len < 0) {
- len = 0;
- } else if (len > x.length_ - pos) {
- len = x.length_ - pos;
- }
- ptr_ = x.ptr_ + pos;
- length_ = len;
-}
-
-void StringPiece::set(const char* str) {
- ptr_ = str;
- if (str != NULL)
- length_ = static_cast<int32_t>(uprv_strlen(str));
- else
- length_ = 0;
-}
-
-int32_t StringPiece::find(StringPiece needle, int32_t offset) {
- if (length() == 0 && needle.length() == 0) {
- return 0;
- }
- // TODO: Improve to be better than O(N^2)?
- for (int32_t i = offset; i < length(); i++) {
- int32_t j = 0;
- for (; j < needle.length(); i++, j++) {
- if (data()[i] != needle.data()[j]) {
- i -= j;
- goto outer_end;
- }
- }
- return i - j;
- outer_end: void();
- }
- return -1;
-}
-
-int32_t StringPiece::compare(StringPiece other) {
- int32_t i = 0;
- for (; i < length(); i++) {
- if (i == other.length()) {
- // this is longer
- return 1;
- }
- char a = data()[i];
- char b = other.data()[i];
- if (a < b) {
- return -1;
- } else if (a > b) {
- return 1;
- }
- }
- if (i < other.length()) {
- // other is longer
- return -1;
- }
- return 0;
-}
-
-U_EXPORT UBool U_EXPORT2
-operator==(const StringPiece& x, const StringPiece& y) {
- int32_t len = x.size();
- if (len != y.size()) {
- return false;
- }
- if (len == 0) {
- return true;
- }
- const char* p = x.data();
- const char* p2 = y.data();
- // Test last byte in case strings share large common prefix
- --len;
- if (p[len] != p2[len]) return false;
- // At this point we can, but don't have to, ignore the last byte.
- return uprv_memcmp(p, p2, len) == 0;
-}
-
-
-const int32_t StringPiece::npos = 0x7fffffff;
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/stringtriebuilder.cpp b/contrib/libs/icu/common/stringtriebuilder.cpp
deleted file mode 100644
index 6f9cc2e5c22..00000000000
--- a/contrib/libs/icu/common/stringtriebuilder.cpp
+++ /dev/null
@@ -1,618 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2010-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: stringtriebuilder.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010dec24
-* created by: Markus W. Scherer
-*/
-
-#include "utypeinfo.h" // for 'typeid' to work
-#include "unicode/utypes.h"
-#include "unicode/stringtriebuilder.h"
-#include "uassert.h"
-#include "uhash.h"
-
-U_CDECL_BEGIN
-
-static int32_t U_CALLCONV
-hashStringTrieNode(const UHashTok key) {
- return icu::StringTrieBuilder::hashNode(key.pointer);
-}
-
-static UBool U_CALLCONV
-equalStringTrieNodes(const UHashTok key1, const UHashTok key2) {
- return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer);
-}
-
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {}
-
-StringTrieBuilder::~StringTrieBuilder() {
- deleteCompactBuilder();
-}
-
-void
-StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL,
- sizeGuess, &errorCode);
- if(U_SUCCESS(errorCode)) {
- if(nodes==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- } else {
- uhash_setKeyDeleter(nodes, uprv_deleteUObject);
- }
- }
-}
-
-void
-StringTrieBuilder::deleteCompactBuilder() {
- uhash_close(nodes);
- nodes=NULL;
-}
-
-void
-StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength,
- UErrorCode &errorCode) {
- if(buildOption==USTRINGTRIE_BUILD_FAST) {
- writeNode(0, elementsLength, 0);
- } else /* USTRINGTRIE_BUILD_SMALL */ {
- createCompactBuilder(2*elementsLength, errorCode);
- Node *root=makeNode(0, elementsLength, 0, errorCode);
- if(U_SUCCESS(errorCode)) {
- root->markRightEdgesFirst(-1);
- root->write(*this);
- }
- deleteCompactBuilder();
- }
-}
-
-// Requires start<limit,
-// and all strings of the [start..limit[ elements must be sorted and
-// have a common prefix of length unitIndex.
-int32_t
-StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
- UBool hasValue=FALSE;
- int32_t value=0;
- int32_t type;
- if(unitIndex==getElementStringLength(start)) {
- // An intermediate or final value.
- value=getElementValue(start++);
- if(start==limit) {
- return writeValueAndFinal(value, TRUE); // final-value node
- }
- hasValue=TRUE;
- }
- // Now all [start..limit[ strings are longer than unitIndex.
- int32_t minUnit=getElementUnit(start, unitIndex);
- int32_t maxUnit=getElementUnit(limit-1, unitIndex);
- if(minUnit==maxUnit) {
- // Linear-match node: All strings have the same character at unitIndex.
- int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
- writeNode(start, limit, lastUnitIndex);
- // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
- int32_t length=lastUnitIndex-unitIndex;
- int32_t maxLinearMatchLength=getMaxLinearMatchLength();
- while(length>maxLinearMatchLength) {
- lastUnitIndex-=maxLinearMatchLength;
- length-=maxLinearMatchLength;
- writeElementUnits(start, lastUnitIndex, maxLinearMatchLength);
- write(getMinLinearMatch()+maxLinearMatchLength-1);
- }
- writeElementUnits(start, unitIndex, length);
- type=getMinLinearMatch()+length-1;
- } else {
- // Branch node.
- int32_t length=countElementUnits(start, limit, unitIndex);
- // length>=2 because minUnit!=maxUnit.
- writeBranchSubNode(start, limit, unitIndex, length);
- if(--length<getMinLinearMatch()) {
- type=length;
- } else {
- write(length);
- type=0;
- }
- }
- return writeValueAndType(hasValue, value, type);
-}
-
-// start<limit && all strings longer than unitIndex &&
-// length different units at unitIndex
-int32_t
-StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
- UChar middleUnits[kMaxSplitBranchLevels];
- int32_t lessThan[kMaxSplitBranchLevels];
- int32_t ltLength=0;
- while(length>getMaxBranchLinearSubNodeLength()) {
- // Branch on the middle unit.
- // First, find the middle unit.
- int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
- // Encode the less-than branch first.
- middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
- lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2);
- ++ltLength;
- // Continue for the greater-or-equal branch.
- start=i;
- length=length-length/2;
- }
- // For each unit, find its elements array start and whether it has a final value.
- int32_t starts[kMaxBranchLinearSubNodeLength];
- UBool isFinal[kMaxBranchLinearSubNodeLength-1];
- int32_t unitNumber=0;
- do {
- int32_t i=starts[unitNumber]=start;
- UChar unit=getElementUnit(i++, unitIndex);
- i=indexOfElementWithNextUnit(i, unitIndex, unit);
- isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start);
- start=i;
- } while(++unitNumber<length-1);
- // unitNumber==length-1, and the maxUnit elements range is [start..limit[
- starts[unitNumber]=start;
-
- // Write the sub-nodes in reverse order: The jump lengths are deltas from
- // after their own positions, so if we wrote the minUnit sub-node first,
- // then its jump delta would be larger.
- // Instead we write the minUnit sub-node last, for a shorter delta.
- int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1];
- do {
- --unitNumber;
- if(!isFinal[unitNumber]) {
- jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1);
- }
- } while(unitNumber>0);
- // The maxUnit sub-node is written as the very last one because we do
- // not jump for it at all.
- unitNumber=length-1;
- writeNode(start, limit, unitIndex+1);
- int32_t offset=write(getElementUnit(start, unitIndex));
- // Write the rest of this node's unit-value pairs.
- while(--unitNumber>=0) {
- start=starts[unitNumber];
- int32_t value;
- if(isFinal[unitNumber]) {
- // Write the final value for the one string ending with this unit.
- value=getElementValue(start);
- } else {
- // Write the delta to the start position of the sub-node.
- value=offset-jumpTargets[unitNumber];
- }
- writeValueAndFinal(value, isFinal[unitNumber]);
- offset=write(getElementUnit(start, unitIndex));
- }
- // Write the split-branch nodes.
- while(ltLength>0) {
- --ltLength;
- writeDeltaTo(lessThan[ltLength]);
- offset=write(middleUnits[ltLength]);
- }
- return offset;
-}
-
-// Requires start<limit,
-// and all strings of the [start..limit[ elements must be sorted and
-// have a common prefix of length unitIndex.
-StringTrieBuilder::Node *
-StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- UBool hasValue=FALSE;
- int32_t value=0;
- if(unitIndex==getElementStringLength(start)) {
- // An intermediate or final value.
- value=getElementValue(start++);
- if(start==limit) {
- return registerFinalValue(value, errorCode);
- }
- hasValue=TRUE;
- }
- Node *node;
- // Now all [start..limit[ strings are longer than unitIndex.
- int32_t minUnit=getElementUnit(start, unitIndex);
- int32_t maxUnit=getElementUnit(limit-1, unitIndex);
- if(minUnit==maxUnit) {
- // Linear-match node: All strings have the same character at unitIndex.
- int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
- Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode);
- // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
- int32_t length=lastUnitIndex-unitIndex;
- int32_t maxLinearMatchLength=getMaxLinearMatchLength();
- while(length>maxLinearMatchLength) {
- lastUnitIndex-=maxLinearMatchLength;
- length-=maxLinearMatchLength;
- node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode);
- nextNode=registerNode(node, errorCode);
- }
- node=createLinearMatchNode(start, unitIndex, length, nextNode);
- } else {
- // Branch node.
- int32_t length=countElementUnits(start, limit, unitIndex);
- // length>=2 because minUnit!=maxUnit.
- Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode);
- node=new BranchHeadNode(length, subNode);
- }
- if(hasValue && node!=NULL) {
- if(matchNodesCanHaveValues()) {
- ((ValueNode *)node)->setValue(value);
- } else {
- node=new IntermediateValueNode(value, registerNode(node, errorCode));
- }
- }
- return registerNode(node, errorCode);
-}
-
-// start<limit && all strings longer than unitIndex &&
-// length different units at unitIndex
-StringTrieBuilder::Node *
-StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
- int32_t length, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- UChar middleUnits[kMaxSplitBranchLevels];
- Node *lessThan[kMaxSplitBranchLevels];
- int32_t ltLength=0;
- while(length>getMaxBranchLinearSubNodeLength()) {
- // Branch on the middle unit.
- // First, find the middle unit.
- int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
- // Create the less-than branch.
- middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
- lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode);
- ++ltLength;
- // Continue for the greater-or-equal branch.
- start=i;
- length=length-length/2;
- }
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- ListBranchNode *listNode=new ListBranchNode();
- if(listNode==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- // For each unit, find its elements array start and whether it has a final value.
- int32_t unitNumber=0;
- do {
- int32_t i=start;
- UChar unit=getElementUnit(i++, unitIndex);
- i=indexOfElementWithNextUnit(i, unitIndex, unit);
- if(start==i-1 && unitIndex+1==getElementStringLength(start)) {
- listNode->add(unit, getElementValue(start));
- } else {
- listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode));
- }
- start=i;
- } while(++unitNumber<length-1);
- // unitNumber==length-1, and the maxUnit elements range is [start..limit[
- UChar unit=getElementUnit(start, unitIndex);
- if(start==limit-1 && unitIndex+1==getElementStringLength(start)) {
- listNode->add(unit, getElementValue(start));
- } else {
- listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode));
- }
- Node *node=registerNode(listNode, errorCode);
- // Create the split-branch nodes.
- while(ltLength>0) {
- --ltLength;
- node=registerNode(
- new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode);
- }
- return node;
-}
-
-StringTrieBuilder::Node *
-StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- delete newNode;
- return NULL;
- }
- if(newNode==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- const UHashElement *old=uhash_find(nodes, newNode);
- if(old!=NULL) {
- delete newNode;
- return (Node *)old->key.pointer;
- }
- // If uhash_puti() returns a non-zero value from an equivalent, previously
- // registered node, then uhash_find() failed to find that and we will leak newNode.
-#if U_DEBUG
- int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
-#endif
- uhash_puti(nodes, newNode, 1, &errorCode);
- U_ASSERT(oldValue==0);
- if(U_FAILURE(errorCode)) {
- delete newNode;
- return NULL;
- }
- return newNode;
-}
-
-StringTrieBuilder::Node *
-StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- FinalValueNode key(value);
- const UHashElement *old=uhash_find(nodes, &key);
- if(old!=NULL) {
- return (Node *)old->key.pointer;
- }
- Node *newNode=new FinalValueNode(value);
- if(newNode==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- // If uhash_puti() returns a non-zero value from an equivalent, previously
- // registered node, then uhash_find() failed to find that and we will leak newNode.
-#if U_DEBUG
- int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
-#endif
- uhash_puti(nodes, newNode, 1, &errorCode);
- U_ASSERT(oldValue==0);
- if(U_FAILURE(errorCode)) {
- delete newNode;
- return NULL;
- }
- return newNode;
-}
-
-int32_t
-StringTrieBuilder::hashNode(const void *node) {
- return ((const Node *)node)->hashCode();
-}
-
-UBool
-StringTrieBuilder::equalNodes(const void *left, const void *right) {
- return *(const Node *)left==*(const Node *)right;
-}
-
-UBool
-StringTrieBuilder::Node::operator==(const Node &other) const {
- return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
-}
-
-int32_t
-StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
- if(offset==0) {
- offset=edgeNumber;
- }
- return edgeNumber;
-}
-
-UBool
-StringTrieBuilder::FinalValueNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!Node::operator==(other)) {
- return FALSE;
- }
- const FinalValueNode &o=(const FinalValueNode &)other;
- return value==o.value;
-}
-
-void
-StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) {
- offset=builder.writeValueAndFinal(value, TRUE);
-}
-
-UBool
-StringTrieBuilder::ValueNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!Node::operator==(other)) {
- return FALSE;
- }
- const ValueNode &o=(const ValueNode &)other;
- return hasValue==o.hasValue && (!hasValue || value==o.value);
-}
-
-UBool
-StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!ValueNode::operator==(other)) {
- return FALSE;
- }
- const IntermediateValueNode &o=(const IntermediateValueNode &)other;
- return next==o.next;
-}
-
-int32_t
-StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
- if(offset==0) {
- offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
- }
- return edgeNumber;
-}
-
-void
-StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) {
- next->write(builder);
- offset=builder.writeValueAndFinal(value, FALSE);
-}
-
-UBool
-StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!ValueNode::operator==(other)) {
- return FALSE;
- }
- const LinearMatchNode &o=(const LinearMatchNode &)other;
- return length==o.length && next==o.next;
-}
-
-int32_t
-StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
- if(offset==0) {
- offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
- }
- return edgeNumber;
-}
-
-UBool
-StringTrieBuilder::ListBranchNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!Node::operator==(other)) {
- return FALSE;
- }
- const ListBranchNode &o=(const ListBranchNode &)other;
- for(int32_t i=0; i<length; ++i) {
- if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-int32_t
-StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
- if(offset==0) {
- firstEdgeNumber=edgeNumber;
- int32_t step=0;
- int32_t i=length;
- do {
- Node *edge=equal[--i];
- if(edge!=NULL) {
- edgeNumber=edge->markRightEdgesFirst(edgeNumber-step);
- }
- // For all but the rightmost edge, decrement the edge number.
- step=1;
- } while(i>0);
- offset=edgeNumber;
- }
- return edgeNumber;
-}
-
-void
-StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) {
- // Write the sub-nodes in reverse order: The jump lengths are deltas from
- // after their own positions, so if we wrote the minUnit sub-node first,
- // then its jump delta would be larger.
- // Instead we write the minUnit sub-node last, for a shorter delta.
- int32_t unitNumber=length-1;
- Node *rightEdge=equal[unitNumber];
- int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset();
- do {
- --unitNumber;
- if(equal[unitNumber]!=NULL) {
- equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
- }
- } while(unitNumber>0);
- // The maxUnit sub-node is written as the very last one because we do
- // not jump for it at all.
- unitNumber=length-1;
- if(rightEdge==NULL) {
- builder.writeValueAndFinal(values[unitNumber], TRUE);
- } else {
- rightEdge->write(builder);
- }
- offset=builder.write(units[unitNumber]);
- // Write the rest of this node's unit-value pairs.
- while(--unitNumber>=0) {
- int32_t value;
- UBool isFinal;
- if(equal[unitNumber]==NULL) {
- // Write the final value for the one string ending with this unit.
- value=values[unitNumber];
- isFinal=TRUE;
- } else {
- // Write the delta to the start position of the sub-node.
- U_ASSERT(equal[unitNumber]->getOffset()>0);
- value=offset-equal[unitNumber]->getOffset();
- isFinal=FALSE;
- }
- builder.writeValueAndFinal(value, isFinal);
- offset=builder.write(units[unitNumber]);
- }
-}
-
-UBool
-StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!Node::operator==(other)) {
- return FALSE;
- }
- const SplitBranchNode &o=(const SplitBranchNode &)other;
- return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
-}
-
-int32_t
-StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
- if(offset==0) {
- firstEdgeNumber=edgeNumber;
- edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber);
- offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1);
- }
- return edgeNumber;
-}
-
-void
-StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) {
- // Encode the less-than branch first.
- lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
- // Encode the greater-or-equal branch last because we do not jump for it at all.
- greaterOrEqual->write(builder);
- // Write this node.
- U_ASSERT(lessThan->getOffset()>0);
- builder.writeDeltaTo(lessThan->getOffset()); // less-than
- offset=builder.write(unit);
-}
-
-UBool
-StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!ValueNode::operator==(other)) {
- return FALSE;
- }
- const BranchHeadNode &o=(const BranchHeadNode &)other;
- return length==o.length && next==o.next;
-}
-
-int32_t
-StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
- if(offset==0) {
- offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
- }
- return edgeNumber;
-}
-
-void
-StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) {
- next->write(builder);
- if(length<=builder.getMinLinearMatch()) {
- offset=builder.writeValueAndType(hasValue, value, length-1);
- } else {
- builder.write(length-1);
- offset=builder.writeValueAndType(hasValue, value, 0);
- }
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/uarrsort.cpp b/contrib/libs/icu/common/uarrsort.cpp
deleted file mode 100644
index c17dbb2e2b1..00000000000
--- a/contrib/libs/icu/common/uarrsort.cpp
+++ /dev/null
@@ -1,274 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2003-2013, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uarrsort.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2003aug04
-* created by: Markus W. Scherer
-*
-* Internal function for sorting arrays.
-*/
-
-#include <cstddef>
-
-#include "unicode/utypes.h"
-#include "cmemory.h"
-#include "uarrsort.h"
-
-enum {
- /**
- * "from Knuth"
- *
- * A binary search over 8 items performs 4 comparisons:
- * log2(8)=3 to subdivide, +1 to check for equality.
- * A linear search over 8 items on average also performs 4 comparisons.
- */
- MIN_QSORT=9,
- STACK_ITEM_SIZE=200
-};
-
-static constexpr int32_t sizeInMaxAlignTs(int32_t sizeInBytes) {
- return (sizeInBytes + sizeof(std::max_align_t) - 1) / sizeof(std::max_align_t);
-}
-
-/* UComparator convenience implementations ---------------------------------- */
-
-U_CAPI int32_t U_EXPORT2
-uprv_uint16Comparator(const void *context, const void *left, const void *right) {
- (void)context;
- return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right;
-}
-
-U_CAPI int32_t U_EXPORT2
-uprv_int32Comparator(const void *context, const void *left, const void *right) {
- (void)context;
- return *(const int32_t *)left - *(const int32_t *)right;
-}
-
-U_CAPI int32_t U_EXPORT2
-uprv_uint32Comparator(const void *context, const void *left, const void *right) {
- (void)context;
- uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right;
-
- /* compare directly because (l-r) would overflow the int32_t result */
- if(l<r) {
- return -1;
- } else if(l==r) {
- return 0;
- } else /* l>r */ {
- return 1;
- }
-}
-
-/* Insertion sort using binary search --------------------------------------- */
-
-U_CAPI int32_t U_EXPORT2
-uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize,
- UComparator *cmp, const void *context) {
- int32_t start=0;
- UBool found=FALSE;
-
- /* Binary search until we get down to a tiny sub-array. */
- while((limit-start)>=MIN_QSORT) {
- int32_t i=(start+limit)/2;
- int32_t diff=cmp(context, item, array+i*itemSize);
- if(diff==0) {
- /*
- * Found the item. We look for the *last* occurrence of such
- * an item, for stable sorting.
- * If we knew that there will be only few equal items,
- * we could break now and enter the linear search.
- * However, if there are many equal items, then it should be
- * faster to continue with the binary search.
- * It seems likely that we either have all unique items
- * (where found will never become TRUE in the insertion sort)
- * or potentially many duplicates.
- */
- found=TRUE;
- start=i+1;
- } else if(diff<0) {
- limit=i;
- } else {
- start=i;
- }
- }
-
- /* Linear search over the remaining tiny sub-array. */
- while(start<limit) {
- int32_t diff=cmp(context, item, array+start*itemSize);
- if(diff==0) {
- found=TRUE;
- } else if(diff<0) {
- break;
- }
- ++start;
- }
- return found ? (start-1) : ~start;
-}
-
-static void
-doInsertionSort(char *array, int32_t length, int32_t itemSize,
- UComparator *cmp, const void *context, void *pv) {
- int32_t j;
-
- for(j=1; j<length; ++j) {
- char *item=array+j*itemSize;
- int32_t insertionPoint=uprv_stableBinarySearch(array, j, item, itemSize, cmp, context);
- if(insertionPoint<0) {
- insertionPoint=~insertionPoint;
- } else {
- ++insertionPoint; /* one past the last equal item */
- }
- if(insertionPoint<j) {
- char *dest=array+insertionPoint*itemSize;
- uprv_memcpy(pv, item, itemSize); /* v=array[j] */
- uprv_memmove(dest+itemSize, dest, (j-insertionPoint)*(size_t)itemSize);
- uprv_memcpy(dest, pv, itemSize); /* array[insertionPoint]=v */
- }
- }
-}
-
-static void
-insertionSort(char *array, int32_t length, int32_t itemSize,
- UComparator *cmp, const void *context, UErrorCode *pErrorCode) {
-
- icu::MaybeStackArray<std::max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE)> v;
- if (sizeInMaxAlignTs(itemSize) > v.getCapacity() &&
- v.resize(sizeInMaxAlignTs(itemSize)) == nullptr) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- doInsertionSort(array, length, itemSize, cmp, context, v.getAlias());
-}
-
-/* QuickSort ---------------------------------------------------------------- */
-
-/*
- * This implementation is semi-recursive:
- * It recurses for the smaller sub-array to shorten the recursion depth,
- * and loops for the larger sub-array.
- *
- * Loosely after QuickSort algorithms in
- * Niklaus Wirth
- * Algorithmen und Datenstrukturen mit Modula-2
- * B.G. Teubner Stuttgart
- * 4. Auflage 1986
- * ISBN 3-519-02260-5
- */
-static void
-subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize,
- UComparator *cmp, const void *context,
- void *px, void *pw) {
- int32_t left, right;
-
- /* start and left are inclusive, limit and right are exclusive */
- do {
- if((start+MIN_QSORT)>=limit) {
- doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px);
- break;
- }
-
- left=start;
- right=limit;
-
- /* x=array[middle] */
- uprv_memcpy(px, array+(size_t)((start+limit)/2)*itemSize, itemSize);
-
- do {
- while(/* array[left]<x */
- cmp(context, array+left*itemSize, px)<0
- ) {
- ++left;
- }
- while(/* x<array[right-1] */
- cmp(context, px, array+(right-1)*itemSize)<0
- ) {
- --right;
- }
-
- /* swap array[left] and array[right-1] via w; ++left; --right */
- if(left<right) {
- --right;
-
- if(left<right) {
- uprv_memcpy(pw, array+(size_t)left*itemSize, itemSize);
- uprv_memcpy(array+(size_t)left*itemSize, array+(size_t)right*itemSize, itemSize);
- uprv_memcpy(array+(size_t)right*itemSize, pw, itemSize);
- }
-
- ++left;
- }
- } while(left<right);
-
- /* sort sub-arrays */
- if((right-start)<(limit-left)) {
- /* sort [start..right[ */
- if(start<(right-1)) {
- subQuickSort(array, start, right, itemSize, cmp, context, px, pw);
- }
-
- /* sort [left..limit[ */
- start=left;
- } else {
- /* sort [left..limit[ */
- if(left<(limit-1)) {
- subQuickSort(array, left, limit, itemSize, cmp, context, px, pw);
- }
-
- /* sort [start..right[ */
- limit=right;
- }
- } while(start<(limit-1));
-}
-
-static void
-quickSort(char *array, int32_t length, int32_t itemSize,
- UComparator *cmp, const void *context, UErrorCode *pErrorCode) {
- /* allocate two intermediate item variables (x and w) */
- icu::MaybeStackArray<std::max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE) * 2> xw;
- if(sizeInMaxAlignTs(itemSize)*2 > xw.getCapacity() &&
- xw.resize(sizeInMaxAlignTs(itemSize) * 2) == nullptr) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- subQuickSort(array, 0, length, itemSize, cmp, context,
- xw.getAlias(), xw.getAlias() + sizeInMaxAlignTs(itemSize));
-}
-
-/* uprv_sortArray() API ----------------------------------------------------- */
-
-/*
- * Check arguments, select an appropriate implementation,
- * cast the array to char * so that array+i*itemSize works.
- */
-U_CAPI void U_EXPORT2
-uprv_sortArray(void *array, int32_t length, int32_t itemSize,
- UComparator *cmp, const void *context,
- UBool sortStable, UErrorCode *pErrorCode) {
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
- if((length>0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- if(length<=1) {
- return;
- } else if(length<MIN_QSORT || sortStable) {
- insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode);
- } else {
- quickSort((char *)array, length, itemSize, cmp, context, pErrorCode);
- }
-}
diff --git a/contrib/libs/icu/common/uarrsort.h b/contrib/libs/icu/common/uarrsort.h
deleted file mode 100644
index a55dca5b9ea..00000000000
--- a/contrib/libs/icu/common/uarrsort.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2003-2013, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uarrsort.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2003aug04
-* created by: Markus W. Scherer
-*
-* Internal function for sorting arrays.
-*/
-
-#ifndef __UARRSORT_H__
-#define __UARRSORT_H__
-
-#include "unicode/utypes.h"
-
-U_CDECL_BEGIN
-/**
- * Function type for comparing two items as part of sorting an array or similar.
- * Callback function for uprv_sortArray().
- *
- * @param context Application-specific pointer, passed through by uprv_sortArray().
- * @param left Pointer to the "left" item.
- * @param right Pointer to the "right" item.
- * @return 32-bit signed integer comparison result:
- * <0 if left<right
- * ==0 if left==right
- * >0 if left>right
- *
- * @internal
- */
-typedef int32_t U_CALLCONV
-UComparator(const void *context, const void *left, const void *right);
-U_CDECL_END
-
-/**
- * Array sorting function.
- * Uses a UComparator for comparing array items to each other, and simple
- * memory copying to move items.
- *
- * @param array The array to be sorted.
- * @param length The number of items in the array.
- * @param itemSize The size in bytes of each array item.
- * @param cmp UComparator function used to compare two items each.
- * @param context Application-specific pointer, passed through to the UComparator.
- * @param sortStable If true, a stable sorting algorithm must be used.
- * @param pErrorCode ICU in/out UErrorCode parameter.
- *
- * @internal
- */
-U_CAPI void U_EXPORT2
-uprv_sortArray(void *array, int32_t length, int32_t itemSize,
- UComparator *cmp, const void *context,
- UBool sortStable, UErrorCode *pErrorCode);
-
-/**
- * Convenience UComparator implementation for uint16_t arrays.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-uprv_uint16Comparator(const void *context, const void *left, const void *right);
-
-/**
- * Convenience UComparator implementation for int32_t arrays.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-uprv_int32Comparator(const void *context, const void *left, const void *right);
-
-/**
- * Convenience UComparator implementation for uint32_t arrays.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-uprv_uint32Comparator(const void *context, const void *left, const void *right);
-
-/**
- * Much like Java Collections.binarySearch(list, key, comparator).
- *
- * Except: Java documents "If the list contains multiple elements equal to
- * the specified object, there is no guarantee which one will be found."
- *
- * This version here will return the largest index of any equal item,
- * for use in stable sorting.
- *
- * @return the index>=0 where the item was found:
- * the largest such index, if multiple, for stable sorting;
- * or the index<0 for inserting the item at ~index in sorted order
- */
-U_CAPI int32_t U_EXPORT2
-uprv_stableBinarySearch(char *array, int32_t length, void *item, int32_t itemSize,
- UComparator *cmp, const void *context);
-
-#endif
diff --git a/contrib/libs/icu/common/uassert.h b/contrib/libs/icu/common/uassert.h
deleted file mode 100644
index 15cd55c8734..00000000000
--- a/contrib/libs/icu/common/uassert.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2002-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* File uassert.h
-*
-* Contains the U_ASSERT and UPRV_UNREACHABLE macros
-*
-******************************************************************************
-*/
-#ifndef U_ASSERT_H
-#define U_ASSERT_H
-
-/* utypes.h is included to get the proper define for uint8_t */
-#include "unicode/utypes.h"
-/* for abort */
-#include <stdlib.h>
-
-/**
- * \def U_ASSERT
- * By default, U_ASSERT just wraps the C library assert macro.
- * By changing the definition here, the assert behavior for ICU can be changed
- * without affecting other non - ICU uses of the C library assert().
-*/
-#if U_DEBUG
-# include <assert.h>
-# define U_ASSERT(exp) assert(exp)
-#elif U_CPLUSPLUS_VERSION
-# define U_ASSERT(exp) void()
-#else
-# define U_ASSERT(exp)
-#endif
-
-/**
- * \def UPRV_UNREACHABLE
- * This macro is used to unconditionally abort if unreachable code is ever executed.
- * @internal
-*/
-#if defined(UPRV_UNREACHABLE)
- // Use the predefined value.
-#else
-# define UPRV_UNREACHABLE abort()
-#endif
-
-#endif
diff --git a/contrib/libs/icu/common/ubidi.cpp b/contrib/libs/icu/common/ubidi.cpp
deleted file mode 100644
index 3ddb45721e2..00000000000
--- a/contrib/libs/icu/common/ubidi.cpp
+++ /dev/null
@@ -1,3036 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ubidi.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999jul27
-* created by: Markus W. Scherer, updated by Matitiahu Allouche
-*
-*/
-
-#include "cmemory.h"
-#include "unicode/utypes.h"
-#include "unicode/ustring.h"
-#include "unicode/uchar.h"
-#include "unicode/ubidi.h"
-#include "unicode/utf16.h"
-#include "ubidi_props.h"
-#include "ubidiimp.h"
-#include "uassert.h"
-
-/*
- * General implementation notes:
- *
- * Throughout the implementation, there are comments like (W2) that refer to
- * rules of the BiDi algorithm, in this example to the second rule of the
- * resolution of weak types.
- *
- * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
- * character according to UTF-16, the second UChar gets the directional property of
- * the entire character assigned, while the first one gets a BN, a boundary
- * neutral, type, which is ignored by most of the algorithm according to
- * rule (X9) and the implementation suggestions of the BiDi algorithm.
- *
- * Later, adjustWSLevels() will set the level for each BN to that of the
- * following character (UChar), which results in surrogate pairs getting the
- * same level on each of their surrogates.
- *
- * In a UTF-8 implementation, the same thing could be done: the last byte of
- * a multi-byte sequence would get the "real" property, while all previous
- * bytes of that sequence would get BN.
- *
- * It is not possible to assign all those parts of a character the same real
- * property because this would fail in the resolution of weak types with rules
- * that look at immediately surrounding types.
- *
- * As a related topic, this implementation does not remove Boundary Neutral
- * types from the input, but ignores them wherever this is relevant.
- * For example, the loop for the resolution of the weak types reads
- * types until it finds a non-BN.
- * Also, explicit embedding codes are neither changed into BN nor removed.
- * They are only treated the same way real BNs are.
- * As stated before, adjustWSLevels() takes care of them at the end.
- * For the purpose of conformance, the levels of all these codes
- * do not matter.
- *
- * Note that this implementation modifies the dirProps
- * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
- * X6, N0 (replace paired brackets by L or R).
- *
- * In this implementation, the resolution of weak types (W1 to W6),
- * neutrals (N1 and N2), and the assignment of the resolved level (In)
- * are all done in one single loop, in resolveImplicitLevels().
- * Changes of dirProp values are done on the fly, without writing
- * them back to the dirProps array.
- *
- *
- * This implementation contains code that allows to bypass steps of the
- * algorithm that are not needed on the specific paragraph
- * in order to speed up the most common cases considerably,
- * like text that is entirely LTR, or RTL text without numbers.
- *
- * Most of this is done by setting a bit for each directional property
- * in a flags variable and later checking for whether there are
- * any LTR characters or any RTL characters, or both, whether
- * there are any explicit embedding codes, etc.
- *
- * If the (Xn) steps are performed, then the flags are re-evaluated,
- * because they will then not contain the embedding codes any more
- * and will be adjusted for override codes, so that subsequently
- * more bypassing may be possible than what the initial flags suggested.
- *
- * If the text is not mixed-directional, then the
- * algorithm steps for the weak type resolution are not performed,
- * and all levels are set to the paragraph level.
- *
- * If there are no explicit embedding codes, then the (Xn) steps
- * are not performed.
- *
- * If embedding levels are supplied as a parameter, then all
- * explicit embedding codes are ignored, and the (Xn) steps
- * are not performed.
- *
- * White Space types could get the level of the run they belong to,
- * and are checked with a test of (flags&MASK_EMBEDDING) to
- * consider if the paragraph direction should be considered in
- * the flags variable.
- *
- * If there are no White Space types in the paragraph, then
- * (L1) is not necessary in adjustWSLevels().
- */
-
-/* to avoid some conditional statements, use tiny constant arrays */
-static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
-static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
-static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
-
-#define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
-#define DIRPROP_FLAG_E(level) flagE[(level)&1]
-#define DIRPROP_FLAG_O(level) flagO[(level)&1]
-
-#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
-
-#define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE)
-
-/* UBiDi object management -------------------------------------------------- */
-
-U_CAPI UBiDi * U_EXPORT2
-ubidi_open(void)
-{
- UErrorCode errorCode=U_ZERO_ERROR;
- return ubidi_openSized(0, 0, &errorCode);
-}
-
-U_CAPI UBiDi * U_EXPORT2
-ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
- UBiDi *pBiDi;
-
- /* check the argument values */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return NULL;
- } else if(maxLength<0 || maxRunCount<0) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL; /* invalid arguments */
- }
-
- /* allocate memory for the object */
- pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
- if(pBiDi==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
- uprv_memset(pBiDi, 0, sizeof(UBiDi));
-
- /* allocate memory for arrays as requested */
- if(maxLength>0) {
- if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
- !getInitialLevelsMemory(pBiDi, maxLength)
- ) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- }
- } else {
- pBiDi->mayAllocateText=TRUE;
- }
-
- if(maxRunCount>0) {
- if(maxRunCount==1) {
- /* use simpleRuns[] */
- pBiDi->runsSize=sizeof(Run);
- } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- }
- } else {
- pBiDi->mayAllocateRuns=TRUE;
- }
-
- if(U_SUCCESS(*pErrorCode)) {
- return pBiDi;
- } else {
- ubidi_close(pBiDi);
- return NULL;
- }
-}
-
-/*
- * We are allowed to allocate memory if memory==NULL or
- * mayAllocate==TRUE for each array that we need.
- * We also try to grow memory as needed if we
- * allocate it.
- *
- * Assume sizeNeeded>0.
- * If *pMemory!=NULL, then assume *pSize>0.
- *
- * ### this realloc() may unnecessarily copy the old data,
- * which we know we don't need any more;
- * is this the best way to do this??
- */
-U_CFUNC UBool
-ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
- void **pMemory = (void **)bidiMem;
- /* check for existing memory */
- if(*pMemory==NULL) {
- /* we need to allocate memory */
- if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
- *pSize=sizeNeeded;
- return TRUE;
- } else {
- return FALSE;
- }
- } else {
- if(sizeNeeded<=*pSize) {
- /* there is already enough memory */
- return TRUE;
- }
- else if(!mayAllocate) {
- /* not enough memory, and we must not allocate */
- return FALSE;
- } else {
- /* we try to grow */
- void *memory;
- /* in most cases, we do not need the copy-old-data part of
- * realloc, but it is needed when adding runs using getRunsMemory()
- * in setParaRunsOnly()
- */
- if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
- *pMemory=memory;
- *pSize=sizeNeeded;
- return TRUE;
- } else {
- /* we failed to grow */
- return FALSE;
- }
- }
- }
-}
-
-U_CAPI void U_EXPORT2
-ubidi_close(UBiDi *pBiDi) {
- if(pBiDi!=NULL) {
- pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
- if(pBiDi->dirPropsMemory!=NULL) {
- uprv_free(pBiDi->dirPropsMemory);
- }
- if(pBiDi->levelsMemory!=NULL) {
- uprv_free(pBiDi->levelsMemory);
- }
- if(pBiDi->openingsMemory!=NULL) {
- uprv_free(pBiDi->openingsMemory);
- }
- if(pBiDi->parasMemory!=NULL) {
- uprv_free(pBiDi->parasMemory);
- }
- if(pBiDi->runsMemory!=NULL) {
- uprv_free(pBiDi->runsMemory);
- }
- if(pBiDi->isolatesMemory!=NULL) {
- uprv_free(pBiDi->isolatesMemory);
- }
- if(pBiDi->insertPoints.points!=NULL) {
- uprv_free(pBiDi->insertPoints.points);
- }
-
- uprv_free(pBiDi);
- }
-}
-
-/* set to approximate "inverse BiDi" ---------------------------------------- */
-
-U_CAPI void U_EXPORT2
-ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
- if(pBiDi!=NULL) {
- pBiDi->isInverse=isInverse;
- pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
- : UBIDI_REORDER_DEFAULT;
- }
-}
-
-U_CAPI UBool U_EXPORT2
-ubidi_isInverse(UBiDi *pBiDi) {
- if(pBiDi!=NULL) {
- return pBiDi->isInverse;
- } else {
- return FALSE;
- }
-}
-
-/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
- * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
- * concept of RUNS_ONLY which is a double operation.
- * It could be advantageous to divide this into 3 concepts:
- * a) Operation: direct / inverse / RUNS_ONLY
- * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
- * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
- * This would allow combinations not possible today like RUNS_ONLY with
- * NUMBERS_SPECIAL.
- * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
- * REMOVE_CONTROLS for the inverse step.
- * Not all combinations would be supported, and probably not all do make sense.
- * This would need to document which ones are supported and what are the
- * fallbacks for unsupported combinations.
- */
-U_CAPI void U_EXPORT2
-ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
- if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
- && (reorderingMode < UBIDI_REORDER_COUNT)) {
- pBiDi->reorderingMode = reorderingMode;
- pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
- }
-}
-
-U_CAPI UBiDiReorderingMode U_EXPORT2
-ubidi_getReorderingMode(UBiDi *pBiDi) {
- if (pBiDi!=NULL) {
- return pBiDi->reorderingMode;
- } else {
- return UBIDI_REORDER_DEFAULT;
- }
-}
-
-U_CAPI void U_EXPORT2
-ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
- if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
- reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
- }
- if (pBiDi!=NULL) {
- pBiDi->reorderingOptions=reorderingOptions;
- }
-}
-
-U_CAPI uint32_t U_EXPORT2
-ubidi_getReorderingOptions(UBiDi *pBiDi) {
- if (pBiDi!=NULL) {
- return pBiDi->reorderingOptions;
- } else {
- return 0;
- }
-}
-
-U_CAPI UBiDiDirection U_EXPORT2
-ubidi_getBaseDirection(const UChar *text,
-int32_t length){
-
- int32_t i;
- UChar32 uchar;
- UCharDirection dir;
-
- if( text==NULL || length<-1 ){
- return UBIDI_NEUTRAL;
- }
-
- if(length==-1) {
- length=u_strlen(text);
- }
-
- for( i = 0 ; i < length; ) {
- /* i is incremented by U16_NEXT */
- U16_NEXT(text, i, length, uchar);
- dir = u_charDirection(uchar);
- if( dir == U_LEFT_TO_RIGHT )
- return UBIDI_LTR;
- if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
- return UBIDI_RTL;
- }
- return UBIDI_NEUTRAL;
-}
-
-/* perform (P2)..(P3) ------------------------------------------------------- */
-
-/**
- * Returns the directionality of the first strong character
- * after the last B in prologue, if any.
- * Requires prologue!=null.
- */
-static DirProp
-firstL_R_AL(UBiDi *pBiDi) {
- const UChar *text=pBiDi->prologue;
- int32_t length=pBiDi->proLength;
- int32_t i;
- UChar32 uchar;
- DirProp dirProp, result=ON;
- for(i=0; i<length; ) {
- /* i is incremented by U16_NEXT */
- U16_NEXT(text, i, length, uchar);
- dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
- if(result==ON) {
- if(dirProp==L || dirProp==R || dirProp==AL) {
- result=dirProp;
- }
- } else {
- if(dirProp==B) {
- result=ON;
- }
- }
- }
- return result;
-}
-
-/*
- * Check that there are enough entries in the array pointed to by pBiDi->paras
- */
-static UBool
-checkParaCount(UBiDi *pBiDi) {
- int32_t count=pBiDi->paraCount;
- if(pBiDi->paras==pBiDi->simpleParas) {
- if(count<=SIMPLE_PARAS_COUNT)
- return TRUE;
- if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
- return FALSE;
- pBiDi->paras=pBiDi->parasMemory;
- uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
- return TRUE;
- }
- if(!getInitialParasMemory(pBiDi, count * 2))
- return FALSE;
- pBiDi->paras=pBiDi->parasMemory;
- return TRUE;
-}
-
-/*
- * Get the directional properties for the text, calculate the flags bit-set, and
- * determine the paragraph level if necessary (in pBiDi->paras[i].level).
- * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
- * When encountering an FSI, it is initially replaced with an LRI, which is the
- * default. Only if a strong R or AL is found within its scope will the LRI be
- * replaced by an RLI.
- */
-static UBool
-getDirProps(UBiDi *pBiDi) {
- const UChar *text=pBiDi->text;
- DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
-
- int32_t i=0, originalLength=pBiDi->originalLength;
- Flags flags=0; /* collect all directionalities in the text */
- UChar32 uchar;
- DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */
- UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
- /* for inverse BiDi, the default para level is set to RTL if there is a
- strong R or AL character at either end of the text */
- UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
- (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
- pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
- int32_t lastArabicPos=-1;
- int32_t controlCount=0;
- UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
- UBIDI_OPTION_REMOVE_CONTROLS);
-
- enum State {
- NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */
- SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */
- SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */
- LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */
- };
- State state;
- DirProp lastStrong=ON; /* for default level & inverse BiDi */
- /* The following stacks are used to manage isolate sequences. Those
- sequences may be nested, but obviously never more deeply than the
- maximum explicit embedding level.
- lastStack is the index of the last used entry in the stack. A value of -1
- means that there is no open isolate sequence.
- lastStack is reset to -1 on paragraph boundaries. */
- /* The following stack contains the position of the initiator of
- each open isolate sequence */
- int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
- /* The following stack contains the last known state before
- encountering the initiator of an isolate sequence */
- State previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
- int32_t stackLast=-1;
-
- if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
- pBiDi->length=0;
- defaultParaLevel=pBiDi->paraLevel&1;
- if(isDefaultLevel) {
- pBiDi->paras[0].level=defaultParaLevel;
- lastStrong=defaultParaLevel;
- if(pBiDi->proLength>0 && /* there is a prologue */
- (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */
- if(dirProp==L)
- pBiDi->paras[0].level=0; /* set the default para level */
- else
- pBiDi->paras[0].level=1; /* set the default para level */
- state=NOT_SEEKING_STRONG;
- } else {
- state=SEEKING_STRONG_FOR_PARA;
- }
- } else {
- pBiDi->paras[0].level=pBiDi->paraLevel;
- state=NOT_SEEKING_STRONG;
- }
- /* count paragraphs and determine the paragraph level (P2..P3) */
- /*
- * see comment in ubidi.h:
- * the UBIDI_DEFAULT_XXX values are designed so that
- * their bit 0 alone yields the intended default
- */
- for( /* i=0 above */ ; i<originalLength; ) {
- /* i is incremented by U16_NEXT */
- U16_NEXT(text, i, originalLength, uchar);
- flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
- dirProps[i-1]=dirProp;
- if(uchar>0xffff) { /* set the lead surrogate's property to BN */
- flags|=DIRPROP_FLAG(BN);
- dirProps[i-2]=BN;
- }
- if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
- controlCount++;
- if(dirProp==L) {
- if(state==SEEKING_STRONG_FOR_PARA) {
- pBiDi->paras[pBiDi->paraCount-1].level=0;
- state=NOT_SEEKING_STRONG;
- }
- else if(state==SEEKING_STRONG_FOR_FSI) {
- if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
- /* no need for next statement, already set by default */
- /* dirProps[isolateStartStack[stackLast]]=LRI; */
- flags|=DIRPROP_FLAG(LRI);
- }
- state=LOOKING_FOR_PDI;
- }
- lastStrong=L;
- continue;
- }
- if(dirProp==R || dirProp==AL) {
- if(state==SEEKING_STRONG_FOR_PARA) {
- pBiDi->paras[pBiDi->paraCount-1].level=1;
- state=NOT_SEEKING_STRONG;
- }
- else if(state==SEEKING_STRONG_FOR_FSI) {
- if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
- dirProps[isolateStartStack[stackLast]]=RLI;
- flags|=DIRPROP_FLAG(RLI);
- }
- state=LOOKING_FOR_PDI;
- }
- lastStrong=R;
- if(dirProp==AL)
- lastArabicPos=i-1;
- continue;
- }
- if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */
- stackLast++;
- if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
- isolateStartStack[stackLast]=i-1;
- previousStateStack[stackLast]=state;
- }
- if(dirProp==FSI) {
- dirProps[i-1]=LRI; /* default if no strong char */
- state=SEEKING_STRONG_FOR_FSI;
- }
- else
- state=LOOKING_FOR_PDI;
- continue;
- }
- if(dirProp==PDI) {
- if(state==SEEKING_STRONG_FOR_FSI) {
- if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
- /* no need for next statement, already set by default */
- /* dirProps[isolateStartStack[stackLast]]=LRI; */
- flags|=DIRPROP_FLAG(LRI);
- }
- }
- if(stackLast>=0) {
- if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
- state=previousStateStack[stackLast];
- stackLast--;
- }
- continue;
- }
- if(dirProp==B) {
- if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
- continue;
- pBiDi->paras[pBiDi->paraCount-1].limit=i;
- if(isDefaultLevelInverse && lastStrong==R)
- pBiDi->paras[pBiDi->paraCount-1].level=1;
- if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
- /* When streaming, we only process whole paragraphs
- thus some updates are only done on paragraph boundaries */
- pBiDi->length=i; /* i is index to next character */
- pBiDi->controlCount=controlCount;
- }
- if(i<originalLength) { /* B not last char in text */
- pBiDi->paraCount++;
- if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */
- return FALSE;
- if(isDefaultLevel) {
- pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
- state=SEEKING_STRONG_FOR_PARA;
- lastStrong=defaultParaLevel;
- } else {
- pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
- state=NOT_SEEKING_STRONG;
- }
- stackLast=-1;
- }
- continue;
- }
- }
- /* Ignore still open isolate sequences with overflow */
- if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
- stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
- state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */
- }
- /* Resolve direction of still unresolved open FSI sequences */
- while(stackLast>=0) {
- if(state==SEEKING_STRONG_FOR_FSI) {
- /* no need for next statement, already set by default */
- /* dirProps[isolateStartStack[stackLast]]=LRI; */
- flags|=DIRPROP_FLAG(LRI);
- break;
- }
- state=previousStateStack[stackLast];
- stackLast--;
- }
- /* When streaming, ignore text after the last paragraph separator */
- if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
- if(pBiDi->length<originalLength)
- pBiDi->paraCount--;
- } else {
- pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
- pBiDi->controlCount=controlCount;
- }
- /* For inverse bidi, default para direction is RTL if there is
- a strong R or AL at either end of the paragraph */
- if(isDefaultLevelInverse && lastStrong==R) {
- pBiDi->paras[pBiDi->paraCount-1].level=1;
- }
- if(isDefaultLevel) {
- pBiDi->paraLevel=static_cast<UBiDiLevel>(pBiDi->paras[0].level);
- }
- /* The following is needed to resolve the text direction for default level
- paragraphs containing no strong character */
- for(i=0; i<pBiDi->paraCount; i++)
- flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
-
- if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
- flags|=DIRPROP_FLAG(L);
- }
- pBiDi->flags=flags;
- pBiDi->lastArabicPos=lastArabicPos;
- return TRUE;
-}
-
-/* determine the paragraph level at position index */
-U_CFUNC UBiDiLevel
-ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
- int32_t i;
- for(i=0; i<pBiDi->paraCount; i++)
- if(pindex<pBiDi->paras[i].limit)
- break;
- if(i>=pBiDi->paraCount)
- i=pBiDi->paraCount-1;
- return (UBiDiLevel)(pBiDi->paras[i].level);
-}
-
-/* Functions for handling paired brackets ----------------------------------- */
-
-/* In the isoRuns array, the first entry is used for text outside of any
- isolate sequence. Higher entries are used for each more deeply nested
- isolate sequence. isoRunLast is the index of the last used entry. The
- openings array is used to note the data of opening brackets not yet
- matched by a closing bracket, or matched but still susceptible to change
- level.
- Each isoRun entry contains the index of the first and
- one-after-last openings entries for pending opening brackets it
- contains. The next openings entry to use is the one-after-last of the
- most deeply nested isoRun entry.
- isoRun entries also contain their current embedding level and the last
- encountered strong character, since these will be needed to resolve
- the level of paired brackets. */
-
-static void
-bracketInit(UBiDi *pBiDi, BracketData *bd) {
- bd->pBiDi=pBiDi;
- bd->isoRunLast=0;
- bd->isoRuns[0].start=0;
- bd->isoRuns[0].limit=0;
- bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
- UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1;
- bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t;
- bd->isoRuns[0].contextDir = (UBiDiDirection)t;
- bd->isoRuns[0].contextPos=0;
- if(pBiDi->openingsMemory) {
- bd->openings=pBiDi->openingsMemory;
- bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
- } else {
- bd->openings=bd->simpleOpenings;
- bd->openingsCount=SIMPLE_OPENINGS_COUNT;
- }
- bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
- bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
-}
-
-/* paragraph boundary */
-static void
-bracketProcessB(BracketData *bd, UBiDiLevel level) {
- bd->isoRunLast=0;
- bd->isoRuns[0].limit=0;
- bd->isoRuns[0].level=level;
- bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1;
- bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1);
- bd->isoRuns[0].contextPos=0;
-}
-
-/* LRE, LRO, RLE, RLO, PDF */
-static void
-bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
- UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
- IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
- DirProp *dirProps=bd->pBiDi->dirProps;
- if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */
- return;
- if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */
- contextLevel=embeddingLevel;
- pLastIsoRun->limit=pLastIsoRun->start;
- pLastIsoRun->level=embeddingLevel;
- pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1;
- pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1);
- pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos;
-}
-
-/* LRI or RLI */
-static void
-bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
- IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
- int16_t lastLimit;
- pLastIsoRun->lastBase=ON;
- lastLimit=pLastIsoRun->limit;
- bd->isoRunLast++;
- pLastIsoRun++;
- pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
- pLastIsoRun->level=level;
- pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1;
- pLastIsoRun->contextDir=(UBiDiDirection)(level&1);
- pLastIsoRun->contextPos=0;
-}
-
-/* PDI */
-static void
-bracketProcessPDI(BracketData *bd) {
- IsoRun *pLastIsoRun;
- bd->isoRunLast--;
- pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
- pLastIsoRun->lastBase=ON;
-}
-
-/* newly found opening bracket: create an openings entry */
-static UBool /* return TRUE if success */
-bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
- IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
- Opening *pOpening;
- if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */
- UBiDi *pBiDi=bd->pBiDi;
- if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
- return FALSE;
- if(bd->openings==bd->simpleOpenings)
- uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
- SIMPLE_OPENINGS_COUNT * sizeof(Opening));
- bd->openings=pBiDi->openingsMemory; /* may have changed */
- bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
- }
- pOpening=&bd->openings[pLastIsoRun->limit];
- pOpening->position=position;
- pOpening->match=match;
- pOpening->contextDir=pLastIsoRun->contextDir;
- pOpening->contextPos=pLastIsoRun->contextPos;
- pOpening->flags=0;
- pLastIsoRun->limit++;
- return TRUE;
-}
-
-/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
-static void
-fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
- /* This function calls itself recursively */
- IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
- Opening *qOpening;
- DirProp *dirProps=bd->pBiDi->dirProps;
- int32_t k, openingPosition, closingPosition;
- for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
- if(qOpening->match>=0) /* not an N0c match */
- continue;
- if(newPropPosition<qOpening->contextPos)
- break;
- if(newPropPosition>=qOpening->position)
- continue;
- if(newProp==qOpening->contextDir)
- break;
- openingPosition=qOpening->position;
- dirProps[openingPosition]=newProp;
- closingPosition=-(qOpening->match);
- dirProps[closingPosition]=newProp;
- qOpening->match=0; /* prevent further changes */
- fixN0c(bd, k, openingPosition, newProp);
- fixN0c(bd, k, closingPosition, newProp);
- }
-}
-
-/* process closing bracket */
-static DirProp /* return L or R if N0b or N0c, ON if N0d */
-bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
- IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
- Opening *pOpening, *qOpening;
- UBiDiDirection direction;
- UBool stable;
- DirProp newProp;
- pOpening=&bd->openings[openIdx];
- direction=(UBiDiDirection)(pLastIsoRun->level&1);
- stable=TRUE; /* assume stable until proved otherwise */
-
- /* The stable flag is set when brackets are paired and their
- level is resolved and cannot be changed by what will be
- found later in the source string.
- An unstable match can occur only when applying N0c, where
- the resolved level depends on the preceding context, and
- this context may be affected by text occurring later.
- Example: RTL paragraph containing: abc[(latin) HEBREW]
- When the closing parenthesis is encountered, it appears
- that N0c1 must be applied since 'abc' sets an opposite
- direction context and both parentheses receive level 2.
- However, when the closing square bracket is processed,
- N0b applies because of 'HEBREW' being included within the
- brackets, thus the square brackets are treated like R and
- receive level 1. However, this changes the preceding
- context of the opening parenthesis, and it now appears
- that N0c2 must be applied to the parentheses rather than
- N0c1. */
-
- if((direction==0 && pOpening->flags&FOUND_L) ||
- (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
- newProp=static_cast<DirProp>(direction);
- }
- else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
- /* it is stable if there is no containing pair or in
- conditions too complicated and not worth checking */
- stable=(openIdx==pLastIsoRun->start);
- if(direction!=pOpening->contextDir)
- newProp= static_cast<DirProp>(pOpening->contextDir); /* N0c1 */
- else
- newProp= static_cast<DirProp>(direction); /* N0c2 */
- } else {
- /* forget this and any brackets nested within this pair */
- pLastIsoRun->limit= static_cast<uint16_t>(openIdx);
- return ON; /* N0d */
- }
- bd->pBiDi->dirProps[pOpening->position]=newProp;
- bd->pBiDi->dirProps[position]=newProp;
- /* Update nested N0c pairs that may be affected */
- fixN0c(bd, openIdx, pOpening->position, newProp);
- if(stable) {
- pLastIsoRun->limit= static_cast<uint16_t>(openIdx); /* forget any brackets nested within this pair */
- /* remove lower located synonyms if any */
- while(pLastIsoRun->limit>pLastIsoRun->start &&
- bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
- pLastIsoRun->limit--;
- } else {
- int32_t k;
- pOpening->match=-position;
- /* neutralize lower located synonyms if any */
- k=openIdx-1;
- while(k>=pLastIsoRun->start &&
- bd->openings[k].position==pOpening->position)
- bd->openings[k--].match=0;
- /* neutralize any unmatched opening between the current pair;
- this will also neutralize higher located synonyms if any */
- for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
- qOpening=&bd->openings[k];
- if(qOpening->position>=position)
- break;
- if(qOpening->match>0)
- qOpening->match=0;
- }
- }
- return newProp;
-}
-
-/* handle strong characters, digits and candidates for closing brackets */
-static UBool /* return TRUE if success */
-bracketProcessChar(BracketData *bd, int32_t position) {
- IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
- DirProp *dirProps, dirProp, newProp;
- UBiDiLevel level;
- dirProps=bd->pBiDi->dirProps;
- dirProp=dirProps[position];
- if(dirProp==ON) {
- UChar c, match;
- int32_t idx;
- /* First see if it is a matching closing bracket. Hopefully, this is
- more efficient than checking if it is a closing bracket at all */
- c=bd->pBiDi->text[position];
- for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
- if(bd->openings[idx].match!=c)
- continue;
- /* We have a match */
- newProp=bracketProcessClosing(bd, idx, position);
- if(newProp==ON) { /* N0d */
- c=0; /* prevent handling as an opening */
- break;
- }
- pLastIsoRun->lastBase=ON;
- pLastIsoRun->contextDir=(UBiDiDirection)newProp;
- pLastIsoRun->contextPos=position;
- level=bd->pBiDi->levels[position];
- if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
- uint16_t flag;
- int32_t i;
- newProp=level&1;
- pLastIsoRun->lastStrong=newProp;
- flag=DIRPROP_FLAG(newProp);
- for(i=pLastIsoRun->start; i<idx; i++)
- bd->openings[i].flags|=flag;
- /* matching brackets are not overridden by LRO/RLO */
- bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
- }
- /* matching brackets are not overridden by LRO/RLO */
- bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
- return TRUE;
- }
- /* We get here only if the ON character is not a matching closing
- bracket or it is a case of N0d */
- /* Now see if it is an opening bracket */
- if(c)
- match= static_cast<UChar>(u_getBidiPairedBracket(c)); /* get the matching char */
- else
- match=0;
- if(match!=c && /* has a matching char */
- ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
- /* special case: process synonyms
- create an opening entry for each synonym */
- if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
- if(!bracketAddOpening(bd, 0x3009, position))
- return FALSE;
- }
- else if(match==0x3009) { /* RIGHT ANGLE BRACKET */
- if(!bracketAddOpening(bd, 0x232A, position))
- return FALSE;
- }
- if(!bracketAddOpening(bd, match, position))
- return FALSE;
- }
- }
- level=bd->pBiDi->levels[position];
- if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
- newProp=level&1;
- if(dirProp!=S && dirProp!=WS && dirProp!=ON)
- dirProps[position]=newProp;
- pLastIsoRun->lastBase=newProp;
- pLastIsoRun->lastStrong=newProp;
- pLastIsoRun->contextDir=(UBiDiDirection)newProp;
- pLastIsoRun->contextPos=position;
- }
- else if(dirProp<=R || dirProp==AL) {
- newProp= static_cast<DirProp>(DIR_FROM_STRONG(dirProp));
- pLastIsoRun->lastBase=dirProp;
- pLastIsoRun->lastStrong=dirProp;
- pLastIsoRun->contextDir=(UBiDiDirection)newProp;
- pLastIsoRun->contextPos=position;
- }
- else if(dirProp==EN) {
- pLastIsoRun->lastBase=EN;
- if(pLastIsoRun->lastStrong==L) {
- newProp=L; /* W7 */
- if(!bd->isNumbersSpecial)
- dirProps[position]=ENL;
- pLastIsoRun->contextDir=(UBiDiDirection)L;
- pLastIsoRun->contextPos=position;
- }
- else {
- newProp=R; /* N0 */
- if(pLastIsoRun->lastStrong==AL)
- dirProps[position]=AN; /* W2 */
- else
- dirProps[position]=ENR;
- pLastIsoRun->contextDir=(UBiDiDirection)R;
- pLastIsoRun->contextPos=position;
- }
- }
- else if(dirProp==AN) {
- newProp=R; /* N0 */
- pLastIsoRun->lastBase=AN;
- pLastIsoRun->contextDir=(UBiDiDirection)R;
- pLastIsoRun->contextPos=position;
- }
- else if(dirProp==NSM) {
- /* if the last real char was ON, change NSM to ON so that it
- will stay ON even if the last real char is a bracket which
- may be changed to L or R */
- newProp=pLastIsoRun->lastBase;
- if(newProp==ON)
- dirProps[position]=newProp;
- }
- else {
- newProp=dirProp;
- pLastIsoRun->lastBase=dirProp;
- }
- if(newProp<=R || newProp==AL) {
- int32_t i;
- uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
- for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
- if(position>bd->openings[i].position)
- bd->openings[i].flags|=flag;
- }
- return TRUE;
-}
-
-/* perform (X1)..(X9) ------------------------------------------------------- */
-
-/* determine if the text is mixed-directional or single-directional */
-static UBiDiDirection
-directionFromFlags(UBiDi *pBiDi) {
- Flags flags=pBiDi->flags;
- /* if the text contains AN and neutrals, then some neutrals may become RTL */
- if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
- return UBIDI_LTR;
- } else if(!(flags&MASK_LTR)) {
- return UBIDI_RTL;
- } else {
- return UBIDI_MIXED;
- }
-}
-
-/*
- * Resolve the explicit levels as specified by explicit embedding codes.
- * Recalculate the flags to have them reflect the real properties
- * after taking the explicit embeddings into account.
- *
- * The BiDi algorithm is designed to result in the same behavior whether embedding
- * levels are externally specified (from "styled text", supposedly the preferred
- * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
- * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
- * However, in a real implementation, the removal of these codes and their index
- * positions in the plain text is undesirable since it would result in
- * reallocated, reindexed text.
- * Instead, this implementation leaves the codes in there and just ignores them
- * in the subsequent processing.
- * In order to get the same reordering behavior, positions with a BN or a not-isolate
- * explicit embedding code just get the same level assigned as the last "real"
- * character.
- *
- * Some implementations, not this one, then overwrite some of these
- * directionality properties at "real" same-level-run boundaries by
- * L or R codes so that the resolution of weak types can be performed on the
- * entire paragraph at once instead of having to parse it once more and
- * perform that resolution on same-level-runs.
- * This limits the scope of the implicit rules in effectively
- * the same way as the run limits.
- *
- * Instead, this implementation does not modify these codes, except for
- * paired brackets whose properties (ON) may be replaced by L or R.
- * On one hand, the paragraph has to be scanned for same-level-runs, but
- * on the other hand, this saves another loop to reset these codes,
- * or saves making and modifying a copy of dirProps[].
- *
- *
- * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
- *
- *
- * Handling the stack of explicit levels (Xn):
- *
- * With the BiDi stack of explicit levels, as pushed with each
- * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
- * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
- *
- * In order to have a correct push-pop semantics even in the case of overflows,
- * overflow counters and a valid isolate counter are used as described in UAX#9
- * section 3.3.2 "Explicit Levels and Directions".
- *
- * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
- *
- * Returns normally the direction; -1 if there was a memory shortage
- *
- */
-static UBiDiDirection
-resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
- DirProp *dirProps=pBiDi->dirProps;
- UBiDiLevel *levels=pBiDi->levels;
- const UChar *text=pBiDi->text;
-
- int32_t i=0, length=pBiDi->length;
- Flags flags=pBiDi->flags; /* collect all directionalities in the text */
- DirProp dirProp;
- UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
- UBiDiDirection direction;
- pBiDi->isolateCount=0;
-
- if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
-
- /* determine if the text is mixed-directional or single-directional */
- direction=directionFromFlags(pBiDi);
-
- /* we may not need to resolve any explicit levels */
- if((direction!=UBIDI_MIXED)) {
- /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
- return direction;
- }
- if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
- /* inverse BiDi: mixed, but all characters are at the same embedding level */
- /* set all levels to the paragraph level */
- int32_t paraIndex, start, limit;
- for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
- if(paraIndex==0)
- start=0;
- else
- start=pBiDi->paras[paraIndex-1].limit;
- limit=pBiDi->paras[paraIndex].limit;
- level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
- for(i=start; i<limit; i++)
- levels[i]=level;
- }
- return direction; /* no bracket matching for inverse BiDi */
- }
- if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
- /* no embeddings, set all levels to the paragraph level */
- /* we still have to perform bracket matching */
- int32_t paraIndex, start, limit;
- BracketData bracketData;
- bracketInit(pBiDi, &bracketData);
- for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
- if(paraIndex==0)
- start=0;
- else
- start=pBiDi->paras[paraIndex-1].limit;
- limit=pBiDi->paras[paraIndex].limit;
- level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
- for(i=start; i<limit; i++) {
- levels[i]=level;
- dirProp=dirProps[i];
- if(dirProp==BN)
- continue;
- if(dirProp==B) {
- if((i+1)<length) {
- if(text[i]==CR && text[i+1]==LF)
- continue; /* skip CR when followed by LF */
- bracketProcessB(&bracketData, level);
- }
- continue;
- }
- if(!bracketProcessChar(&bracketData, i)) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return UBIDI_LTR;
- }
- }
- }
- return direction;
- }
- {
- /* continue to perform (Xn) */
-
- /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
- /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
- UBiDiLevel embeddingLevel=level, newLevel;
- UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */
- int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */
-
- /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
- stackLast points to its current entry. */
- uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
- but we need one more entry as base */
- uint32_t stackLast=0;
- int32_t overflowIsolateCount=0;
- int32_t overflowEmbeddingCount=0;
- int32_t validIsolateCount=0;
- BracketData bracketData;
- bracketInit(pBiDi, &bracketData);
- stack[0]=level; /* initialize base entry to para level, no override, no isolate */
-
- /* recalculate the flags */
- flags=0;
-
- for(i=0; i<length; ++i) {
- dirProp=dirProps[i];
- switch(dirProp) {
- case LRE:
- case RLE:
- case LRO:
- case RLO:
- /* (X2, X3, X4, X5) */
- flags|=DIRPROP_FLAG(BN);
- levels[i]=previousLevel;
- if (dirProp==LRE || dirProp==LRO)
- /* least greater even level */
- newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
- else
- /* least greater odd level */
- newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
- if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
- overflowEmbeddingCount==0) {
- lastCcPos=i;
- embeddingLevel=newLevel;
- if(dirProp==LRO || dirProp==RLO)
- embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
- stackLast++;
- stack[stackLast]=embeddingLevel;
- /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
- since this has already been done for newLevel which is
- the source for embeddingLevel.
- */
- } else {
- if(overflowIsolateCount==0)
- overflowEmbeddingCount++;
- }
- break;
- case PDF:
- /* (X7) */
- flags|=DIRPROP_FLAG(BN);
- levels[i]=previousLevel;
- /* handle all the overflow cases first */
- if(overflowIsolateCount) {
- break;
- }
- if(overflowEmbeddingCount) {
- overflowEmbeddingCount--;
- break;
- }
- if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */
- lastCcPos=i;
- stackLast--;
- embeddingLevel=(UBiDiLevel)stack[stackLast];
- }
- break;
- case LRI:
- case RLI:
- flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
- levels[i]=NO_OVERRIDE(embeddingLevel);
- if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
- bracketProcessBoundary(&bracketData, lastCcPos,
- previousLevel, embeddingLevel);
- flags|=DIRPROP_FLAG_MULTI_RUNS;
- }
- previousLevel=embeddingLevel;
- /* (X5a, X5b) */
- if(dirProp==LRI)
- /* least greater even level */
- newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
- else
- /* least greater odd level */
- newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
- if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
- overflowEmbeddingCount==0) {
- flags|=DIRPROP_FLAG(dirProp);
- lastCcPos=i;
- validIsolateCount++;
- if(validIsolateCount>pBiDi->isolateCount)
- pBiDi->isolateCount=validIsolateCount;
- embeddingLevel=newLevel;
- /* we can increment stackLast without checking because newLevel
- will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
- stackLast++;
- stack[stackLast]=embeddingLevel+ISOLATE;
- bracketProcessLRI_RLI(&bracketData, embeddingLevel);
- } else {
- /* make it WS so that it is handled by adjustWSLevels() */
- dirProps[i]=WS;
- overflowIsolateCount++;
- }
- break;
- case PDI:
- if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
- bracketProcessBoundary(&bracketData, lastCcPos,
- previousLevel, embeddingLevel);
- flags|=DIRPROP_FLAG_MULTI_RUNS;
- }
- /* (X6a) */
- if(overflowIsolateCount) {
- overflowIsolateCount--;
- /* make it WS so that it is handled by adjustWSLevels() */
- dirProps[i]=WS;
- }
- else if(validIsolateCount) {
- flags|=DIRPROP_FLAG(PDI);
- lastCcPos=i;
- overflowEmbeddingCount=0;
- while(stack[stackLast]<ISOLATE) /* pop embedding entries */
- stackLast--; /* until the last isolate entry */
- stackLast--; /* pop also the last isolate entry */
- validIsolateCount--;
- bracketProcessPDI(&bracketData);
- } else
- /* make it WS so that it is handled by adjustWSLevels() */
- dirProps[i]=WS;
- embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
- flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
- previousLevel=embeddingLevel;
- levels[i]=NO_OVERRIDE(embeddingLevel);
- break;
- case B:
- flags|=DIRPROP_FLAG(B);
- levels[i]=GET_PARALEVEL(pBiDi, i);
- if((i+1)<length) {
- if(text[i]==CR && text[i+1]==LF)
- break; /* skip CR when followed by LF */
- overflowEmbeddingCount=overflowIsolateCount=0;
- validIsolateCount=0;
- stackLast=0;
- previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
- stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
- bracketProcessB(&bracketData, embeddingLevel);
- }
- break;
- case BN:
- /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
- /* they will get their levels set correctly in adjustWSLevels() */
- levels[i]=previousLevel;
- flags|=DIRPROP_FLAG(BN);
- break;
- default:
- /* all other types are normal characters and get the "real" level */
- if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
- bracketProcessBoundary(&bracketData, lastCcPos,
- previousLevel, embeddingLevel);
- flags|=DIRPROP_FLAG_MULTI_RUNS;
- if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
- flags|=DIRPROP_FLAG_O(embeddingLevel);
- else
- flags|=DIRPROP_FLAG_E(embeddingLevel);
- }
- previousLevel=embeddingLevel;
- levels[i]=embeddingLevel;
- if(!bracketProcessChar(&bracketData, i))
- return (UBiDiDirection)-1;
- /* the dirProp may have been changed in bracketProcessChar() */
- flags|=DIRPROP_FLAG(dirProps[i]);
- break;
- }
- }
- if(flags&MASK_EMBEDDING)
- flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
- if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
- flags|=DIRPROP_FLAG(L);
- /* again, determine if the text is mixed-directional or single-directional */
- pBiDi->flags=flags;
- direction=directionFromFlags(pBiDi);
- }
- return direction;
-}
-
-/*
- * Use a pre-specified embedding levels array:
- *
- * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
- * ignore all explicit codes (X9),
- * and check all the preset levels.
- *
- * Recalculate the flags to have them reflect the real properties
- * after taking the explicit embeddings into account.
- */
-static UBiDiDirection
-checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
- DirProp *dirProps=pBiDi->dirProps;
- UBiDiLevel *levels=pBiDi->levels;
- int32_t isolateCount=0;
-
- int32_t length=pBiDi->length;
- Flags flags=0; /* collect all directionalities in the text */
- pBiDi->isolateCount=0;
-
- int32_t currentParaIndex = 0;
- int32_t currentParaLimit = pBiDi->paras[0].limit;
- int32_t currentParaLevel = pBiDi->paraLevel;
-
- for(int32_t i=0; i<length; ++i) {
- UBiDiLevel level=levels[i];
- DirProp dirProp=dirProps[i];
- if(dirProp==LRI || dirProp==RLI) {
- isolateCount++;
- if(isolateCount>pBiDi->isolateCount)
- pBiDi->isolateCount=isolateCount;
- }
- else if(dirProp==PDI)
- isolateCount--;
- else if(dirProp==B)
- isolateCount=0;
-
- // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
- if (pBiDi->defaultParaLevel != 0 &&
- i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
- currentParaLevel = pBiDi->paras[++currentParaIndex].level;
- currentParaLimit = pBiDi->paras[currentParaIndex].limit;
- }
-
- UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
- level &= ~UBIDI_LEVEL_OVERRIDE;
- if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
- if (level == 0) {
- if (dirProp == B) {
- // Paragraph separators are ok with explicit level 0.
- // Prevents reordering of paragraphs.
- } else {
- // Treat explicit level 0 as a wildcard for the paragraph level.
- // Avoid making the caller guess what the paragraph level would be.
- level = (UBiDiLevel)currentParaLevel;
- levels[i] = level | overrideFlag;
- }
- } else {
- // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
- /* level out of bounds */
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return UBIDI_LTR;
- }
- }
- if (overrideFlag != 0) {
- /* keep the override flag in levels[i] but adjust the flags */
- flags|=DIRPROP_FLAG_O(level);
- } else {
- /* set the flags */
- flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
- }
- }
- if(flags&MASK_EMBEDDING)
- flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
- /* determine if the text is mixed-directional or single-directional */
- pBiDi->flags=flags;
- return directionFromFlags(pBiDi);
-}
-
-/******************************************************************
- The Properties state machine table
-*******************************************************************
-
- All table cells are 8 bits:
- bits 0..4: next state
- bits 5..7: action to perform (if > 0)
-
- Cells may be of format "n" where n represents the next state
- (except for the rightmost column).
- Cells may also be of format "s(x,y)" where x represents an action
- to perform and y represents the next state.
-
-*******************************************************************
- Definitions and type for properties state table
-*******************************************************************
-*/
-#define IMPTABPROPS_COLUMNS 16
-#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
-#define GET_STATEPROPS(cell) ((cell)&0x1f)
-#define GET_ACTIONPROPS(cell) ((cell)>>5)
-#define s(action, newState) ((uint8_t)(newState+(action<<5)))
-
-static const uint8_t groupProp[] = /* dirProp regrouped */
-{
-/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
- 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
-};
-enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
-
-/******************************************************************
-
- PROPERTIES STATE TABLE
-
- In table impTabProps,
- - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
- - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
- - the Res column is the reduced property assigned to a run
-
- Action 1: process current run1, init new run1
- 2: init new run2
- 3: process run1, process run2, init new run1
- 4: process run1, set run1=run2, init new run2
-
- Notes:
- 1) This table is used in resolveImplicitLevels().
- 2) This table triggers actions when there is a change in the Bidi
- property of incoming characters (action 1).
- 3) Most such property sequences are processed immediately (in
- fact, passed to processPropertySeq().
- 4) However, numbers are assembled as one sequence. This means
- that undefined situations (like CS following digits, until
- it is known if the next char will be a digit) are held until
- following chars define them.
- Example: digits followed by CS, then comes another CS or ON;
- the digits will be processed, then the CS assigned
- as the start of an ON sequence (action 3).
- 5) There are cases where more than one sequence must be
- processed, for instance digits followed by CS followed by L:
- the digits must be processed as one sequence, and the CS
- must be processed as an ON sequence, all this before starting
- assembling chars for the opening L sequence.
-
-
-*/
-static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
-{
-/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
-/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON },
-/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L },
-/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R },
-/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R },
-/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN },
-/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
-/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN },
-/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
-/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
-/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON },
-/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN },
-/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN },
-/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
-/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN },
-/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
-/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S },
-/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S },
-/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B },
-/*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L },
-/*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L },
-/*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L },
-/*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN },
-/*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN },
-/*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN }
-};
-
-/* we must undef macro s because the levels tables have a different
- * structure (4 bits for action and 4 bits for next state.
- */
-#undef s
-
-/******************************************************************
- The levels state machine tables
-*******************************************************************
-
- All table cells are 8 bits:
- bits 0..3: next state
- bits 4..7: action to perform (if > 0)
-
- Cells may be of format "n" where n represents the next state
- (except for the rightmost column).
- Cells may also be of format "s(x,y)" where x represents an action
- to perform and y represents the next state.
-
- This format limits each table to 16 states each and to 15 actions.
-
-*******************************************************************
- Definitions and type for levels state tables
-*******************************************************************
-*/
-#define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
-#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
-#define GET_STATE(cell) ((cell)&0x0f)
-#define GET_ACTION(cell) ((cell)>>4)
-#define s(action, newState) ((uint8_t)(newState+(action<<4)))
-
-typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
-typedef uint8_t ImpAct[];
-
-/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
- * instead of having a pair of ImpTab and a pair of ImpAct.
- */
-typedef struct ImpTabPair {
- const void * pImpTab[2];
- const void * pImpAct[2];
-} ImpTabPair;
-
-/******************************************************************
-
- LEVELS STATE TABLES
-
- In all levels state tables,
- - state 0 is the initial state
- - the Res column is the increment to add to the text level
- for this property sequence.
-
- The impAct arrays for each table of a pair map the local action
- numbers of the table to the total list of actions. For instance,
- action 2 in a given table corresponds to the action number which
- appears in entry [2] of the impAct array for that table.
- The first entry of all impAct arrays must be 0.
-
- Action 1: init conditional sequence
- 2: prepend conditional sequence to current sequence
- 3: set ON sequence to new level - 1
- 4: init EN/AN/ON sequence
- 5: fix EN/AN/ON sequence followed by R
- 6: set previous level sequence to level 2
-
- Notes:
- 1) These tables are used in processPropertySeq(). The input
- is property sequences as determined by resolveImplicitLevels.
- 2) Most such property sequences are processed immediately
- (levels are assigned).
- 3) However, some sequences cannot be assigned a final level till
- one or more following sequences are received. For instance,
- ON following an R sequence within an even-level paragraph.
- If the following sequence is R, the ON sequence will be
- assigned basic run level+1, and so will the R sequence.
- 4) S is generally handled like ON, since its level will be fixed
- to paragraph level in adjustWSLevels().
-
-*/
-
-static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
-/* In this table, conditional sequences receive the lower possible level
- until proven otherwise.
-*/
-{
-/* L , R , EN , AN , ON , S , B , Res */
-/* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
-/* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
-/* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
-/* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
-/* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 },
-/* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 }
-};
-static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
-/* In this table, conditional sequences receive the lower possible level
- until proven otherwise.
-*/
-{
-/* L , R , EN , AN , ON , S , B , Res */
-/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
-/* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
-/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
-/* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
-/* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
-/* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
-};
-static const ImpAct impAct0 = {0,1,2,3,4};
-static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
- &impTabR_DEFAULT},
- {&impAct0, &impAct0}};
-
-static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
-/* In this table, conditional sequences receive the lower possible level
- until proven otherwise.
-*/
-{
-/* L , R , EN , AN , ON , S , B , Res */
-/* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
-/* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 },
-/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 },
-/* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 },
-/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
-};
-static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
- &impTabR_DEFAULT},
- {&impAct0, &impAct0}};
-
-static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
-/* In this table, EN/AN+ON sequences receive levels as if associated with R
- until proven that there is L or sor/eor on both sides. AN is handled like EN.
-*/
-{
-/* L , R , EN , AN , ON , S , B , Res */
-/* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
-/* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
-/* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
-/* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
-/* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
-/* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
-};
-static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
-/* In this table, EN/AN+ON sequences receive levels as if associated with R
- until proven that there is L on both sides. AN is handled like EN.
-*/
-{
-/* L , R , EN , AN , ON , S , B , Res */
-/* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
-/* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
-/* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
-/* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
-/* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
-};
-static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
- {&impTabL_GROUP_NUMBERS_WITH_R,
- &impTabR_GROUP_NUMBERS_WITH_R},
- {&impAct0, &impAct0}};
-
-
-static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
-/* This table is identical to the Default LTR table except that EN and AN are
- handled like L.
-*/
-{
-/* L , R , EN , AN , ON , S , B , Res */
-/* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
-/* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
-/* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
-/* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
-/* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
-/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
-};
-static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
-/* This table is identical to the Default RTL table except that EN and AN are
- handled like L.
-*/
-{
-/* L , R , EN , AN , ON , S , B , Res */
-/* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
-/* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
-/* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
-/* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
-/* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
-/* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
-};
-static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
- {&impTabL_INVERSE_NUMBERS_AS_L,
- &impTabR_INVERSE_NUMBERS_AS_L},
- {&impAct0, &impAct0}};
-
-static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
-/* In this table, conditional sequences receive the lower possible level
- until proven otherwise.
-*/
-{
-/* L , R , EN , AN , ON , S , B , Res */
-/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
-/* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
-/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
-/* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
-/* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
-/* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
-/* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
-};
-static const ImpAct impAct1 = {0,1,13,14};
-/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
- */
-static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
- {&impTabL_DEFAULT,
- &impTabR_INVERSE_LIKE_DIRECT},
- {&impAct0, &impAct1}};
-
-static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
-/* The case handled in this table is (visually): R EN L
-*/
-{
-/* L , R , EN , AN , ON , S , B , Res */
-/* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
-/* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
-/* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
-/* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
-/* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
-/* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
-/* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
-};
-static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
-/* The cases handled in this table are (visually): R EN L
- R L AN L
-*/
-{
-/* L , R , EN , AN , ON , S , B , Res */
-/* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
-/* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
-/* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
-/* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
-/* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
-/* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
-/* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
-};
-static const ImpAct impAct2 = {0,1,2,5,6,7,8};
-static const ImpAct impAct3 = {0,1,9,10,11,12};
-static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
- {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
- &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
- {&impAct2, &impAct3}};
-
-static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
- {&impTabL_NUMBERS_SPECIAL,
- &impTabR_INVERSE_LIKE_DIRECT},
- {&impAct0, &impAct1}};
-
-static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
-/* The case handled in this table is (visually): R EN L
-*/
-{
-/* L , R , EN , AN , ON , S , B , Res */
-/* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
-/* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
-/* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
-/* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
-/* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
-};
-static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
- {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
- &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
- {&impAct2, &impAct3}};
-
-#undef s
-
-typedef struct {
- const ImpTab * pImpTab; /* level table pointer */
- const ImpAct * pImpAct; /* action map array */
- int32_t startON; /* start of ON sequence */
- int32_t startL2EN; /* start of level 2 sequence */
- int32_t lastStrongRTL; /* index of last found R or AL */
- int32_t state; /* current state */
- int32_t runStart; /* start position of the run */
- UBiDiLevel runLevel; /* run level before implicit solving */
-} LevState;
-
-/*------------------------------------------------------------------------*/
-
-static void
-addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
- /* param pos: position where to insert
- param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
- */
-{
-#define FIRSTALLOC 10
- Point point;
- InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
-
- if (pInsertPoints->capacity == 0)
- {
- pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC));
- if (pInsertPoints->points == NULL)
- {
- pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- pInsertPoints->capacity=FIRSTALLOC;
- }
- if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
- {
- Point * savePoints=pInsertPoints->points;
- pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points,
- pInsertPoints->capacity*2*sizeof(Point)));
- if (pInsertPoints->points == NULL)
- {
- pInsertPoints->points=savePoints;
- pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- else pInsertPoints->capacity*=2;
- }
- point.pos=pos;
- point.flag=flag;
- pInsertPoints->points[pInsertPoints->size]=point;
- pInsertPoints->size++;
-#undef FIRSTALLOC
-}
-
-static void
-setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
-{
- DirProp *dirProps=pBiDi->dirProps, dirProp;
- UBiDiLevel *levels=pBiDi->levels;
- int32_t isolateCount=0, k;
- for(k=start; k<limit; k++) {
- dirProp=dirProps[k];
- if(dirProp==PDI)
- isolateCount--;
- if(isolateCount==0)
- levels[k]=level;
- if(dirProp==LRI || dirProp==RLI)
- isolateCount++;
- }
-}
-
-/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
-
-/*
- * This implementation of the (Wn) rules applies all rules in one pass.
- * In order to do so, it needs a look-ahead of typically 1 character
- * (except for W5: sequences of ET) and keeps track of changes
- * in a rule Wp that affect a later Wq (p<q).
- *
- * The (Nn) and (In) rules are also performed in that same single loop,
- * but effectively one iteration behind for white space.
- *
- * Since all implicit rules are performed in one step, it is not necessary
- * to actually store the intermediate directional properties in dirProps[].
- */
-
-static void
-processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
- int32_t start, int32_t limit) {
- uint8_t cell, oldStateSeq, actionSeq;
- const ImpTab * pImpTab=pLevState->pImpTab;
- const ImpAct * pImpAct=pLevState->pImpAct;
- UBiDiLevel * levels=pBiDi->levels;
- UBiDiLevel level, addLevel;
- InsertPoints * pInsertPoints;
- int32_t start0, k;
-
- start0=start; /* save original start position */
- oldStateSeq=(uint8_t)pLevState->state;
- cell=(*pImpTab)[oldStateSeq][_prop];
- pLevState->state=GET_STATE(cell); /* isolate the new state */
- actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
- addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
-
- if(actionSeq) {
- switch(actionSeq) {
- case 1: /* init ON seq */
- pLevState->startON=start0;
- break;
-
- case 2: /* prepend ON seq to current seq */
- start=pLevState->startON;
- break;
-
- case 3: /* EN/AN after R+ON */
- level=pLevState->runLevel+1;
- setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
- break;
-
- case 4: /* EN/AN before R for NUMBERS_SPECIAL */
- level=pLevState->runLevel+2;
- setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
- break;
-
- case 5: /* L or S after possible relevant EN/AN */
- /* check if we had EN after R/AL */
- if (pLevState->startL2EN >= 0) {
- addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
- }
- pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
- /* check if we had any relevant EN/AN after R/AL */
- pInsertPoints=&(pBiDi->insertPoints);
- if ((pInsertPoints->capacity == 0) ||
- (pInsertPoints->size <= pInsertPoints->confirmed))
- {
- /* nothing, just clean up */
- pLevState->lastStrongRTL=-1;
- /* check if we have a pending conditional segment */
- level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
- if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
- start=pLevState->startON; /* reset to basic run level */
- }
- if (_prop == DirProp_S) /* add LRM before S */
- {
- addPoint(pBiDi, start0, LRM_BEFORE);
- pInsertPoints->confirmed=pInsertPoints->size;
- }
- break;
- }
- /* reset previous RTL cont to level for LTR text */
- for (k=pLevState->lastStrongRTL+1; k<start0; k++)
- {
- /* reset odd level, leave runLevel+2 as is */
- levels[k]=(levels[k] - 2) & ~1;
- }
- /* mark insert points as confirmed */
- pInsertPoints->confirmed=pInsertPoints->size;
- pLevState->lastStrongRTL=-1;
- if (_prop == DirProp_S) /* add LRM before S */
- {
- addPoint(pBiDi, start0, LRM_BEFORE);
- pInsertPoints->confirmed=pInsertPoints->size;
- }
- break;
-
- case 6: /* R/AL after possible relevant EN/AN */
- /* just clean up */
- pInsertPoints=&(pBiDi->insertPoints);
- if (pInsertPoints->capacity > 0)
- /* remove all non confirmed insert points */
- pInsertPoints->size=pInsertPoints->confirmed;
- pLevState->startON=-1;
- pLevState->startL2EN=-1;
- pLevState->lastStrongRTL=limit - 1;
- break;
-
- case 7: /* EN/AN after R/AL + possible cont */
- /* check for real AN */
- if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
- (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
- {
- /* real AN */
- if (pLevState->startL2EN == -1) /* if no relevant EN already found */
- {
- /* just note the righmost digit as a strong RTL */
- pLevState->lastStrongRTL=limit - 1;
- break;
- }
- if (pLevState->startL2EN >= 0) /* after EN, no AN */
- {
- addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
- pLevState->startL2EN=-2;
- }
- /* note AN */
- addPoint(pBiDi, start0, LRM_BEFORE);
- break;
- }
- /* if first EN/AN after R/AL */
- if (pLevState->startL2EN == -1) {
- pLevState->startL2EN=start0;
- }
- break;
-
- case 8: /* note location of latest R/AL */
- pLevState->lastStrongRTL=limit - 1;
- pLevState->startON=-1;
- break;
-
- case 9: /* L after R+ON/EN/AN */
- /* include possible adjacent number on the left */
- for (k=start0-1; k>=0 && !(levels[k]&1); k--);
- if(k>=0) {
- addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
- pInsertPoints=&(pBiDi->insertPoints);
- pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
- }
- pLevState->startON=start0;
- break;
-
- case 10: /* AN after L */
- /* AN numbers between L text on both sides may be trouble. */
- /* tentatively bracket with LRMs; will be confirmed if followed by L */
- addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
- addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
- break;
-
- case 11: /* R after L+ON/EN/AN */
- /* false alert, infirm LRMs around previous AN */
- pInsertPoints=&(pBiDi->insertPoints);
- pInsertPoints->size=pInsertPoints->confirmed;
- if (_prop == DirProp_S) /* add RLM before S */
- {
- addPoint(pBiDi, start0, RLM_BEFORE);
- pInsertPoints->confirmed=pInsertPoints->size;
- }
- break;
-
- case 12: /* L after L+ON/AN */
- level=pLevState->runLevel + addLevel;
- for(k=pLevState->startON; k<start0; k++) {
- if (levels[k]<level)
- levels[k]=level;
- }
- pInsertPoints=&(pBiDi->insertPoints);
- pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
- pLevState->startON=start0;
- break;
-
- case 13: /* L after L+ON+EN/AN/ON */
- level=pLevState->runLevel;
- for(k=start0-1; k>=pLevState->startON; k--) {
- if(levels[k]==level+3) {
- while(levels[k]==level+3) {
- levels[k--]-=2;
- }
- while(levels[k]==level) {
- k--;
- }
- }
- if(levels[k]==level+2) {
- levels[k]=level;
- continue;
- }
- levels[k]=level+1;
- }
- break;
-
- case 14: /* R after L+ON+EN/AN/ON */
- level=pLevState->runLevel+1;
- for(k=start0-1; k>=pLevState->startON; k--) {
- if(levels[k]>level) {
- levels[k]-=2;
- }
- }
- break;
-
- default: /* we should never get here */
- UPRV_UNREACHABLE;
- }
- }
- if((addLevel) || (start < start0)) {
- level=pLevState->runLevel + addLevel;
- if(start>=pLevState->runStart) {
- for(k=start; k<limit; k++) {
- levels[k]=level;
- }
- } else {
- setLevelsOutsideIsolates(pBiDi, start, limit, level);
- }
- }
-}
-
-/**
- * Returns the directionality of the last strong character at the end of the prologue, if any.
- * Requires prologue!=null.
- */
-static DirProp
-lastL_R_AL(UBiDi *pBiDi) {
- const UChar *text=pBiDi->prologue;
- int32_t length=pBiDi->proLength;
- int32_t i;
- UChar32 uchar;
- DirProp dirProp;
- for(i=length; i>0; ) {
- /* i is decremented by U16_PREV */
- U16_PREV(text, 0, i, uchar);
- dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
- if(dirProp==L) {
- return DirProp_L;
- }
- if(dirProp==R || dirProp==AL) {
- return DirProp_R;
- }
- if(dirProp==B) {
- return DirProp_ON;
- }
- }
- return DirProp_ON;
-}
-
-/**
- * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
- * Requires epilogue!=null.
- */
-static DirProp
-firstL_R_AL_EN_AN(UBiDi *pBiDi) {
- const UChar *text=pBiDi->epilogue;
- int32_t length=pBiDi->epiLength;
- int32_t i;
- UChar32 uchar;
- DirProp dirProp;
- for(i=0; i<length; ) {
- /* i is incremented by U16_NEXT */
- U16_NEXT(text, i, length, uchar);
- dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
- if(dirProp==L) {
- return DirProp_L;
- }
- if(dirProp==R || dirProp==AL) {
- return DirProp_R;
- }
- if(dirProp==EN) {
- return DirProp_EN;
- }
- if(dirProp==AN) {
- return DirProp_AN;
- }
- }
- return DirProp_ON;
-}
-
-static void
-resolveImplicitLevels(UBiDi *pBiDi,
- int32_t start, int32_t limit,
- DirProp sor, DirProp eor) {
- const DirProp *dirProps=pBiDi->dirProps;
- DirProp dirProp;
- LevState levState;
- int32_t i, start1, start2;
- uint16_t oldStateImp, stateImp, actionImp;
- uint8_t gprop, resProp, cell;
- UBool inverseRTL;
- DirProp nextStrongProp=R;
- int32_t nextStrongPos=-1;
-
- /* check for RTL inverse BiDi mode */
- /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
- * loop on the text characters from end to start.
- * This would need a different properties state table (at least different
- * actions) and different levels state tables (maybe very similar to the
- * LTR corresponding ones.
- */
- inverseRTL=(UBool)
- ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
- (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
- pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
-
- /* initialize for property and levels state tables */
- levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
- levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
- levState.runStart=start;
- levState.runLevel=pBiDi->levels[start];
- levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
- levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
- if(start==0 && pBiDi->proLength>0) {
- DirProp lastStrong=lastL_R_AL(pBiDi);
- if(lastStrong!=DirProp_ON) {
- sor=lastStrong;
- }
- }
- /* The isolates[] entries contain enough information to
- resume the bidi algorithm in the same state as it was
- when it was interrupted by an isolate sequence. */
- if(dirProps[start]==PDI && pBiDi->isolateCount >= 0) {
- levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
- start1=pBiDi->isolates[pBiDi->isolateCount].start1;
- stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
- levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
- pBiDi->isolateCount--;
- } else {
- levState.startON=-1;
- start1=start;
- if(dirProps[start]==NSM)
- stateImp = 1 + sor;
- else
- stateImp=0;
- levState.state=0;
- processPropertySeq(pBiDi, &levState, sor, start, start);
- }
- start2=start; /* to make Java compiler happy */
-
- for(i=start; i<=limit; i++) {
- if(i>=limit) {
- int32_t k;
- for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--);
- dirProp=dirProps[k];
- if(dirProp==LRI || dirProp==RLI)
- break; /* no forced closing for sequence ending with LRI/RLI */
- gprop=eor;
- } else {
- DirProp prop, prop1;
- prop=dirProps[i];
- if(prop==B) {
- pBiDi->isolateCount=-1; /* current isolates stack entry == none */
- }
- if(inverseRTL) {
- if(prop==AL) {
- /* AL before EN does not make it AN */
- prop=R;
- } else if(prop==EN) {
- if(nextStrongPos<=i) {
- /* look for next strong char (L/R/AL) */
- int32_t j;
- nextStrongProp=R; /* set default */
- nextStrongPos=limit;
- for(j=i+1; j<limit; j++) {
- prop1=dirProps[j];
- if(prop1==L || prop1==R || prop1==AL) {
- nextStrongProp=prop1;
- nextStrongPos=j;
- break;
- }
- }
- }
- if(nextStrongProp==AL) {
- prop=AN;
- }
- }
- }
- gprop=groupProp[prop];
- }
- oldStateImp=stateImp;
- cell=impTabProps[oldStateImp][gprop];
- stateImp=GET_STATEPROPS(cell); /* isolate the new state */
- actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
- if((i==limit) && (actionImp==0)) {
- /* there is an unprocessed sequence if its property == eor */
- actionImp=1; /* process the last sequence */
- }
- if(actionImp) {
- resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
- switch(actionImp) {
- case 1: /* process current seq1, init new seq1 */
- processPropertySeq(pBiDi, &levState, resProp, start1, i);
- start1=i;
- break;
- case 2: /* init new seq2 */
- start2=i;
- break;
- case 3: /* process seq1, process seq2, init new seq1 */
- processPropertySeq(pBiDi, &levState, resProp, start1, start2);
- processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
- start1=i;
- break;
- case 4: /* process seq1, set seq1=seq2, init new seq2 */
- processPropertySeq(pBiDi, &levState, resProp, start1, start2);
- start1=start2;
- start2=i;
- break;
- default: /* we should never get here */
- UPRV_UNREACHABLE;
- }
- }
- }
-
- /* flush possible pending sequence, e.g. ON */
- if(limit==pBiDi->length && pBiDi->epiLength>0) {
- DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
- if(firstStrong!=DirProp_ON) {
- eor=firstStrong;
- }
- }
-
- /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
- for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--);
- dirProp=dirProps[i];
- if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
- pBiDi->isolateCount++;
- pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
- pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
- pBiDi->isolates[pBiDi->isolateCount].start1=start1;
- pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
- }
- else
- processPropertySeq(pBiDi, &levState, eor, limit, limit);
-}
-
-/* perform (L1) and (X9) ---------------------------------------------------- */
-
-/*
- * Reset the embedding levels for some non-graphic characters (L1).
- * This function also sets appropriate levels for BN, and
- * explicit embedding types that are supposed to have been removed
- * from the paragraph in (X9).
- */
-static void
-adjustWSLevels(UBiDi *pBiDi) {
- const DirProp *dirProps=pBiDi->dirProps;
- UBiDiLevel *levels=pBiDi->levels;
- int32_t i;
-
- if(pBiDi->flags&MASK_WS) {
- UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
- Flags flag;
-
- i=pBiDi->trailingWSStart;
- while(i>0) {
- /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
- while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
- if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
- levels[i]=0;
- } else {
- levels[i]=GET_PARALEVEL(pBiDi, i);
- }
- }
-
- /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
- /* here, i+1 is guaranteed to be <length */
- while(i>0) {
- flag=DIRPROP_FLAG(dirProps[--i]);
- if(flag&MASK_BN_EXPLICIT) {
- levels[i]=levels[i+1];
- } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
- levels[i]=0;
- break;
- } else if(flag&MASK_B_S) {
- levels[i]=GET_PARALEVEL(pBiDi, i);
- break;
- }
- }
- }
- }
-}
-
-U_CAPI void U_EXPORT2
-ubidi_setContext(UBiDi *pBiDi,
- const UChar *prologue, int32_t proLength,
- const UChar *epilogue, int32_t epiLength,
- UErrorCode *pErrorCode) {
- /* check the argument values */
- RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
- if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
- (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- if(proLength==-1) {
- pBiDi->proLength=u_strlen(prologue);
- } else {
- pBiDi->proLength=proLength;
- }
- if(epiLength==-1) {
- pBiDi->epiLength=u_strlen(epilogue);
- } else {
- pBiDi->epiLength=epiLength;
- }
- pBiDi->prologue=prologue;
- pBiDi->epilogue=epilogue;
-}
-
-static void
-setParaSuccess(UBiDi *pBiDi) {
- pBiDi->proLength=0; /* forget the last context */
- pBiDi->epiLength=0;
- pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
-}
-
-#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
-#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
-
-static void
-setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
- UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
- int32_t *runsOnlyMemory = NULL;
- int32_t *visualMap;
- UChar *visualText;
- int32_t saveLength, saveTrailingWSStart;
- const UBiDiLevel *levels;
- UBiDiLevel *saveLevels;
- UBiDiDirection saveDirection;
- UBool saveMayAllocateText;
- Run *runs;
- int32_t visualLength, i, j, visualStart, logicalStart,
- runCount, runLength, addedRuns, insertRemove,
- start, limit, step, indexOddBit, logicalPos,
- index0, index1;
- uint32_t saveOptions;
-
- pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
- if(length==0) {
- ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
- goto cleanup3;
- }
- /* obtain memory for mapping table and visual text */
- runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))));
- if(runsOnlyMemory==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- goto cleanup3;
- }
- visualMap=runsOnlyMemory;
- visualText=(UChar *)&visualMap[length];
- saveLevels=(UBiDiLevel *)&visualText[length];
- saveOptions=pBiDi->reorderingOptions;
- if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
- pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
- pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
- }
- paraLevel&=1; /* accept only 0 or 1 */
- ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- goto cleanup3;
- }
- /* we cannot access directly pBiDi->levels since it is not yet set if
- * direction is not MIXED
- */
- levels=ubidi_getLevels(pBiDi, pErrorCode);
- uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
- saveTrailingWSStart=pBiDi->trailingWSStart;
- saveLength=pBiDi->length;
- saveDirection=pBiDi->direction;
-
- /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
- * the visual map and the dirProps array to drive the second call
- * to ubidi_setPara (but must make provision for possible removal of
- * BiDi controls. Alternatively, only use the dirProps array via
- * customized classifier callback.
- */
- visualLength=ubidi_writeReordered(pBiDi, visualText, length,
- UBIDI_DO_MIRRORING, pErrorCode);
- ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- goto cleanup2;
- }
- pBiDi->reorderingOptions=saveOptions;
-
- pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
- paraLevel^=1;
- /* Because what we did with reorderingOptions, visualText may be shorter
- * than the original text. But we don't want the levels memory to be
- * reallocated shorter than the original length, since we need to restore
- * the levels as after the first call to ubidi_setpara() before returning.
- * We will force mayAllocateText to FALSE before the second call to
- * ubidi_setpara(), and will restore it afterwards.
- */
- saveMayAllocateText=pBiDi->mayAllocateText;
- pBiDi->mayAllocateText=FALSE;
- ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
- pBiDi->mayAllocateText=saveMayAllocateText;
- ubidi_getRuns(pBiDi, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- goto cleanup1;
- }
- /* check if some runs must be split, count how many splits */
- addedRuns=0;
- runCount=pBiDi->runCount;
- runs=pBiDi->runs;
- visualStart=0;
- for(i=0; i<runCount; i++, visualStart+=runLength) {
- runLength=runs[i].visualLimit-visualStart;
- if(runLength<2) {
- continue;
- }
- logicalStart=GET_INDEX(runs[i].logicalStart);
- for(j=logicalStart+1; j<logicalStart+runLength; j++) {
- index0=visualMap[j];
- index1=visualMap[j-1];
- if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
- addedRuns++;
- }
- }
- }
- if(addedRuns) {
- if(getRunsMemory(pBiDi, runCount+addedRuns)) {
- if(runCount==1) {
- /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
- pBiDi->runsMemory[0]=runs[0];
- }
- runs=pBiDi->runs=pBiDi->runsMemory;
- pBiDi->runCount+=addedRuns;
- } else {
- goto cleanup1;
- }
- }
- /* split runs which are not consecutive in source text */
- for(i=runCount-1; i>=0; i--) {
- runLength= i==0 ? runs[0].visualLimit :
- runs[i].visualLimit-runs[i-1].visualLimit;
- logicalStart=runs[i].logicalStart;
- indexOddBit=GET_ODD_BIT(logicalStart);
- logicalStart=GET_INDEX(logicalStart);
- if(runLength<2) {
- if(addedRuns) {
- runs[i+addedRuns]=runs[i];
- }
- logicalPos=visualMap[logicalStart];
- runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
- saveLevels[logicalPos]^indexOddBit);
- continue;
- }
- if(indexOddBit) {
- start=logicalStart;
- limit=logicalStart+runLength-1;
- step=1;
- } else {
- start=logicalStart+runLength-1;
- limit=logicalStart;
- step=-1;
- }
- for(j=start; j!=limit; j+=step) {
- index0=visualMap[j];
- index1=visualMap[j+step];
- if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
- logicalPos=BIDI_MIN(visualMap[start], index0);
- runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
- saveLevels[logicalPos]^indexOddBit);
- runs[i+addedRuns].visualLimit=runs[i].visualLimit;
- runs[i].visualLimit-=BIDI_ABS(j-start)+1;
- insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
- runs[i+addedRuns].insertRemove=insertRemove;
- runs[i].insertRemove&=~insertRemove;
- start=j+step;
- addedRuns--;
- }
- }
- if(addedRuns) {
- runs[i+addedRuns]=runs[i];
- }
- logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
- runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
- saveLevels[logicalPos]^indexOddBit);
- }
-
- cleanup1:
- /* restore initial paraLevel */
- pBiDi->paraLevel^=1;
- cleanup2:
- /* restore real text */
- pBiDi->text=text;
- pBiDi->length=saveLength;
- pBiDi->originalLength=length;
- pBiDi->direction=saveDirection;
- /* the saved levels should never excess levelsSize, but we check anyway */
- if(saveLength>pBiDi->levelsSize) {
- saveLength=pBiDi->levelsSize;
- }
- uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
- pBiDi->trailingWSStart=saveTrailingWSStart;
- if(pBiDi->runCount>1) {
- pBiDi->direction=UBIDI_MIXED;
- }
- cleanup3:
- /* free memory for mapping table and visual text */
- uprv_free(runsOnlyMemory);
-
- pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
-}
-
-/* ubidi_setPara ------------------------------------------------------------ */
-
-U_CAPI void U_EXPORT2
-ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
- UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
- UErrorCode *pErrorCode) {
- UBiDiDirection direction;
- DirProp *dirProps;
-
- /* check the argument values */
- RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
- if(pBiDi==NULL || text==NULL || length<-1 ||
- (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- if(length==-1) {
- length=u_strlen(text);
- }
-
- /* special treatment for RUNS_ONLY mode */
- if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
- setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
- return;
- }
-
- /* initialize the UBiDi structure */
- pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
- pBiDi->text=text;
- pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
- pBiDi->paraLevel=paraLevel;
- pBiDi->direction=(UBiDiDirection)(paraLevel&1);
- pBiDi->paraCount=1;
-
- pBiDi->dirProps=NULL;
- pBiDi->levels=NULL;
- pBiDi->runs=NULL;
- pBiDi->insertPoints.size=0; /* clean up from last call */
- pBiDi->insertPoints.confirmed=0; /* clean up from last call */
-
- /*
- * Save the original paraLevel if contextual; otherwise, set to 0.
- */
- pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
-
- if(length==0) {
- /*
- * For an empty paragraph, create a UBiDi object with the paraLevel and
- * the flags and the direction set but without allocating zero-length arrays.
- * There is nothing more to do.
- */
- if(IS_DEFAULT_LEVEL(paraLevel)) {
- pBiDi->paraLevel&=1;
- pBiDi->defaultParaLevel=0;
- }
- pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
- pBiDi->runCount=0;
- pBiDi->paraCount=0;
- setParaSuccess(pBiDi); /* mark successful setPara */
- return;
- }
-
- pBiDi->runCount=-1;
-
- /* allocate paras memory */
- if(pBiDi->parasMemory)
- pBiDi->paras=pBiDi->parasMemory;
- else
- pBiDi->paras=pBiDi->simpleParas;
-
- /*
- * Get the directional properties,
- * the flags bit-set, and
- * determine the paragraph level if necessary.
- */
- if(getDirPropsMemory(pBiDi, length)) {
- pBiDi->dirProps=pBiDi->dirPropsMemory;
- if(!getDirProps(pBiDi)) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- } else {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- dirProps=pBiDi->dirProps;
- /* the processed length may have changed if UBIDI_OPTION_STREAMING */
- length= pBiDi->length;
- pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
-
- /* are explicit levels specified? */
- if(embeddingLevels==NULL) {
- /* no: determine explicit levels according to the (Xn) rules */\
- if(getLevelsMemory(pBiDi, length)) {
- pBiDi->levels=pBiDi->levelsMemory;
- direction=resolveExplicitLevels(pBiDi, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- } else {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- } else {
- /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
- pBiDi->levels=embeddingLevels;
- direction=checkExplicitLevels(pBiDi, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- }
-
- /* allocate isolate memory */
- if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
- pBiDi->isolates=pBiDi->simpleIsolates;
- else
- if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
- pBiDi->isolates=pBiDi->isolatesMemory;
- else {
- if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
- pBiDi->isolates=pBiDi->isolatesMemory;
- } else {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- }
- pBiDi->isolateCount=-1; /* current isolates stack entry == none */
-
- /*
- * The steps after (X9) in the UBiDi algorithm are performed only if
- * the paragraph text has mixed directionality!
- */
- pBiDi->direction=direction;
- switch(direction) {
- case UBIDI_LTR:
- /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
- pBiDi->trailingWSStart=0;
- break;
- case UBIDI_RTL:
- /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
- pBiDi->trailingWSStart=0;
- break;
- default:
- /*
- * Choose the right implicit state table
- */
- switch(pBiDi->reorderingMode) {
- case UBIDI_REORDER_DEFAULT:
- pBiDi->pImpTabPair=&impTab_DEFAULT;
- break;
- case UBIDI_REORDER_NUMBERS_SPECIAL:
- pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
- break;
- case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
- pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
- break;
- case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
- pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
- break;
- case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
- if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
- pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
- } else {
- pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
- }
- break;
- case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
- if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
- pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
- } else {
- pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
- }
- break;
- default:
- /* we should never get here */
- UPRV_UNREACHABLE;
- }
- /*
- * If there are no external levels specified and there
- * are no significant explicit level codes in the text,
- * then we can treat the entire paragraph as one run.
- * Otherwise, we need to perform the following rules on runs of
- * the text with the same embedding levels. (X10)
- * "Significant" explicit level codes are ones that actually
- * affect non-BN characters.
- * Examples for "insignificant" ones are empty embeddings
- * LRE-PDF, LRE-RLE-PDF-PDF, etc.
- */
- if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
- !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
- resolveImplicitLevels(pBiDi, 0, length,
- GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
- GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
- } else {
- /* sor, eor: start and end types of same-level-run */
- UBiDiLevel *levels=pBiDi->levels;
- int32_t start, limit=0;
- UBiDiLevel level, nextLevel;
- DirProp sor, eor;
-
- /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
- level=GET_PARALEVEL(pBiDi, 0);
- nextLevel=levels[0];
- if(level<nextLevel) {
- eor=GET_LR_FROM_LEVEL(nextLevel);
- } else {
- eor=GET_LR_FROM_LEVEL(level);
- }
-
- do {
- /* determine start and limit of the run (end points just behind the run) */
-
- /* the values for this run's start are the same as for the previous run's end */
- start=limit;
- level=nextLevel;
- if((start>0) && (dirProps[start-1]==B)) {
- /* except if this is a new paragraph, then set sor = para level */
- sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
- } else {
- sor=eor;
- }
-
- /* search for the limit of this run */
- while((++limit<length) &&
- ((levels[limit]==level) ||
- (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
-
- /* get the correct level of the next run */
- if(limit<length) {
- nextLevel=levels[limit];
- } else {
- nextLevel=GET_PARALEVEL(pBiDi, length-1);
- }
-
- /* determine eor from max(level, nextLevel); sor is last run's eor */
- if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
- eor=GET_LR_FROM_LEVEL(nextLevel);
- } else {
- eor=GET_LR_FROM_LEVEL(level);
- }
-
- /* if the run consists of overridden directional types, then there
- are no implicit types to be resolved */
- if(!(level&UBIDI_LEVEL_OVERRIDE)) {
- resolveImplicitLevels(pBiDi, start, limit, sor, eor);
- } else {
- /* remove the UBIDI_LEVEL_OVERRIDE flags */
- do {
- levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
- } while(start<limit);
- }
- } while(limit<length);
- }
- /* check if we got any memory shortage while adding insert points */
- if (U_FAILURE(pBiDi->insertPoints.errorCode))
- {
- *pErrorCode=pBiDi->insertPoints.errorCode;
- return;
- }
- /* reset the embedding levels for some non-graphic characters (L1), (X9) */
- adjustWSLevels(pBiDi);
- break;
- }
- /* add RLM for inverse Bidi with contextual orientation resolving
- * to RTL which would not round-trip otherwise
- */
- if((pBiDi->defaultParaLevel>0) &&
- (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
- ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
- (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
- int32_t i, j, start, last;
- UBiDiLevel level;
- DirProp dirProp;
- for(i=0; i<pBiDi->paraCount; i++) {
- last=(pBiDi->paras[i].limit)-1;
- level= static_cast<UBiDiLevel>(pBiDi->paras[i].level);
- if(level==0)
- continue; /* LTR paragraph */
- start= i==0 ? 0 : pBiDi->paras[i-1].limit;
- for(j=last; j>=start; j--) {
- dirProp=dirProps[j];
- if(dirProp==L) {
- if(j<last) {
- while(dirProps[last]==B) {
- last--;
- }
- }
- addPoint(pBiDi, last, RLM_BEFORE);
- break;
- }
- if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
- break;
- }
- }
- }
- }
-
- if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
- pBiDi->resultLength -= pBiDi->controlCount;
- } else {
- pBiDi->resultLength += pBiDi->insertPoints.size;
- }
- setParaSuccess(pBiDi); /* mark successful setPara */
-}
-
-U_CAPI void U_EXPORT2
-ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
- if(pBiDi!=NULL) {
- pBiDi->orderParagraphsLTR=orderParagraphsLTR;
- }
-}
-
-U_CAPI UBool U_EXPORT2
-ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
- if(pBiDi!=NULL) {
- return pBiDi->orderParagraphsLTR;
- } else {
- return FALSE;
- }
-}
-
-U_CAPI UBiDiDirection U_EXPORT2
-ubidi_getDirection(const UBiDi *pBiDi) {
- if(IS_VALID_PARA_OR_LINE(pBiDi)) {
- return pBiDi->direction;
- } else {
- return UBIDI_LTR;
- }
-}
-
-U_CAPI const UChar * U_EXPORT2
-ubidi_getText(const UBiDi *pBiDi) {
- if(IS_VALID_PARA_OR_LINE(pBiDi)) {
- return pBiDi->text;
- } else {
- return NULL;
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-ubidi_getLength(const UBiDi *pBiDi) {
- if(IS_VALID_PARA_OR_LINE(pBiDi)) {
- return pBiDi->originalLength;
- } else {
- return 0;
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-ubidi_getProcessedLength(const UBiDi *pBiDi) {
- if(IS_VALID_PARA_OR_LINE(pBiDi)) {
- return pBiDi->length;
- } else {
- return 0;
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-ubidi_getResultLength(const UBiDi *pBiDi) {
- if(IS_VALID_PARA_OR_LINE(pBiDi)) {
- return pBiDi->resultLength;
- } else {
- return 0;
- }
-}
-
-/* paragraphs API functions ------------------------------------------------- */
-
-U_CAPI UBiDiLevel U_EXPORT2
-ubidi_getParaLevel(const UBiDi *pBiDi) {
- if(IS_VALID_PARA_OR_LINE(pBiDi)) {
- return pBiDi->paraLevel;
- } else {
- return 0;
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-ubidi_countParagraphs(UBiDi *pBiDi) {
- if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
- return 0;
- } else {
- return pBiDi->paraCount;
- }
-}
-
-U_CAPI void U_EXPORT2
-ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
- int32_t *pParaStart, int32_t *pParaLimit,
- UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
- int32_t paraStart;
-
- /* check the argument values */
- RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
- RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
- RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
-
- pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
- if(paraIndex) {
- paraStart=pBiDi->paras[paraIndex-1].limit;
- } else {
- paraStart=0;
- }
- if(pParaStart!=NULL) {
- *pParaStart=paraStart;
- }
- if(pParaLimit!=NULL) {
- *pParaLimit=pBiDi->paras[paraIndex].limit;
- }
- if(pParaLevel!=NULL) {
- *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
- int32_t *pParaStart, int32_t *pParaLimit,
- UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
- int32_t paraIndex;
-
- /* check the argument values */
- /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
- RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
- RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
- pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
- RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
-
- for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
- ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
- return paraIndex;
-}
-
-U_CAPI void U_EXPORT2
-ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
- const void *newContext, UBiDiClassCallback **oldFn,
- const void **oldContext, UErrorCode *pErrorCode)
-{
- RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
- if(pBiDi==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if( oldFn )
- {
- *oldFn = pBiDi->fnClassCallback;
- }
- if( oldContext )
- {
- *oldContext = pBiDi->coClassCallback;
- }
- pBiDi->fnClassCallback = newFn;
- pBiDi->coClassCallback = newContext;
-}
-
-U_CAPI void U_EXPORT2
-ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
-{
- if(pBiDi==NULL) {
- return;
- }
- if( fn )
- {
- *fn = pBiDi->fnClassCallback;
- }
- if( context )
- {
- *context = pBiDi->coClassCallback;
- }
-}
-
-U_CAPI UCharDirection U_EXPORT2
-ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
-{
- UCharDirection dir;
-
- if( pBiDi->fnClassCallback == NULL ||
- (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
- {
- dir = ubidi_getClass(c);
- }
- if(dir >= U_CHAR_DIRECTION_COUNT) {
- dir = (UCharDirection)ON;
- }
- return dir;
-}
diff --git a/contrib/libs/icu/common/ubidi_props.cpp b/contrib/libs/icu/common/ubidi_props.cpp
deleted file mode 100644
index 4141c21938a..00000000000
--- a/contrib/libs/icu/common/ubidi_props.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2004-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ubidi_props.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004dec30
-* created by: Markus W. Scherer
-*
-* Low-level Unicode bidi/shaping properties access.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/uset.h"
-#include "unicode/udata.h" /* UDataInfo */
-#include "ucmndata.h" /* DataHeader */
-#include "udatamem.h"
-#include "uassert.h"
-#include "cmemory.h"
-#include "utrie2.h"
-#include "ubidi_props.h"
-#include "ucln_cmn.h"
-
-struct UBiDiProps {
- UDataMemory *mem;
- const int32_t *indexes;
- const uint32_t *mirrors;
- const uint8_t *jgArray;
- const uint8_t *jgArray2;
-
- UTrie2 trie;
- uint8_t formatVersion[4];
-};
-
-/* ubidi_props_data.h is machine-generated by genbidi --csource */
-#define INCLUDED_FROM_UBIDI_PROPS_C
-#include "ubidi_props_data.h"
-
-/* set of property starts for UnicodeSet ------------------------------------ */
-
-static UBool U_CALLCONV
-_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
- (void)end;
- (void)value;
- /* add the start code point to the USet */
- const USetAdder *sa=(const USetAdder *)context;
- sa->add(sa->set, start);
- return TRUE;
-}
-
-U_CFUNC void
-ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
- int32_t i, length;
- UChar32 c, start, limit;
-
- const uint8_t *jgArray;
- uint8_t prev, jg;
-
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
-
- /* add the start code point of each same-value range of the trie */
- utrie2_enum(&ubidi_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
-
- /* add the code points from the bidi mirroring table */
- length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH];
- for(i=0; i<length; ++i) {
- c=UBIDI_GET_MIRROR_CODE_POINT(ubidi_props_singleton.mirrors[i]);
- sa->addRange(sa->set, c, c+1);
- }
-
- /* add the code points from the Joining_Group array where the value changes */
- start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START];
- limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT];
- jgArray=ubidi_props_singleton.jgArray;
- for(;;) {
- prev=0;
- while(start<limit) {
- jg=*jgArray++;
- if(jg!=prev) {
- sa->add(sa->set, start);
- prev=jg;
- }
- ++start;
- }
- if(prev!=0) {
- /* add the limit code point if the last value was not 0 (it is now start==limit) */
- sa->add(sa->set, limit);
- }
- if(limit==ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]) {
- /* switch to the second Joining_Group range */
- start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2];
- limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2];
- jgArray=ubidi_props_singleton.jgArray2;
- } else {
- break;
- }
- }
-
- /* add code points with hardcoded properties, plus the ones following them */
-
- /* (none right now) */
-}
-
-/* property access functions ------------------------------------------------ */
-
-U_CFUNC int32_t
-ubidi_getMaxValue(UProperty which) {
- int32_t max=ubidi_props_singleton.indexes[UBIDI_MAX_VALUES_INDEX];
- switch(which) {
- case UCHAR_BIDI_CLASS:
- return (max&UBIDI_CLASS_MASK);
- case UCHAR_JOINING_GROUP:
- return (max&UBIDI_MAX_JG_MASK)>>UBIDI_MAX_JG_SHIFT;
- case UCHAR_JOINING_TYPE:
- return (max&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT;
- case UCHAR_BIDI_PAIRED_BRACKET_TYPE:
- return (max&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT;
- default:
- return -1; /* undefined */
- }
-}
-
-U_CAPI UCharDirection
-ubidi_getClass(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
- return (UCharDirection)UBIDI_GET_CLASS(props);
-}
-
-U_CFUNC UBool
-ubidi_isMirrored(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
- return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT);
-}
-
-static UChar32
-getMirror(UChar32 c, uint16_t props) {
- int32_t delta=UBIDI_GET_MIRROR_DELTA(props);
- if(delta!=UBIDI_ESC_MIRROR_DELTA) {
- return c+delta;
- } else {
- /* look for mirror code point in the mirrors[] table */
- const uint32_t *mirrors;
- uint32_t m;
- int32_t i, length;
- UChar32 c2;
-
- mirrors=ubidi_props_singleton.mirrors;
- length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH];
-
- /* linear search */
- for(i=0; i<length; ++i) {
- m=mirrors[i];
- c2=UBIDI_GET_MIRROR_CODE_POINT(m);
- if(c==c2) {
- /* found c, return its mirror code point using the index in m */
- return UBIDI_GET_MIRROR_CODE_POINT(mirrors[UBIDI_GET_MIRROR_INDEX(m)]);
- } else if(c<c2) {
- break;
- }
- }
-
- /* c not found, return it itself */
- return c;
- }
-}
-
-U_CFUNC UChar32
-ubidi_getMirror(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
- return getMirror(c, props);
-}
-
-U_CFUNC UBool
-ubidi_isBidiControl(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
- return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT);
-}
-
-U_CFUNC UBool
-ubidi_isJoinControl(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
- return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT);
-}
-
-U_CFUNC UJoiningType
-ubidi_getJoiningType(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
- return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT);
-}
-
-U_CFUNC UJoiningGroup
-ubidi_getJoiningGroup(UChar32 c) {
- UChar32 start, limit;
-
- start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START];
- limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT];
- if(start<=c && c<limit) {
- return (UJoiningGroup)ubidi_props_singleton.jgArray[c-start];
- }
- start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2];
- limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2];
- if(start<=c && c<limit) {
- return (UJoiningGroup)ubidi_props_singleton.jgArray2[c-start];
- }
- return U_JG_NO_JOINING_GROUP;
-}
-
-U_CFUNC UBidiPairedBracketType
-ubidi_getPairedBracketType(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
- return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT);
-}
-
-U_CFUNC UChar32
-ubidi_getPairedBracket(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
- if((props&UBIDI_BPT_MASK)==0) {
- return c;
- } else {
- return getMirror(c, props);
- }
-}
-
-/* public API (see uchar.h) ------------------------------------------------- */
-
-U_CFUNC UCharDirection
-u_charDirection(UChar32 c) {
- return ubidi_getClass(c);
-}
-
-U_CFUNC UBool
-u_isMirrored(UChar32 c) {
- return ubidi_isMirrored(c);
-}
-
-U_CFUNC UChar32
-u_charMirror(UChar32 c) {
- return ubidi_getMirror(c);
-}
-
-U_STABLE UChar32 U_EXPORT2
-u_getBidiPairedBracket(UChar32 c) {
- return ubidi_getPairedBracket(c);
-}
diff --git a/contrib/libs/icu/common/ubidi_props.h b/contrib/libs/icu/common/ubidi_props.h
deleted file mode 100644
index 698ee9c52bd..00000000000
--- a/contrib/libs/icu/common/ubidi_props.h
+++ /dev/null
@@ -1,148 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2004-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ubidi_props.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004dec30
-* created by: Markus W. Scherer
-*
-* Low-level Unicode bidi/shaping properties access.
-*/
-
-#ifndef __UBIDI_PROPS_H__
-#define __UBIDI_PROPS_H__
-
-#include "unicode/utypes.h"
-#include "unicode/uset.h"
-#include "putilimp.h"
-#include "uset_imp.h"
-#include "udataswp.h"
-
-U_CDECL_BEGIN
-
-/* library API -------------------------------------------------------------- */
-
-U_CFUNC void
-ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
-
-/* property access functions */
-
-U_CFUNC int32_t
-ubidi_getMaxValue(UProperty which);
-
-U_CAPI UCharDirection
-ubidi_getClass(UChar32 c);
-
-U_CFUNC UBool
-ubidi_isMirrored(UChar32 c);
-
-U_CFUNC UChar32
-ubidi_getMirror(UChar32 c);
-
-U_CFUNC UBool
-ubidi_isBidiControl(UChar32 c);
-
-U_CFUNC UBool
-ubidi_isJoinControl(UChar32 c);
-
-U_CFUNC UJoiningType
-ubidi_getJoiningType(UChar32 c);
-
-U_CFUNC UJoiningGroup
-ubidi_getJoiningGroup(UChar32 c);
-
-U_CFUNC UBidiPairedBracketType
-ubidi_getPairedBracketType(UChar32 c);
-
-U_CFUNC UChar32
-ubidi_getPairedBracket(UChar32 c);
-
-/* file definitions --------------------------------------------------------- */
-
-#define UBIDI_DATA_NAME "ubidi"
-#define UBIDI_DATA_TYPE "icu"
-
-/* format "BiDi" */
-#define UBIDI_FMT_0 0x42
-#define UBIDI_FMT_1 0x69
-#define UBIDI_FMT_2 0x44
-#define UBIDI_FMT_3 0x69
-
-/* indexes into indexes[] */
-enum {
- UBIDI_IX_INDEX_TOP,
- UBIDI_IX_LENGTH,
- UBIDI_IX_TRIE_SIZE,
- UBIDI_IX_MIRROR_LENGTH,
-
- UBIDI_IX_JG_START,
- UBIDI_IX_JG_LIMIT,
- UBIDI_IX_JG_START2, /* new in format version 2.2, ICU 54 */
- UBIDI_IX_JG_LIMIT2,
-
- UBIDI_MAX_VALUES_INDEX=15,
- UBIDI_IX_TOP=16
-};
-
-/* definitions for 16-bit bidi/shaping properties word ---------------------- */
-
-enum {
- /* UBIDI_CLASS_SHIFT=0, */ /* bidi class: 5 bits (4..0) */
- UBIDI_JT_SHIFT=5, /* joining type: 3 bits (7..5) */
-
- UBIDI_BPT_SHIFT=8, /* Bidi_Paired_Bracket_Type(bpt): 2 bits (9..8) */
-
- UBIDI_JOIN_CONTROL_SHIFT=10,
- UBIDI_BIDI_CONTROL_SHIFT=11,
-
- UBIDI_IS_MIRRORED_SHIFT=12, /* 'is mirrored' */
- UBIDI_MIRROR_DELTA_SHIFT=13, /* bidi mirroring delta: 3 bits (15..13) */
-
- UBIDI_MAX_JG_SHIFT=16 /* max JG value in indexes[UBIDI_MAX_VALUES_INDEX] bits 23..16 */
-};
-
-#define UBIDI_CLASS_MASK 0x0000001f
-#define UBIDI_JT_MASK 0x000000e0
-#define UBIDI_BPT_MASK 0x00000300
-
-#define UBIDI_MAX_JG_MASK 0x00ff0000
-
-#define UBIDI_GET_CLASS(props) ((props)&UBIDI_CLASS_MASK)
-#define UBIDI_GET_FLAG(props, shift) (((props)>>(shift))&1)
-
-#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
-# define UBIDI_GET_MIRROR_DELTA(props) ((int16_t)(props)>>UBIDI_MIRROR_DELTA_SHIFT)
-#else
-# define UBIDI_GET_MIRROR_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UBIDI_MIRROR_DELTA_SHIFT)|0xe000) : ((props)>>UBIDI_MIRROR_DELTA_SHIFT))
-#endif
-
-enum {
- UBIDI_ESC_MIRROR_DELTA=-4,
- UBIDI_MIN_MIRROR_DELTA=-3,
- UBIDI_MAX_MIRROR_DELTA=3
-};
-
-/* definitions for 32-bit mirror table entry -------------------------------- */
-
-enum {
- /* the source Unicode code point takes 21 bits (20..0) */
- UBIDI_MIRROR_INDEX_SHIFT=21,
- UBIDI_MAX_MIRROR_INDEX=0x7ff
-};
-
-#define UBIDI_GET_MIRROR_CODE_POINT(m) (UChar32)((m)&0x1fffff)
-
-#define UBIDI_GET_MIRROR_INDEX(m) ((m)>>UBIDI_MIRROR_INDEX_SHIFT)
-
-U_CDECL_END
-
-#endif
diff --git a/contrib/libs/icu/common/ubidi_props_data.h b/contrib/libs/icu/common/ubidi_props_data.h
deleted file mode 100644
index 7a34870bd80..00000000000
--- a/contrib/libs/icu/common/ubidi_props_data.h
+++ /dev/null
@@ -1,922 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// Copyright (C) 1999-2016, International Business Machines
-// Corporation and others. All Rights Reserved.
-//
-// file name: ubidi_props_data.h
-//
-// machine-generated by: icu/tools/unicode/c/genprops/bidipropsbuilder.cpp
-
-
-#ifdef INCLUDED_FROM_UBIDI_PROPS_C
-
-static const UVersionInfo ubidi_props_dataVersion={0xd,0,0,0};
-
-static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x67ec,0x6200,0x28,0x620,0x8c8,0x10ac0,0x10d24,0,0,0,0,0,0,0,0x6502b6};
-
-static const uint16_t ubidi_props_trieIndex[12536]={
-0x37c,0x384,0x38c,0x394,0x3ac,0x3b4,0x3bc,0x3c4,0x39c,0x3a4,0x39c,0x3a4,0x39c,0x3a4,0x39c,0x3a4,
-0x39c,0x3a4,0x39c,0x3a4,0x3ca,0x3d2,0x3da,0x3e2,0x3ea,0x3f2,0x3ee,0x3f6,0x3fe,0x406,0x401,0x409,
-0x39c,0x3a4,0x39c,0x3a4,0x411,0x419,0x39c,0x3a4,0x39c,0x3a4,0x39c,0x3a4,0x41f,0x427,0x42f,0x437,
-0x43f,0x447,0x44f,0x457,0x45d,0x465,0x46d,0x475,0x47d,0x485,0x48b,0x493,0x49b,0x4a3,0x4ab,0x4b3,
-0x4bf,0x4bb,0x4c7,0x4cf,0x431,0x4df,0x4e6,0x4d7,0x4ee,0x4f0,0x4f8,0x500,0x508,0x509,0x511,0x519,
-0x521,0x509,0x529,0x52e,0x521,0x509,0x536,0x53e,0x508,0x546,0x54e,0x500,0x556,0x39c,0x55e,0x562,
-0x56a,0x56c,0x574,0x57c,0x508,0x584,0x58c,0x500,0x413,0x590,0x511,0x500,0x508,0x39c,0x598,0x39c,
-0x39c,0x59e,0x5a6,0x39c,0x39c,0x5aa,0x5b2,0x39c,0x5b6,0x5bd,0x39c,0x5c5,0x5cd,0x5d4,0x555,0x39c,
-0x39c,0x5dc,0x5e4,0x5ec,0x5f4,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x5fc,0x39c,0x604,0x39c,0x39c,0x39c,
-0x60c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x614,0x39c,0x39c,0x39c,0x61c,0x61c,0x515,0x515,0x39c,0x622,0x62a,0x604,
-0x640,0x632,0x632,0x648,0x64f,0x638,0x39c,0x39c,0x39c,0x657,0x65f,0x39c,0x39c,0x39c,0x661,0x669,
-0x671,0x39c,0x678,0x680,0x39c,0x688,0x56b,0x39c,0x545,0x690,0x556,0x698,0x413,0x6a0,0x39c,0x6a7,
-0x39c,0x6ac,0x39c,0x39c,0x39c,0x39c,0x6b2,0x6ba,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x3ea,0x6c2,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x6ca,0x6d2,0x6d6,
-0x6ee,0x6f4,0x6de,0x6e6,0x6fc,0x704,0x708,0x5d7,0x710,0x718,0x720,0x39c,0x728,0x669,0x669,0x669,
-0x738,0x740,0x748,0x750,0x755,0x75d,0x765,0x730,0x76d,0x775,0x39c,0x77b,0x782,0x669,0x669,0x669,
-0x669,0x582,0x788,0x669,0x790,0x39c,0x39c,0x666,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,
-0x669,0x669,0x669,0x669,0x669,0x798,0x669,0x669,0x669,0x669,0x669,0x79e,0x669,0x669,0x7a6,0x7ae,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x669,0x669,0x669,0x669,0x7be,0x7c6,0x7ce,0x7b6,
-0x7de,0x7e6,0x7ee,0x7f5,0x7fc,0x804,0x808,0x7d6,0x669,0x669,0x669,0x810,0x816,0x669,0x669,0x81c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x824,0x39c,0x39c,0x39c,0x82c,0x39c,0x39c,0x39c,0x3ea,
-0x834,0x83c,0x840,0x39c,0x848,0x669,0x669,0x66c,0x669,0x669,0x669,0x669,0x669,0x669,0x84f,0x855,
-0x865,0x85d,0x39c,0x39c,0x86d,0x60c,0x39c,0x3c3,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x669,0x82b,
-0x3d1,0x39c,0x875,0x87d,0x39c,0x885,0x88d,0x39c,0x39c,0x39c,0x39c,0x891,0x39c,0x39c,0x661,0x3c2,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x669,0x669,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x875,0x669,0x582,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x898,0x39c,0x39c,0x89d,0x56c,0x39c,0x39c,0x5b8,0x669,0x660,0x39c,0x39c,0x8a5,0x39c,0x39c,0x39c,
-0x8ad,0x8b4,0x632,0x8bc,0x39c,0x39c,0x58e,0x8c4,0x39c,0x8cc,0x8d3,0x39c,0x4ee,0x8d8,0x39c,0x507,
-0x39c,0x8e0,0x8e8,0x509,0x39c,0x8ec,0x508,0x8f4,0x39c,0x39c,0x39c,0x8fa,0x39c,0x39c,0x39c,0x901,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x915,0x909,0x90d,0x49b,0x49b,0x49b,0x49b,0x49b,
-0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x91d,0x49b,0x49b,0x49b,0x49b,0x925,0x929,
-0x931,0x939,0x93d,0x945,0x49b,0x49b,0x49b,0x949,0x951,0x38c,0x959,0x961,0x39c,0x39c,0x39c,0x969,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0xe70,0xe70,0xeb0,0xef0,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xf28,0xf68,0xfa8,0xfb8,0xff8,0x1004,
-0xe70,0xe70,0x1044,0xe70,0xe70,0xe70,0x107c,0x10bc,0x10fc,0x113c,0x1174,0x11b4,0x11f4,0x122c,0x126c,0x12ac,
-0xa40,0xa80,0xac0,0xaff,0x1a0,0x1a0,0xb3f,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xb68,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xba8,0x1a0,0x1a0,0xbdd,0xc1d,0xc5d,0xc9d,0xcdd,0xd1d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0xd9d,0xdad,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
-0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x971,0x39c,0x669,0x669,0x979,0x60c,0x39c,0x501,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x981,0x39c,0x39c,0x39c,0x988,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x990,0x431,0x431,0x431,0x431,0x431,0x431,0x431,
-0x998,0x99c,0x431,0x431,0x431,0x431,0x9ac,0x9a4,0x431,0x9b4,0x431,0x431,0x9bc,0x9c2,0x431,0x431,
-0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x9d2,0x9ca,0x431,0x431,0x431,0x431,0x431,0x431,
-0x431,0x431,0x431,0x9da,0x431,0x9e2,0x431,0x431,0x431,0x9e6,0x9ed,0x9f3,0x431,0x9f7,0x9ff,0x431,
-0x508,0xa07,0xa0e,0xa15,0x413,0xa18,0x39c,0x39c,0x4ee,0xa1f,0x39c,0xa25,0x413,0xa2a,0xa32,0x39c,
-0x39c,0xa37,0x39c,0x39c,0x39c,0x39c,0x82c,0xa3f,0x413,0x590,0x56b,0xa46,0x39c,0x39c,0x39c,0x39c,
-0x39c,0xa07,0xa4e,0x39c,0x39c,0xa56,0xa5e,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xa62,0xa6a,0x39c,
-0x39c,0xa72,0x56b,0xa7a,0x39c,0xa80,0x39c,0x39c,0x5fc,0xa88,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0xa8d,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xa94,0xa9c,0x39c,0x39c,0x39c,0xa9f,0x56b,0xaa7,
-0xaab,0xab3,0x39c,0xaba,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0xac1,0x39c,0x39c,0xacf,0xac9,0x39c,0x39c,0x39c,0xad7,0xadf,0x39c,0xae3,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x592,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xaf0,0xaeb,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0xaf8,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xaff,
-0x39c,0xb05,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0xa26,0x39c,0xb0b,0x39c,0x39c,0xb13,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x522,0xb1b,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xb22,0xb2a,0xb30,0x39c,0x39c,0x669,0x669,0xb38,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x669,0x669,0x83f,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0xb3a,0x39c,0xb41,0x39c,0xb3d,0x39c,0xb44,0x39c,0xb4c,0xb50,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x3ea,0xb58,0x3ea,
-0xb5f,0xb66,0xb6e,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xb76,0xb7e,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xb05,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0xb83,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x431,0x431,0x431,
-0x431,0x431,0x431,0xb8b,0x431,0xb93,0xb93,0xb9a,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,
-0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,
-0x90d,0x49b,0x49b,0x431,0x431,0x49b,0x49b,0x9f3,0x431,0x431,0x431,0x431,0x431,0x49b,0x49b,0x49b,
-0x49b,0x49b,0x49b,0x49b,0xba2,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x669,0xbaa,0x669,
-0x669,0x66c,0xbaf,0xbb3,0x84f,0xbbb,0x3be,0x39c,0xbc1,0x39c,0xbc6,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x779,0x39c,0x39c,0x39c,0x39c,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,
-0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,
-0x669,0x669,0x669,0x66b,0x979,0x669,0x669,0x669,0x66c,0x669,0x669,0xbce,0x66e,0xbaa,0x669,0xbd6,
-0x669,0xbde,0xbe3,0x39c,0x39c,0x669,0x669,0x669,0xbeb,0x669,0x669,0x798,0x669,0x669,0x669,0x66c,
-0xbf2,0xbfa,0xc00,0xc05,0x39c,0x669,0x669,0x669,0x669,0xc0d,0x669,0x788,0xc15,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xc1c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
-0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xc1c,0xc2c,0xc24,0xc24,
-0xc24,0xc2d,0xc2d,0xc2d,0xc2d,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0xc35,0xc2d,0xc2d,0xc2d,
-0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,
-0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,
-0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,
-0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0x37b,0x37b,0x37b,
-0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,8,7,8,9,7,0x12,0x12,
-0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,7,7,7,8,
-9,0xa,0xa,4,4,4,0xa,0xa,0x310a,0xf20a,0xa,3,6,3,6,6,
-2,2,2,2,2,2,2,2,2,2,6,0xa,0x500a,0xa,0xd00a,0xa,
-0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0x510a,0xa,0xd20a,0xa,0xa,
-0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0x510a,0xa,0xd20a,0xa,0x12,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x12,0x12,0x12,0x12,0x12,7,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
-0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
-6,0xa,4,4,4,4,0xa,0xa,0xa,0xa,0,0x900a,0xa,0xb2,0xa,0xa,
-4,4,2,2,0xa,0,0xa,0xa,0xa,2,0,0x900a,0xa,0xa,0xa,0xa,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0xa,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0xa,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0xa,0xa,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xa,0xa,0,0,
-0,0,0,0,0,0,0xa,0,0,0,0,0,0xa,0xa,0,0xa,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0xa,0,0,0,0,0,
-0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0xa,0,0,0xa,0xa,4,1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,0xb1,1,0xb1,0xb1,1,
-0xb1,0xb1,1,0xb1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,5,5,5,5,
-5,5,0xa,0xa,0xd,4,4,0xd,6,0xd,0xa,0xa,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0x8ad,0xd,0xd,0xd,0x4d,0xd,0x8d,0x8d,
-0x8d,0x8d,0x4d,0x8d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x4d,
-0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x2d,0x4d,0x4d,0x4d,
-0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,5,5,5,5,
-5,5,5,5,5,5,4,5,5,0xd,0x4d,0x4d,0xb1,0x8d,0x8d,0x8d,
-0xd,0x8d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,
-0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x4d,0x4d,
-0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
-0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
-0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,
-0x4d,0x8d,0x4d,0x8d,0x4d,0x4d,0x8d,0x8d,0xd,0x8d,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,5,0xa,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0xd,0xb1,0xb1,0xa,0xb1,0xb1,
-0xb1,0xb1,0x8d,0x8d,2,2,2,2,2,2,2,2,2,2,0x4d,0x4d,
-0x4d,0xd,0xd,0x4d,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xd,0xd,0xad,0x8d,0xb1,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x8d,0x4d,0x4d,
-0x4d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x8d,0x4d,
-0x8d,0x4d,0x4d,0x8d,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0xd,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
-0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
-0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x8d,
-0x8d,0x4d,0x4d,0x4d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0xd,0xd,0xd,0xd,
-0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0xd,0xd,
-0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,1,1,1,1,
-1,1,1,1,1,1,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
-0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
-0x41,0x41,0x41,0x41,0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-1,1,0xa,0xa,0xa,0xa,0x21,1,1,0xb1,1,1,0xb1,0xb1,0xb1,0xb1,
-1,0xb1,0xb1,0xb1,1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1,0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41,
-0x41,0x41,0x81,0x81,0x41,0x81,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
-0x81,0x41,0x81,0x81,0x81,0xb1,0xb1,0xb1,1,1,1,1,0x4d,0xd,0x4d,0x4d,
-0x4d,0x4d,0xd,0x8d,0x4d,0x8d,0x8d,0xd,0xd,0xd,0xd,0xd,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,5,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x4d,0x4d,0x4d,0x4d,
-0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0xd,0x8d,0x4d,0x4d,0x8d,0x8d,0x4d,
-0x4d,0xd,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
-0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0,0,0,
-0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1,0,0,
-0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
-0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
-0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,4,
-0,0,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0,0,0xb1,
-0xb1,0xb1,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0xb1,0,0,
-0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
-0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
-0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0xb1,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,
-0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0xb1,0xb1,0,
-0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,
-0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,
-0xa,4,0xa,0,0,0,0,0,0xb1,0,0,0,0xb1,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
-0xb1,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,
-0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
-0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0xa0,
-0,0,0,0,0,0,0xa0,0,0,0,0,0,0xb1,0xb1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,
-0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0,0,0,0,4,0,0,0,0,0,0,0,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0,0xb1,0x310a,0xf20a,
-0x310a,0xf20a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,
-0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,
-0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0xb1,0xb1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0xb1,0xb1,
-0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,
-0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0x310a,0xf20a,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0,
-0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
-0,0,0,4,0,0xb1,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
-0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
-0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0x40,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x4a,0xa,0xa,0x2a,0xb1,0xb1,0xb1,0x12,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
-0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0,0,0,0,0,0,0,
-0,0xb1,0xb1,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
-0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0xb1,0xb1,0,
-0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0xb1,0,
-0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0xa,0,0,0,
-0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0xb1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
-0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
-0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0xb1,0,0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0xb1,0,0,0,
-0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0,
-0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0xa,0xa,0,0xa,0xa,0xa,0xa,6,0x310a,0xf20a,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0x814,0x815,
-0x813,0x816,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,2,0,0,0,2,2,2,2,
-2,2,3,3,0xa,0x310a,0xf20a,0,9,9,9,9,9,9,9,9,
-9,9,9,0xb2,0x412,0x432,0x8a0,0x8a1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,7,0x8ab,0x8ae,0x8b0,0x8ac,0x8af,6,
-4,4,4,4,4,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,
-2,2,2,2,2,2,2,2,2,2,3,3,0xa,0x310a,0xf20a,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xa,0xa,0,0xa,0xa,0xa,0xa,0,0xa,0xa,0,0,0,0,0,0,
-0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,
-0xa,0xa,0xa,0xa,0,0xa,0,0xa,0,0xa,0,0,0,0,4,0,
-0,0,0,0,0,0,0,0,0,0,0xa,0xa,0,0,0,0,
-0x100a,0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,
-0x300a,0xf00a,0x900a,0x900a,0x900a,0x100a,0x900a,0x900a,0x100a,0x100a,0x900a,0x900a,0x900a,0x900a,0x900a,0x100a,
-0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0x700a,0x700a,0x700a,0xb00a,0xb00a,0xb00a,0xa,0xa,
-0xa,0x100a,3,4,0xa,0x900a,0x100a,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x900a,
-0x900a,0x900a,0x900a,0xa,0x900a,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,
-0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x300a,0xf00a,0x100a,0x100a,
-0x100a,0x100a,0x100a,0x900a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x900a,0xa,0xa,0xa,
-0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,
-0x100a,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,
-0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
-0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x900a,0x100a,0x900a,0x900a,0x100a,0x900a,
-0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x900a,0xa,0xa,0xa,
-0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,
-0xf00a,0x900a,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
-0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0xa,
-0x300a,0xf00a,0xa,0x500a,0x100a,0xd00a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x300a,0xf00a,0xa,
-0xa,0xa,0xa,0xa,0x900a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x310a,0xf20a,
-0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x100a,0x100a,0xa,0xa,
-0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x100a,0x100a,
-0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,
-0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x710a,0x320a,0xf10a,0xb20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,
-0xf20a,0xa,0xa,0x900a,0x100a,0x100a,0x100a,0x100a,0x900a,0xa,0x100a,0x900a,0x300a,0xf00a,0x100a,0x100a,
-0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,
-0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,
-0x310a,0xf20a,0x310a,0xf20a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x900a,0x900a,0x100a,0xa,
-0xa,0xa,0xa,0xa,0x300a,0xf00a,0x900a,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0x300a,
-0xf00a,0x300a,0xf00a,0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
-0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0x100a,0xa,0x100a,0xa,
-0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,
-0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,
-0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x100a,0x100a,
-0x100a,0x100a,0xa,0x100a,0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,
-0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
-0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
-0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,
-0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
-0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x900a,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,
-0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xb1,
-0xb1,0xb1,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
-0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,
-0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
-0xb1,0xb1,0,0,0xa,0,0,0,0,0,0xa,0xa,0,0,0,0,
-0,0xa,0xa,0xa,9,0xa,0xa,0xa,0xa,0,0,0,0x310a,0xf20a,0x310a,0xf20a,
-0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,
-0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xa,
-0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
-0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0xb1,0xb1,0xb1,0xb1,0xa,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,0xa,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0xb1,0,0,0,0xb1,0,0,0,0,0xb1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0xb1,0xb1,0,0xa,0xa,0xa,0xa,0xb1,0,0,0,
-0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,
-0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
-0x40,0x40,0x60,0,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
-0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,
-0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
-0,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
-0,0,0,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xb1,0,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0xb1,0xb1,
-0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,
-0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,
-0,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0,0,0,
-0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,1,1,1,1,1,1,1,1,1,3,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xd,0xd,0xd,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,1,0xb1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xd,0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xd,0xd,0xd,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
-0x12,0x12,0x12,0x12,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xa,0xd,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
-0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,6,0xa,6,0,0xa,6,0xa,0xa,0xa,0x310a,0xf20a,0x310a,
-0xf20a,0x310a,0xf20a,4,0xa,0xa,3,3,0x300a,0xf00a,0xa,0,0xa,4,4,0xa,
-0,0,0,0,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xd,0xd,0xb2,0,0xa,0xa,4,4,4,0xa,0xa,0x310a,0xf20a,0xa,3,
-6,3,6,6,2,2,2,2,2,2,2,2,2,2,6,0xa,
-0x500a,0xa,0xd00a,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x510a,
-0xa,0xd20a,0xa,0x310a,0xf20a,0xa,0x310a,0xf20a,0xa,0xa,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,4,4,0xa,0xa,0xa,4,4,0,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xaa,0xaa,0xaa,
-0xa,0xa,0x12,0x12,0,0xa,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0,0,0,0xb1,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xa,
-1,0xb1,0xb1,0xb1,1,0xb1,0xb1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,1,1,1,1,0xb1,
-0x41,0x81,1,1,0x81,0xb1,0xb1,1,1,1,1,0x41,0x41,0x41,0x41,0x81,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-0x41,0x41,0x41,0x41,0x41,0x81,1,0x81,1,0x81,0x81,1,1,0x61,0x81,0x81,
-0x81,0x81,0x81,0x41,0x41,0x41,0x41,0x61,0x41,0x41,0x41,0x41,0x41,0x81,0x41,0x41,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0x41,0x81,0x41,0x81,0x81,0x81,0x41,0x41,0x41,0x81,0x41,0x41,0x81,0x41,0x81,0x81,
-0x41,0x81,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,0x81,0x81,0x81,0x81,0x41,0x41,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0x4d,0x4d,0x8d,0x4d,0xb1,0xb1,0xb1,0xb1,
-0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,5,5,5,5,5,5,5,5,
-5,5,0xd,0xd,0xd,0xd,0xd,0xd,0x6d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
-0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
-0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,1,1,1,1,1,1,1,1,1,
-1,1,1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,
-0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0x4d,0x4d,0x4d,0x8d,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,0x41,1,0x41,0x41,
-0x81,0x81,0x81,1,0x41,0x81,0x81,0x41,0x41,0x81,0x41,0x41,1,0x41,0x81,0x81,
-0x41,1,1,1,1,0x81,0x41,0x61,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
-0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,
-0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
-0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,
-0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,0,0,0,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0,0xb1,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,
-0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0,0,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,
-0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,0,
-0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xa0,0xa0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
-0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
-0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
-0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0,0xa0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,
-0,0xb1,0,0xb1,0,0,0,0,0,0,0,0,4,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,4,4,4,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
-0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0,
-0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0xb2,0xb2,0xb2,0xb2,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
-0xb1,0xb1,0,0,0,0,0,0,0,0,0,0xb2,0xb2,0xb2,0xb2,0xb2,
-0xb2,0xb2,0xb2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xa,0xa,0xb1,0xb1,0xb1,0xa,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0x100a,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0x100a,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0x100a,0,0,0,0,0,0,0,0,0,0,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
-0,0xb1,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
-0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,
-0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,4,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,0x41,0x41,0x41,0x41,
-0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
-0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xa1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd,
-0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2,
-2,2,2,2,2,2,2,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,
-0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0,0,0,
-0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
-0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,
-0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
-0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,2,2,2,2,2,2,2,2,2,2,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x12,0x12,
-0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,
-0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,
-0x12,0xb2,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
-0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
-0x12,0x12,0x12,0x12,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
-0x12,0x12,0x12,0x12,0,0,0,0
-};
-
-static const uint32_t ubidi_props_mirrors[40]={
-0x2000ab,0xbb,0x4202215,0x4e0221f,0x3e02220,0x3a02221,0x3c02222,0x4c02224,0x2202243,0x1402245,0x120224c,0x4002298,0x44022a6,0x48022a8,0x46022a9,0x4a022ab,
-0x38022b8,0x10022cd,0x2e022f2,0x30022f3,0x32022f4,0x34022f6,0x36022f7,0x24022fa,0x26022fb,0x28022fc,0x2a022fd,0x2c022fe,0x20027dc,0xa0299b,0xc029a0,0x8029a3,
-0x16029b8,0x4029f5,0x1802ade,0x1c02ae3,0x1a02ae4,0x1e02ae5,0xe02aee,0x602bfe
-};
-
-static const uint8_t ubidi_props_jgArray[680]={
-0x2d,0,3,3,0x2c,3,0x2d,3,4,0x2a,4,4,0xd,0xd,0xd,6,
-6,0x1f,0x1f,0x23,0x23,0x21,0x21,0x28,0x28,1,1,0xb,0xb,0x37,0x37,0x37,
-0,9,0x1d,0x13,0x16,0x18,0x1a,0x10,0x2c,0x2d,0x2d,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0x1d,
-0,3,3,3,0,3,0x2c,0x2c,0x2d,4,4,4,4,4,4,4,
-4,0xd,0xd,0xd,0xd,0xd,0xd,0xd,6,6,6,6,6,6,6,6,
-6,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x23,0x23,0x23,0x21,0x21,0x28,
-1,9,9,9,9,9,9,0x1d,0x1d,0xb,0x26,0xb,0x13,0x13,0x13,0xb,
-0xb,0xb,0xb,0xb,0xb,0x16,0x16,0x16,0x16,0x1a,0x1a,0x1a,0x1a,0x38,0x15,0xd,
-0x2a,0x11,0x11,0xe,0x2c,0x2c,0x2c,0x2c,0x2c,0x2c,0x2c,0x2c,0x37,0x2f,0x37,0x2c,
-0x2d,0x2d,0x2e,0x2e,0,0x2a,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0x1f,
-0,0,0,0,0,0,0,0,0,0,0x23,0x21,1,0,0,0x15,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,5,0xc,0xc,7,7,0xf,0x27,0x32,0x12,0x2b,0x2b,0x30,0x31,0x14,
-0x17,0x19,0x1b,0x24,0xa,8,0x1c,0x20,0x22,0x1e,7,0x25,0x29,5,0xc,7,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0x35,0x34,0x33,
-4,4,4,4,4,4,4,0xd,0xd,6,6,0x1f,0x23,1,1,1,
-9,9,0xb,0xb,0xb,0x18,0x18,0x1a,0x1a,0x1a,0x16,0x1f,0x1f,0x23,0xd,0xd,
-0x23,0x1f,0xd,3,3,0x37,0x37,0x2d,0x2c,0x2c,0x36,0x36,0xd,0x23,0x23,0x13,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x5d,0x5a,0x60,0x63,0x5e,0x5f,0x59,0x61,0x5b,0x5c,0x62,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-4,4,0xd,0x28,9,0x1d,0x16,0x18,0x2d,0x2d,0x1f,0x2c,0x39,0,6,0x21,
-0xb,0x55,0x1f,1,0x13,0,4,4,4,0x1f,0x2d,0x56,0x58,0x57,4,4,
-4,0xd,0xb,1,0x58,0xd,0xd,0x16
-};
-
-static const uint8_t ubidi_props_jgArray2[612]={
-0x3a,0x3c,0x3c,0x40,0x40,0x3d,0,0x52,0,0x54,0x54,0,0,0x41,0x4f,0x53,
-0x43,0x43,0x43,0x44,0x3e,0x50,0x45,0x46,0x4c,0x3b,0x3b,0x48,0x48,0x4b,0x49,0x49,
-0x49,0x4a,0,0,0x4d,0,0,0,0,0,0,0x47,0x3f,0x4e,0x51,0x42,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0x65,0,0,0,0,0,0,0x65,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0x64,0,0,0x65,0,0x64,0,
-0x64,0,0,0x64
-};
-
-static const UBiDiProps ubidi_props_singleton={
- NULL,
- ubidi_props_indexes,
- ubidi_props_mirrors,
- ubidi_props_jgArray,
- ubidi_props_jgArray2,
- {
- ubidi_props_trieIndex,
- ubidi_props_trieIndex+3568,
- NULL,
- 3568,
- 8968,
- 0x1a0,
- 0xe70,
- 0x0,
- 0x0,
- 0x110000,
- 0x30f4,
- NULL, 0, FALSE, FALSE, 0, NULL
- },
- { 2,2,0,0 }
-};
-
-#endif // INCLUDED_FROM_UBIDI_PROPS_C
diff --git a/contrib/libs/icu/common/ubidiimp.h b/contrib/libs/icu/common/ubidiimp.h
deleted file mode 100644
index 9746b2bc103..00000000000
--- a/contrib/libs/icu/common/ubidiimp.h
+++ /dev/null
@@ -1,476 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ubidiimp.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999aug06
-* created by: Markus W. Scherer, updated by Matitiahu Allouche
-*/
-
-#ifndef UBIDIIMP_H
-#define UBIDIIMP_H
-
-#include "unicode/utypes.h"
-#include "unicode/ubidi.h"
-#include "unicode/uchar.h"
-#include "ubidi_props.h"
-
-/* miscellaneous definitions ---------------------------------------------- */
-
-typedef uint8_t DirProp;
-typedef uint32_t Flags;
-
-/* Comparing the description of the BiDi algorithm with this implementation
- is easier with the same names for the BiDi types in the code as there.
- See UCharDirection in uchar.h .
-*/
-enum {
- L= U_LEFT_TO_RIGHT, /* 0 */
- R= U_RIGHT_TO_LEFT, /* 1 */
- EN= U_EUROPEAN_NUMBER, /* 2 */
- ES= U_EUROPEAN_NUMBER_SEPARATOR, /* 3 */
- ET= U_EUROPEAN_NUMBER_TERMINATOR, /* 4 */
- AN= U_ARABIC_NUMBER, /* 5 */
- CS= U_COMMON_NUMBER_SEPARATOR, /* 6 */
- B= U_BLOCK_SEPARATOR, /* 7 */
- S= U_SEGMENT_SEPARATOR, /* 8 */
- WS= U_WHITE_SPACE_NEUTRAL, /* 9 */
- ON= U_OTHER_NEUTRAL, /* 10 */
- LRE=U_LEFT_TO_RIGHT_EMBEDDING, /* 11 */
- LRO=U_LEFT_TO_RIGHT_OVERRIDE, /* 12 */
- AL= U_RIGHT_TO_LEFT_ARABIC, /* 13 */
- RLE=U_RIGHT_TO_LEFT_EMBEDDING, /* 14 */
- RLO=U_RIGHT_TO_LEFT_OVERRIDE, /* 15 */
- PDF=U_POP_DIRECTIONAL_FORMAT, /* 16 */
- NSM=U_DIR_NON_SPACING_MARK, /* 17 */
- BN= U_BOUNDARY_NEUTRAL, /* 18 */
- FSI=U_FIRST_STRONG_ISOLATE, /* 19 */
- LRI=U_LEFT_TO_RIGHT_ISOLATE, /* 20 */
- RLI=U_RIGHT_TO_LEFT_ISOLATE, /* 21 */
- PDI=U_POP_DIRECTIONAL_ISOLATE, /* 22 */
- ENL, /* EN after W7 */ /* 23 */
- ENR, /* EN not subject to W7 */ /* 24 */
- dirPropCount
-};
-
-/* Sometimes, bit values are more appropriate
- to deal with directionality properties.
- Abbreviations in these macro names refer to names
- used in the BiDi algorithm.
-*/
-#define DIRPROP_FLAG(dir) (1UL<<(dir))
-#define PURE_DIRPROP(prop) ((prop)&~0xE0) ?????????????????????????
-
-/* special flag for multiple runs from explicit embedding codes */
-#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
-
-/* are there any characters that are LTR or RTL? */
-#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(ENL)|DIRPROP_FLAG(ENR)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI))
-#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI))
-#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
-#define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN))
-
-/* explicit embedding codes */
-#define MASK_EXPLICIT (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(PDF))
-
-/* explicit isolate codes */
-#define MASK_ISO (DIRPROP_FLAG(LRI)|DIRPROP_FLAG(RLI)|DIRPROP_FLAG(FSI)|DIRPROP_FLAG(PDI))
-
-#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
-
-/* paragraph and segment separators */
-#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
-
-/* all types that are counted as White Space or Neutral in some steps */
-#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT|MASK_ISO)
-
-/* types that are neutrals or could becomes neutrals in (Wn) */
-#define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS)
-
-/*
- * These types may be changed to "e",
- * the embedding type (L or R) of the run,
- * in the BiDi algorithm (N2)
- */
-#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
-
-/* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
-#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
-
-#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
-
-/*
- * The following bit is used for the directional isolate status.
- * Stack entries corresponding to isolate sequences are greater than ISOLATE.
- */
-#define ISOLATE 0x0100
-
-U_CFUNC UBiDiLevel
-ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t index);
-
-#define GET_PARALEVEL(ubidi, index) \
- ((UBiDiLevel)(!(ubidi)->defaultParaLevel || (index)<(ubidi)->paras[0].limit ? \
- (ubidi)->paraLevel : ubidi_getParaLevelAtIndex((ubidi), (index))))
-
-/* number of paras entries allocated initially without malloc */
-#define SIMPLE_PARAS_COUNT 10
-/* number of isolate entries allocated initially without malloc */
-#define SIMPLE_ISOLATES_COUNT 5
-/* number of isolate run entries for paired brackets allocated initially without malloc */
-#define SIMPLE_OPENINGS_COUNT 20
-
-#define CR 0x000D
-#define LF 0x000A
-
-/* Run structure for reordering --------------------------------------------- */
-enum {
- LRM_BEFORE=1,
- LRM_AFTER=2,
- RLM_BEFORE=4,
- RLM_AFTER=8
-};
-
-typedef struct Para {
- int32_t limit;
- int32_t level;
-} Para;
-
-enum { /* flags for Opening.flags */
- FOUND_L=DIRPROP_FLAG(L),
- FOUND_R=DIRPROP_FLAG(R)
-};
-
-typedef struct Opening {
- int32_t position; /* position of opening bracket */
- int32_t match; /* matching char or -position of closing bracket */
- int32_t contextPos; /* position of last strong char found before opening */
- uint16_t flags; /* bits for L or R/AL found within the pair */
- UBiDiDirection contextDir; /* L or R according to last strong char before opening */
- uint8_t filler; /* to complete a nice multiple of 4 chars */
-} Opening;
-
-typedef struct IsoRun {
- int32_t contextPos; /* position of char determining context */
- uint16_t start; /* index of first opening entry for this run */
- uint16_t limit; /* index after last opening entry for this run */
- UBiDiLevel level; /* level of this run */
- DirProp lastStrong; /* bidi class of last strong char found in this run */
- DirProp lastBase; /* bidi class of last base char found in this run */
- UBiDiDirection contextDir; /* L or R to use as context for following openings */
-} IsoRun;
-
-typedef struct BracketData {
- UBiDi *pBiDi;
- /* array of opening entries which should be enough in most cases; no malloc() */
- Opening simpleOpenings[SIMPLE_OPENINGS_COUNT];
- Opening *openings; /* pointer to current array of entries */
- int32_t openingsCount; /* number of allocated entries */
- int32_t isoRunLast; /* index of last used entry */
- /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
- + 1 for index 0, + 1 for before the first isolated sequence */
- IsoRun isoRuns[UBIDI_MAX_EXPLICIT_LEVEL+2];
- UBool isNumbersSpecial; /* reordering mode for NUMBERS_SPECIAL */
-} BracketData;
-
-typedef struct Isolate {
- int32_t startON;
- int32_t start1;
- int32_t state;
- int16_t stateImp;
-} Isolate;
-
-typedef struct Run {
- int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */
- visualLimit, /* last visual position of the run +1 */
- insertRemove; /* if >0, flags for inserting LRM/RLM before/after run,
- if <0, count of bidi controls within run */
-} Run;
-
-/* in a Run, logicalStart will get this bit set if the run level is odd */
-#define INDEX_ODD_BIT (1UL<<31)
-
-#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)((level)&1)<<31))
-#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)((level)&1)<<31))
-#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
-
-#define GET_INDEX(x) ((x)&~INDEX_ODD_BIT)
-#define GET_ODD_BIT(x) ((uint32_t)(x)>>31)
-#define IS_ODD_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)!=0))
-#define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0))
-
-U_CFUNC UBool
-ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
-
-/** BiDi control code points */
-enum {
- ZWNJ_CHAR=0x200c,
- ZWJ_CHAR,
- LRM_CHAR,
- RLM_CHAR,
- LRE_CHAR=0x202a,
- RLE_CHAR,
- PDF_CHAR,
- LRO_CHAR,
- RLO_CHAR,
- LRI_CHAR=0x2066,
- RLI_CHAR,
- FSI_CHAR,
- PDI_CHAR
-};
-
-#define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5 || (uint32_t)((c)-LRI_CHAR)<4)
-
-/* InsertPoints structure for noting where to put BiDi marks ---------------- */
-
-typedef struct Point {
- int32_t pos; /* position in text */
- int32_t flag; /* flag for LRM/RLM, before/after */
-} Point;
-
-typedef struct InsertPoints {
- int32_t capacity; /* number of points allocated */
- int32_t size; /* number of points used */
- int32_t confirmed; /* number of points confirmed */
- UErrorCode errorCode; /* for eventual memory shortage */
- Point *points; /* pointer to array of points */
-} InsertPoints;
-
-
-/* UBiDi structure ----------------------------------------------------------- */
-
-struct UBiDi {
- /* pointer to parent paragraph object (pointer to self if this object is
- * a paragraph object); set to NULL in a newly opened object; set to a
- * real value after a successful execution of ubidi_setPara or ubidi_setLine
- */
- const UBiDi * pParaBiDi;
-
- /* alias pointer to the current text */
- const UChar *text;
-
- /* length of the current text */
- int32_t originalLength;
-
- /* if the UBIDI_OPTION_STREAMING option is set, this is the length
- * of text actually processed by ubidi_setPara, which may be shorter than
- * the original length.
- * Otherwise, it is identical to the original length.
- */
- int32_t length;
-
- /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
- * marks are allowed to be inserted in one of the reordering mode, the
- * length of the result string may be different from the processed length.
- */
- int32_t resultLength;
-
- /* memory sizes in bytes */
- int32_t dirPropsSize, levelsSize, openingsSize, parasSize, runsSize, isolatesSize;
-
- /* allocated memory */
- DirProp *dirPropsMemory;
- UBiDiLevel *levelsMemory;
- Opening *openingsMemory;
- Para *parasMemory;
- Run *runsMemory;
- Isolate *isolatesMemory;
-
- /* indicators for whether memory may be allocated after ubidi_open() */
- UBool mayAllocateText, mayAllocateRuns;
-
- /* arrays with one value per text-character */
- DirProp *dirProps;
- UBiDiLevel *levels;
-
- /* are we performing an approximation of the "inverse BiDi" algorithm? */
- UBool isInverse;
-
- /* are we using the basic algorithm or its variation? */
- UBiDiReorderingMode reorderingMode;
-
- /* UBIDI_REORDER_xxx values must be ordered so that all the regular
- * logical to visual modes come first, and all inverse BiDi modes
- * come last.
- */
- #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL
-
- /* bitmask for reordering options */
- uint32_t reorderingOptions;
-
- /* must block separators receive level 0? */
- UBool orderParagraphsLTR;
-
- /* the paragraph level */
- UBiDiLevel paraLevel;
- /* original paraLevel when contextual */
- /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
- UBiDiLevel defaultParaLevel;
-
- /* context data */
- const UChar *prologue;
- int32_t proLength;
- const UChar *epilogue;
- int32_t epiLength;
-
- /* the following is set in ubidi_setPara, used in processPropertySeq */
- const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */
-
- /* the overall paragraph or line directionality - see UBiDiDirection */
- UBiDiDirection direction;
-
- /* flags is a bit set for which directional properties are in the text */
- Flags flags;
-
- /* lastArabicPos is index to the last AL in the text, -1 if none */
- int32_t lastArabicPos;
-
- /* characters after trailingWSStart are WS and are */
- /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
- int32_t trailingWSStart;
-
- /* fields for paragraph handling */
- int32_t paraCount; /* set in getDirProps() */
- /* filled in getDirProps() */
- Para *paras;
-
- /* for relatively short text, we only need a tiny array of paras (no malloc()) */
- Para simpleParas[SIMPLE_PARAS_COUNT];
-
- /* fields for line reordering */
- int32_t runCount; /* ==-1: runs not set up yet */
- Run *runs;
-
- /* for non-mixed text, we only need a tiny array of runs (no malloc()) */
- Run simpleRuns[1];
-
- /* maximum or current nesting depth of isolate sequences */
- /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
- nesting encountered.
- Within resolveImplicitLevels(), this is the index of the current isolates
- stack entry. */
- int32_t isolateCount;
- Isolate *isolates;
-
- /* for simple text, have a small stack (no malloc()) */
- Isolate simpleIsolates[SIMPLE_ISOLATES_COUNT];
-
- /* for inverse Bidi with insertion of directional marks */
- InsertPoints insertPoints;
-
- /* for option UBIDI_OPTION_REMOVE_CONTROLS */
- int32_t controlCount;
-
- /* for Bidi class callback */
- UBiDiClassCallback *fnClassCallback; /* action pointer */
- const void *coClassCallback; /* context pointer */
-};
-
-#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
-#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
-
-typedef union {
- DirProp *dirPropsMemory;
- UBiDiLevel *levelsMemory;
- Opening *openingsMemory;
- Para *parasMemory;
- Run *runsMemory;
- Isolate *isolatesMemory;
-} BidiMemoryForAllocation;
-
-/* Macros for initial checks at function entry */
-#define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \
- if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue; \
-} UPRV_BLOCK_MACRO_END
-#define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \
- if(!IS_VALID_PARA(bidi)) { \
- errcode=U_INVALID_STATE_ERROR; \
- return retvalue; \
- } \
-} UPRV_BLOCK_MACRO_END
-#define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \
- if(!IS_VALID_PARA_OR_LINE(bidi)) { \
- errcode=U_INVALID_STATE_ERROR; \
- return retvalue; \
- } \
-} UPRV_BLOCK_MACRO_END
-#define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \
- if((arg)<(start) || (arg)>=(limit)) { \
- (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \
- return retvalue; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-#define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) UPRV_BLOCK_MACRO_BEGIN { \
- if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return; \
-} UPRV_BLOCK_MACRO_END
-#define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) UPRV_BLOCK_MACRO_BEGIN { \
- if(!IS_VALID_PARA(bidi)) { \
- errcode=U_INVALID_STATE_ERROR; \
- return; \
- } \
-} UPRV_BLOCK_MACRO_END
-#define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) UPRV_BLOCK_MACRO_BEGIN { \
- if(!IS_VALID_PARA_OR_LINE(bidi)) { \
- errcode=U_INVALID_STATE_ERROR; \
- return; \
- } \
-} UPRV_BLOCK_MACRO_END
-#define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) UPRV_BLOCK_MACRO_BEGIN { \
- if((arg)<(start) || (arg)>=(limit)) { \
- (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \
- return; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/* helper function to (re)allocate memory if allowed */
-U_CFUNC UBool
-ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
-
-/* helper macros for each allocated array in UBiDi */
-#define getDirPropsMemory(pBiDi, length) \
- ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
- (pBiDi)->mayAllocateText, (length))
-
-#define getLevelsMemory(pBiDi, length) \
- ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
- (pBiDi)->mayAllocateText, (length))
-
-#define getRunsMemory(pBiDi, length) \
- ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
- (pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
-
-/* additional macros used by ubidi_open() - always allow allocation */
-#define getInitialDirPropsMemory(pBiDi, length) \
- ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
- TRUE, (length))
-
-#define getInitialLevelsMemory(pBiDi, length) \
- ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
- TRUE, (length))
-
-#define getInitialOpeningsMemory(pBiDi, length) \
- ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->openingsMemory, &(pBiDi)->openingsSize, \
- TRUE, (length)*sizeof(Opening))
-
-#define getInitialParasMemory(pBiDi, length) \
- ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
- TRUE, (length)*sizeof(Para))
-
-#define getInitialRunsMemory(pBiDi, length) \
- ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
- TRUE, (length)*sizeof(Run))
-
-#define getInitialIsolatesMemory(pBiDi, length) \
- ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->isolatesMemory, &(pBiDi)->isolatesSize, \
- TRUE, (length)*sizeof(Isolate))
-
-#endif
diff --git a/contrib/libs/icu/common/ubidiln.cpp b/contrib/libs/icu/common/ubidiln.cpp
deleted file mode 100644
index 3545f4e111c..00000000000
--- a/contrib/libs/icu/common/ubidiln.cpp
+++ /dev/null
@@ -1,1347 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ubidiln.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999aug06
-* created by: Markus W. Scherer, updated by Matitiahu Allouche
-*/
-
-#include "cmemory.h"
-#include "unicode/utypes.h"
-#include "unicode/ustring.h"
-#include "unicode/uchar.h"
-#include "unicode/ubidi.h"
-#include "ubidiimp.h"
-#include "uassert.h"
-
-/*
- * General remarks about the functions in this file:
- *
- * These functions deal with the aspects of potentially mixed-directional
- * text in a single paragraph or in a line of a single paragraph
- * which has already been processed according to
- * the Unicode 6.3 BiDi algorithm as defined in
- * http://www.unicode.org/unicode/reports/tr9/ , version 28,
- * also described in The Unicode Standard, Version 6.3.0 .
- *
- * This means that there is a UBiDi object with a levels
- * and a dirProps array.
- * paraLevel and direction are also set.
- * Only if the length of the text is zero, then levels==dirProps==NULL.
- *
- * The overall directionality of the paragraph
- * or line is used to bypass the reordering steps if possible.
- * Even purely RTL text does not need reordering there because
- * the ubidi_getLogical/VisualIndex() functions can compute the
- * index on the fly in such a case.
- *
- * The implementation of the access to same-level-runs and of the reordering
- * do attempt to provide better performance and less memory usage compared to
- * a direct implementation of especially rule (L2) with an array of
- * one (32-bit) integer per text character.
- *
- * Here, the levels array is scanned as soon as necessary, and a vector of
- * same-level-runs is created. Reordering then is done on this vector.
- * For each run of text positions that were resolved to the same level,
- * only 8 bytes are stored: the first text position of the run and the visual
- * position behind the run after reordering.
- * One sign bit is used to hold the directionality of the run.
- * This is inefficient if there are many very short runs. If the average run
- * length is <2, then this uses more memory.
- *
- * In a further attempt to save memory, the levels array is never changed
- * after all the resolution rules (Xn, Wn, Nn, In).
- * Many functions have to consider the field trailingWSStart:
- * if it is less than length, then there is an implicit trailing run
- * at the paraLevel,
- * which is not reflected in the levels array.
- * This allows a line UBiDi object to use the same levels array as
- * its paragraph parent object.
- *
- * When a UBiDi object is created for a line of a paragraph, then the
- * paragraph's levels and dirProps arrays are reused by way of setting
- * a pointer into them, not by copying. This again saves memory and forbids to
- * change the now shared levels for (L1).
- */
-
-/* handle trailing WS (L1) -------------------------------------------------- */
-
-/*
- * setTrailingWSStart() sets the start index for a trailing
- * run of WS in the line. This is necessary because we do not modify
- * the paragraph's levels array that we just point into.
- * Using trailingWSStart is another form of performing (L1).
- *
- * To make subsequent operations easier, we also include the run
- * before the WS if it is at the paraLevel - we merge the two here.
- *
- * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is
- * set correctly for the line even when contextual multiple paragraphs.
- */
-static void
-setTrailingWSStart(UBiDi *pBiDi) {
- /* pBiDi->direction!=UBIDI_MIXED */
-
- const DirProp *dirProps=pBiDi->dirProps;
- UBiDiLevel *levels=pBiDi->levels;
- int32_t start=pBiDi->length;
- UBiDiLevel paraLevel=pBiDi->paraLevel;
-
- /* If the line is terminated by a block separator, all preceding WS etc...
- are already set to paragraph level.
- Setting trailingWSStart to pBidi->length will avoid changing the
- level of B chars from 0 to paraLevel in ubidi_getLevels when
- orderParagraphsLTR==TRUE.
- */
- if(dirProps[start-1]==B) {
- pBiDi->trailingWSStart=start; /* currently == pBiDi->length */
- return;
- }
- /* go backwards across all WS, BN, explicit codes */
- while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) {
- --start;
- }
-
- /* if the WS run can be merged with the previous run then do so here */
- while(start>0 && levels[start-1]==paraLevel) {
- --start;
- }
-
- pBiDi->trailingWSStart=start;
-}
-
-/* ubidi_setLine ------------------------------------------------------------ */
-
-U_CAPI void U_EXPORT2
-ubidi_setLine(const UBiDi *pParaBiDi,
- int32_t start, int32_t limit,
- UBiDi *pLineBiDi,
- UErrorCode *pErrorCode) {
- int32_t length;
-
- /* check the argument values */
- RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
- RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode);
- RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode);
- RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode);
- if(pLineBiDi==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) !=
- ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) {
- /* the line crosses a paragraph boundary */
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- /* set the values in pLineBiDi from its pParaBiDi parent */
- pLineBiDi->pParaBiDi=NULL; /* mark unfinished setLine */
- pLineBiDi->text=pParaBiDi->text+start;
- length=pLineBiDi->length=limit-start;
- pLineBiDi->resultLength=pLineBiDi->originalLength=length;
- pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start);
- pLineBiDi->paraCount=pParaBiDi->paraCount;
- pLineBiDi->runs=NULL;
- pLineBiDi->flags=0;
- pLineBiDi->reorderingMode=pParaBiDi->reorderingMode;
- pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions;
- pLineBiDi->controlCount=0;
- if(pParaBiDi->controlCount>0) {
- int32_t j;
- for(j=start; j<limit; j++) {
- if(IS_BIDI_CONTROL_CHAR(pParaBiDi->text[j])) {
- pLineBiDi->controlCount++;
- }
- }
- pLineBiDi->resultLength-=pLineBiDi->controlCount;
- }
-
- pLineBiDi->dirProps=pParaBiDi->dirProps+start;
- pLineBiDi->levels=pParaBiDi->levels+start;
- pLineBiDi->runCount=-1;
-
- if(pParaBiDi->direction!=UBIDI_MIXED) {
- /* the parent is already trivial */
- pLineBiDi->direction=pParaBiDi->direction;
-
- /*
- * The parent's levels are all either
- * implicitly or explicitly ==paraLevel;
- * do the same here.
- */
- if(pParaBiDi->trailingWSStart<=start) {
- pLineBiDi->trailingWSStart=0;
- } else if(pParaBiDi->trailingWSStart<limit) {
- pLineBiDi->trailingWSStart=pParaBiDi->trailingWSStart-start;
- } else {
- pLineBiDi->trailingWSStart=length;
- }
- } else {
- const UBiDiLevel *levels=pLineBiDi->levels;
- int32_t i, trailingWSStart;
- UBiDiLevel level;
-
- setTrailingWSStart(pLineBiDi);
- trailingWSStart=pLineBiDi->trailingWSStart;
-
- /* recalculate pLineBiDi->direction */
- if(trailingWSStart==0) {
- /* all levels are at paraLevel */
- pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1);
- } else {
- /* get the level of the first character */
- level=(UBiDiLevel)(levels[0]&1);
-
- /* if there is anything of a different level, then the line is mixed */
- if(trailingWSStart<length && (pLineBiDi->paraLevel&1)!=level) {
- /* the trailing WS is at paraLevel, which differs from levels[0] */
- pLineBiDi->direction=UBIDI_MIXED;
- } else {
- /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
- i=1;
- for(;;) {
- if(i==trailingWSStart) {
- /* the direction values match those in level */
- pLineBiDi->direction=(UBiDiDirection)level;
- break;
- } else if((levels[i]&1)!=level) {
- pLineBiDi->direction=UBIDI_MIXED;
- break;
- }
- ++i;
- }
- }
- }
-
- switch(pLineBiDi->direction) {
- case UBIDI_LTR:
- /* make sure paraLevel is even */
- pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1);
-
- /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
- pLineBiDi->trailingWSStart=0;
- break;
- case UBIDI_RTL:
- /* make sure paraLevel is odd */
- pLineBiDi->paraLevel|=1;
-
- /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
- pLineBiDi->trailingWSStart=0;
- break;
- default:
- break;
- }
- }
- pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */
- return;
-}
-
-U_CAPI UBiDiLevel U_EXPORT2
-ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) {
- /* return paraLevel if in the trailing WS run, otherwise the real level */
- if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) {
- return 0;
- } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) {
- return GET_PARALEVEL(pBiDi, charIndex);
- } else {
- return pBiDi->levels[charIndex];
- }
-}
-
-U_CAPI const UBiDiLevel * U_EXPORT2
-ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
- int32_t start, length;
-
- RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, NULL);
- RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, NULL);
- if((length=pBiDi->length)<=0) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- if((start=pBiDi->trailingWSStart)==length) {
- /* the current levels array reflects the WS run */
- return pBiDi->levels;
- }
-
- /*
- * After the previous if(), we know that the levels array
- * has an implicit trailing WS run and therefore does not fully
- * reflect itself all the levels.
- * This must be a UBiDi object for a line, and
- * we need to create a new levels array.
- */
- if(getLevelsMemory(pBiDi, length)) {
- UBiDiLevel *levels=pBiDi->levelsMemory;
-
- if(start>0 && levels!=pBiDi->levels) {
- uprv_memcpy(levels, pBiDi->levels, start);
- }
- /* pBiDi->paraLevel is ok even if contextual multiple paragraphs,
- since pBidi is a line object */
- uprv_memset(levels+start, pBiDi->paraLevel, length-start);
-
- /* this new levels array is set for the line and reflects the WS run */
- pBiDi->trailingWSStart=length;
- return pBiDi->levels=levels;
- } else {
- /* out of memory */
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-}
-
-U_CAPI void U_EXPORT2
-ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
- int32_t *pLogicalLimit, UBiDiLevel *pLevel) {
- UErrorCode errorCode;
- int32_t runCount, visualStart, logicalLimit, logicalFirst, i;
- Run iRun;
-
- errorCode=U_ZERO_ERROR;
- RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode);
- /* ubidi_countRuns will check VALID_PARA_OR_LINE */
- runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode);
- if(U_FAILURE(errorCode)) {
- return;
- }
- /* this is done based on runs rather than on levels since levels have
- a special interpretation when UBIDI_REORDER_RUNS_ONLY
- */
- visualStart=logicalLimit=0;
- iRun=pBiDi->runs[0];
-
- for(i=0; i<runCount; i++) {
- iRun = pBiDi->runs[i];
- logicalFirst=GET_INDEX(iRun.logicalStart);
- logicalLimit=logicalFirst+iRun.visualLimit-visualStart;
- if((logicalPosition>=logicalFirst) &&
- (logicalPosition<logicalLimit)) {
- break;
- }
- visualStart = iRun.visualLimit;
- }
- if(pLogicalLimit) {
- *pLogicalLimit=logicalLimit;
- }
- if(pLevel) {
- if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
- *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart);
- }
- else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) {
- *pLevel=GET_PARALEVEL(pBiDi, logicalPosition);
- } else {
- *pLevel=pBiDi->levels[logicalPosition];
- }
- }
-}
-
-/* runs API functions ------------------------------------------------------- */
-
-U_CAPI int32_t U_EXPORT2
-ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) {
- RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
- RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
- ubidi_getRuns(pBiDi, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return -1;
- }
- return pBiDi->runCount;
-}
-
-U_CAPI UBiDiDirection U_EXPORT2
-ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
- int32_t *pLogicalStart, int32_t *pLength)
-{
- int32_t start;
- UErrorCode errorCode = U_ZERO_ERROR;
- RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR);
- ubidi_getRuns(pBiDi, &errorCode);
- if(U_FAILURE(errorCode)) {
- return UBIDI_LTR;
- }
- RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR);
-
- start=pBiDi->runs[runIndex].logicalStart;
- if(pLogicalStart!=NULL) {
- *pLogicalStart=GET_INDEX(start);
- }
- if(pLength!=NULL) {
- if(runIndex>0) {
- *pLength=pBiDi->runs[runIndex].visualLimit-
- pBiDi->runs[runIndex-1].visualLimit;
- } else {
- *pLength=pBiDi->runs[0].visualLimit;
- }
- }
- return (UBiDiDirection)GET_ODD_BIT(start);
-}
-
-/* in trivial cases there is only one trivial run; called by ubidi_getRuns() */
-static void
-getSingleRun(UBiDi *pBiDi, UBiDiLevel level) {
- /* simple, single-run case */
- pBiDi->runs=pBiDi->simpleRuns;
- pBiDi->runCount=1;
-
- /* fill and reorder the single run */
- pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level);
- pBiDi->runs[0].visualLimit=pBiDi->length;
- pBiDi->runs[0].insertRemove=0;
-}
-
-/* reorder the runs array (L2) ---------------------------------------------- */
-
-/*
- * Reorder the same-level runs in the runs array.
- * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
- * All the visualStart fields=logical start before reordering.
- * The "odd" bits are not set yet.
- *
- * Reordering with this data structure lends itself to some handy shortcuts:
- *
- * Since each run is moved but not modified, and since at the initial maxLevel
- * each sequence of same-level runs consists of only one run each, we
- * don't need to do anything there and can predecrement maxLevel.
- * In many simple cases, the reordering is thus done entirely in the
- * index mapping.
- * Also, reordering occurs only down to the lowest odd level that occurs,
- * which is minLevel|1. However, if the lowest level itself is odd, then
- * in the last reordering the sequence of the runs at this level or higher
- * will be all runs, and we don't need the elaborate loop to search for them.
- * This is covered by ++minLevel instead of minLevel|=1 followed
- * by an extra reorder-all after the reorder-some loop.
- * About a trailing WS run:
- * Such a run would need special treatment because its level is not
- * reflected in levels[] if this is not a paragraph object.
- * Instead, all characters from trailingWSStart on are implicitly at
- * paraLevel.
- * However, for all maxLevel>paraLevel, this run will never be reordered
- * and does not need to be taken into account. maxLevel==paraLevel is only reordered
- * if minLevel==paraLevel is odd, which is done in the extra segment.
- * This means that for the main reordering loop we don't need to consider
- * this run and can --runCount. If it is later part of the all-runs
- * reordering, then runCount is adjusted accordingly.
- */
-static void
-reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) {
- Run *runs, tempRun;
- UBiDiLevel *levels;
- int32_t firstRun, endRun, limitRun, runCount;
-
- /* nothing to do? */
- if(maxLevel<=(minLevel|1)) {
- return;
- }
-
- /*
- * Reorder only down to the lowest odd level
- * and reorder at an odd minLevel in a separate, simpler loop.
- * See comments above for why minLevel is always incremented.
- */
- ++minLevel;
-
- runs=pBiDi->runs;
- levels=pBiDi->levels;
- runCount=pBiDi->runCount;
-
- /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */
- if(pBiDi->trailingWSStart<pBiDi->length) {
- --runCount;
- }
-
- while(--maxLevel>=minLevel) {
- firstRun=0;
-
- /* loop for all sequences of runs */
- for(;;) {
- /* look for a sequence of runs that are all at >=maxLevel */
- /* look for the first run of such a sequence */
- while(firstRun<runCount && levels[runs[firstRun].logicalStart]<maxLevel) {
- ++firstRun;
- }
- if(firstRun>=runCount) {
- break; /* no more such runs */
- }
-
- /* look for the limit run of such a sequence (the run behind it) */
- for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=maxLevel;) {}
-
- /* Swap the entire sequence of runs from firstRun to limitRun-1. */
- endRun=limitRun-1;
- while(firstRun<endRun) {
- tempRun = runs[firstRun];
- runs[firstRun]=runs[endRun];
- runs[endRun]=tempRun;
- ++firstRun;
- --endRun;
- }
-
- if(limitRun==runCount) {
- break; /* no more such runs */
- } else {
- firstRun=limitRun+1;
- }
- }
- }
-
- /* now do maxLevel==old minLevel (==odd!), see above */
- if(!(minLevel&1)) {
- firstRun=0;
-
- /* include the trailing WS run in this complete reordering */
- if(pBiDi->trailingWSStart==pBiDi->length) {
- --runCount;
- }
-
- /* Swap the entire sequence of all runs. (endRun==runCount) */
- while(firstRun<runCount) {
- tempRun=runs[firstRun];
- runs[firstRun]=runs[runCount];
- runs[runCount]=tempRun;
- ++firstRun;
- --runCount;
- }
- }
-}
-
-/* compute the runs array --------------------------------------------------- */
-
-static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex) {
- Run *runs=pBiDi->runs;
- int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart;
-
- for(i=0; i<runCount; i++) {
- length=runs[i].visualLimit-visualStart;
- logicalStart=GET_INDEX(runs[i].logicalStart);
- if((logicalIndex>=logicalStart) && (logicalIndex<(logicalStart+length))) {
- return i;
- }
- visualStart+=length;
- }
- /* we should never get here */
- UPRV_UNREACHABLE;
-}
-
-/*
- * Compute the runs array from the levels array.
- * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0
- * and the runs are reordered.
- * Odd-level runs have visualStart on their visual right edge and
- * they progress visually to the left.
- * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the
- * sum of appropriate LRM/RLM_BEFORE/AFTER flags.
- * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the
- * negative number of BiDi control characters within this run.
- */
-U_CFUNC UBool
-ubidi_getRuns(UBiDi *pBiDi, UErrorCode*) {
- /*
- * This method returns immediately if the runs are already set. This
- * includes the case of length==0 (handled in setPara)..
- */
- if (pBiDi->runCount>=0) {
- return TRUE;
- }
-
- if(pBiDi->direction!=UBIDI_MIXED) {
- /* simple, single-run case - this covers length==0 */
- /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */
- getSingleRun(pBiDi, pBiDi->paraLevel);
- } else /* UBIDI_MIXED, length>0 */ {
- /* mixed directionality */
- int32_t length=pBiDi->length, limit;
- UBiDiLevel *levels=pBiDi->levels;
- int32_t i, runCount;
- UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */
- /*
- * If there are WS characters at the end of the line
- * and the run preceding them has a level different from
- * paraLevel, then they will form their own run at paraLevel (L1).
- * Count them separately.
- * We need some special treatment for this in order to not
- * modify the levels array which a line UBiDi object shares
- * with its paragraph parent and its other line siblings.
- * In other words, for the trailing WS, it may be
- * levels[]!=paraLevel but we have to treat it like it were so.
- */
- limit=pBiDi->trailingWSStart;
- /* count the runs, there is at least one non-WS run, and limit>0 */
- runCount=0;
- for(i=0; i<limit; ++i) {
- /* increment runCount at the start of each run */
- if(levels[i]!=level) {
- ++runCount;
- level=levels[i];
- }
- }
-
- /*
- * We don't need to see if the last run can be merged with a trailing
- * WS run because setTrailingWSStart() would have done that.
- */
- if(runCount==1 && limit==length) {
- /* There is only one non-WS run and no trailing WS-run. */
- getSingleRun(pBiDi, levels[0]);
- } else /* runCount>1 || limit<length */ {
- /* allocate and set the runs */
- Run *runs;
- int32_t runIndex, start;
- UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0;
-
- /* now, count a (non-mergeable) WS run */
- if(limit<length) {
- ++runCount;
- }
-
- /* runCount>1 */
- if(getRunsMemory(pBiDi, runCount)) {
- runs=pBiDi->runsMemory;
- } else {
- return FALSE;
- }
-
- /* set the runs */
- /* FOOD FOR THOUGHT: this could be optimized, e.g.:
- * 464->444, 484->444, 575->555, 595->555
- * However, that would take longer. Check also how it would
- * interact with BiDi control removal and inserting Marks.
- */
- runIndex=0;
-
- /* search for the run limits and initialize visualLimit values with the run lengths */
- i=0;
- do {
- /* prepare this run */
- start=i;
- level=levels[i];
- if(level<minLevel) {
- minLevel=level;
- }
- if(level>maxLevel) {
- maxLevel=level;
- }
-
- /* look for the run limit */
- while(++i<limit && levels[i]==level) {}
-
- /* i is another run limit */
- runs[runIndex].logicalStart=start;
- runs[runIndex].visualLimit=i-start;
- runs[runIndex].insertRemove=0;
- ++runIndex;
- } while(i<limit);
-
- if(limit<length) {
- /* there is a separate WS run */
- runs[runIndex].logicalStart=limit;
- runs[runIndex].visualLimit=length-limit;
- /* For the trailing WS run, pBiDi->paraLevel is ok even
- if contextual multiple paragraphs. */
- if(pBiDi->paraLevel<minLevel) {
- minLevel=pBiDi->paraLevel;
- }
- }
-
- /* set the object fields */
- pBiDi->runs=runs;
- pBiDi->runCount=runCount;
-
- reorderLine(pBiDi, minLevel, maxLevel);
-
- /* now add the direction flags and adjust the visualLimit's to be just that */
- /* this loop will also handle the trailing WS run */
- limit=0;
- for(i=0; i<runCount; ++i) {
- ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]);
- limit+=runs[i].visualLimit;
- runs[i].visualLimit=limit;
- }
-
- /* Set the "odd" bit for the trailing WS run. */
- /* For a RTL paragraph, it will be the *first* run in visual order. */
- /* For the trailing WS run, pBiDi->paraLevel is ok even if
- contextual multiple paragraphs. */
- if(runIndex<runCount) {
- int32_t trailingRun = ((pBiDi->paraLevel & 1) != 0)? 0 : runIndex;
-
- ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel);
- }
- }
- }
-
- /* handle insert LRM/RLM BEFORE/AFTER run */
- if(pBiDi->insertPoints.size>0) {
- Point *point, *start=pBiDi->insertPoints.points,
- *limit=start+pBiDi->insertPoints.size;
- int32_t runIndex;
- for(point=start; point<limit; point++) {
- runIndex=getRunFromLogicalIndex(pBiDi, point->pos);
- pBiDi->runs[runIndex].insertRemove|=point->flag;
- }
- }
-
- /* handle remove BiDi control characters */
- if(pBiDi->controlCount>0) {
- int32_t runIndex;
- const UChar *start=pBiDi->text, *limit=start+pBiDi->length, *pu;
- for(pu=start; pu<limit; pu++) {
- if(IS_BIDI_CONTROL_CHAR(*pu)) {
- runIndex=getRunFromLogicalIndex(pBiDi, (int32_t)(pu-start));
- pBiDi->runs[runIndex].insertRemove--;
- }
- }
- }
-
- return TRUE;
-}
-
-static UBool
-prepareReorder(const UBiDiLevel *levels, int32_t length,
- int32_t *indexMap,
- UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) {
- int32_t start;
- UBiDiLevel level, minLevel, maxLevel;
-
- if(levels==NULL || length<=0) {
- return FALSE;
- }
-
- /* determine minLevel and maxLevel */
- minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1;
- maxLevel=0;
- for(start=length; start>0;) {
- level=levels[--start];
- if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) {
- return FALSE;
- }
- if(level<minLevel) {
- minLevel=level;
- }
- if(level>maxLevel) {
- maxLevel=level;
- }
- }
- *pMinLevel=minLevel;
- *pMaxLevel=maxLevel;
-
- /* initialize the index map */
- for(start=length; start>0;) {
- --start;
- indexMap[start]=start;
- }
-
- return TRUE;
-}
-
-/* reorder a line based on a levels array (L2) ------------------------------ */
-
-U_CAPI void U_EXPORT2
-ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
- int32_t start, limit, sumOfSosEos;
- UBiDiLevel minLevel = 0, maxLevel = 0;
-
- if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
- return;
- }
-
- /* nothing to do? */
- if(minLevel==maxLevel && (minLevel&1)==0) {
- return;
- }
-
- /* reorder only down to the lowest odd level */
- minLevel|=1;
-
- /* loop maxLevel..minLevel */
- do {
- start=0;
-
- /* loop for all sequences of levels to reorder at the current maxLevel */
- for(;;) {
- /* look for a sequence of levels that are all at >=maxLevel */
- /* look for the first index of such a sequence */
- while(start<length && levels[start]<maxLevel) {
- ++start;
- }
- if(start>=length) {
- break; /* no more such sequences */
- }
-
- /* look for the limit of such a sequence (the index behind it) */
- for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
-
- /*
- * sos=start of sequence, eos=end of sequence
- *
- * The closed (inclusive) interval from sos to eos includes all the logical
- * and visual indexes within this sequence. They are logically and
- * visually contiguous and in the same range.
- *
- * For each run, the new visual index=sos+eos-old visual index;
- * we pre-add sos+eos into sumOfSosEos ->
- * new visual index=sumOfSosEos-old visual index;
- */
- sumOfSosEos=start+limit-1;
-
- /* reorder each index in the sequence */
- do {
- indexMap[start]=sumOfSosEos-indexMap[start];
- } while(++start<limit);
-
- /* start==limit */
- if(limit==length) {
- break; /* no more such sequences */
- } else {
- start=limit+1;
- }
- }
- } while(--maxLevel>=minLevel);
-}
-
-U_CAPI void U_EXPORT2
-ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
- int32_t start, end, limit, temp;
- UBiDiLevel minLevel = 0, maxLevel = 0;
-
- if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
- return;
- }
-
- /* nothing to do? */
- if(minLevel==maxLevel && (minLevel&1)==0) {
- return;
- }
-
- /* reorder only down to the lowest odd level */
- minLevel|=1;
-
- /* loop maxLevel..minLevel */
- do {
- start=0;
-
- /* loop for all sequences of levels to reorder at the current maxLevel */
- for(;;) {
- /* look for a sequence of levels that are all at >=maxLevel */
- /* look for the first index of such a sequence */
- while(start<length && levels[start]<maxLevel) {
- ++start;
- }
- if(start>=length) {
- break; /* no more such runs */
- }
-
- /* look for the limit of such a sequence (the index behind it) */
- for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
-
- /*
- * Swap the entire interval of indexes from start to limit-1.
- * We don't need to swap the levels for the purpose of this
- * algorithm: the sequence of levels that we look at does not
- * move anyway.
- */
- end=limit-1;
- while(start<end) {
- temp=indexMap[start];
- indexMap[start]=indexMap[end];
- indexMap[end]=temp;
-
- ++start;
- --end;
- }
-
- if(limit==length) {
- break; /* no more such sequences */
- } else {
- start=limit+1;
- }
- }
- } while(--maxLevel>=minLevel);
-}
-
-/* API functions for logical<->visual mapping ------------------------------- */
-
-U_CAPI int32_t U_EXPORT2
-ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) {
- int32_t visualIndex=UBIDI_MAP_NOWHERE;
- RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
- RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
- RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1);
-
- /* we can do the trivial cases without the runs array */
- switch(pBiDi->direction) {
- case UBIDI_LTR:
- visualIndex=logicalIndex;
- break;
- case UBIDI_RTL:
- visualIndex=pBiDi->length-logicalIndex-1;
- break;
- default:
- if(!ubidi_getRuns(pBiDi, pErrorCode)) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return -1;
- } else {
- Run *runs=pBiDi->runs;
- int32_t i, visualStart=0, offset, length;
-
- /* linear search for the run, search on the visual runs */
- for(i=0; i<pBiDi->runCount; ++i) {
- length=runs[i].visualLimit-visualStart;
- offset=logicalIndex-GET_INDEX(runs[i].logicalStart);
- if(offset>=0 && offset<length) {
- if(IS_EVEN_RUN(runs[i].logicalStart)) {
- /* LTR */
- visualIndex=visualStart+offset;
- } else {
- /* RTL */
- visualIndex=visualStart+length-offset-1;
- }
- break; /* exit for loop */
- }
- visualStart+=length;
- }
- if(i>=pBiDi->runCount) {
- return UBIDI_MAP_NOWHERE;
- }
- }
- }
-
- if(pBiDi->insertPoints.size>0) {
- /* add the number of added marks until the calculated visual index */
- Run *runs=pBiDi->runs;
- int32_t i, length, insertRemove;
- int32_t visualStart=0, markFound=0;
- for(i=0; ; i++, visualStart+=length) {
- length=runs[i].visualLimit-visualStart;
- insertRemove=runs[i].insertRemove;
- if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) {
- markFound++;
- }
- /* is it the run containing the visual index? */
- if(visualIndex<runs[i].visualLimit) {
- return visualIndex+markFound;
- }
- if(insertRemove & (LRM_AFTER|RLM_AFTER)) {
- markFound++;
- }
- }
- }
- else if(pBiDi->controlCount>0) {
- /* subtract the number of controls until the calculated visual index */
- Run *runs=pBiDi->runs;
- int32_t i, j, start, limit, length, insertRemove;
- int32_t visualStart=0, controlFound=0;
- UChar uchar=pBiDi->text[logicalIndex];
- /* is the logical index pointing to a control ? */
- if(IS_BIDI_CONTROL_CHAR(uchar)) {
- return UBIDI_MAP_NOWHERE;
- }
- /* loop on runs */
- for(i=0; ; i++, visualStart+=length) {
- length=runs[i].visualLimit-visualStart;
- insertRemove=runs[i].insertRemove;
- /* calculated visual index is beyond this run? */
- if(visualIndex>=runs[i].visualLimit) {
- controlFound-=insertRemove;
- continue;
- }
- /* calculated visual index must be within current run */
- if(insertRemove==0) {
- return visualIndex-controlFound;
- }
- if(IS_EVEN_RUN(runs[i].logicalStart)) {
- /* LTR: check from run start to logical index */
- start=runs[i].logicalStart;
- limit=logicalIndex;
- } else {
- /* RTL: check from logical index to run end */
- start=logicalIndex+1;
- limit=GET_INDEX(runs[i].logicalStart)+length;
- }
- for(j=start; j<limit; j++) {
- uchar=pBiDi->text[j];
- if(IS_BIDI_CONTROL_CHAR(uchar)) {
- controlFound++;
- }
- }
- return visualIndex-controlFound;
- }
- }
-
- return visualIndex;
-}
-
-U_CAPI int32_t U_EXPORT2
-ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) {
- Run *runs;
- int32_t i, runCount, start;
- RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
- RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
- RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1);
- /* we can do the trivial cases without the runs array */
- if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) {
- if(pBiDi->direction==UBIDI_LTR) {
- return visualIndex;
- }
- else if(pBiDi->direction==UBIDI_RTL) {
- return pBiDi->length-visualIndex-1;
- }
- }
- if(!ubidi_getRuns(pBiDi, pErrorCode)) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return -1;
- }
-
- runs=pBiDi->runs;
- runCount=pBiDi->runCount;
- if(pBiDi->insertPoints.size>0) {
- /* handle inserted LRM/RLM */
- int32_t markFound=0, insertRemove;
- int32_t visualStart=0, length;
- runs=pBiDi->runs;
- /* subtract number of marks until visual index */
- for(i=0; ; i++, visualStart+=length) {
- length=runs[i].visualLimit-visualStart;
- insertRemove=runs[i].insertRemove;
- if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
- if(visualIndex<=(visualStart+markFound)) {
- return UBIDI_MAP_NOWHERE;
- }
- markFound++;
- }
- /* is adjusted visual index within this run? */
- if(visualIndex<(runs[i].visualLimit+markFound)) {
- visualIndex-=markFound;
- break;
- }
- if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
- if(visualIndex==(visualStart+length+markFound)) {
- return UBIDI_MAP_NOWHERE;
- }
- markFound++;
- }
- }
- }
- else if(pBiDi->controlCount>0) {
- /* handle removed BiDi control characters */
- int32_t controlFound=0, insertRemove, length;
- int32_t logicalStart, logicalEnd, visualStart=0, j, k;
- UChar uchar;
- UBool evenRun;
- /* add number of controls until visual index */
- for(i=0; ; i++, visualStart+=length) {
- length=runs[i].visualLimit-visualStart;
- insertRemove=runs[i].insertRemove;
- /* is adjusted visual index beyond current run? */
- if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) {
- controlFound-=insertRemove;
- continue;
- }
- /* adjusted visual index is within current run */
- if(insertRemove==0) {
- visualIndex+=controlFound;
- break;
- }
- /* count non-control chars until visualIndex */
- logicalStart=runs[i].logicalStart;
- evenRun=IS_EVEN_RUN(logicalStart);
- REMOVE_ODD_BIT(logicalStart);
- logicalEnd=logicalStart+length-1;
- for(j=0; j<length; j++) {
- k= evenRun ? logicalStart+j : logicalEnd-j;
- uchar=pBiDi->text[k];
- if(IS_BIDI_CONTROL_CHAR(uchar)) {
- controlFound++;
- }
- if((visualIndex+controlFound)==(visualStart+j)) {
- break;
- }
- }
- visualIndex+=controlFound;
- break;
- }
- }
- /* handle all cases */
- if(runCount<=10) {
- /* linear search for the run */
- for(i=0; visualIndex>=runs[i].visualLimit; ++i) {}
- } else {
- /* binary search for the run */
- int32_t begin=0, limit=runCount;
-
- /* the middle if() is guaranteed to find the run, we don't need a loop limit */
- for(;;) {
- i=(begin+limit)/2;
- if(visualIndex>=runs[i].visualLimit) {
- begin=i+1;
- } else if(i==0 || visualIndex>=runs[i-1].visualLimit) {
- break;
- } else {
- limit=i;
- }
- }
- }
-
- start=runs[i].logicalStart;
- if(IS_EVEN_RUN(start)) {
- /* LTR */
- /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */
- if(i>0) {
- visualIndex-=runs[i-1].visualLimit;
- }
- return start+visualIndex;
- } else {
- /* RTL */
- return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1;
- }
-}
-
-U_CAPI void U_EXPORT2
-ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
- RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
- /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
- ubidi_countRuns(pBiDi, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- /* no op */
- } else if(indexMap==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- } else {
- /* fill a logical-to-visual index map using the runs[] */
- int32_t visualStart, visualLimit, i, j, k;
- int32_t logicalStart, logicalLimit;
- Run *runs=pBiDi->runs;
- if (pBiDi->length<=0) {
- return;
- }
- if (pBiDi->length>pBiDi->resultLength) {
- uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t));
- }
-
- visualStart=0;
- for(j=0; j<pBiDi->runCount; ++j) {
- logicalStart=GET_INDEX(runs[j].logicalStart);
- visualLimit=runs[j].visualLimit;
- if(IS_EVEN_RUN(runs[j].logicalStart)) {
- do { /* LTR */
- indexMap[logicalStart++]=visualStart++;
- } while(visualStart<visualLimit);
- } else {
- logicalStart+=visualLimit-visualStart; /* logicalLimit */
- do { /* RTL */
- indexMap[--logicalStart]=visualStart++;
- } while(visualStart<visualLimit);
- }
- /* visualStart==visualLimit; */
- }
-
- if(pBiDi->insertPoints.size>0) {
- int32_t markFound=0, runCount=pBiDi->runCount;
- int32_t length, insertRemove;
- visualStart=0;
- /* add number of marks found until each index */
- for(i=0; i<runCount; i++, visualStart+=length) {
- length=runs[i].visualLimit-visualStart;
- insertRemove=runs[i].insertRemove;
- if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
- markFound++;
- }
- if(markFound>0) {
- logicalStart=GET_INDEX(runs[i].logicalStart);
- logicalLimit=logicalStart+length;
- for(j=logicalStart; j<logicalLimit; j++) {
- indexMap[j]+=markFound;
- }
- }
- if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
- markFound++;
- }
- }
- }
- else if(pBiDi->controlCount>0) {
- int32_t controlFound=0, runCount=pBiDi->runCount;
- int32_t length, insertRemove;
- UBool evenRun;
- UChar uchar;
- visualStart=0;
- /* subtract number of controls found until each index */
- for(i=0; i<runCount; i++, visualStart+=length) {
- length=runs[i].visualLimit-visualStart;
- insertRemove=runs[i].insertRemove;
- /* no control found within previous runs nor within this run */
- if((controlFound-insertRemove)==0) {
- continue;
- }
- logicalStart=runs[i].logicalStart;
- evenRun=IS_EVEN_RUN(logicalStart);
- REMOVE_ODD_BIT(logicalStart);
- logicalLimit=logicalStart+length;
- /* if no control within this run */
- if(insertRemove==0) {
- for(j=logicalStart; j<logicalLimit; j++) {
- indexMap[j]-=controlFound;
- }
- continue;
- }
- for(j=0; j<length; j++) {
- k= evenRun ? logicalStart+j : logicalLimit-j-1;
- uchar=pBiDi->text[k];
- if(IS_BIDI_CONTROL_CHAR(uchar)) {
- controlFound++;
- indexMap[k]=UBIDI_MAP_NOWHERE;
- continue;
- }
- indexMap[k]-=controlFound;
- }
- }
- }
- }
-}
-
-U_CAPI void U_EXPORT2
-ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
- RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
- if(indexMap==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
- ubidi_countRuns(pBiDi, pErrorCode);
- if(U_SUCCESS(*pErrorCode)) {
- /* fill a visual-to-logical index map using the runs[] */
- Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount;
- int32_t logicalStart, visualStart, visualLimit, *pi=indexMap;
-
- if (pBiDi->resultLength<=0) {
- return;
- }
- visualStart=0;
- for(; runs<runsLimit; ++runs) {
- logicalStart=runs->logicalStart;
- visualLimit=runs->visualLimit;
- if(IS_EVEN_RUN(logicalStart)) {
- do { /* LTR */
- *pi++ = logicalStart++;
- } while(++visualStart<visualLimit);
- } else {
- REMOVE_ODD_BIT(logicalStart);
- logicalStart+=visualLimit-visualStart; /* logicalLimit */
- do { /* RTL */
- *pi++ = --logicalStart;
- } while(++visualStart<visualLimit);
- }
- /* visualStart==visualLimit; */
- }
-
- if(pBiDi->insertPoints.size>0) {
- int32_t markFound=0, runCount=pBiDi->runCount;
- int32_t insertRemove, i, j, k;
- runs=pBiDi->runs;
- /* count all inserted marks */
- for(i=0; i<runCount; i++) {
- insertRemove=runs[i].insertRemove;
- if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
- markFound++;
- }
- if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
- markFound++;
- }
- }
- /* move back indexes by number of preceding marks */
- k=pBiDi->resultLength;
- for(i=runCount-1; i>=0 && markFound>0; i--) {
- insertRemove=runs[i].insertRemove;
- if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
- indexMap[--k]= UBIDI_MAP_NOWHERE;
- markFound--;
- }
- visualStart= i>0 ? runs[i-1].visualLimit : 0;
- for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) {
- indexMap[--k]=indexMap[j];
- }
- if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
- indexMap[--k]= UBIDI_MAP_NOWHERE;
- markFound--;
- }
- }
- }
- else if(pBiDi->controlCount>0) {
- int32_t runCount=pBiDi->runCount, logicalEnd;
- int32_t insertRemove, length, i, j, k, m;
- UChar uchar;
- UBool evenRun;
- runs=pBiDi->runs;
- visualStart=0;
- /* move forward indexes by number of preceding controls */
- k=0;
- for(i=0; i<runCount; i++, visualStart+=length) {
- length=runs[i].visualLimit-visualStart;
- insertRemove=runs[i].insertRemove;
- /* if no control found yet, nothing to do in this run */
- if((insertRemove==0)&&(k==visualStart)) {
- k+=length;
- continue;
- }
- /* if no control in this run */
- if(insertRemove==0) {
- visualLimit=runs[i].visualLimit;
- for(j=visualStart; j<visualLimit; j++) {
- indexMap[k++]=indexMap[j];
- }
- continue;
- }
- logicalStart=runs[i].logicalStart;
- evenRun=IS_EVEN_RUN(logicalStart);
- REMOVE_ODD_BIT(logicalStart);
- logicalEnd=logicalStart+length-1;
- for(j=0; j<length; j++) {
- m= evenRun ? logicalStart+j : logicalEnd-j;
- uchar=pBiDi->text[m];
- if(!IS_BIDI_CONTROL_CHAR(uchar)) {
- indexMap[k++]=m;
- }
- }
- }
- }
- }
-}
-
-U_CAPI void U_EXPORT2
-ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) {
- if(srcMap!=NULL && destMap!=NULL && length>0) {
- const int32_t *pi;
- int32_t destLength=-1, count=0;
- /* find highest value and count positive indexes in srcMap */
- pi=srcMap+length;
- while(pi>srcMap) {
- if(*--pi>destLength) {
- destLength=*pi;
- }
- if(*pi>=0) {
- count++;
- }
- }
- destLength++; /* add 1 for origin 0 */
- if(count<destLength) {
- /* we must fill unmatched destMap entries with -1 */
- uprv_memset(destMap, 0xFF, destLength*sizeof(int32_t));
- }
- pi=srcMap+length;
- while(length>0) {
- if(*--pi>=0) {
- destMap[*pi]=--length;
- } else {
- --length;
- }
- }
- }
-}
diff --git a/contrib/libs/icu/common/ubiditransform.cpp b/contrib/libs/icu/common/ubiditransform.cpp
deleted file mode 100644
index 5b0d5cf96a4..00000000000
--- a/contrib/libs/icu/common/ubiditransform.cpp
+++ /dev/null
@@ -1,530 +0,0 @@
-/*
-******************************************************************************
-*
-* © 2016 and later: Unicode, Inc. and others.
-* License & terms of use: http://www.unicode.org/copyright.html
-*
-******************************************************************************
-* file name: ubiditransform.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2016jul24
-* created by: Lina Kemmel
-*
-*/
-
-#include "cmemory.h"
-#include "unicode/ubidi.h"
-#include "unicode/ustring.h"
-#include "unicode/ushape.h"
-#include "unicode/utf16.h"
-#include "ustr_imp.h"
-#include "unicode/ubiditransform.h"
-
-/* Some convenience defines */
-#define LTR UBIDI_LTR
-#define RTL UBIDI_RTL
-#define LOGICAL UBIDI_LOGICAL
-#define VISUAL UBIDI_VISUAL
-#define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL
-#define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR
-
-#define CHECK_LEN(STR, LEN, ERROR) UPRV_BLOCK_MACRO_BEGIN { \
- if (LEN == 0) return 0; \
- if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \
- if (LEN == -1) LEN = u_strlen(STR); \
-} UPRV_BLOCK_MACRO_END
-
-#define MAX_ACTIONS 7
-
-/**
- * Typedef for a pointer to a function, which performs some operation (such as
- * reordering, setting "inverse" mode, character mirroring, etc.). Return value
- * indicates whether the text was changed in the course of this operation or
- * not.
- */
-typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *);
-
-/**
- * Structure that holds a predefined reordering scheme, including the following
- * information:
- * <ul>
- * <li>an input base direction,</li>
- * <li>an input order,</li>
- * <li>an output base direction,</li>
- * <li>an output order,</li>
- * <li>a digit shaping direction,</li>
- * <li>a letter shaping direction,</li>
- * <li>a base direction that should be applied when the reordering engine is
- * invoked (which can not always be derived from the caller-defined
- * options),</li>
- * <li>an array of pointers to functions that accomplish the bidi layout
- * transformation.</li>
- * </ul>
- */
-typedef struct {
- UBiDiLevel inLevel; /* input level */
- UBiDiOrder inOrder; /* input order */
- UBiDiLevel outLevel; /* output level */
- UBiDiOrder outOrder; /* output order */
- uint32_t digitsDir; /* digit shaping direction */
- uint32_t lettersDir; /* letter shaping direction */
- UBiDiLevel baseLevel; /* paragraph level to be used with setPara */
- const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */
-} ReorderingScheme;
-
-struct UBiDiTransform {
- UBiDi *pBidi; /* pointer to a UBiDi object */
- const ReorderingScheme *pActiveScheme; /* effective reordering scheme */
- UChar *src; /* input text */
- UChar *dest; /* output text */
- uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */
- uint32_t srcSize; /* input text capacity excluding the trailing zero */
- uint32_t destSize; /* output text capacity */
- uint32_t *pDestLength; /* number of UChars written to dest */
- uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */
- uint32_t digits; /* digit option for ArabicShaping */
- uint32_t letters; /* letter option for ArabicShaping */
-};
-
-U_DRAFT UBiDiTransform* U_EXPORT2
-ubiditransform_open(UErrorCode *pErrorCode)
-{
- UBiDiTransform *pBiDiTransform = NULL;
- if (U_SUCCESS(*pErrorCode)) {
- pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform));
- if (pBiDiTransform == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- }
- }
- return pBiDiTransform;
-}
-
-U_DRAFT void U_EXPORT2
-ubiditransform_close(UBiDiTransform *pBiDiTransform)
-{
- if (pBiDiTransform != NULL) {
- if (pBiDiTransform->pBidi != NULL) {
- ubidi_close(pBiDiTransform->pBidi);
- }
- if (pBiDiTransform->src != NULL) {
- uprv_free(pBiDiTransform->src);
- }
- uprv_free(pBiDiTransform);
- }
-}
-
-/**
- * Performs Bidi resolution of text.
- *
- * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
- * @param pErrorCode Pointer to the error code value.
- *
- * @return Whether or not this function modifies the text. Besides the return
- * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
- */
-static UBool
-action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
-{
- ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength,
- pTransform->pActiveScheme->baseLevel, NULL, pErrorCode);
- return FALSE;
-}
-
-/**
- * Performs basic reordering of text (Logical -> Visual LTR).
- *
- * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
- * @param pErrorCode Pointer to the error code value.
- *
- * @return Whether or not this function modifies the text. Besides the return
- * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
- */
-static UBool
-action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
-{
- ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize,
- static_cast<uint16_t>(pTransform->reorderingOptions), pErrorCode);
-
- *pTransform->pDestLength = pTransform->srcLength;
- pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
- return TRUE;
-}
-
-/**
- * Sets "inverse" mode on the <code>UBiDi</code> object.
- *
- * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
- * @param pErrorCode Pointer to the error code value.
- *
- * @return Whether or not this function modifies the text. Besides the return
- * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
- */
-static UBool
-action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
-{
- (void)pErrorCode;
- ubidi_setInverse(pTransform->pBidi, TRUE);
- ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT);
- return FALSE;
-}
-
-/**
- * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL
- * transformation.
- *
- * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
- * @param pErrorCode Pointer to the error code value.
- *
- * @return Whether or not this function modifies the text. Besides the return
- * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
- */
-static UBool
-action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
-{
- (void)pErrorCode;
- ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY);
- return FALSE;
-}
-
-/**
- * Performs string reverse.
- *
- * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
- * @param pErrorCode Pointer to the error code value.
- *
- * @return Whether or not this function modifies the text. Besides the return
- * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
- */
-static UBool
-action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
-{
- ubidi_writeReverse(pTransform->src, pTransform->srcLength,
- pTransform->dest, pTransform->destSize,
- UBIDI_REORDER_DEFAULT, pErrorCode);
- *pTransform->pDestLength = pTransform->srcLength;
- return TRUE;
-}
-
-/**
- * Applies a new value to the text that serves as input at the current
- * processing step. This value is identical to the original one when we begin
- * the processing, but usually changes as the transformation progresses.
- *
- * @param pTransform A pointer to the <code>UBiDiTransform</code> structure.
- * @param newSrc A pointer whose value is to be used as input text.
- * @param newLength A length of the new text in <code>UChar</code>s.
- * @param newSize A new source capacity in <code>UChar</code>s.
- * @param pErrorCode Pointer to the error code value.
- */
-static void
-updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength,
- uint32_t newSize, UErrorCode *pErrorCode)
-{
- if (newSize < newLength) {
- *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
- return;
- }
- if (newSize > pTransform->srcSize) {
- newSize += 50; // allocate slightly more than needed right now
- if (pTransform->src != NULL) {
- uprv_free(pTransform->src);
- pTransform->src = NULL;
- }
- pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar));
- if (pTransform->src == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- //pTransform->srcLength = pTransform->srcSize = 0;
- return;
- }
- pTransform->srcSize = newSize;
- }
- u_strncpy(pTransform->src, newSrc, newLength);
- pTransform->srcLength = u_terminateUChars(pTransform->src,
- pTransform->srcSize, newLength, pErrorCode);
-}
-
-/**
- * Calls a lower level shaping function.
- *
- * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
- * @param options Shaping options.
- * @param pErrorCode Pointer to the error code value.
- */
-static void
-doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode)
-{
- *pTransform->pDestLength = u_shapeArabic(pTransform->src,
- pTransform->srcLength, pTransform->dest, pTransform->destSize,
- options, pErrorCode);
-}
-
-/**
- * Performs digit and letter shaping.
- *
- * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
- * @param pErrorCode Pointer to the error code value.
- *
- * @return Whether or not this function modifies the text. Besides the return
- * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
- */
-static UBool
-action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
-{
- if ((pTransform->letters | pTransform->digits) == 0) {
- return FALSE;
- }
- if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) {
- doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir,
- pErrorCode);
- } else {
- doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode);
- if (U_SUCCESS(*pErrorCode)) {
- updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength,
- *pTransform->pDestLength, pErrorCode);
- doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir,
- pErrorCode);
- }
- }
- return TRUE;
-}
-
-/**
- * Performs character mirroring.
- *
- * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
- * @param pErrorCode Pointer to the error code value.
- *
- * @return Whether or not this function modifies the text. Besides the return
- * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
- */
-static UBool
-action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
-{
- UChar32 c;
- uint32_t i = 0, j = 0;
- if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) {
- return FALSE;
- }
- if (pTransform->destSize < pTransform->srcLength) {
- *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
- return FALSE;
- }
- do {
- UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1;
- U16_NEXT(pTransform->src, i, pTransform->srcLength, c);
- U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c);
- } while (i < pTransform->srcLength);
-
- *pTransform->pDestLength = pTransform->srcLength;
- pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
- return TRUE;
-}
-
-/**
- * All possible reordering schemes.
- *
- */
-static const ReorderingScheme Schemes[] =
-{
- /* 0: Logical LTR => Visual LTR */
- {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
- {action_shapeArabic, action_resolve, action_reorder, NULL}},
- /* 1: Logical RTL => Visual LTR */
- {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
- {action_resolve, action_reorder, action_shapeArabic, NULL}},
- /* 2: Logical LTR => Visual RTL */
- {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
- {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}},
- /* 3: Logical RTL => Visual RTL */
- {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
- {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}},
- /* 4: Visual LTR => Logical RTL */
- {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
- {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}},
- /* 5: Visual RTL => Logical RTL */
- {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
- {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}},
- /* 6: Visual LTR => Logical LTR */
- {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
- {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}},
- /* 7: Visual RTL => Logical LTR */
- {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
- {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}},
- /* 8: Logical LTR => Logical RTL */
- {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
- {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}},
- /* 9: Logical RTL => Logical LTR */
- {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL,
- {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}},
- /* 10: Visual LTR => Visual RTL */
- {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
- {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}},
- /* 11: Visual RTL => Visual LTR */
- {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
- {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}},
- /* 12: Logical LTR => Logical LTR */
- {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
- {action_resolve, action_mirror, action_shapeArabic, NULL}},
- /* 13: Logical RTL => Logical RTL */
- {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL,
- {action_resolve, action_mirror, action_shapeArabic, NULL}},
- /* 14: Visual LTR => Visual LTR */
- {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
- {action_resolve, action_mirror, action_shapeArabic, NULL}},
- /* 15: Visual RTL => Visual RTL */
- {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
- {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}}
-};
-
-static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes);
-
-/**
- * When the direction option is <code>UBIDI_DEFAULT_LTR</code> or
- * <code>UBIDI_DEFAULT_RTL</code>, resolve the base direction according to that
- * of the first strong bidi character.
- */
-static void
-resolveBaseDirection(const UChar *text, uint32_t length,
- UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel)
-{
- switch (*pInLevel) {
- case UBIDI_DEFAULT_LTR:
- case UBIDI_DEFAULT_RTL: {
- UBiDiLevel level = static_cast<UBiDiLevel>(ubidi_getBaseDirection(text, length));
- *pInLevel = static_cast<UBiDiLevel>(level != UBIDI_NEUTRAL) ? level
- : *pInLevel == UBIDI_DEFAULT_RTL ? static_cast<UBiDiLevel>(RTL) : static_cast<UBiDiLevel>(LTR);
- break;
- }
- default:
- *pInLevel &= 1;
- break;
- }
- switch (*pOutLevel) {
- case UBIDI_DEFAULT_LTR:
- case UBIDI_DEFAULT_RTL:
- *pOutLevel = *pInLevel;
- break;
- default:
- *pOutLevel &= 1;
- break;
- }
-}
-
-/**
- * Finds a valid <code>ReorderingScheme</code> matching the
- * caller-defined scheme.
- *
- * @return A valid <code>ReorderingScheme</code> object or NULL
- */
-static const ReorderingScheme*
-findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel,
- UBiDiOrder inOrder, UBiDiOrder outOrder)
-{
- uint32_t i;
- for (i = 0; i < nSchemes; i++) {
- const ReorderingScheme *pScheme = Schemes + i;
- if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel
- && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) {
- return pScheme;
- }
- }
- return NULL;
-}
-
-U_DRAFT uint32_t U_EXPORT2
-ubiditransform_transform(UBiDiTransform *pBiDiTransform,
- const UChar *src, int32_t srcLength,
- UChar *dest, int32_t destSize,
- UBiDiLevel inParaLevel, UBiDiOrder inOrder,
- UBiDiLevel outParaLevel, UBiDiOrder outOrder,
- UBiDiMirroring doMirroring, uint32_t shapingOptions,
- UErrorCode *pErrorCode)
-{
- uint32_t destLength = 0;
- UBool textChanged = FALSE;
- const UBiDiTransform *pOrigTransform = pBiDiTransform;
- const UBiDiAction *action = NULL;
-
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if (src == NULL || dest == NULL) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- CHECK_LEN(src, srcLength, pErrorCode);
- CHECK_LEN(dest, destSize, pErrorCode);
-
- if (pBiDiTransform == NULL) {
- pBiDiTransform = ubiditransform_open(pErrorCode);
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- }
- /* Current limitation: in multiple paragraphs will be resolved according
- to the 1st paragraph */
- resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel);
-
- pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel,
- inOrder, outOrder);
- if (pBiDiTransform->pActiveScheme == NULL) {
- goto cleanup;
- }
- pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING
- : UBIDI_REORDER_DEFAULT;
-
- /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text
- scheme at the time shaping is invoked. */
- shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK;
- pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK;
- pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK;
-
- updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode);
- if (U_FAILURE(*pErrorCode)) {
- goto cleanup;
- }
- if (pBiDiTransform->pBidi == NULL) {
- pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode);
- if (U_FAILURE(*pErrorCode)) {
- goto cleanup;
- }
- }
- pBiDiTransform->dest = dest;
- pBiDiTransform->destSize = destSize;
- pBiDiTransform->pDestLength = &destLength;
-
- /* Checking for U_SUCCESS() within the loop to bail out on first failure. */
- for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) {
- if ((*action)(pBiDiTransform, pErrorCode)) {
- if (action + 1) {
- updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength,
- *pBiDiTransform->pDestLength, pErrorCode);
- }
- textChanged = TRUE;
- }
- }
- ubidi_setInverse(pBiDiTransform->pBidi, FALSE);
-
- if (!textChanged && U_SUCCESS(*pErrorCode)) {
- /* Text was not changed - just copy src to dest */
- if (destSize < srcLength) {
- *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_strncpy(dest, src, srcLength);
- destLength = srcLength;
- }
- }
-cleanup:
- if (pOrigTransform != pBiDiTransform) {
- ubiditransform_close(pBiDiTransform);
- } else {
- pBiDiTransform->dest = NULL;
- pBiDiTransform->pDestLength = NULL;
- pBiDiTransform->srcLength = 0;
- pBiDiTransform->destSize = 0;
- }
- return U_FAILURE(*pErrorCode) ? 0 : destLength;
-}
diff --git a/contrib/libs/icu/common/ubidiwrt.cpp b/contrib/libs/icu/common/ubidiwrt.cpp
deleted file mode 100644
index a69c0a4b8b1..00000000000
--- a/contrib/libs/icu/common/ubidiwrt.cpp
+++ /dev/null
@@ -1,650 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2000-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ubidiwrt.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999aug06
-* created by: Markus W. Scherer, updated by Matitiahu Allouche
-*
-* This file contains implementations for BiDi functions that use
-* the core algorithm and core API to write reordered text.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/ustring.h"
-#include "unicode/uchar.h"
-#include "unicode/ubidi.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "ustr_imp.h"
-#include "ubidiimp.h"
-
-/*
- * The function implementations in this file are designed
- * for UTF-16 and UTF-32, not for UTF-8.
- *
- * Assumptions that are not true for UTF-8:
- * - Any code point always needs the same number of code units
- * ("minimum-length-problem" of UTF-8)
- * - The BiDi control characters need only one code unit each
- *
- * Further assumptions for all UTFs:
- * - u_charMirror(c) needs the same number of code units as c
- */
-#if defined(UTF_SIZE) && UTF_SIZE==8
-# error reimplement ubidi_writeReordered() for UTF-8, see comment above
-#endif
-
-#define IS_COMBINING(type) ((1UL<<(type))&(1UL<<U_NON_SPACING_MARK|1UL<<U_COMBINING_SPACING_MARK|1UL<<U_ENCLOSING_MARK))
-
-/*
- * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we
- * semantically write RTL runs in reverse and later reverse them again.
- * Instead, we actually write them in forward order to begin with.
- * However, if the RTL run was to be mirrored, we need to mirror here now
- * since the implicit second reversal must not do it.
- * It looks strange to do mirroring in LTR output, but it is only because
- * we are writing RTL output in reverse.
- */
-static int32_t
-doWriteForward(const UChar *src, int32_t srcLength,
- UChar *dest, int32_t destSize,
- uint16_t options,
- UErrorCode *pErrorCode) {
- /* optimize for several combinations of options */
- switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING)) {
- case 0: {
- /* simply copy the LTR run to the destination */
- int32_t length=srcLength;
- if(destSize<length) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return srcLength;
- }
- do {
- *dest++=*src++;
- } while(--length>0);
- return srcLength;
- }
- case UBIDI_DO_MIRRORING: {
- /* do mirroring */
- int32_t i=0, j=0;
- UChar32 c;
-
- if(destSize<srcLength) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return srcLength;
- }
- do {
- U16_NEXT(src, i, srcLength, c);
- c=u_charMirror(c);
- U16_APPEND_UNSAFE(dest, j, c);
- } while(i<srcLength);
- return srcLength;
- }
- case UBIDI_REMOVE_BIDI_CONTROLS: {
- /* copy the LTR run and remove any BiDi control characters */
- int32_t remaining=destSize;
- UChar c;
- do {
- c=*src++;
- if(!IS_BIDI_CONTROL_CHAR(c)) {
- if(--remaining<0) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-
- /* preflight the length */
- while(--srcLength>0) {
- c=*src++;
- if(!IS_BIDI_CONTROL_CHAR(c)) {
- --remaining;
- }
- }
- return destSize-remaining;
- }
- *dest++=c;
- }
- } while(--srcLength>0);
- return destSize-remaining;
- }
- default: {
- /* remove BiDi control characters and do mirroring */
- int32_t remaining=destSize;
- int32_t i, j=0;
- UChar32 c;
- do {
- i=0;
- U16_NEXT(src, i, srcLength, c);
- src+=i;
- srcLength-=i;
- if(!IS_BIDI_CONTROL_CHAR(c)) {
- remaining-=i;
- if(remaining<0) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-
- /* preflight the length */
- while(srcLength>0) {
- c=*src++;
- if(!IS_BIDI_CONTROL_CHAR(c)) {
- --remaining;
- }
- --srcLength;
- }
- return destSize-remaining;
- }
- c=u_charMirror(c);
- U16_APPEND_UNSAFE(dest, j, c);
- }
- } while(srcLength>0);
- return j;
- }
- } /* end of switch */
-}
-
-static int32_t
-doWriteReverse(const UChar *src, int32_t srcLength,
- UChar *dest, int32_t destSize,
- uint16_t options,
- UErrorCode *pErrorCode) {
- /*
- * RTL run -
- *
- * RTL runs need to be copied to the destination in reverse order
- * of code points, not code units, to keep Unicode characters intact.
- *
- * The general strategy for this is to read the source text
- * in backward order, collect all code units for a code point
- * (and optionally following combining characters, see below),
- * and copy all these code units in ascending order
- * to the destination for this run.
- *
- * Several options request whether combining characters
- * should be kept after their base characters,
- * whether BiDi control characters should be removed, and
- * whether characters should be replaced by their mirror-image
- * equivalent Unicode characters.
- */
- int32_t i, j;
- UChar32 c;
-
- /* optimize for several combinations of options */
- switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) {
- case 0:
- /*
- * With none of the "complicated" options set, the destination
- * run will have the same length as the source run,
- * and there is no mirroring and no keeping combining characters
- * with their base characters.
- */
- if(destSize<srcLength) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return srcLength;
- }
- destSize=srcLength;
-
- /* preserve character integrity */
- do {
- /* i is always after the last code unit known to need to be kept in this segment */
- i=srcLength;
-
- /* collect code units for one base character */
- U16_BACK_1(src, 0, srcLength);
-
- /* copy this base character */
- j=srcLength;
- do {
- *dest++=src[j++];
- } while(j<i);
- } while(srcLength>0);
- break;
- case UBIDI_KEEP_BASE_COMBINING:
- /*
- * Here, too, the destination
- * run will have the same length as the source run,
- * and there is no mirroring.
- * We do need to keep combining characters with their base characters.
- */
- if(destSize<srcLength) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return srcLength;
- }
- destSize=srcLength;
-
- /* preserve character integrity */
- do {
- /* i is always after the last code unit known to need to be kept in this segment */
- i=srcLength;
-
- /* collect code units and modifier letters for one base character */
- do {
- U16_PREV(src, 0, srcLength, c);
- } while(srcLength>0 && IS_COMBINING(u_charType(c)));
-
- /* copy this "user character" */
- j=srcLength;
- do {
- *dest++=src[j++];
- } while(j<i);
- } while(srcLength>0);
- break;
- default:
- /*
- * With several "complicated" options set, this is the most
- * general and the slowest copying of an RTL run.
- * We will do mirroring, remove BiDi controls, and
- * keep combining characters with their base characters
- * as requested.
- */
- if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) {
- i=srcLength;
- } else {
- /* we need to find out the destination length of the run,
- which will not include the BiDi control characters */
- int32_t length=srcLength;
- UChar ch;
-
- i=0;
- do {
- ch=*src++;
- if(!IS_BIDI_CONTROL_CHAR(ch)) {
- ++i;
- }
- } while(--length>0);
- src-=srcLength;
- }
-
- if(destSize<i) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return i;
- }
- destSize=i;
-
- /* preserve character integrity */
- do {
- /* i is always after the last code unit known to need to be kept in this segment */
- i=srcLength;
-
- /* collect code units for one base character */
- U16_PREV(src, 0, srcLength, c);
- if(options&UBIDI_KEEP_BASE_COMBINING) {
- /* collect modifier letters for this base character */
- while(srcLength>0 && IS_COMBINING(u_charType(c))) {
- U16_PREV(src, 0, srcLength, c);
- }
- }
-
- if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) {
- /* do not copy this BiDi control character */
- continue;
- }
-
- /* copy this "user character" */
- j=srcLength;
- if(options&UBIDI_DO_MIRRORING) {
- /* mirror only the base character */
- int32_t k=0;
- c=u_charMirror(c);
- U16_APPEND_UNSAFE(dest, k, c);
- dest+=k;
- j+=k;
- }
- while(j<i) {
- *dest++=src[j++];
- }
- } while(srcLength>0);
- break;
- } /* end of switch */
-
- return destSize;
-}
-
-U_CAPI int32_t U_EXPORT2
-ubidi_writeReverse(const UChar *src, int32_t srcLength,
- UChar *dest, int32_t destSize,
- uint16_t options,
- UErrorCode *pErrorCode) {
- int32_t destLength;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* more error checking */
- if( src==NULL || srcLength<-1 ||
- destSize<0 || (destSize>0 && dest==NULL))
- {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* do input and output overlap? */
- if( dest!=NULL &&
- ((src>=dest && src<dest+destSize) ||
- (dest>=src && dest<src+srcLength)))
- {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if(srcLength==-1) {
- srcLength=u_strlen(src);
- }
- if(srcLength>0) {
- destLength=doWriteReverse(src, srcLength, dest, destSize, options, pErrorCode);
- } else {
- /* nothing to do */
- destLength=0;
- }
-
- return u_terminateUChars(dest, destSize, destLength, pErrorCode);
-}
-
-// Ticket 20907 - The optimizer in MSVC/Visual Studio versions below 16.4 has trouble with this
-// function on Windows ARM64. As a work-around, we disable optimizations for this function.
-// This work-around could/should be removed once the following versions of Visual Studio are no
-// longer supported: All versions of VS2017, and versions of VS2019 below 16.4.
-#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
-#pragma optimize( "", off )
-#endif
-U_CAPI int32_t U_EXPORT2
-ubidi_writeReordered(UBiDi *pBiDi,
- UChar *dest, int32_t destSize,
- uint16_t options,
- UErrorCode *pErrorCode) {
- const UChar *text;
- UChar *saveDest;
- int32_t length, destCapacity;
- int32_t run, runCount, logicalStart, runLength;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* more error checking */
- if( pBiDi==NULL ||
- (text=pBiDi->text)==NULL || (length=pBiDi->length)<0 ||
- destSize<0 || (destSize>0 && dest==NULL))
- {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* do input and output overlap? */
- if( dest!=NULL &&
- ((text>=dest && text<dest+destSize) ||
- (dest>=text && dest<text+pBiDi->originalLength)))
- {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if(length==0) {
- /* nothing to do */
- return u_terminateUChars(dest, destSize, 0, pErrorCode);
- }
-
- runCount=ubidi_countRuns(pBiDi, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* destSize shrinks, later destination length=destCapacity-destSize */
- saveDest=dest;
- destCapacity=destSize;
-
- /*
- * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the
- * reordering mode (checked below) is appropriate.
- */
- if(pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
- options|=UBIDI_INSERT_LRM_FOR_NUMERIC;
- options&=~UBIDI_REMOVE_BIDI_CONTROLS;
- }
- /*
- * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS
- * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC.
- */
- if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
- options|=UBIDI_REMOVE_BIDI_CONTROLS;
- options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
- }
- /*
- * If we do not perform the "inverse BiDi" algorithm, then we
- * don't need to insert any LRMs, and don't need to test for it.
- */
- if((pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_NUMBERS_AS_L) &&
- (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_LIKE_DIRECT) &&
- (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
- (pBiDi->reorderingMode != UBIDI_REORDER_RUNS_ONLY)) {
- options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
- }
- /*
- * Iterate through all visual runs and copy the run text segments to
- * the destination, according to the options.
- *
- * The tests for where to insert LRMs ignore the fact that there may be
- * BN codes or non-BMP code points at the beginning and end of a run;
- * they may insert LRMs unnecessarily but the tests are faster this way
- * (this would have to be improved for UTF-8).
- *
- * Note that the only errors that are set by doWriteXY() are buffer overflow
- * errors. Ignore them until the end, and continue for preflighting.
- */
- if(!(options&UBIDI_OUTPUT_REVERSE)) {
- /* forward output */
- if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
- /* do not insert BiDi controls */
- for(run=0; run<runCount; ++run) {
- if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
- runLength=doWriteForward(text+logicalStart, runLength,
- dest, destSize,
- (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
- } else {
- runLength=doWriteReverse(text+logicalStart, runLength,
- dest, destSize,
- options, pErrorCode);
- }
- if(dest!=NULL) {
- dest+=runLength;
- }
- destSize-=runLength;
- }
- } else {
- /* insert BiDi controls for "inverse BiDi" */
- const DirProp *dirProps=pBiDi->dirProps;
- const UChar *src;
- UChar uc;
- UBiDiDirection dir;
- int32_t markFlag;
-
- for(run=0; run<runCount; ++run) {
- dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
- src=text+logicalStart;
- /* check if something relevant in insertPoints */
- markFlag=pBiDi->runs[run].insertRemove;
- if(markFlag<0) { /* BiDi controls count */
- markFlag=0;
- }
-
- if(UBIDI_LTR==dir) {
- if((pBiDi->isInverse) &&
- (/*run>0 &&*/ dirProps[logicalStart]!=L)) {
- markFlag |= LRM_BEFORE;
- }
- if (markFlag & LRM_BEFORE) {
- uc=LRM_CHAR;
- }
- else if (markFlag & RLM_BEFORE) {
- uc=RLM_CHAR;
- }
- else uc=0;
- if(uc) {
- if(destSize>0) {
- *dest++=uc;
- }
- --destSize;
- }
-
- runLength=doWriteForward(src, runLength,
- dest, destSize,
- (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
- if(dest!=NULL) {
- dest+=runLength;
- }
- destSize-=runLength;
-
- if((pBiDi->isInverse) &&
- (/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L)) {
- markFlag |= LRM_AFTER;
- }
- if (markFlag & LRM_AFTER) {
- uc=LRM_CHAR;
- }
- else if (markFlag & RLM_AFTER) {
- uc=RLM_CHAR;
- }
- else uc=0;
- if(uc) {
- if(destSize>0) {
- *dest++=uc;
- }
- --destSize;
- }
- } else { /* RTL run */
- if((pBiDi->isInverse) &&
- (/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1])))) {
- markFlag |= RLM_BEFORE;
- }
- if (markFlag & LRM_BEFORE) {
- uc=LRM_CHAR;
- }
- else if (markFlag & RLM_BEFORE) {
- uc=RLM_CHAR;
- }
- else uc=0;
- if(uc) {
- if(destSize>0) {
- *dest++=uc;
- }
- --destSize;
- }
-
- runLength=doWriteReverse(src, runLength,
- dest, destSize,
- options, pErrorCode);
- if(dest!=NULL) {
- dest+=runLength;
- }
- destSize-=runLength;
-
- if((pBiDi->isInverse) &&
- (/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart])))) {
- markFlag |= RLM_AFTER;
- }
- if (markFlag & LRM_AFTER) {
- uc=LRM_CHAR;
- }
- else if (markFlag & RLM_AFTER) {
- uc=RLM_CHAR;
- }
- else uc=0;
- if(uc) {
- if(destSize>0) {
- *dest++=uc;
- }
- --destSize;
- }
- }
- }
- }
- } else {
- /* reverse output */
- if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
- /* do not insert BiDi controls */
- for(run=runCount; --run>=0;) {
- if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
- runLength=doWriteReverse(text+logicalStart, runLength,
- dest, destSize,
- (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
- } else {
- runLength=doWriteForward(text+logicalStart, runLength,
- dest, destSize,
- options, pErrorCode);
- }
- if(dest!=NULL) {
- dest+=runLength;
- }
- destSize-=runLength;
- }
- } else {
- /* insert BiDi controls for "inverse BiDi" */
- const DirProp *dirProps=pBiDi->dirProps;
- const UChar *src;
- UBiDiDirection dir;
-
- for(run=runCount; --run>=0;) {
- /* reverse output */
- dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
- src=text+logicalStart;
-
- if(UBIDI_LTR==dir) {
- if(/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L) {
- if(destSize>0) {
- *dest++=LRM_CHAR;
- }
- --destSize;
- }
-
- runLength=doWriteReverse(src, runLength,
- dest, destSize,
- (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
- if(dest!=NULL) {
- dest+=runLength;
- }
- destSize-=runLength;
-
- if(/*run>0 &&*/ dirProps[logicalStart]!=L) {
- if(destSize>0) {
- *dest++=LRM_CHAR;
- }
- --destSize;
- }
- } else {
- if(/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart]))) {
- if(destSize>0) {
- *dest++=RLM_CHAR;
- }
- --destSize;
- }
-
- runLength=doWriteForward(src, runLength,
- dest, destSize,
- options, pErrorCode);
- if(dest!=NULL) {
- dest+=runLength;
- }
- destSize-=runLength;
-
- if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) {
- if(destSize>0) {
- *dest++=RLM_CHAR;
- }
- --destSize;
- }
- }
- }
- }
- }
-
- return u_terminateUChars(saveDest, destCapacity, destCapacity-destSize, pErrorCode);
-}
-#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
-#pragma optimize( "", on )
-#endif
diff --git a/contrib/libs/icu/common/ubrk.cpp b/contrib/libs/icu/common/ubrk.cpp
deleted file mode 100644
index f8bdf5a6b65..00000000000
--- a/contrib/libs/icu/common/ubrk.cpp
+++ /dev/null
@@ -1,357 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-********************************************************************************
-* Copyright (C) 1996-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-********************************************************************************
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/ubrk.h"
-
-#include "unicode/brkiter.h"
-#include "unicode/uloc.h"
-#include "unicode/ustring.h"
-#include "unicode/uchriter.h"
-#include "unicode/rbbi.h"
-#include "rbbirb.h"
-#include "uassert.h"
-#include "cmemory.h"
-
-U_NAMESPACE_USE
-
-//------------------------------------------------------------------------------
-//
-// ubrk_open Create a canned type of break iterator based on type (word, line, etc.)
-// and locale.
-//
-//------------------------------------------------------------------------------
-U_CAPI UBreakIterator* U_EXPORT2
-ubrk_open(UBreakIteratorType type,
- const char *locale,
- const UChar *text,
- int32_t textLength,
- UErrorCode *status)
-{
-
- if(U_FAILURE(*status)) return 0;
-
- BreakIterator *result = 0;
-
- switch(type) {
-
- case UBRK_CHARACTER:
- result = BreakIterator::createCharacterInstance(Locale(locale), *status);
- break;
-
- case UBRK_WORD:
- result = BreakIterator::createWordInstance(Locale(locale), *status);
- break;
-
- case UBRK_LINE:
- result = BreakIterator::createLineInstance(Locale(locale), *status);
- break;
-
- case UBRK_SENTENCE:
- result = BreakIterator::createSentenceInstance(Locale(locale), *status);
- break;
-
- case UBRK_TITLE:
- result = BreakIterator::createTitleInstance(Locale(locale), *status);
- break;
-
- default:
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
-
- // check for allocation error
- if (U_FAILURE(*status)) {
- return 0;
- }
- if(result == 0) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
-
- UBreakIterator *uBI = (UBreakIterator *)result;
- if (text != NULL) {
- ubrk_setText(uBI, text, textLength, status);
- }
- return uBI;
-}
-
-
-
-//------------------------------------------------------------------------------
-//
-// ubrk_openRules open a break iterator from a set of break rules.
-// Invokes the rule builder.
-//
-//------------------------------------------------------------------------------
-U_CAPI UBreakIterator* U_EXPORT2
-ubrk_openRules( const UChar *rules,
- int32_t rulesLength,
- const UChar *text,
- int32_t textLength,
- UParseError *parseErr,
- UErrorCode *status) {
-
- if (status == NULL || U_FAILURE(*status)){
- return 0;
- }
-
- BreakIterator *result = 0;
- UnicodeString ruleString(rules, rulesLength);
- result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status);
- if(U_FAILURE(*status)) {
- return 0;
- }
-
- UBreakIterator *uBI = (UBreakIterator *)result;
- if (text != NULL) {
- ubrk_setText(uBI, text, textLength, status);
- }
- return uBI;
-}
-
-
-U_CAPI UBreakIterator* U_EXPORT2
-ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
- const UChar * text, int32_t textLength,
- UErrorCode * status)
-{
- if (U_FAILURE(*status)) {
- return NULL;
- }
- if (rulesLength < 0) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- LocalPointer<RuleBasedBreakIterator> lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status);
- if (U_FAILURE(*status)) {
- return NULL;
- }
- UBreakIterator *uBI = reinterpret_cast<UBreakIterator *>(lpRBBI.orphan());
- if (text != NULL) {
- ubrk_setText(uBI, text, textLength, status);
- }
- return uBI;
-}
-
-
-U_CAPI UBreakIterator * U_EXPORT2
-ubrk_safeClone(
- const UBreakIterator *bi,
- void * /*stackBuffer*/,
- int32_t *pBufferSize,
- UErrorCode *status)
-{
- if (status == NULL || U_FAILURE(*status)){
- return NULL;
- }
- if (bi == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- if (pBufferSize != NULL) {
- int32_t inputSize = *pBufferSize;
- *pBufferSize = 1;
- if (inputSize == 0) {
- return NULL; // preflighting for deprecated functionality
- }
- }
- BreakIterator *newBI = ((BreakIterator *)bi)->clone();
- if (newBI == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- *status = U_SAFECLONE_ALLOCATED_WARNING;
- }
- return (UBreakIterator *)newBI;
-}
-
-
-
-U_CAPI void U_EXPORT2
-ubrk_close(UBreakIterator *bi)
-{
- delete (BreakIterator *)bi;
-}
-
-U_CAPI void U_EXPORT2
-ubrk_setText(UBreakIterator* bi,
- const UChar* text,
- int32_t textLength,
- UErrorCode* status)
-{
- UText ut = UTEXT_INITIALIZER;
- utext_openUChars(&ut, text, textLength, status);
- ((BreakIterator*)bi)->setText(&ut, *status);
- // A stack allocated UText wrapping a UChar * string
- // can be dumped without explicitly closing it.
-}
-
-
-
-U_CAPI void U_EXPORT2
-ubrk_setUText(UBreakIterator *bi,
- UText *text,
- UErrorCode *status)
-{
- ((BreakIterator*)bi)->setText(text, *status);
-}
-
-
-
-
-
-U_CAPI int32_t U_EXPORT2
-ubrk_current(const UBreakIterator *bi)
-{
-
- return ((BreakIterator*)bi)->current();
-}
-
-U_CAPI int32_t U_EXPORT2
-ubrk_next(UBreakIterator *bi)
-{
-
- return ((BreakIterator*)bi)->next();
-}
-
-U_CAPI int32_t U_EXPORT2
-ubrk_previous(UBreakIterator *bi)
-{
-
- return ((BreakIterator*)bi)->previous();
-}
-
-U_CAPI int32_t U_EXPORT2
-ubrk_first(UBreakIterator *bi)
-{
-
- return ((BreakIterator*)bi)->first();
-}
-
-U_CAPI int32_t U_EXPORT2
-ubrk_last(UBreakIterator *bi)
-{
-
- return ((BreakIterator*)bi)->last();
-}
-
-U_CAPI int32_t U_EXPORT2
-ubrk_preceding(UBreakIterator *bi,
- int32_t offset)
-{
-
- return ((BreakIterator*)bi)->preceding(offset);
-}
-
-U_CAPI int32_t U_EXPORT2
-ubrk_following(UBreakIterator *bi,
- int32_t offset)
-{
-
- return ((BreakIterator*)bi)->following(offset);
-}
-
-U_CAPI const char* U_EXPORT2
-ubrk_getAvailable(int32_t index)
-{
-
- return uloc_getAvailable(index);
-}
-
-U_CAPI int32_t U_EXPORT2
-ubrk_countAvailable()
-{
-
- return uloc_countAvailable();
-}
-
-
-U_CAPI UBool U_EXPORT2
-ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
-{
- return ((BreakIterator*)bi)->isBoundary(offset);
-}
-
-
-U_CAPI int32_t U_EXPORT2
-ubrk_getRuleStatus(UBreakIterator *bi)
-{
- return ((BreakIterator*)bi)->getRuleStatus();
-}
-
-U_CAPI int32_t U_EXPORT2
-ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
-{
- return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status);
-}
-
-
-U_CAPI const char* U_EXPORT2
-ubrk_getLocaleByType(const UBreakIterator *bi,
- ULocDataLocaleType type,
- UErrorCode* status)
-{
- if (bi == NULL) {
- if (U_SUCCESS(*status)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return NULL;
- }
- return ((BreakIterator*)bi)->getLocaleID(type, *status);
-}
-
-
-U_CAPI void U_EXPORT2
-ubrk_refreshUText(UBreakIterator *bi,
- UText *text,
- UErrorCode *status)
-{
- BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi);
- bii->refreshInputText(text, *status);
-}
-
-U_CAPI int32_t U_EXPORT2
-ubrk_getBinaryRules(UBreakIterator *bi,
- uint8_t * binaryRules, int32_t rulesCapacity,
- UErrorCode * status)
-{
- if (U_FAILURE(*status)) {
- return 0;
- }
- if ((binaryRules == NULL && rulesCapacity > 0) || rulesCapacity < 0) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- RuleBasedBreakIterator* rbbi;
- if ((rbbi = dynamic_cast<RuleBasedBreakIterator*>(reinterpret_cast<BreakIterator*>(bi))) == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- uint32_t rulesLength;
- const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength);
- if (rulesLength > INT32_MAX) {
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- if (binaryRules != NULL) { // if not preflighting
- // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely
- if ((int32_t)rulesLength > rulesCapacity) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- } else {
- uprv_memcpy(binaryRules, returnedRules, rulesLength);
- }
- }
- return (int32_t)rulesLength;
-}
-
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/contrib/libs/icu/common/ubrkimpl.h b/contrib/libs/icu/common/ubrkimpl.h
deleted file mode 100644
index 8197f66339e..00000000000
--- a/contrib/libs/icu/common/ubrkimpl.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2006, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#ifndef UBRKIMPL_H
-#define UBRKIMPL_H
-
-#define U_ICUDATA_BRKITR U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "brkitr"
-
-#endif /*UBRKIMPL_H*/
diff --git a/contrib/libs/icu/common/ucase.cpp b/contrib/libs/icu/common/ucase.cpp
deleted file mode 100644
index 4ec25a27a50..00000000000
--- a/contrib/libs/icu/common/ucase.cpp
+++ /dev/null
@@ -1,1572 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2004-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ucase.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004aug30
-* created by: Markus W. Scherer
-*
-* Low-level Unicode character/string case mapping code.
-* Much code moved here (and modified) from uchar.c.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/unistr.h"
-#include "unicode/uset.h"
-#include "unicode/udata.h" /* UDataInfo */
-#include "unicode/utf16.h"
-#include "ucmndata.h" /* DataHeader */
-#include "udatamem.h"
-#include "umutex.h"
-#include "uassert.h"
-#include "cmemory.h"
-#include "utrie2.h"
-#include "ucase.h"
-
-struct UCaseProps {
- UDataMemory *mem;
- const int32_t *indexes;
- const uint16_t *exceptions;
- const uint16_t *unfold;
-
- UTrie2 trie;
- uint8_t formatVersion[4];
-};
-
-/* ucase_props_data.h is machine-generated by gencase --csource */
-#define INCLUDED_FROM_UCASE_CPP
-#include "ucase_props_data.h"
-
-/* set of property starts for UnicodeSet ------------------------------------ */
-
-static UBool U_CALLCONV
-_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) {
- /* add the start code point to the USet */
- const USetAdder *sa=(const USetAdder *)context;
- sa->add(sa->set, start);
- return TRUE;
-}
-
-U_CFUNC void U_EXPORT2
-ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
-
- /* add the start code point of each same-value range of the trie */
- utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
-
- /* add code points with hardcoded properties, plus the ones following them */
-
- /* (none right now, see comment below) */
-
- /*
- * Omit code points with hardcoded specialcasing properties
- * because we do not build property UnicodeSets for them right now.
- */
-}
-
-/* data access primitives --------------------------------------------------- */
-
-U_CFUNC const UTrie2 * U_EXPORT2
-ucase_getTrie() {
- return &ucase_props_singleton.trie;
-}
-
-#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
-
-/* number of bits in an 8-bit integer value */
-static const uint8_t flagsOffset[256]={
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
-};
-
-#define HAS_SLOT(flags, idx) ((flags)&(1<<(idx)))
-#define SLOT_OFFSET(flags, idx) flagsOffset[(flags)&((1<<(idx))-1)]
-
-/*
- * Get the value of an optional-value slot where HAS_SLOT(excWord, idx).
- *
- * @param excWord (in) initial exceptions word
- * @param idx (in) desired slot index
- * @param pExc16 (in/out) const uint16_t * after excWord=*pExc16++;
- * moved to the last uint16_t of the value, use +1 for beginning of next slot
- * @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified
- */
-#define GET_SLOT_VALUE(excWord, idx, pExc16, value) UPRV_BLOCK_MACRO_BEGIN { \
- if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \
- (pExc16)+=SLOT_OFFSET(excWord, idx); \
- (value)=*pExc16; \
- } else { \
- (pExc16)+=2*SLOT_OFFSET(excWord, idx); \
- (value)=*pExc16++; \
- (value)=((value)<<16)|*pExc16; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/* simple case mappings ----------------------------------------------------- */
-
-U_CAPI UChar32 U_EXPORT2
-ucase_tolower(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!UCASE_HAS_EXCEPTION(props)) {
- if(UCASE_IS_UPPER_OR_TITLE(props)) {
- c+=UCASE_GET_DELTA(props);
- }
- } else {
- const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
- uint16_t excWord=*pe++;
- if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
- int32_t delta;
- GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
- return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
- }
- if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
- GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c);
- }
- }
- return c;
-}
-
-U_CAPI UChar32 U_EXPORT2
-ucase_toupper(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!UCASE_HAS_EXCEPTION(props)) {
- if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
- c+=UCASE_GET_DELTA(props);
- }
- } else {
- const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
- uint16_t excWord=*pe++;
- if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
- int32_t delta;
- GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
- return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
- }
- if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
- GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c);
- }
- }
- return c;
-}
-
-U_CAPI UChar32 U_EXPORT2
-ucase_totitle(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!UCASE_HAS_EXCEPTION(props)) {
- if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
- c+=UCASE_GET_DELTA(props);
- }
- } else {
- const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
- uint16_t excWord=*pe++;
- if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
- int32_t delta;
- GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
- return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
- }
- int32_t idx;
- if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
- idx=UCASE_EXC_TITLE;
- } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
- idx=UCASE_EXC_UPPER;
- } else {
- return c;
- }
- GET_SLOT_VALUE(excWord, idx, pe, c);
- }
- return c;
-}
-
-static const UChar iDot[2] = { 0x69, 0x307 };
-static const UChar jDot[2] = { 0x6a, 0x307 };
-static const UChar iOgonekDot[3] = { 0x12f, 0x307 };
-static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 };
-static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 };
-static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 };
-
-
-U_CFUNC void U_EXPORT2
-ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
- uint16_t props;
-
- /*
- * Hardcode the case closure of i and its relatives and ignore the
- * data file data for these characters.
- * The Turkic dotless i and dotted I with their case mapping conditions
- * and case folding option make the related characters behave specially.
- * This code matches their closure behavior to their case folding behavior.
- */
-
- switch(c) {
- case 0x49:
- /* regular i and I are in one equivalence class */
- sa->add(sa->set, 0x69);
- return;
- case 0x69:
- sa->add(sa->set, 0x49);
- return;
- case 0x130:
- /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
- sa->addString(sa->set, iDot, 2);
- return;
- case 0x131:
- /* dotless i is in a class by itself */
- return;
- default:
- /* otherwise use the data file data */
- break;
- }
-
- props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!UCASE_HAS_EXCEPTION(props)) {
- if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
- /* add the one simple case mapping, no matter what type it is */
- int32_t delta=UCASE_GET_DELTA(props);
- if(delta!=0) {
- sa->add(sa->set, c+delta);
- }
- }
- } else {
- /*
- * c has exceptions, so there may be multiple simple and/or
- * full case mappings. Add them all.
- */
- const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
- const UChar *closure;
- uint16_t excWord=*pe++;
- int32_t idx, closureLength, fullLength, length;
-
- pe0=pe;
-
- /* add all simple case mappings */
- for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
- if(HAS_SLOT(excWord, idx)) {
- pe=pe0;
- GET_SLOT_VALUE(excWord, idx, pe, c);
- sa->add(sa->set, c);
- }
- }
- if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
- pe=pe0;
- int32_t delta;
- GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
- sa->add(sa->set, (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta);
- }
-
- /* get the closure string pointer & length */
- if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
- pe=pe0;
- GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
- closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
- closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */
- } else {
- closureLength=0;
- closure=NULL;
- }
-
- /* add the full case folding */
- if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
- pe=pe0;
- GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
-
- /* start of full case mapping strings */
- ++pe;
-
- fullLength&=0xffff; /* bits 16 and higher are reserved */
-
- /* skip the lowercase result string */
- pe+=fullLength&UCASE_FULL_LOWER;
- fullLength>>=4;
-
- /* add the full case folding string */
- length=fullLength&0xf;
- if(length!=0) {
- sa->addString(sa->set, (const UChar *)pe, length);
- pe+=length;
- }
-
- /* skip the uppercase and titlecase strings */
- fullLength>>=4;
- pe+=fullLength&0xf;
- fullLength>>=4;
- pe+=fullLength;
-
- closure=(const UChar *)pe; /* behind full case mappings */
- }
-
- /* add each code point in the closure string */
- for(idx=0; idx<closureLength;) {
- U16_NEXT_UNSAFE(closure, idx, c);
- sa->add(sa->set, c);
- }
- }
-}
-
-/*
- * compare s, which has a length, with t, which has a maximum length or is NUL-terminated
- * must be length>0 and max>0 and length<=max
- */
-static inline int32_t
-strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
- int32_t c1, c2;
-
- max-=length; /* we require length<=max, so no need to decrement max in the loop */
- do {
- c1=*s++;
- c2=*t++;
- if(c2==0) {
- return 1; /* reached the end of t but not of s */
- }
- c1-=c2;
- if(c1!=0) {
- return c1; /* return difference result */
- }
- } while(--length>0);
- /* ends with length==0 */
-
- if(max==0 || *t==0) {
- return 0; /* equal to length of both strings */
- } else {
- return -max; /* return lengh difference */
- }
-}
-
-U_CFUNC UBool U_EXPORT2
-ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) {
- int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
-
- if(ucase_props_singleton.unfold==NULL || s==NULL) {
- return FALSE; /* no reverse case folding data, or no string */
- }
- if(length<=1) {
- /* the string is too short to find any match */
- /*
- * more precise would be:
- * if(!u_strHasMoreChar32Than(s, length, 1))
- * but this does not make much practical difference because
- * a single supplementary code point would just not be found
- */
- return FALSE;
- }
-
- const uint16_t *unfold=ucase_props_singleton.unfold;
- unfoldRows=unfold[UCASE_UNFOLD_ROWS];
- unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
- unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
- unfold+=unfoldRowWidth;
-
- if(length>unfoldStringWidth) {
- /* the string is too long to find any match */
- return FALSE;
- }
-
- /* do a binary search for the string */
- start=0;
- limit=unfoldRows;
- while(start<limit) {
- i=(start+limit)/2;
- const UChar *p=reinterpret_cast<const UChar *>(unfold+(i*unfoldRowWidth));
- result=strcmpMax(s, length, p, unfoldStringWidth);
-
- if(result==0) {
- /* found the string: add each code point, and its case closure */
- UChar32 c;
-
- for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
- U16_NEXT_UNSAFE(p, i, c);
- sa->add(sa->set, c);
- ucase_addCaseClosure(c, sa);
- }
- return TRUE;
- } else if(result<0) {
- limit=i;
- } else /* result>0 */ {
- start=i+1;
- }
- }
-
- return FALSE; /* string not found */
-}
-
-U_NAMESPACE_BEGIN
-
-FullCaseFoldingIterator::FullCaseFoldingIterator()
- : unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)),
- unfoldRows(unfold[UCASE_UNFOLD_ROWS]),
- unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]),
- unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]),
- currentRow(0),
- rowCpIndex(unfoldStringWidth) {
- unfold+=unfoldRowWidth;
-}
-
-UChar32
-FullCaseFoldingIterator::next(UnicodeString &full) {
- // Advance past the last-delivered code point.
- const UChar *p=unfold+(currentRow*unfoldRowWidth);
- if(rowCpIndex>=unfoldRowWidth || p[rowCpIndex]==0) {
- ++currentRow;
- p+=unfoldRowWidth;
- rowCpIndex=unfoldStringWidth;
- }
- if(currentRow>=unfoldRows) { return U_SENTINEL; }
- // Set "full" to the NUL-terminated string in the first unfold column.
- int32_t length=unfoldStringWidth;
- while(length>0 && p[length-1]==0) { --length; }
- full.setTo(FALSE, p, length);
- // Return the code point.
- UChar32 c;
- U16_NEXT_UNSAFE(p, rowCpIndex, c);
- return c;
-}
-
-namespace LatinCase {
-
-const int8_t TO_LOWER_NORMAL[LIMIT] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
- 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
- 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
- EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
-
- 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
- 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
- 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
- 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
-};
-
-const int8_t TO_LOWER_TR_LT[LIMIT] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
- 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
- 1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0,
- EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
-
- 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
- 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
- 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
- 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
-};
-
-const int8_t TO_UPPER_NORMAL[LIMIT] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
- -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
- -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
- -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
-
- 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
- 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
- 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
- 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
-
- -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
- 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
- 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
- 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
-};
-
-const int8_t TO_UPPER_TR[LIMIT] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32,
- -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
- -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
- -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
-
- 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
- 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
- 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
- 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
-
- -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
- 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
- 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
- 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
-};
-
-} // namespace LatinCase
-
-U_NAMESPACE_END
-
-/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
-U_CAPI int32_t U_EXPORT2
-ucase_getType(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- return UCASE_GET_TYPE(props);
-}
-
-/** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
-U_CAPI int32_t U_EXPORT2
-ucase_getTypeOrIgnorable(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- return UCASE_GET_TYPE_AND_IGNORABLE(props);
-}
-
-/** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */
-static inline int32_t
-getDotType(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!UCASE_HAS_EXCEPTION(props)) {
- return props&UCASE_DOT_MASK;
- } else {
- const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
- return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK;
- }
-}
-
-U_CAPI UBool U_EXPORT2
-ucase_isSoftDotted(UChar32 c) {
- return (UBool)(getDotType(c)==UCASE_SOFT_DOTTED);
-}
-
-U_CAPI UBool U_EXPORT2
-ucase_isCaseSensitive(UChar32 c) {
- uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!UCASE_HAS_EXCEPTION(props)) {
- return (UBool)((props&UCASE_SENSITIVE)!=0);
- } else {
- const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
- return (UBool)((*pe&UCASE_EXC_SENSITIVE)!=0);
- }
-}
-
-/* string casing ------------------------------------------------------------ */
-
-/*
- * These internal functions form the core of string case mappings.
- * They map single code points to result code points or strings and take
- * all necessary conditions (context, locale ID, options) into account.
- *
- * They do not iterate over the source or write to the destination
- * so that the same functions are useful for non-standard string storage,
- * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
- * For the same reason, the "surrounding text" context is passed in as a
- * UCaseContextIterator which does not make any assumptions about
- * the underlying storage.
- *
- * This section contains helper functions that check for conditions
- * in the input text surrounding the current code point
- * according to SpecialCasing.txt.
- *
- * Each helper function gets the index
- * - after the current code point if it looks at following text
- * - before the current code point if it looks at preceding text
- *
- * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
- *
- * Final_Sigma
- * C is preceded by a sequence consisting of
- * a cased letter and a case-ignorable sequence,
- * and C is not followed by a sequence consisting of
- * an ignorable sequence and then a cased letter.
- *
- * More_Above
- * C is followed by one or more characters of combining class 230 (ABOVE)
- * in the combining character sequence.
- *
- * After_Soft_Dotted
- * The last preceding character with combining class of zero before C
- * was Soft_Dotted,
- * and there is no intervening combining character class 230 (ABOVE).
- *
- * Before_Dot
- * C is followed by combining dot above (U+0307).
- * Any sequence of characters with a combining class that is neither 0 nor 230
- * may intervene between the current character and the combining dot above.
- *
- * The erratum from 2002-10-31 adds the condition
- *
- * After_I
- * The last preceding base character was an uppercase I, and there is no
- * intervening combining character class 230 (ABOVE).
- *
- * (See Jitterbug 2344 and the comments on After_I below.)
- *
- * Helper definitions in Unicode 3.2 UAX 21:
- *
- * D1. A character C is defined to be cased
- * if it meets any of the following criteria:
- *
- * - The general category of C is Titlecase Letter (Lt)
- * - In [CoreProps], C has one of the properties Uppercase, or Lowercase
- * - Given D = NFD(C), then it is not the case that:
- * D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
- * (This third criterium does not add any characters to the list
- * for Unicode 3.2. Ignored.)
- *
- * D2. A character C is defined to be case-ignorable
- * if it meets either of the following criteria:
- *
- * - The general category of C is
- * Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
- * Letter Modifier (Lm), or Symbol Modifier (Sk)
- * - C is one of the following characters
- * U+0027 APOSTROPHE
- * U+00AD SOFT HYPHEN (SHY)
- * U+2019 RIGHT SINGLE QUOTATION MARK
- * (the preferred character for apostrophe)
- *
- * D3. A case-ignorable sequence is a sequence of
- * zero or more case-ignorable characters.
- */
-
-#define is_d(c) ((c)=='d' || (c)=='D')
-#define is_e(c) ((c)=='e' || (c)=='E')
-#define is_i(c) ((c)=='i' || (c)=='I')
-#define is_l(c) ((c)=='l' || (c)=='L')
-#define is_r(c) ((c)=='r' || (c)=='R')
-#define is_t(c) ((c)=='t' || (c)=='T')
-#define is_u(c) ((c)=='u' || (c)=='U')
-#define is_z(c) ((c)=='z' || (c)=='Z')
-
-/* separator? */
-#define is_sep(c) ((c)=='_' || (c)=='-' || (c)==0)
-
-/**
- * Requires non-NULL locale ID but otherwise does the equivalent of
- * checking for language codes as if uloc_getLanguage() were called:
- * Accepts both 2- and 3-letter codes and accepts case variants.
- */
-U_CFUNC int32_t
-ucase_getCaseLocale(const char *locale) {
- /*
- * This function used to use uloc_getLanguage(), but the current code
- * removes the dependency of this low-level code on uloc implementation code
- * and is faster because not the whole locale ID has to be
- * examined and copied/transformed.
- *
- * Because this code does not want to depend on uloc, the caller must
- * pass in a non-NULL locale, i.e., may need to call uloc_getDefault().
- */
- char c=*locale++;
- // Fastpath for English "en" which is often used for default (=root locale) case mappings,
- // and for Chinese "zh": Very common but no special case mapping behavior.
- // Then check lowercase vs. uppercase to reduce the number of comparisons
- // for other locales without special behavior.
- if(c=='e') {
- /* el or ell? */
- c=*locale++;
- if(is_l(c)) {
- c=*locale++;
- if(is_l(c)) {
- c=*locale;
- }
- if(is_sep(c)) {
- return UCASE_LOC_GREEK;
- }
- }
- // en, es, ... -> root
- } else if(c=='z') {
- return UCASE_LOC_ROOT;
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
- } else if(c>='a') { // ASCII a-z = 0x61..0x7a, after A-Z
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
- } else if(c<='z') { // EBCDIC a-z = 0x81..0xa9 with two gaps, before A-Z
-#else
-# error Unknown charset family!
-#endif
- // lowercase c
- if(c=='t') {
- /* tr or tur? */
- c=*locale++;
- if(is_u(c)) {
- c=*locale++;
- }
- if(is_r(c)) {
- c=*locale;
- if(is_sep(c)) {
- return UCASE_LOC_TURKISH;
- }
- }
- } else if(c=='a') {
- /* az or aze? */
- c=*locale++;
- if(is_z(c)) {
- c=*locale++;
- if(is_e(c)) {
- c=*locale;
- }
- if(is_sep(c)) {
- return UCASE_LOC_TURKISH;
- }
- }
- } else if(c=='l') {
- /* lt or lit? */
- c=*locale++;
- if(is_i(c)) {
- c=*locale++;
- }
- if(is_t(c)) {
- c=*locale;
- if(is_sep(c)) {
- return UCASE_LOC_LITHUANIAN;
- }
- }
- } else if(c=='n') {
- /* nl or nld? */
- c=*locale++;
- if(is_l(c)) {
- c=*locale++;
- if(is_d(c)) {
- c=*locale;
- }
- if(is_sep(c)) {
- return UCASE_LOC_DUTCH;
- }
- }
- }
- } else {
- // uppercase c
- // Same code as for lowercase c but also check for 'E'.
- if(c=='T') {
- /* tr or tur? */
- c=*locale++;
- if(is_u(c)) {
- c=*locale++;
- }
- if(is_r(c)) {
- c=*locale;
- if(is_sep(c)) {
- return UCASE_LOC_TURKISH;
- }
- }
- } else if(c=='A') {
- /* az or aze? */
- c=*locale++;
- if(is_z(c)) {
- c=*locale++;
- if(is_e(c)) {
- c=*locale;
- }
- if(is_sep(c)) {
- return UCASE_LOC_TURKISH;
- }
- }
- } else if(c=='L') {
- /* lt or lit? */
- c=*locale++;
- if(is_i(c)) {
- c=*locale++;
- }
- if(is_t(c)) {
- c=*locale;
- if(is_sep(c)) {
- return UCASE_LOC_LITHUANIAN;
- }
- }
- } else if(c=='E') {
- /* el or ell? */
- c=*locale++;
- if(is_l(c)) {
- c=*locale++;
- if(is_l(c)) {
- c=*locale;
- }
- if(is_sep(c)) {
- return UCASE_LOC_GREEK;
- }
- }
- } else if(c=='N') {
- /* nl or nld? */
- c=*locale++;
- if(is_l(c)) {
- c=*locale++;
- if(is_d(c)) {
- c=*locale;
- }
- if(is_sep(c)) {
- return UCASE_LOC_DUTCH;
- }
- }
- }
- }
- return UCASE_LOC_ROOT;
-}
-
-/*
- * Is followed by
- * {case-ignorable}* cased
- * ?
- * (dir determines looking forward/backward)
- * If a character is case-ignorable, it is skipped regardless of whether
- * it is also cased or not.
- */
-static UBool
-isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) {
- UChar32 c;
-
- if(iter==NULL) {
- return FALSE;
- }
-
- for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
- int32_t type=ucase_getTypeOrIgnorable(c);
- if(type&4) {
- /* case-ignorable, continue with the loop */
- } else if(type!=UCASE_NONE) {
- return TRUE; /* followed by cased letter */
- } else {
- return FALSE; /* uncased and not case-ignorable */
- }
- }
-
- return FALSE; /* not followed by cased letter */
-}
-
-/* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
-static UBool
-isPrecededBySoftDotted(UCaseContextIterator *iter, void *context) {
- UChar32 c;
- int32_t dotType;
- int8_t dir;
-
- if(iter==NULL) {
- return FALSE;
- }
-
- for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
- dotType=getDotType(c);
- if(dotType==UCASE_SOFT_DOTTED) {
- return TRUE; /* preceded by TYPE_i */
- } else if(dotType!=UCASE_OTHER_ACCENT) {
- return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
- }
- }
-
- return FALSE; /* not preceded by TYPE_i */
-}
-
-/*
- * See Jitterbug 2344:
- * The condition After_I for Turkic-lowercasing of U+0307 combining dot above
- * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
- * we made those releases compatible with Unicode 3.2 which had not fixed
- * a related bug in SpecialCasing.txt.
- *
- * From the Jitterbug 2344 text:
- * ... this bug is listed as a Unicode erratum
- * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
- * <quote>
- * There are two errors in SpecialCasing.txt.
- * 1. Missing semicolons on two lines. ... [irrelevant for ICU]
- * 2. An incorrect context definition. Correct as follows:
- * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
- * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
- * ---
- * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
- * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
- * where the context After_I is defined as:
- * The last preceding base character was an uppercase I, and there is no
- * intervening combining character class 230 (ABOVE).
- * </quote>
- *
- * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
- *
- * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
- * # This matches the behavior of the canonically equivalent I-dot_above
- *
- * See also the description in this place in older versions of uchar.c (revision 1.100).
- *
- * Markus W. Scherer 2003-feb-15
- */
-
-/* Is preceded by base character 'I' with no intervening cc=230 ? */
-static UBool
-isPrecededBy_I(UCaseContextIterator *iter, void *context) {
- UChar32 c;
- int32_t dotType;
- int8_t dir;
-
- if(iter==NULL) {
- return FALSE;
- }
-
- for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
- if(c==0x49) {
- return TRUE; /* preceded by I */
- }
- dotType=getDotType(c);
- if(dotType!=UCASE_OTHER_ACCENT) {
- return FALSE; /* preceded by different base character (not I), or intervening cc==230 */
- }
- }
-
- return FALSE; /* not preceded by I */
-}
-
-/* Is followed by one or more cc==230 ? */
-static UBool
-isFollowedByMoreAbove(UCaseContextIterator *iter, void *context) {
- UChar32 c;
- int32_t dotType;
- int8_t dir;
-
- if(iter==NULL) {
- return FALSE;
- }
-
- for(dir=1; (c=iter(context, dir))>=0; dir=0) {
- dotType=getDotType(c);
- if(dotType==UCASE_ABOVE) {
- return TRUE; /* at least one cc==230 following */
- } else if(dotType!=UCASE_OTHER_ACCENT) {
- return FALSE; /* next base character, no more cc==230 following */
- }
- }
-
- return FALSE; /* no more cc==230 following */
-}
-
-/* Is followed by a dot above (without cc==230 in between) ? */
-static UBool
-isFollowedByDotAbove(UCaseContextIterator *iter, void *context) {
- UChar32 c;
- int32_t dotType;
- int8_t dir;
-
- if(iter==NULL) {
- return FALSE;
- }
-
- for(dir=1; (c=iter(context, dir))>=0; dir=0) {
- if(c==0x307) {
- return TRUE;
- }
- dotType=getDotType(c);
- if(dotType!=UCASE_OTHER_ACCENT) {
- return FALSE; /* next base character or cc==230 in between */
- }
- }
-
- return FALSE; /* no dot above following */
-}
-
-U_CAPI int32_t U_EXPORT2
-ucase_toFullLower(UChar32 c,
- UCaseContextIterator *iter, void *context,
- const UChar **pString,
- int32_t loc) {
- // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
- U_ASSERT(c >= 0);
- UChar32 result=c;
- uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!UCASE_HAS_EXCEPTION(props)) {
- if(UCASE_IS_UPPER_OR_TITLE(props)) {
- result=c+UCASE_GET_DELTA(props);
- }
- } else {
- const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
- uint16_t excWord=*pe++;
- int32_t full;
-
- pe2=pe;
-
- if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
- /* use hardcoded conditions and mappings */
-
- /*
- * Test for conditional mappings first
- * (otherwise the unconditional default mappings are always taken),
- * then test for characters that have unconditional mappings in SpecialCasing.txt,
- * then get the UnicodeData.txt mappings.
- */
- if( loc==UCASE_LOC_LITHUANIAN &&
- /* base characters, find accents above */
- (((c==0x49 || c==0x4a || c==0x12e) &&
- isFollowedByMoreAbove(iter, context)) ||
- /* precomposed with accent above, no need to find one */
- (c==0xcc || c==0xcd || c==0x128))
- ) {
- /*
- # Lithuanian
-
- # Lithuanian retains the dot in a lowercase i when followed by accents.
-
- # Introduce an explicit dot above when lowercasing capital I's and J's
- # whenever there are more accents above.
- # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
-
- 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
- 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
- 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
- 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
- 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
- 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
- */
- switch(c) {
- case 0x49: /* LATIN CAPITAL LETTER I */
- *pString=iDot;
- return 2;
- case 0x4a: /* LATIN CAPITAL LETTER J */
- *pString=jDot;
- return 2;
- case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
- *pString=iOgonekDot;
- return 2;
- case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
- *pString=iDotGrave;
- return 3;
- case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
- *pString=iDotAcute;
- return 3;
- case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
- *pString=iDotTilde;
- return 3;
- default:
- return 0; /* will not occur */
- }
- /* # Turkish and Azeri */
- } else if(loc==UCASE_LOC_TURKISH && c==0x130) {
- /*
- # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
- # The following rules handle those cases.
-
- 0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
- 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
- */
- return 0x69;
- } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(iter, context)) {
- /*
- # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
- # This matches the behavior of the canonically equivalent I-dot_above
-
- 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
- 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
- */
- *pString=nullptr;
- return 0; /* remove the dot (continue without output) */
- } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
- /*
- # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
-
- 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
- 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
- */
- return 0x131;
- } else if(c==0x130) {
- /*
- # Preserve canonical equivalence for I with dot. Turkic is handled below.
-
- 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
- */
- *pString=iDot;
- return 2;
- } else if( c==0x3a3 &&
- !isFollowedByCasedLetter(iter, context, 1) &&
- isFollowedByCasedLetter(iter, context, -1) /* -1=preceded */
- ) {
- /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
- /*
- # Special case for final form of sigma
-
- 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
- */
- return 0x3c2; /* greek small final sigma */
- } else {
- /* no known conditional special case mapping, use a normal mapping */
- }
- } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
- GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
- full&=UCASE_FULL_LOWER;
- if(full!=0) {
- /* set the output pointer to the lowercase mapping */
- *pString=reinterpret_cast<const UChar *>(pe+1);
-
- /* return the string length */
- return full;
- }
- }
-
- if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
- int32_t delta;
- GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
- return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
- }
- if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
- GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result);
- }
- }
-
- return (result==c) ? ~result : result;
-}
-
-/* internal */
-static int32_t
-toUpperOrTitle(UChar32 c,
- UCaseContextIterator *iter, void *context,
- const UChar **pString,
- int32_t loc,
- UBool upperNotTitle) {
- // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
- U_ASSERT(c >= 0);
- UChar32 result=c;
- uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!UCASE_HAS_EXCEPTION(props)) {
- if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
- result=c+UCASE_GET_DELTA(props);
- }
- } else {
- const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
- uint16_t excWord=*pe++;
- int32_t full, idx;
-
- pe2=pe;
-
- if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
- /* use hardcoded conditions and mappings */
- if(loc==UCASE_LOC_TURKISH && c==0x69) {
- /*
- # Turkish and Azeri
-
- # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
- # The following rules handle those cases.
-
- # When uppercasing, i turns into a dotted capital I
-
- 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
- 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
- */
- return 0x130;
- } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter, context)) {
- /*
- # Lithuanian
-
- # Lithuanian retains the dot in a lowercase i when followed by accents.
-
- # Remove DOT ABOVE after "i" with upper or titlecase
-
- 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
- */
- *pString=nullptr;
- return 0; /* remove the dot (continue without output) */
- } else {
- /* no known conditional special case mapping, use a normal mapping */
- }
- } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
- GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
-
- /* start of full case mapping strings */
- ++pe;
-
- /* skip the lowercase and case-folding result strings */
- pe+=full&UCASE_FULL_LOWER;
- full>>=4;
- pe+=full&0xf;
- full>>=4;
-
- if(upperNotTitle) {
- full&=0xf;
- } else {
- /* skip the uppercase result string */
- pe+=full&0xf;
- full=(full>>4)&0xf;
- }
-
- if(full!=0) {
- /* set the output pointer to the result string */
- *pString=reinterpret_cast<const UChar *>(pe);
-
- /* return the string length */
- return full;
- }
- }
-
- if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
- int32_t delta;
- GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
- return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
- }
- if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
- idx=UCASE_EXC_TITLE;
- } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
- /* here, titlecase is same as uppercase */
- idx=UCASE_EXC_UPPER;
- } else {
- return ~c;
- }
- GET_SLOT_VALUE(excWord, idx, pe2, result);
- }
-
- return (result==c) ? ~result : result;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucase_toFullUpper(UChar32 c,
- UCaseContextIterator *iter, void *context,
- const UChar **pString,
- int32_t caseLocale) {
- return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE);
-}
-
-U_CAPI int32_t U_EXPORT2
-ucase_toFullTitle(UChar32 c,
- UCaseContextIterator *iter, void *context,
- const UChar **pString,
- int32_t caseLocale) {
- return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE);
-}
-
-/* case folding ------------------------------------------------------------- */
-
-/*
- * Case folding is similar to lowercasing.
- * The result may be a simple mapping, i.e., a single code point, or
- * a full mapping, i.e., a string.
- * If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
- * then only the lowercase mapping is stored.
- *
- * Some special cases are hardcoded because their conditions cannot be
- * parsed and processed from CaseFolding.txt.
- *
- * Unicode 3.2 CaseFolding.txt specifies for its status field:
-
-# C: common case folding, common mappings shared by both simple and full mappings.
-# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
-# S: simple case folding, mappings to single characters where different from F.
-# T: special case for uppercase I and dotted uppercase I
-# - For non-Turkic languages, this mapping is normally not used.
-# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
-#
-# Usage:
-# A. To do a simple case folding, use the mappings with status C + S.
-# B. To do a full case folding, use the mappings with status C + F.
-#
-# The mappings with status T can be used or omitted depending on the desired case-folding
-# behavior. (The default option is to exclude them.)
-
- * Unicode 3.2 has 'T' mappings as follows:
-
-0049; T; 0131; # LATIN CAPITAL LETTER I
-0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
-
- * while the default mappings for these code points are:
-
-0049; C; 0069; # LATIN CAPITAL LETTER I
-0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
-
- * U+0130 has no simple case folding (simple-case-folds to itself).
- */
-
-/* return the simple case folding mapping for c */
-U_CAPI UChar32 U_EXPORT2
-ucase_fold(UChar32 c, uint32_t options) {
- uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!UCASE_HAS_EXCEPTION(props)) {
- if(UCASE_IS_UPPER_OR_TITLE(props)) {
- c+=UCASE_GET_DELTA(props);
- }
- } else {
- const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
- uint16_t excWord=*pe++;
- int32_t idx;
- if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
- /* special case folding mappings, hardcoded */
- if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
- /* default mappings */
- if(c==0x49) {
- /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
- return 0x69;
- } else if(c==0x130) {
- /* no simple case folding for U+0130 */
- return c;
- }
- } else {
- /* Turkic mappings */
- if(c==0x49) {
- /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
- return 0x131;
- } else if(c==0x130) {
- /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
- return 0x69;
- }
- }
- }
- if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
- return c;
- }
- if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
- int32_t delta;
- GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
- return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
- }
- if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
- idx=UCASE_EXC_FOLD;
- } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
- idx=UCASE_EXC_LOWER;
- } else {
- return c;
- }
- GET_SLOT_VALUE(excWord, idx, pe, c);
- }
- return c;
-}
-
-/*
- * Issue for canonical caseless match (UAX #21):
- * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
- * canonical equivalence, unlike default-option casefolding.
- * For example, I-grave and I + grave fold to strings that are not canonically
- * equivalent.
- * For more details, see the comment in unorm_compare() in unorm.cpp
- * and the intermediate prototype changes for Jitterbug 2021.
- * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
- *
- * This did not get fixed because it appears that it is not possible to fix
- * it for uppercase and lowercase characters (I-grave vs. i-grave)
- * together in a way that they still fold to common result strings.
- */
-
-U_CAPI int32_t U_EXPORT2
-ucase_toFullFolding(UChar32 c,
- const UChar **pString,
- uint32_t options) {
- // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
- U_ASSERT(c >= 0);
- UChar32 result=c;
- uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!UCASE_HAS_EXCEPTION(props)) {
- if(UCASE_IS_UPPER_OR_TITLE(props)) {
- result=c+UCASE_GET_DELTA(props);
- }
- } else {
- const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
- uint16_t excWord=*pe++;
- int32_t full, idx;
-
- pe2=pe;
-
- if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
- /* use hardcoded conditions and mappings */
- if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
- /* default mappings */
- if(c==0x49) {
- /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
- return 0x69;
- } else if(c==0x130) {
- /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
- *pString=iDot;
- return 2;
- }
- } else {
- /* Turkic mappings */
- if(c==0x49) {
- /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
- return 0x131;
- } else if(c==0x130) {
- /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
- return 0x69;
- }
- }
- } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
- GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
-
- /* start of full case mapping strings */
- ++pe;
-
- /* skip the lowercase result string */
- pe+=full&UCASE_FULL_LOWER;
- full=(full>>4)&0xf;
-
- if(full!=0) {
- /* set the output pointer to the result string */
- *pString=reinterpret_cast<const UChar *>(pe);
-
- /* return the string length */
- return full;
- }
- }
-
- if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
- return ~c;
- }
- if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
- int32_t delta;
- GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
- return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
- }
- if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
- idx=UCASE_EXC_FOLD;
- } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
- idx=UCASE_EXC_LOWER;
- } else {
- return ~c;
- }
- GET_SLOT_VALUE(excWord, idx, pe2, result);
- }
-
- return (result==c) ? ~result : result;
-}
-
-/* case mapping properties API ---------------------------------------------- */
-
-/* public API (see uchar.h) */
-
-U_CAPI UBool U_EXPORT2
-u_isULowercase(UChar32 c) {
- return (UBool)(UCASE_LOWER==ucase_getType(c));
-}
-
-U_CAPI UBool U_EXPORT2
-u_isUUppercase(UChar32 c) {
- return (UBool)(UCASE_UPPER==ucase_getType(c));
-}
-
-/* Transforms the Unicode character to its lower case equivalent.*/
-U_CAPI UChar32 U_EXPORT2
-u_tolower(UChar32 c) {
- return ucase_tolower(c);
-}
-
-/* Transforms the Unicode character to its upper case equivalent.*/
-U_CAPI UChar32 U_EXPORT2
-u_toupper(UChar32 c) {
- return ucase_toupper(c);
-}
-
-/* Transforms the Unicode character to its title case equivalent.*/
-U_CAPI UChar32 U_EXPORT2
-u_totitle(UChar32 c) {
- return ucase_totitle(c);
-}
-
-/* return the simple case folding mapping for c */
-U_CAPI UChar32 U_EXPORT2
-u_foldCase(UChar32 c, uint32_t options) {
- return ucase_fold(c, options);
-}
-
-U_CFUNC int32_t U_EXPORT2
-ucase_hasBinaryProperty(UChar32 c, UProperty which) {
- /* case mapping properties */
- const UChar *resultString;
- switch(which) {
- case UCHAR_LOWERCASE:
- return (UBool)(UCASE_LOWER==ucase_getType(c));
- case UCHAR_UPPERCASE:
- return (UBool)(UCASE_UPPER==ucase_getType(c));
- case UCHAR_SOFT_DOTTED:
- return ucase_isSoftDotted(c);
- case UCHAR_CASE_SENSITIVE:
- return ucase_isCaseSensitive(c);
- case UCHAR_CASED:
- return (UBool)(UCASE_NONE!=ucase_getType(c));
- case UCHAR_CASE_IGNORABLE:
- return (UBool)(ucase_getTypeOrIgnorable(c)>>2);
- /*
- * Note: The following Changes_When_Xyz are defined as testing whether
- * the NFD form of the input changes when Xyz-case-mapped.
- * However, this simpler implementation of these properties,
- * ignoring NFD, passes the tests.
- * The implementation needs to be changed if the tests start failing.
- * When that happens, optimizations should be used to work with the
- * per-single-code point ucase_toFullXyz() functions unless
- * the NFD form has more than one code point,
- * and the property starts set needs to be the union of the
- * start sets for normalization and case mappings.
- */
- case UCHAR_CHANGES_WHEN_LOWERCASED:
- return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
- case UCHAR_CHANGES_WHEN_UPPERCASED:
- return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
- case UCHAR_CHANGES_WHEN_TITLECASED:
- return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
- /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
- case UCHAR_CHANGES_WHEN_CASEMAPPED:
- return (UBool)(
- ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
- ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
- ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
- default:
- return FALSE;
- }
-}
diff --git a/contrib/libs/icu/common/ucase.h b/contrib/libs/icu/common/ucase.h
deleted file mode 100644
index b0a453b87e8..00000000000
--- a/contrib/libs/icu/common/ucase.h
+++ /dev/null
@@ -1,444 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2004-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ucase.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004aug30
-* created by: Markus W. Scherer
-*
-* Low-level Unicode character/string case mapping code.
-*/
-
-#ifndef __UCASE_H__
-#define __UCASE_H__
-
-#include "unicode/utypes.h"
-#include "unicode/uset.h"
-#include "putilimp.h"
-#include "uset_imp.h"
-#include "udataswp.h"
-#include "utrie2.h"
-
-#ifdef __cplusplus
-U_NAMESPACE_BEGIN
-
-class UnicodeString;
-
-U_NAMESPACE_END
-#endif
-
-/* library API -------------------------------------------------------------- */
-
-U_CFUNC void U_EXPORT2
-ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
-
-/**
- * Requires non-NULL locale ID but otherwise does the equivalent of
- * checking for language codes as if uloc_getLanguage() were called:
- * Accepts both 2- and 3-letter codes and accepts case variants.
- */
-U_CFUNC int32_t
-ucase_getCaseLocale(const char *locale);
-
-/* Casing locale types for ucase_getCaseLocale */
-enum {
- UCASE_LOC_UNKNOWN,
- UCASE_LOC_ROOT,
- UCASE_LOC_TURKISH,
- UCASE_LOC_LITHUANIAN,
- UCASE_LOC_GREEK,
- UCASE_LOC_DUTCH
-};
-
-/**
- * Bit mask for getting just the options from a string compare options word
- * that are relevant for case-insensitive string comparison.
- * See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
- * @internal
- */
-#define _STRCASECMP_OPTIONS_MASK 0xffff
-
-/**
- * Bit mask for getting just the options from a string compare options word
- * that are relevant for case folding (of a single string or code point).
- *
- * Currently only bit 0 for U_FOLD_CASE_EXCLUDE_SPECIAL_I.
- * It is conceivable that at some point we might use one more bit for using uppercase sharp s.
- * It is conceivable that at some point we might want the option to use only simple case foldings
- * when operating on strings.
- *
- * See stringoptions.h.
- * @internal
- */
-#define _FOLD_CASE_OPTIONS_MASK 7
-
-/* single-code point functions */
-
-U_CAPI UChar32 U_EXPORT2
-ucase_tolower(UChar32 c);
-
-U_CAPI UChar32 U_EXPORT2
-ucase_toupper(UChar32 c);
-
-U_CAPI UChar32 U_EXPORT2
-ucase_totitle(UChar32 c);
-
-U_CAPI UChar32 U_EXPORT2
-ucase_fold(UChar32 c, uint32_t options);
-
-/**
- * Adds all simple case mappings and the full case folding for c to sa,
- * and also adds special case closure mappings.
- * c itself is not added.
- * For example, the mappings
- * - for s include long s
- * - for sharp s include ss
- * - for k include the Kelvin sign
- */
-U_CFUNC void U_EXPORT2
-ucase_addCaseClosure(UChar32 c, const USetAdder *sa);
-
-/**
- * Maps the string to single code points and adds the associated case closure
- * mappings.
- * The string is mapped to code points if it is their full case folding string.
- * In other words, this performs a reverse full case folding and then
- * adds the case closure items of the resulting code points.
- * If the string is found and its closure applied, then
- * the string itself is added as well as part of its code points' closure.
- * It must be length>=0.
- *
- * @return TRUE if the string was found
- */
-U_CFUNC UBool U_EXPORT2
-ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa);
-
-#ifdef __cplusplus
-U_NAMESPACE_BEGIN
-
-/**
- * Iterator over characters with more than one code point in the full default Case_Folding.
- */
-class U_COMMON_API FullCaseFoldingIterator {
-public:
- /** Constructor. */
- FullCaseFoldingIterator();
- /**
- * Returns the next (cp, full) pair where "full" is cp's full default Case_Folding.
- * Returns a negative cp value at the end of the iteration.
- */
- UChar32 next(UnicodeString &full);
-private:
- FullCaseFoldingIterator(const FullCaseFoldingIterator &); // no copy
- FullCaseFoldingIterator &operator=(const FullCaseFoldingIterator &); // no assignment
-
- const UChar *unfold;
- int32_t unfoldRows;
- int32_t unfoldRowWidth;
- int32_t unfoldStringWidth;
- int32_t currentRow;
- int32_t rowCpIndex;
-};
-
-/**
- * Fast case mapping data for ASCII/Latin.
- * Linear arrays of delta bytes: 0=no mapping; EXC=exception.
- * Deltas must not cross the ASCII boundary, or else they cannot be easily used
- * in simple UTF-8 code.
- */
-namespace LatinCase {
-
-/** Case mapping/folding data for code points up to U+017F. */
-constexpr UChar LIMIT = 0x180;
-/** U+017F case-folds and uppercases crossing the ASCII boundary. */
-constexpr UChar LONG_S = 0x17f;
-/** Exception: Complex mapping, or too-large delta. */
-constexpr int8_t EXC = -0x80;
-
-/** Deltas for lowercasing for most locales, and default case folding. */
-extern const int8_t TO_LOWER_NORMAL[LIMIT];
-/** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */
-extern const int8_t TO_LOWER_TR_LT[LIMIT];
-
-/** Deltas for uppercasing for most locales. */
-extern const int8_t TO_UPPER_NORMAL[LIMIT];
-/** Deltas for uppercasing for tr/az. */
-extern const int8_t TO_UPPER_TR[LIMIT];
-
-} // namespace LatinCase
-
-U_NAMESPACE_END
-#endif
-
-/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
-U_CAPI int32_t U_EXPORT2
-ucase_getType(UChar32 c);
-
-/** @return like ucase_getType() but also sets UCASE_IGNORABLE if c is case-ignorable */
-U_CAPI int32_t U_EXPORT2
-ucase_getTypeOrIgnorable(UChar32 c);
-
-U_CAPI UBool U_EXPORT2
-ucase_isSoftDotted(UChar32 c);
-
-U_CAPI UBool U_EXPORT2
-ucase_isCaseSensitive(UChar32 c);
-
-/* string case mapping functions */
-
-U_CDECL_BEGIN
-
-/**
- * Iterator function for string case mappings, which need to look at the
- * context (surrounding text) of a given character for conditional mappings.
- *
- * The iterator only needs to go backward or forward away from the
- * character in question. It does not use any indexes on this interface.
- * It does not support random access or an arbitrary change of
- * iteration direction.
- *
- * The code point being case-mapped itself is never returned by
- * this iterator.
- *
- * @param context A pointer to the iterator's working data.
- * @param dir If <0 then start iterating backward from the character;
- * if >0 then start iterating forward from the character;
- * if 0 then continue iterating in the current direction.
- * @return Next code point, or <0 when the iteration is done.
- */
-typedef UChar32 U_CALLCONV
-UCaseContextIterator(void *context, int8_t dir);
-
-/**
- * Sample struct which may be used by some implementations of
- * UCaseContextIterator.
- */
-struct UCaseContext {
- void *p;
- int32_t start, index, limit;
- int32_t cpStart, cpLimit;
- int8_t dir;
- int8_t b1, b2, b3;
-};
-typedef struct UCaseContext UCaseContext;
-
-U_CDECL_END
-
-#define UCASECONTEXT_INITIALIZER { NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-
-enum {
- /**
- * For string case mappings, a single character (a code point) is mapped
- * either to itself (in which case in-place mapping functions do nothing),
- * or to another single code point, or to a string.
- * Aside from the string contents, these are indicated with a single int32_t
- * value as follows:
- *
- * Mapping to self: Negative values (~self instead of -self to support U+0000)
- *
- * Mapping to another code point: Positive values >UCASE_MAX_STRING_LENGTH
- *
- * Mapping to a string: The string length (0..UCASE_MAX_STRING_LENGTH) is
- * returned. Note that the string result may indeed have zero length.
- */
- UCASE_MAX_STRING_LENGTH=0x1f
-};
-
-/**
- * Get the full lowercase mapping for c.
- *
- * @param csp Case mapping properties.
- * @param c Character to be mapped.
- * @param iter Character iterator, used for context-sensitive mappings.
- * See UCaseContextIterator for details.
- * If iter==NULL then a context-independent result is returned.
- * @param context Pointer to be passed into iter.
- * @param pString If the mapping result is a string, then the pointer is
- * written to *pString.
- * @param caseLocale Case locale value from ucase_getCaseLocale().
- * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH.
- *
- * @see UCaseContextIterator
- * @see UCASE_MAX_STRING_LENGTH
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-ucase_toFullLower(UChar32 c,
- UCaseContextIterator *iter, void *context,
- const UChar **pString,
- int32_t caseLocale);
-
-U_CAPI int32_t U_EXPORT2
-ucase_toFullUpper(UChar32 c,
- UCaseContextIterator *iter, void *context,
- const UChar **pString,
- int32_t caseLocale);
-
-U_CAPI int32_t U_EXPORT2
-ucase_toFullTitle(UChar32 c,
- UCaseContextIterator *iter, void *context,
- const UChar **pString,
- int32_t caseLocale);
-
-U_CAPI int32_t U_EXPORT2
-ucase_toFullFolding(UChar32 c,
- const UChar **pString,
- uint32_t options);
-
-U_CFUNC int32_t U_EXPORT2
-ucase_hasBinaryProperty(UChar32 c, UProperty which);
-
-
-U_CDECL_BEGIN
-
-/**
- * @internal
- */
-typedef int32_t U_CALLCONV
-UCaseMapFull(UChar32 c,
- UCaseContextIterator *iter, void *context,
- const UChar **pString,
- int32_t caseLocale);
-
-U_CDECL_END
-
-/* file definitions --------------------------------------------------------- */
-
-#define UCASE_DATA_NAME "ucase"
-#define UCASE_DATA_TYPE "icu"
-
-/* format "cAsE" */
-#define UCASE_FMT_0 0x63
-#define UCASE_FMT_1 0x41
-#define UCASE_FMT_2 0x53
-#define UCASE_FMT_3 0x45
-
-/* indexes into indexes[] */
-enum {
- UCASE_IX_INDEX_TOP,
- UCASE_IX_LENGTH,
- UCASE_IX_TRIE_SIZE,
- UCASE_IX_EXC_LENGTH,
- UCASE_IX_UNFOLD_LENGTH,
-
- UCASE_IX_MAX_FULL_LENGTH=15,
- UCASE_IX_TOP=16
-};
-
-/* definitions for 16-bit case properties word ------------------------------ */
-
-U_CFUNC const UTrie2 * U_EXPORT2
-ucase_getTrie();
-
-/* 2-bit constants for types of cased characters */
-#define UCASE_TYPE_MASK 3
-enum {
- UCASE_NONE,
- UCASE_LOWER,
- UCASE_UPPER,
- UCASE_TITLE
-};
-
-#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK)
-#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7)
-
-#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2)
-
-#define UCASE_IGNORABLE 4
-#define UCASE_EXCEPTION 8
-#define UCASE_SENSITIVE 0x10
-
-#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
-
-#define UCASE_DOT_MASK 0x60
-enum {
- UCASE_NO_DOT=0, /* normal characters with cc=0 */
- UCASE_SOFT_DOTTED=0x20, /* soft-dotted characters with cc=0 */
- UCASE_ABOVE=0x40, /* "above" accents with cc=230 */
- UCASE_OTHER_ACCENT=0x60 /* other accent character (0<cc!=230) */
-};
-
-/* no exception: bits 15..7 are a 9-bit signed case mapping delta */
-#define UCASE_DELTA_SHIFT 7
-#define UCASE_DELTA_MASK 0xff80
-#define UCASE_MAX_DELTA 0xff
-#define UCASE_MIN_DELTA (-UCASE_MAX_DELTA-1)
-
-#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
-# define UCASE_GET_DELTA(props) ((int16_t)(props)>>UCASE_DELTA_SHIFT)
-#else
-# define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT))
-#endif
-
-/* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */
-#define UCASE_EXC_SHIFT 4
-#define UCASE_EXC_MASK 0xfff0
-#define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1)
-
-/* definitions for 16-bit main exceptions word ------------------------------ */
-
-/* first 8 bits indicate values in optional slots */
-enum {
- UCASE_EXC_LOWER,
- UCASE_EXC_FOLD,
- UCASE_EXC_UPPER,
- UCASE_EXC_TITLE,
- UCASE_EXC_DELTA,
- UCASE_EXC_5, /* reserved */
- UCASE_EXC_CLOSURE,
- UCASE_EXC_FULL_MAPPINGS,
- UCASE_EXC_ALL_SLOTS /* one past the last slot */
-};
-
-/* each slot is 2 uint16_t instead of 1 */
-#define UCASE_EXC_DOUBLE_SLOTS 0x100
-
-enum {
- UCASE_EXC_NO_SIMPLE_CASE_FOLDING=0x200,
- UCASE_EXC_DELTA_IS_NEGATIVE=0x400,
- UCASE_EXC_SENSITIVE=0x800
-};
-
-/* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */
-#define UCASE_EXC_DOT_SHIFT 7
-
-/* normally stored in the main word, but pushed out for larger exception indexes */
-#define UCASE_EXC_DOT_MASK 0x3000
-enum {
- UCASE_EXC_NO_DOT=0,
- UCASE_EXC_SOFT_DOTTED=0x1000,
- UCASE_EXC_ABOVE=0x2000, /* "above" accents with cc=230 */
- UCASE_EXC_OTHER_ACCENT=0x3000 /* other character (0<cc!=230) */
-};
-
-/* complex/conditional mappings */
-#define UCASE_EXC_CONDITIONAL_SPECIAL 0x4000
-#define UCASE_EXC_CONDITIONAL_FOLD 0x8000
-
-/* definitions for lengths word for full case mappings */
-#define UCASE_FULL_LOWER 0xf
-#define UCASE_FULL_FOLDING 0xf0
-#define UCASE_FULL_UPPER 0xf00
-#define UCASE_FULL_TITLE 0xf000
-
-/* maximum lengths */
-#define UCASE_FULL_MAPPINGS_MAX_LENGTH (4*0xf)
-#define UCASE_CLOSURE_MAX_LENGTH 0xf
-
-/* constants for reverse case folding ("unfold") data */
-enum {
- UCASE_UNFOLD_ROWS,
- UCASE_UNFOLD_ROW_WIDTH,
- UCASE_UNFOLD_STRING_WIDTH
-};
-
-#endif
diff --git a/contrib/libs/icu/common/ucase_props_data.h b/contrib/libs/icu/common/ucase_props_data.h
deleted file mode 100644
index 7c972309570..00000000000
--- a/contrib/libs/icu/common/ucase_props_data.h
+++ /dev/null
@@ -1,951 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// Copyright (C) 1999-2016, International Business Machines
-// Corporation and others. All Rights Reserved.
-//
-// file name: ucase_props_data.h
-//
-// machine-generated by: icu/tools/unicode/c/genprops/casepropsbuilder.cpp
-
-
-#ifdef INCLUDED_FROM_UCASE_CPP
-
-static const UVersionInfo ucase_props_dataVersion={0xd,0,0,0};
-
-static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x70ca,0x6098,0x687,0x172,0,0,0,0,0,0,0,0,0,0,3};
-
-static const uint16_t ucase_props_trieIndex[12356]={
-0x336,0x33e,0x346,0x34e,0x35c,0x364,0x36c,0x374,0x37c,0x384,0x38b,0x393,0x39b,0x3a3,0x3ab,0x3b3,
-0x3b9,0x3c1,0x3c9,0x3d1,0x3d9,0x3e1,0x3e9,0x3f1,0x3f9,0x401,0x409,0x411,0x419,0x421,0x429,0x431,
-0x439,0x441,0x449,0x451,0x459,0x461,0x469,0x471,0x46d,0x475,0x47a,0x482,0x489,0x491,0x499,0x4a1,
-0x4a9,0x4b1,0x4b9,0x4c1,0x355,0x35d,0x4c6,0x4ce,0x4d3,0x4db,0x4e3,0x4eb,0x4ea,0x4f2,0x4f7,0x4ff,
-0x507,0x50e,0x512,0x355,0x355,0x355,0x519,0x521,0x529,0x52b,0x533,0x53b,0x53f,0x540,0x548,0x550,
-0x558,0x540,0x560,0x565,0x558,0x540,0x56d,0x575,0x53f,0x57d,0x585,0x58d,0x595,0x355,0x59d,0x355,
-0x5a5,0x4ec,0x5ad,0x58d,0x53f,0x57d,0x5b4,0x58d,0x5bc,0x5be,0x548,0x58d,0x53f,0x355,0x5c6,0x355,
-0x355,0x5cc,0x5d3,0x355,0x355,0x5d7,0x5df,0x355,0x5e3,0x5ea,0x355,0x5f1,0x5f9,0x600,0x608,0x355,
-0x355,0x60d,0x615,0x61d,0x625,0x62d,0x634,0x63c,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x644,0x355,0x355,0x654,0x654,0x64c,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x65c,0x65c,0x54c,0x54c,0x355,0x662,0x66a,0x355,
-0x672,0x355,0x67a,0x355,0x681,0x687,0x355,0x355,0x355,0x68f,0x355,0x355,0x355,0x355,0x355,0x355,
-0x696,0x355,0x69d,0x6a5,0x355,0x6ad,0x6b5,0x355,0x57c,0x6b8,0x6c0,0x6c6,0x5bc,0x6ce,0x355,0x6d5,
-0x355,0x6da,0x355,0x6e0,0x6e8,0x6ec,0x6f4,0x6fc,0x704,0x709,0x70c,0x714,0x724,0x71c,0x734,0x72c,
-0x37c,0x73c,0x37c,0x744,0x747,0x37c,0x74f,0x37c,0x757,0x75f,0x767,0x76f,0x777,0x77f,0x787,0x78f,
-0x797,0x79e,0x355,0x7a6,0x7ae,0x355,0x7b6,0x7be,0x7c6,0x7ce,0x7d6,0x7de,0x7e6,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x7e9,0x7ef,0x7f5,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x7fd,0x802,0x806,0x80e,0x37c,0x37c,0x37c,0x816,0x81e,0x825,0x355,0x82a,0x355,0x355,0x355,0x832,
-0x355,0x677,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x53e,0x83a,0x355,0x355,0x841,0x355,0x355,0x849,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x851,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x6e0,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x857,0x355,0x85f,0x864,0x86c,0x355,0x355,0x874,0x87c,0x884,0x37c,0x889,0x891,0x897,0x89f,0x8a2,
-0x8aa,0x8b1,0x355,0x355,0x355,0x355,0x8b8,0x8c0,0x355,0x8c8,0x8cf,0x355,0x529,0x8d4,0x8dc,0x681,
-0x355,0x8e2,0x8ea,0x8ee,0x355,0x8f6,0x8fe,0x906,0x355,0x90c,0x910,0x918,0x928,0x920,0x355,0x930,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x938,0x355,0x355,0x355,0x355,0x940,0x5bc,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x945,0x94d,0x951,0x355,0x355,0x355,0x355,0x338,0x33e,0x959,0x961,0x968,0x4ec,0x355,0x355,0x970,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0xd58,0xd58,0xd70,0xdb0,0xdf0,0xe2c,0xe6c,0xeac,0xee4,0xf24,0xf64,0xfa4,0xfe4,0x1024,0x1064,0x10a4,
-0x10e4,0x1124,0x1164,0x11a4,0x11b4,0x11e8,0x1224,0x1264,0x12a4,0x12e4,0xd54,0x1318,0x134c,0x138c,0x13a8,0x13dc,
-0x9e1,0xa11,0xa51,0xa90,0x188,0x188,0xac8,0x188,0x188,0x188,0x188,0x188,0x188,0xaf1,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xb31,0x188,0x188,0xb66,0xba5,0xbe5,0xc1f,0xc56,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
-0xc96,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x977,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x6b5,0x355,0x355,0x355,0x97f,0x355,0x355,0x355,
-0x355,0x987,0x98d,0x991,0x355,0x355,0x995,0x999,0x99f,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x9a7,0x9ab,0x355,0x355,0x355,0x355,0x355,0x9b3,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x9bb,0x9bf,0x9c7,0x9cb,0x355,0x9d2,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x9d8,0x355,0x355,0x355,0x355,0x9df,0x355,0x355,0x355,0x355,
-0x355,0x53f,0x9e4,0x9eb,0x5bd,0x5bc,0x9ef,0x53c,0x355,0x9f7,0x9fe,0x355,0xa04,0x5bc,0xa09,0xa11,
-0x355,0x355,0xa16,0x355,0x355,0x355,0x355,0x338,0xa1e,0x5bc,0x5be,0xa26,0xa2d,0x355,0x355,0x355,
-0x355,0x355,0x9e4,0xa35,0x355,0x355,0xa3d,0xa45,0x355,0x355,0x355,0x355,0x355,0x355,0xa49,0xa51,
-0x355,0x355,0xa59,0x4b0,0x355,0x355,0xa61,0x355,0x355,0xa67,0xa6f,0x355,0x355,0x355,0x355,0x355,
-0x355,0xa74,0x355,0x355,0x355,0xa7c,0xa84,0x355,0x355,0xa8c,0xa94,0x355,0x355,0x355,0xa97,0x6b5,
-0xa9f,0xaa3,0xaab,0x355,0xab2,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0xab9,0x355,0x355,0x940,0xac1,0x355,0x355,0x355,0xac7,0xacf,0x355,0xad3,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xad9,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xadf,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xae6,0x355,0xaec,0x57c,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0xa7c,0xa84,0x355,0x355,0x355,0x355,0x355,0x355,0x677,0x355,0xaf2,0x355,0x355,
-0xafa,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0xaff,0x57c,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0xb07,0xb0f,0xb15,0x355,0x355,0x355,0x355,0xb1d,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0xb25,0xb2d,0xb32,0xb38,0xb40,0xb48,0xb50,0xb29,0xb58,0xb60,
-0xb68,0xb6f,0xb2a,0xb25,0xb2d,0xb28,0xb38,0xb2b,0xb26,0xb77,0xb29,0xb7f,0xb87,0xb8f,0xb96,0xb82,
-0xb8a,0xb92,0xb99,0xb85,0xba1,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x87c,0xba9,0x87c,0xbb0,0xbb7,0xbbf,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0xbc7,0xbcf,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xbd3,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x9d0,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0xbdb,0x355,0xbe3,0xbeb,0xbf2,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xb21,
-0xbfa,0xbfa,0xc00,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x9f9,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x53f,0x87c,0x87c,0x87c,0x355,0x355,0x355,0x355,0x87c,0x87c,
-0x87c,0x87c,0x87c,0x87c,0x87c,0xc08,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
-0x355,0x355,0x355,0x355,0x355,0x355,0x335,0x335,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
-0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,
-0,0,4,0,0,0,0,0,0,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,
-0x1012,0xa,0x5a,0x7a,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0xba,0x1012,0x1012,0x1012,0x1012,
-0x1012,0x1012,0x1012,0,0,0,4,0,4,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,
-0xf011,0xf9,0xf031,0x149,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0x189,0xf011,0xf011,0xf011,0xf011,
-0xf011,0xf011,0xf011,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,4,0,1,0,0,4,0,4,
-0,0,0,0,4,0x1c9,0,4,4,0,1,0,0,0,0,0,
-0x1012,0x1012,0x1012,0x1012,0x1012,0x1fa,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x5a,0x5a,0x1012,0x1012,
-0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x239,
-0xf011,0xf011,0xf011,0xf011,0xf011,0x2d9,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,
-0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0x3c91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x31a,0xff91,0x92,0xff91,0x92,0xff91,0x31a,0xffb1,
-0x33a,0x389,0x92,0xff91,0x92,0xff91,0x92,0xff91,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,
-0xff91,0x92,0xff91,0x92,0xff91,0x3d9,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0xc392,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x459,0x6191,0x6912,0x92,0xff91,
-0x92,0xff91,0x6712,0x92,0xff91,0x6692,0x6692,0x92,0xff91,1,0x2792,0x6512,0x6592,0x92,0xff91,0x6692,
-0x6792,0x3091,0x6992,0x6892,0x92,0xff91,0x5191,1,0x6992,0x6a92,0x4111,0x6b12,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x6d12,0x92,0xff91,0x6d12,1,1,0x92,0xff91,0x6d12,0x92,0xff91,0x6c92,0x6c92,0x92,
-0xff91,0x92,0xff91,0x6d92,0x92,0xff91,1,0,0x92,0xff91,1,0x1c11,0,0,0,0,
-0x48a,0x4bb,0x4f9,0x52a,0x55b,0x599,0x5ca,0x5fb,0x639,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,
-0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0xd891,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x669,0x6ea,0x71b,0x759,
-0x92,0xff91,0xcf92,0xe412,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0xbf12,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,1,1,1,1,1,1,0x78a,0x92,
-0xff91,0xae92,0x7aa,0x7c9,0x7c9,0x92,0xff91,0x9e92,0x2292,0x2392,0x92,0xff91,0x92,0xffb1,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x7e9,0x809,0x829,0x9711,0x9911,1,0x9991,0x9991,1,0x9b11,1,0x9a91,
-0x849,1,1,1,0x9991,0x869,1,0x9891,1,0x889,0x8a9,1,0x97b1,0x9691,0x8a9,0x8c9,
-0x8e9,1,1,0x9691,1,0x909,0x9591,1,1,0x9511,1,1,1,1,1,1,
-1,0x929,1,1,0x9311,1,0x949,0x9311,1,1,1,0x969,0x9311,0xdd91,0x9391,0x9391,
-0xdc91,1,1,1,1,1,0x9291,1,0,1,1,1,1,1,1,1,
-1,0x989,0x9a9,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,5,5,0x25,5,5,5,5,5,5,4,4,4,
-0x14,4,0x14,4,5,5,4,4,4,4,4,4,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-4,4,4,4,5,5,5,5,5,4,4,4,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-4,4,4,4,0x54,0x54,0x44,0x44,0x44,0x44,0x44,0x9cc,0x54,0x44,0x54,0x44,
-0x54,0x44,0x44,0x44,0x44,0x44,0x44,0x54,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x64,
-0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,
-0x64,0x64,0x64,0x64,0x64,0x74,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,
-0x64,0x44,0x44,0x44,0x44,0x44,0x54,0x44,0x44,0x9dd,0x44,0x64,0x64,0x64,0x44,0x44,
-0x44,0x64,0x64,4,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x44,0x64,0x64,0x64,0x44,
-0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
-0x44,0x44,0x44,0x44,0x92,0xff91,0x92,0xff91,4,4,0x92,0xff91,0,0,5,0x4111,
-0x4111,0x4111,0,0x3a12,0,0,0,0,4,4,0x1312,4,0x1292,0x1292,0x1292,0,
-0x2012,0,0x1f92,0x1f92,0xa29,0x1012,0xafa,0x1012,0x1012,0xb3a,0x1012,0x1012,0xb7a,0xbca,0xc1a,0x1012,
-0xc5a,0x1012,0x1012,0x1012,0xc9a,0xcda,0,0xd1a,0x1012,0x1012,0xd5a,0x1012,0x1012,0xd9a,0x1012,0x1012,
-0xed11,0xed91,0xed91,0xed91,0xdd9,0xf011,0xea9,0xf011,0xf011,0xee9,0xf011,0xf011,0xf29,0xf79,0xfc9,0xf011,
-0x1009,0xf011,0xf011,0xf011,0x1049,0x1089,0x10c9,0x10f9,0xf011,0xf011,0x1139,0xf011,0xf011,0x1179,0xf011,0xf011,
-0xe011,0xe091,0xe091,0x412,0x11b9,0x11e9,2,2,2,0x1239,0x1269,0xfc11,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x1299,0x12c9,0x391,0xc631,0x12fa,0x1349,0,0x92,0xff91,0xfc92,0x92,0xff91,
-1,0xbf12,0xbf12,0xbf12,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,
-0x2812,0x2812,0x2812,0x2812,0x1012,0x1012,0x137a,0x1012,0x13ba,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,
-0x1012,0x1012,0x13fa,0x1012,0x1012,0x143a,0x147a,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x14ca,0x1012,
-0x1012,0x1012,0x1012,0x1012,0xf011,0xf011,0x1509,0xf011,0x1549,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,
-0xf011,0xf011,0x1589,0xf011,0xf011,0x15c9,0x1609,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0x1659,0xf011,
-0xf011,0xf011,0xf011,0xf011,0xd811,0xd811,0xd811,0xd811,0xd811,0xd811,0xd831,0xd811,0xd831,0xd811,0xd811,0xd811,
-0xd811,0xd811,0xd811,0xd811,0x92,0xff91,0x169a,0x16d9,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0,0x44,0x44,0x44,0x44,0x44,4,4,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x792,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,
-0xff91,0x92,0xff91,0xf891,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,
-0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0,
-0,4,0,0,0,0,0,4,1,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
-0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
-0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0x1719,1,0,0,0,
-0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,0x64,0x44,0x44,0x44,0x64,0x64,
-0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x64,0x44,
-0x44,0x64,0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,
-0x64,0x64,0,0x64,0,0x64,0x64,0,0x44,0x64,0,0x64,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
-0,0,0,0,4,4,4,4,4,4,0,0,0,0,0,0,
-0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0,
-4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0x64,
-0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x64,0x64,0x44,0x44,0x44,0x44,0x44,
-0x64,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,
-0x44,0x44,0x44,0x44,0x44,4,0,0x44,0x44,0x44,0x44,0x64,0x44,4,4,0x44,
-0x44,0,0x64,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,4,0,0x64,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x64,0x44,0x44,
-0x64,0x44,0x44,0x64,0x64,0x64,0x44,0x64,0x64,0x44,0x64,0x44,0x44,0x44,0x64,0x44,
-0x64,0x44,0x64,0x44,0x64,0x44,0x44,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,
-4,4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,
-4,4,0,0,0,0,4,0,0,0x64,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0x44,0x44,0x44,0x44,4,0x44,0x44,0x44,0x44,0x44,4,0x44,0x44,0x44,
-4,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
-0x44,0x44,0x44,0x44,0x44,0x44,4,0x64,0x44,0x44,0x64,0x44,0x44,0x64,0x44,0x44,
-0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x44,0x64,0x44,0x44,0x64,0x64,0x44,
-0x44,0x44,0x44,0x44,4,4,4,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,4,0,0x64,0,0,0,0,4,4,4,
-4,4,4,4,4,0,0,0,0,0x64,0,0,0,0x44,0x64,0x44,
-0x44,4,4,4,0,0,0,0,0,0,0,0,0,0,4,4,
-0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x64,0,0,0,
-0,4,4,4,4,0,0,0,0,0,0,0,0,0x64,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0,
-0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,4,4,0,0,0,0,4,4,0,0,4,4,0x64,0,0,
-0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,4,4,0,0,0,4,0,0,0,0,0,0,
-0,0,0,0,0,4,4,4,4,4,0,4,4,0,0,0,
-0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,
-4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x64,0,0,4,0,4,4,4,4,0,0,0,0,0,0,0,
-0,0x64,0,0,0,0,0,0,0,4,4,0,0,0,0,0,
-0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
-0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,4,0,0,0,4,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,4,0,0,0,0,0,4,4,4,0,4,4,
-4,0x64,0,0,0,0,0,0,0,0x64,0x64,0,0,0,0,0,
-0,0,0,0,0,0,4,0,0,0,0,0,4,0x64,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0x64,0,0,0,0,0,0,0,4,4,4,0,4,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,4,0,0,4,4,4,4,0x64,0x64,0x64,0,0,0,0,0,
-0,0,4,4,0x64,0x64,0x64,0x64,4,4,4,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,
-4,4,4,4,0x64,0x64,0x64,4,4,0,0,0,0,0,0,0,
-0,0,4,0,0x64,0x64,0x64,0x64,4,4,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0x64,0,0x64,
-0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0x64,0x64,4,0x64,4,4,4,4,4,0x64,0x64,
-0x64,0x64,4,0,0x64,4,0x44,0x44,0x64,0,0x44,0x44,0,0,0,0,
-0,4,4,4,4,4,4,4,4,4,4,4,0,4,4,4,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,
-0,0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,4,4,4,4,0,4,4,4,4,4,0x64,0,0x64,0x64,0,
-0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0,
-0,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,4,4,4,4,0,0,0,0,0,0,0,
-0,0,0,0,0,0,4,0,0,4,4,0,0,0,0,0,
-0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,4,0,0,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,
-0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,
-0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0,0x179a,0,0,0,0,0,0x179a,0,0,
-0x17b9,0x17e9,0x1819,0x1849,0x1879,0x18a9,0x18d9,0x1909,0x1939,0x1969,0x1999,0x19c9,0x19f9,0x1a29,0x1a59,0x1a89,
-0x1ab9,0x1ae9,0x1b19,0x1b49,0x1b79,0x1ba9,0x1bd9,0x1c09,0x1c39,0x1c69,0x1c99,0x1cc9,0x1cf9,0x1d29,0x1d59,0x1d89,
-0x1db9,0x1de9,0x1e19,0x1e49,0x1e79,0x1ea9,0x1ed9,0x1f09,0x1f39,0x1f69,0x1f99,0,4,0x1fc9,0x1ff9,0x2029,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,
-0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,
-0x207a,0x207a,0x207a,0x207a,0x207a,0x207a,0,0,0x2099,0x20c9,0x20f9,0x2129,0x2159,0x2189,0,0,
-0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,
-0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,0x205a,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,4,4,0x64,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,4,
-4,4,4,4,4,4,0,0,0,0,0,0,0,0,4,0,
-0,4,4,4,4,4,4,4,4,4,0x64,4,0,0,0,4,
-0,0,0,0,0,0x44,0,0,0,0,0,0,0,0,0,0,
-0,0,0,4,4,4,4,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,0,
-0,0,0,4,4,0,0,0,0,0,0,0,0,0,4,0,
-0,0,0,0,0,0x64,0x44,0x64,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,
-0x64,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,4,0,4,4,4,4,
-4,4,4,0,0x64,0,4,0,0,4,4,4,4,4,4,4,
-4,0,0,0,0,0,0,4,4,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
-0x44,0,0,0x64,0,0,0,0,0,0,0,4,0,0,0,0,
-0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,
-0x44,0x64,4,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x64,0,4,4,4,4,4,0,4,0,0,0,
-0,0,4,0,0x60,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,4,4,4,4,0,0,
-4,4,0x60,0x64,4,4,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0x64,0,4,4,0,0,
-0,4,0,4,4,4,0x60,0x60,0,0,0,0,0,0,0,0,
-0,0,0,0,4,4,4,4,4,4,4,4,0,0,4,0x64,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,4,4,4,4,4,4,0,0,
-0x21b9,0x21e9,0x2219,0x2249,0x2279,0x22c9,0x2319,0x2349,0x2379,0,0,0,0,0,0,0,
-0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,
-0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0x23aa,0,0,0x23aa,0x23aa,0x23aa,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x44,0x44,0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x64,0x64,0x64,0x64,
-0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0x64,0,0,
-0,0,0,0,0x44,0,0,0,0x44,0x44,0,0,0,0,0,0,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,0x25,5,5,5,5,5,5,5,5,1,1,1,1,1,
-1,1,1,1,1,1,1,1,5,0x23c9,1,1,1,0x23e9,1,1,
-5,5,5,5,0x25,5,5,5,0x25,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x2409,1,
-1,1,1,1,1,1,0x21,1,1,1,1,5,5,5,5,5,
-0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
-0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0,0x44,0x64,0x64,0x44,0x64,
-0x44,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,0x44,0x64,0x64,0x64,
-0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xffb1,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x242a,0x2469,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x24a9,0x2529,0x25a9,0x2629,0x26a9,0x2729,1,1,0x275a,1,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xffb1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x411,0x411,0x411,0x411,
-0x411,0x411,0x411,0x411,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0x411,0x411,0x411,0x411,
-0x411,0x411,0,0,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0,0,0x411,0x411,0x411,0x411,
-0x411,0x411,0x411,0x411,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0x411,0x411,0x411,0x411,
-0x411,0x411,0x411,0x411,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0x411,0x411,0x411,0x411,
-0x411,0x411,0,0,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0,0,0x27a9,0x411,0x2829,0x411,
-0x28d9,0x411,0x2989,0x411,0,0xfc12,0,0xfc12,0,0xfc12,0,0xfc12,0x411,0x411,0x411,0x411,
-0x411,0x411,0x411,0x411,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0x2511,0x2511,0x2b11,0x2b11,
-0x2b11,0x2b11,0x3211,0x3211,0x4011,0x4011,0x3811,0x3811,0x3f11,0x3f11,0,0,0x2a39,0x2aa9,0x2b19,0x2b89,
-0x2bf9,0x2c69,0x2cd9,0x2d49,0x2dbb,0x2e2b,0x2e9b,0x2f0b,0x2f7b,0x2feb,0x305b,0x30cb,0x3139,0x31a9,0x3219,0x3289,
-0x32f9,0x3369,0x33d9,0x3449,0x34bb,0x352b,0x359b,0x360b,0x367b,0x36eb,0x375b,0x37cb,0x3839,0x38a9,0x3919,0x3989,
-0x39f9,0x3a69,0x3ad9,0x3b49,0x3bbb,0x3c2b,0x3c9b,0x3d0b,0x3d7b,0x3deb,0x3e5b,0x3ecb,0x411,0x411,0x3f39,0x3fb9,
-0x4029,0,0x40a9,0x4129,0xfc12,0xfc12,0xdb12,0xdb12,0x41db,4,0x4249,4,4,4,0x4299,0x4319,
-0x4389,0,0x4409,0x4489,0xd512,0xd512,0xd512,0xd512,0x453b,4,4,4,0x411,0x411,0x45a9,0x4659,
-0,0,0x4729,0x47a9,0xfc12,0xfc12,0xce12,0xce12,0,4,4,4,0x411,0x411,0x4859,0x4909,
-0x49d9,0x391,0x4a59,0x4ad9,0xfc12,0xfc12,0xc812,0xc812,0xfc92,4,4,4,0,0,0x4b89,0x4c09,
-0x4c79,0,0x4cf9,0x4d79,0xc012,0xc012,0xc112,0xc112,0x4e2b,4,4,0,0,0,0,0,
-0,0,0,0,0,0,0,4,4,4,4,4,0,0,0,0,
-0,0,0,0,4,4,0,0,0,0,0,0,4,0,0,4,
-0,0,4,4,4,4,4,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,4,4,4,4,4,0,4,4,
-4,4,4,4,4,4,4,4,0,0x25,0,0,0,0,0,0,
-0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0x44,0x44,0x64,0x64,0x44,0x44,0x44,0x44,
-0x64,0x64,0x64,0x44,0x44,4,4,4,4,0x44,4,4,4,0x64,0x64,0x44,
-0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,
-0,0,1,2,2,2,1,1,2,2,2,1,0,2,0,0,
-0,2,2,2,2,2,0,0,0,0,0,0,2,0,0x4e9a,0,
-2,0,0x4eda,0x4f1a,2,2,0,1,2,2,0xe12,2,1,0,0,0,
-0,1,0,0,1,1,2,2,0,0,0,0,0,2,1,1,
-0x21,0x21,0,0,0,0,0xf211,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0x812,0x812,0x812,0x812,0x812,0x812,0x812,0x812,
-0x812,0x812,0x812,0x812,0x812,0x812,0x812,0x812,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,
-0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0,0,0,0x92,0xff91,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,
-0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xf311,0xf311,0xf311,0xf311,
-0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,
-0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,
-0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
-0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
-0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0,0x92,0xff91,0x4f5a,0x4f7a,0x4f9a,0x4fb9,0x4fd9,0x92,
-0xff91,0x92,0xff91,0x92,0xff91,0x4ffa,0x501a,0x503a,0x505a,1,0x92,0xff91,1,0x92,0xff91,1,
-1,1,1,1,0x25,5,0x507a,0x507a,0x92,0xff91,0x92,0xff91,1,0,0,0,
-0,0,0,0x92,0xff91,0x92,0xff91,0x44,0x44,0x44,0x92,0xff91,0,0,0,0,
-0,0,0,0,0,0,0,0,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,
-0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,
-0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0x5099,0,0x5099,0,0,0,0,
-0,0x5099,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
-0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
-0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,
-0,0,0x64,0x64,0x64,0x64,0x60,0x60,0,4,4,4,4,4,0,0,
-0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0x64,0x64,4,
-4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x50ba,0x50f9,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0,0x44,
-4,4,4,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,4,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,5,5,0x44,0x44,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-4,4,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-1,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,5,1,1,1,1,1,1,1,1,0x92,0xff91,0x92,
-0xff91,0x513a,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,4,4,4,0x92,
-0xff91,0x515a,1,0,0x92,0xff91,0x92,0xff91,0x1811,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x517a,0x519a,0x51ba,0x51da,0x517a,1,0x51fa,0x521a,0x523a,0x525a,
-0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0,0,0x92,0xff91,
-0xe812,0x527a,0x529a,0x92,0xff91,0x92,0xff91,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0x92,0xff91,0,
-5,5,1,0,0,0,0,0,0,0,4,0,0,0,0x64,0,
-0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,
-0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x64,4,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
-0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
-0,0,0,0,0,0,4,4,4,4,4,0x64,0x64,0x64,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,4,4,4,4,4,4,4,4,4,4,4,0,0x60,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0x64,0,0,4,4,4,4,0,0,4,4,0,0,
-0x60,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,4,4,4,4,4,4,0,0,4,4,0,0,4,4,0,
-0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
-0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
-0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0x44,0,0x44,0x44,0x64,0,0,0x44,
-0x44,0,0,0,0,0,0x44,0x44,0,0x44,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,4,4,0,0,0,0,0,4,4,0,0x64,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0x52b9,1,1,1,1,1,1,1,4,5,5,5,5,
-1,1,1,1,1,1,1,1,1,4,4,4,0,0,0,0,
-0x52d9,0x5309,0x5339,0x5369,0x5399,0x53c9,0x53f9,0x5429,0x5459,0x5489,0x54b9,0x54e9,0x5519,0x5549,0x5579,0x55a9,
-0x5bd9,0x5c09,0x5c39,0x5c69,0x5c99,0x5cc9,0x5cf9,0x5d29,0x5d59,0x5d89,0x5db9,0x5de9,0x5e19,0x5e49,0x5e79,0x5ea9,
-0x5ed9,0x5f09,0x5f39,0x5f69,0x5f99,0x5fc9,0x5ff9,0x6029,0x6059,0x6089,0x60b9,0x60e9,0x6119,0x6149,0x6179,0x61a9,
-0x55d9,0x5609,0x5639,0x5669,0x5699,0x56c9,0x56f9,0x5729,0x5759,0x5789,0x57b9,0x57e9,0x5819,0x5849,0x5879,0x58a9,
-0x58d9,0x5909,0x5939,0x5969,0x5999,0x59c9,0x59f9,0x5a29,0x5a59,0x5a89,0x5ab9,0x5ae9,0x5b19,0x5b49,0x5b79,0x5ba9,
-0,0,0,0,0,4,0,0,4,0,0,0,0,0x64,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x61d9,0x6259,0x62d9,0x6359,0x6409,0x64b9,0x6559,0,0,0,0,0,0,0,0,0,
-0,0,0,0x65f9,0x6679,0x66f9,0x6779,0x67f9,0,0,0,0,0,0,0x64,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-4,4,4,4,0,0,0,4,0,0,0,0,0,0,0,0,
-0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,
-0x64,0x64,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,4,0,0,4,0,0,0,0,0,0,
-0,0,0,0,0,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,
-0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0,
-0,0,4,0,4,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,
-0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0x64,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0x1412,0x1412,0x1412,0x1412,
-0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,
-0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0xec11,0xec11,0xec11,0xec11,
-0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,
-0xec11,0xec11,0xec11,0xec11,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,
-0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0,0,0,0,0xec11,0xec11,0xec11,0xec11,
-0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,
-0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0,0,0,0,0,4,4,4,
-0,4,4,0,0,0,0,0,4,0x64,4,0x44,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x44,0x64,0x64,0,0,0,0,0x64,0,0,0,0,
-0,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x2012,0x2012,0x2012,0x2012,
-0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,
-0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0xe011,0xe011,0xe011,0xe011,
-0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,
-0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0x64,0x64,0x44,0x44,0x44,0x64,0x44,0x64,0x64,0x64,0x64,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
-4,4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
-4,4,4,0,0,0x64,0x64,0,0,4,0,0,0x44,0x44,0x44,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
-4,4,4,4,0,4,4,4,4,4,4,0x64,0x64,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,4,
-4,4,4,0,0x60,0,0,0,0,0,0,0,0,4,0x64,4,
-4,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,4,4,4,0,0,4,0x60,0x64,4,
-0,0,0,0,0,0,4,0,0,0,0,4,4,4,4,4,
-4,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
-0,0,0,0,0,0x60,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,
-0x44,0,0,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0x64,4,4,0,0x64,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,4,4,4,4,4,4,0,4,0,
-0,0,0,4,4,0,0x64,0x64,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,4,4,4,4,0,0,0,0,0,0,
-4,4,0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,0,
-0,4,0,0x64,0,0,0,0,0,0,0,0,0,0,0,4,
-0,4,0,0,4,4,4,4,4,4,0x60,0x64,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,4,4,4,0,0,4,4,
-4,4,0,4,4,4,4,0x64,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
-4,4,4,4,4,4,4,4,0,0x64,0x64,0,0,0,0,0,
-0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,
-0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,
-0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,
-0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,4,4,0x60,0x64,0,
-0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-4,4,4,4,0,0,4,4,0,0,0,0,0,4,4,4,
-4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
-0x64,4,4,4,4,0,0,4,4,4,4,0,0,0,0,0,
-0,0,0,0x64,0,0,0,0,0,0,0,0,0,4,4,4,
-4,4,4,0,0,4,4,4,0,0,0,0,0,0,0,0,
-0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,0,
-4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,4,4,4,4,4,4,4,0,4,4,4,4,
-4,4,0,0x64,4,4,4,4,4,4,4,4,0,0,4,4,
-4,4,4,4,4,0,4,4,0,4,4,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,
-4,4,4,0,0,0,4,0,4,4,0,4,4,4,0x64,4,
-0x64,0x64,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0,
-0,4,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
-4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x64,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,0,4,4,0,0,0,
-0,0,0,0,0,0,0,0,0x60,0x60,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,4,0x64,0,0,0,0,0,
-0,0x60,0x60,0x64,0x64,0x64,0,0,0,0x60,0x60,0x60,0x60,0x60,0x60,4,
-4,4,4,4,4,4,4,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,
-0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,
-0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0x44,0x44,0x44,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,
-1,1,1,1,1,1,0x21,0x21,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,1,1,1,1,1,1,1,0,0x21,0x21,
-1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,2,0,2,2,
-0,0,2,0,0,2,2,0,0,2,2,2,2,0,2,2,
-2,2,2,2,2,2,1,1,1,1,0,1,0,1,0x21,0x21,
-1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-1,1,1,1,2,2,0,2,2,2,2,0,0,2,2,2,
-2,2,2,2,2,0,2,2,2,2,2,2,2,0,1,1,
-1,1,1,1,1,1,0x21,0x21,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,2,2,0,2,2,2,2,0,
-2,2,2,2,2,0,2,0,0,0,2,2,2,2,2,2,
-2,0,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
-1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
-1,1,0,0,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,0,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,0,1,1,1,1,1,1,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,0,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,
-1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,1,1,1,0,1,1,1,1,1,1,2,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,4,0,0,0,0,4,
-4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,
-0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,
-4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,4,4,4,4,4,0,4,4,4,
-4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,
-0x44,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
-0x44,0x44,0x44,0x44,0x44,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44,
-0x44,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,
-0x44,0x44,0x44,4,4,4,4,4,4,4,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,
-0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,0x1112,0x1112,0x1112,0x1112,
-0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,
-0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0xef11,0xef11,
-0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,
-0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0x44,0x44,0x44,0x44,
-0x44,0x44,0x64,4,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
-2,2,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0
-};
-
-static const uint16_t ucase_props_exceptions[1671]={
-0xc850,0x20,2,0x130,0x131,0x4810,0x20,0x841,0x6b,1,0x212a,0x841,0x73,1,0x17f,0x5c50,
-0x20,2,0x130,0x131,0x844,0x4b,1,0x212a,0x844,0x53,1,0x17f,0x806,0x3bc,0x39c,0x841,
-0xe5,1,0x212b,0x8c0,1,0x2220,0x73,0x73,0x53,0x53,0x53,0x73,0x1e9e,0x844,0xc5,1,
-0x212b,0x4810,1,0xce50,0xc7,2,0x49,0x131,0x844,0x49,2,0x69,0x130,0x880,0x2220,0x2bc,
-0x6e,0x2bc,0x4e,0x2bc,0x4e,0x806,0x73,0x53,0x809,0x1c6,0x1c5,0x80d,0x1c6,0x1c4,0x1c5,0x80c,
-0x1c4,0x1c5,0x809,0x1c9,0x1c8,0x80d,0x1c9,0x1c7,0x1c8,0x80c,0x1c7,0x1c8,0x809,0x1cc,0x1cb,0x80d,
-0x1cc,0x1ca,0x1cb,0x80c,0x1ca,0x1cb,0x880,0x2220,0x6a,0x30c,0x4a,0x30c,0x4a,0x30c,0x809,0x1f3,
-0x1f2,0x80d,0x1f3,0x1f1,0x1f2,0x80c,0x1f1,0x1f2,0x810,0x2a2b,0x810,0x2a28,0x810,0x2a3f,0x810,0x2a1f,
-0x810,0x2a1c,0x810,0x2a1e,0x810,0xa54f,0x810,0xa54b,0x810,0xa528,0x810,0xa544,0x810,0x29f7,0x810,0xa541,
-0x810,0x29fd,0x810,0x29e7,0x810,0xa543,0x810,0xa52a,0x1810,0xa515,0x810,0xa512,0x6800,0x3846,0x3b9,0x399,
-1,0x1fbe,0x8c0,1,0x3330,0x3b9,0x308,0x301,0x399,0x308,0x301,0x399,0x308,0x301,0x1fd3,0x841,
-0x3b2,1,0x3d0,0x841,0x3b5,1,0x3f5,0x841,0x3b8,2,0x3d1,0x3f4,0x841,0x3b9,2,0x345,
-0x1fbe,0x841,0x3ba,1,0x3f0,0x841,0x3bc,1,0xb5,0x841,0x3c0,1,0x3d6,0x841,0x3c1,1,
-0x3f1,0x4850,0x20,1,0x3c2,0x841,0x3c6,1,0x3d5,0x841,0x3c9,1,0x2126,0x8c0,1,0x3330,
-0x3c5,0x308,0x301,0x3a5,0x308,0x301,0x3a5,0x308,0x301,0x1fe3,0x844,0x392,1,0x3d0,0x844,0x395,
-1,0x3f5,0x844,0x398,2,0x3d1,0x3f4,0x844,0x399,2,0x345,0x1fbe,0x844,0x39a,1,0x3f0,
-0x844,0x39c,1,0xb5,0x844,0x3a0,1,0x3d6,0x844,0x3a1,1,0x3f1,0x806,0x3c3,0x3a3,0x844,
-0x3a3,1,0x3c2,0x844,0x3a6,1,0x3d5,0x844,0x3a9,1,0x2126,0x806,0x3b2,0x392,0x846,0x3b8,
-0x398,1,0x3f4,0x806,0x3c6,0x3a6,0x806,0x3c0,0x3a0,0x806,0x3ba,0x39a,0x806,0x3c1,0x3a1,0x841,
-0x3b8,2,0x398,0x3d1,0x806,0x3b5,0x395,0x841,0x432,1,0x1c80,0x841,0x434,1,0x1c81,0x841,
-0x43e,1,0x1c82,0x841,0x441,1,0x1c83,0x841,0x442,2,0x1c84,0x1c85,0x841,0x44a,1,0x1c86,
-0x844,0x412,1,0x1c80,0x844,0x414,1,0x1c81,0x844,0x41e,1,0x1c82,0x844,0x421,1,0x1c83,
-0x844,0x422,2,0x1c84,0x1c85,0x844,0x42a,1,0x1c86,0x841,0x463,1,0x1c87,0x844,0x462,1,
-0x1c87,0x880,0x2220,0x565,0x582,0x535,0x552,0x535,0x582,0x810,0x1c60,0x80c,0x1c90,0x10d0,0x80c,0x1c91,
-0x10d1,0x80c,0x1c92,0x10d2,0x80c,0x1c93,0x10d3,0x80c,0x1c94,0x10d4,0x80c,0x1c95,0x10d5,0x80c,0x1c96,0x10d6,
-0x80c,0x1c97,0x10d7,0x80c,0x1c98,0x10d8,0x80c,0x1c99,0x10d9,0x80c,0x1c9a,0x10da,0x80c,0x1c9b,0x10db,0x80c,
-0x1c9c,0x10dc,0x80c,0x1c9d,0x10dd,0x80c,0x1c9e,0x10de,0x80c,0x1c9f,0x10df,0x80c,0x1ca0,0x10e0,0x80c,0x1ca1,
-0x10e1,0x80c,0x1ca2,0x10e2,0x80c,0x1ca3,0x10e3,0x80c,0x1ca4,0x10e4,0x80c,0x1ca5,0x10e5,0x80c,0x1ca6,0x10e6,
-0x80c,0x1ca7,0x10e7,0x80c,0x1ca8,0x10e8,0x80c,0x1ca9,0x10e9,0x80c,0x1caa,0x10ea,0x80c,0x1cab,0x10eb,0x80c,
-0x1cac,0x10ec,0x80c,0x1cad,0x10ed,0x80c,0x1cae,0x10ee,0x80c,0x1caf,0x10ef,0x80c,0x1cb0,0x10f0,0x80c,0x1cb1,
-0x10f1,0x80c,0x1cb2,0x10f2,0x80c,0x1cb3,0x10f3,0x80c,0x1cb4,0x10f4,0x80c,0x1cb5,0x10f5,0x80c,0x1cb6,0x10f6,
-0x80c,0x1cb7,0x10f7,0x80c,0x1cb8,0x10f8,0x80c,0x1cb9,0x10f9,0x80c,0x1cba,0x10fa,0x80c,0x1cbd,0x10fd,0x80c,
-0x1cbe,0x10fe,0x80c,0x1cbf,0x10ff,0xa10,0x97d0,0xa10,8,0x806,0x13f0,0x13f0,0x806,0x13f1,0x13f1,0x806,
-0x13f2,0x13f2,0x806,0x13f3,0x13f3,0x806,0x13f4,0x13f4,0x806,0x13f5,0x13f5,0x806,0x432,0x412,0x806,0x434,
-0x414,0x806,0x43e,0x41e,0x806,0x441,0x421,0x846,0x442,0x422,1,0x1c85,0x846,0x442,0x422,1,
-0x1c84,0x806,0x44a,0x42a,0x806,0x463,0x462,0x806,0xa64b,0xa64a,0xc10,0xbc0,0x810,0x8a04,0x810,0xee6,
-0x810,0x8a38,0x841,0x1e61,1,0x1e9b,0x844,0x1e60,1,0x1e9b,0x880,0x2220,0x68,0x331,0x48,0x331,
-0x48,0x331,0x880,0x2220,0x74,0x308,0x54,0x308,0x54,0x308,0x880,0x2220,0x77,0x30a,0x57,0x30a,
-0x57,0x30a,0x880,0x2220,0x79,0x30a,0x59,0x30a,0x59,0x30a,0x880,0x2220,0x61,0x2be,0x41,0x2be,
-0x41,0x2be,0x806,0x1e61,0x1e60,0xc90,0x1dbf,0x20,0x73,0x73,0x880,0x2220,0x3c5,0x313,0x3a5,0x313,
-0x3a5,0x313,0x880,0x3330,0x3c5,0x313,0x300,0x3a5,0x313,0x300,0x3a5,0x313,0x300,0x880,0x3330,0x3c5,
-0x313,0x301,0x3a5,0x313,0x301,0x3a5,0x313,0x301,0x880,0x3330,0x3c5,0x313,0x342,0x3a5,0x313,0x342,
-0x3a5,0x313,0x342,0x890,8,0x220,0x1f00,0x3b9,0x1f08,0x399,0x890,8,0x220,0x1f01,0x3b9,0x1f09,
-0x399,0x890,8,0x220,0x1f02,0x3b9,0x1f0a,0x399,0x890,8,0x220,0x1f03,0x3b9,0x1f0b,0x399,0x890,
-8,0x220,0x1f04,0x3b9,0x1f0c,0x399,0x890,8,0x220,0x1f05,0x3b9,0x1f0d,0x399,0x890,8,0x220,
-0x1f06,0x3b9,0x1f0e,0x399,0x890,8,0x220,0x1f07,0x3b9,0x1f0f,0x399,0xc90,8,0x220,0x1f00,0x3b9,
-0x1f08,0x399,0xc90,8,0x220,0x1f01,0x3b9,0x1f09,0x399,0xc90,8,0x220,0x1f02,0x3b9,0x1f0a,0x399,
-0xc90,8,0x220,0x1f03,0x3b9,0x1f0b,0x399,0xc90,8,0x220,0x1f04,0x3b9,0x1f0c,0x399,0xc90,8,
-0x220,0x1f05,0x3b9,0x1f0d,0x399,0xc90,8,0x220,0x1f06,0x3b9,0x1f0e,0x399,0xc90,8,0x220,0x1f07,
-0x3b9,0x1f0f,0x399,0x890,8,0x220,0x1f20,0x3b9,0x1f28,0x399,0x890,8,0x220,0x1f21,0x3b9,0x1f29,
-0x399,0x890,8,0x220,0x1f22,0x3b9,0x1f2a,0x399,0x890,8,0x220,0x1f23,0x3b9,0x1f2b,0x399,0x890,
-8,0x220,0x1f24,0x3b9,0x1f2c,0x399,0x890,8,0x220,0x1f25,0x3b9,0x1f2d,0x399,0x890,8,0x220,
-0x1f26,0x3b9,0x1f2e,0x399,0x890,8,0x220,0x1f27,0x3b9,0x1f2f,0x399,0xc90,8,0x220,0x1f20,0x3b9,
-0x1f28,0x399,0xc90,8,0x220,0x1f21,0x3b9,0x1f29,0x399,0xc90,8,0x220,0x1f22,0x3b9,0x1f2a,0x399,
-0xc90,8,0x220,0x1f23,0x3b9,0x1f2b,0x399,0xc90,8,0x220,0x1f24,0x3b9,0x1f2c,0x399,0xc90,8,
-0x220,0x1f25,0x3b9,0x1f2d,0x399,0xc90,8,0x220,0x1f26,0x3b9,0x1f2e,0x399,0xc90,8,0x220,0x1f27,
-0x3b9,0x1f2f,0x399,0x890,8,0x220,0x1f60,0x3b9,0x1f68,0x399,0x890,8,0x220,0x1f61,0x3b9,0x1f69,
-0x399,0x890,8,0x220,0x1f62,0x3b9,0x1f6a,0x399,0x890,8,0x220,0x1f63,0x3b9,0x1f6b,0x399,0x890,
-8,0x220,0x1f64,0x3b9,0x1f6c,0x399,0x890,8,0x220,0x1f65,0x3b9,0x1f6d,0x399,0x890,8,0x220,
-0x1f66,0x3b9,0x1f6e,0x399,0x890,8,0x220,0x1f67,0x3b9,0x1f6f,0x399,0xc90,8,0x220,0x1f60,0x3b9,
-0x1f68,0x399,0xc90,8,0x220,0x1f61,0x3b9,0x1f69,0x399,0xc90,8,0x220,0x1f62,0x3b9,0x1f6a,0x399,
-0xc90,8,0x220,0x1f63,0x3b9,0x1f6b,0x399,0xc90,8,0x220,0x1f64,0x3b9,0x1f6c,0x399,0xc90,8,
-0x220,0x1f65,0x3b9,0x1f6d,0x399,0xc90,8,0x220,0x1f66,0x3b9,0x1f6e,0x399,0xc90,8,0x220,0x1f67,
-0x3b9,0x1f6f,0x399,0x880,0x2220,0x1f70,0x3b9,0x1fba,0x399,0x1fba,0x345,0x890,9,0x220,0x3b1,0x3b9,
-0x391,0x399,0x880,0x2220,0x3ac,0x3b9,0x386,0x399,0x386,0x345,0x880,0x2220,0x3b1,0x342,0x391,0x342,
-0x391,0x342,0x880,0x3330,0x3b1,0x342,0x3b9,0x391,0x342,0x399,0x391,0x342,0x345,0xc90,9,0x220,
-0x3b1,0x3b9,0x391,0x399,0x846,0x3b9,0x399,1,0x345,0x880,0x2220,0x1f74,0x3b9,0x1fca,0x399,0x1fca,
-0x345,0x890,9,0x220,0x3b7,0x3b9,0x397,0x399,0x880,0x2220,0x3ae,0x3b9,0x389,0x399,0x389,0x345,
-0x880,0x2220,0x3b7,0x342,0x397,0x342,0x397,0x342,0x880,0x3330,0x3b7,0x342,0x3b9,0x397,0x342,0x399,
-0x397,0x342,0x345,0xc90,9,0x220,0x3b7,0x3b9,0x397,0x399,0x880,0x3330,0x3b9,0x308,0x300,0x399,
-0x308,0x300,0x399,0x308,0x300,0x8c0,1,0x3330,0x3b9,0x308,0x301,0x399,0x308,0x301,0x399,0x308,
-0x301,0x390,0x880,0x2220,0x3b9,0x342,0x399,0x342,0x399,0x342,0x880,0x3330,0x3b9,0x308,0x342,0x399,
-0x308,0x342,0x399,0x308,0x342,0x880,0x3330,0x3c5,0x308,0x300,0x3a5,0x308,0x300,0x3a5,0x308,0x300,
-0x8c0,1,0x3330,0x3c5,0x308,0x301,0x3a5,0x308,0x301,0x3a5,0x308,0x301,0x3b0,0x880,0x2220,0x3c1,
-0x313,0x3a1,0x313,0x3a1,0x313,0x880,0x2220,0x3c5,0x342,0x3a5,0x342,0x3a5,0x342,0x880,0x3330,0x3c5,
-0x308,0x342,0x3a5,0x308,0x342,0x3a5,0x308,0x342,0x880,0x2220,0x1f7c,0x3b9,0x1ffa,0x399,0x1ffa,0x345,
-0x890,9,0x220,0x3c9,0x3b9,0x3a9,0x399,0x880,0x2220,0x3ce,0x3b9,0x38f,0x399,0x38f,0x345,0x880,
-0x2220,0x3c9,0x342,0x3a9,0x342,0x3a9,0x342,0x880,0x3330,0x3c9,0x342,0x3b9,0x3a9,0x342,0x399,0x3a9,
-0x342,0x345,0xc90,9,0x220,0x3c9,0x3b9,0x3a9,0x399,0xc50,0x1d5d,1,0x3a9,0xc50,0x20bf,1,
-0x4b,0xc50,0x2046,1,0xc5,0xc10,0x29f7,0xc10,0xee6,0xc10,0x29e7,0xc10,0x2a2b,0xc10,0x2a28,0xc10,
-0x2a1c,0xc10,0x29fd,0xc10,0x2a1f,0xc10,0x2a1e,0xc10,0x2a3f,0xc10,0x1c60,0x841,0xa64b,1,0x1c88,0x844,
-0xa64a,1,0x1c88,0xc10,0x8a04,0xc10,0xa528,0xc10,0xa544,0xc10,0xa54f,0xc10,0xa54b,0xc10,0xa541,0xc10,
-0xa512,0xc10,0xa52a,0xc10,0xa515,0x810,0x3a0,0xc10,0xa543,0xc10,0x8a38,0xc10,0x3a0,0x806,0x13a0,0x13a0,
-0x806,0x13a1,0x13a1,0x806,0x13a2,0x13a2,0x806,0x13a3,0x13a3,0x806,0x13a4,0x13a4,0x806,0x13a5,0x13a5,0x806,
-0x13a6,0x13a6,0x806,0x13a7,0x13a7,0x806,0x13a8,0x13a8,0x806,0x13a9,0x13a9,0x806,0x13aa,0x13aa,0x806,0x13ab,
-0x13ab,0x806,0x13ac,0x13ac,0x806,0x13ad,0x13ad,0x806,0x13ae,0x13ae,0x806,0x13af,0x13af,0x806,0x13b0,0x13b0,
-0x806,0x13b1,0x13b1,0x806,0x13b2,0x13b2,0x806,0x13b3,0x13b3,0x806,0x13b4,0x13b4,0x806,0x13b5,0x13b5,0x806,
-0x13b6,0x13b6,0x806,0x13b7,0x13b7,0x806,0x13b8,0x13b8,0x806,0x13b9,0x13b9,0x806,0x13ba,0x13ba,0x806,0x13bb,
-0x13bb,0x806,0x13bc,0x13bc,0x806,0x13bd,0x13bd,0x806,0x13be,0x13be,0x806,0x13bf,0x13bf,0x806,0x13c0,0x13c0,
-0x806,0x13c1,0x13c1,0x806,0x13c2,0x13c2,0x806,0x13c3,0x13c3,0x806,0x13c4,0x13c4,0x806,0x13c5,0x13c5,0x806,
-0x13c6,0x13c6,0x806,0x13c7,0x13c7,0x806,0x13c8,0x13c8,0x806,0x13c9,0x13c9,0x806,0x13ca,0x13ca,0x806,0x13cb,
-0x13cb,0x806,0x13cc,0x13cc,0x806,0x13cd,0x13cd,0x806,0x13ce,0x13ce,0x806,0x13cf,0x13cf,0x806,0x13d0,0x13d0,
-0x806,0x13d1,0x13d1,0x806,0x13d2,0x13d2,0x806,0x13d3,0x13d3,0x806,0x13d4,0x13d4,0x806,0x13d5,0x13d5,0x806,
-0x13d6,0x13d6,0x806,0x13d7,0x13d7,0x806,0x13d8,0x13d8,0x806,0x13d9,0x13d9,0x806,0x13da,0x13da,0x806,0x13db,
-0x13db,0x806,0x13dc,0x13dc,0x806,0x13dd,0x13dd,0x806,0x13de,0x13de,0x806,0x13df,0x13df,0x806,0x13e0,0x13e0,
-0x806,0x13e1,0x13e1,0x806,0x13e2,0x13e2,0x806,0x13e3,0x13e3,0x806,0x13e4,0x13e4,0x806,0x13e5,0x13e5,0x806,
-0x13e6,0x13e6,0x806,0x13e7,0x13e7,0x806,0x13e8,0x13e8,0x806,0x13e9,0x13e9,0x806,0x13ea,0x13ea,0x806,0x13eb,
-0x13eb,0x806,0x13ec,0x13ec,0x806,0x13ed,0x13ed,0x806,0x13ee,0x13ee,0x806,0x13ef,0x13ef,0x880,0x2220,0x66,
-0x66,0x46,0x46,0x46,0x66,0x880,0x2220,0x66,0x69,0x46,0x49,0x46,0x69,0x880,0x2220,0x66,
-0x6c,0x46,0x4c,0x46,0x6c,0x880,0x3330,0x66,0x66,0x69,0x46,0x46,0x49,0x46,0x66,0x69,
-0x880,0x3330,0x66,0x66,0x6c,0x46,0x46,0x4c,0x46,0x66,0x6c,0x8c0,1,0x2220,0x73,0x74,
-0x53,0x54,0x53,0x74,0xfb06,0x8c0,1,0x2220,0x73,0x74,0x53,0x54,0x53,0x74,0xfb05,0x880,
-0x2220,0x574,0x576,0x544,0x546,0x544,0x576,0x880,0x2220,0x574,0x565,0x544,0x535,0x544,0x565,0x880,
-0x2220,0x574,0x56b,0x544,0x53b,0x544,0x56b,0x880,0x2220,0x57e,0x576,0x54e,0x546,0x54e,0x576,0x880,
-0x2220,0x574,0x56d,0x544,0x53d,0x544,0x56d
-};
-
-static const uint16_t ucase_props_unfold[370]={
-0x49,5,3,0,0,0x61,0x2be,0,0x1e9a,0,0x66,0x66,0,0xfb00,0,0x66,
-0x66,0x69,0xfb03,0,0x66,0x66,0x6c,0xfb04,0,0x66,0x69,0,0xfb01,0,0x66,0x6c,
-0,0xfb02,0,0x68,0x331,0,0x1e96,0,0x69,0x307,0,0x130,0,0x6a,0x30c,0,
-0x1f0,0,0x73,0x73,0,0xdf,0x1e9e,0x73,0x74,0,0xfb05,0xfb06,0x74,0x308,0,0x1e97,
-0,0x77,0x30a,0,0x1e98,0,0x79,0x30a,0,0x1e99,0,0x2bc,0x6e,0,0x149,0,
-0x3ac,0x3b9,0,0x1fb4,0,0x3ae,0x3b9,0,0x1fc4,0,0x3b1,0x342,0,0x1fb6,0,0x3b1,
-0x342,0x3b9,0x1fb7,0,0x3b1,0x3b9,0,0x1fb3,0x1fbc,0x3b7,0x342,0,0x1fc6,0,0x3b7,0x342,
-0x3b9,0x1fc7,0,0x3b7,0x3b9,0,0x1fc3,0x1fcc,0x3b9,0x308,0x300,0x1fd2,0,0x3b9,0x308,0x301,
-0x390,0x1fd3,0x3b9,0x308,0x342,0x1fd7,0,0x3b9,0x342,0,0x1fd6,0,0x3c1,0x313,0,0x1fe4,
-0,0x3c5,0x308,0x300,0x1fe2,0,0x3c5,0x308,0x301,0x3b0,0x1fe3,0x3c5,0x308,0x342,0x1fe7,0,
-0x3c5,0x313,0,0x1f50,0,0x3c5,0x313,0x300,0x1f52,0,0x3c5,0x313,0x301,0x1f54,0,0x3c5,
-0x313,0x342,0x1f56,0,0x3c5,0x342,0,0x1fe6,0,0x3c9,0x342,0,0x1ff6,0,0x3c9,0x342,
-0x3b9,0x1ff7,0,0x3c9,0x3b9,0,0x1ff3,0x1ffc,0x3ce,0x3b9,0,0x1ff4,0,0x565,0x582,0,
-0x587,0,0x574,0x565,0,0xfb14,0,0x574,0x56b,0,0xfb15,0,0x574,0x56d,0,0xfb17,
-0,0x574,0x576,0,0xfb13,0,0x57e,0x576,0,0xfb16,0,0x1f00,0x3b9,0,0x1f80,0x1f88,
-0x1f01,0x3b9,0,0x1f81,0x1f89,0x1f02,0x3b9,0,0x1f82,0x1f8a,0x1f03,0x3b9,0,0x1f83,0x1f8b,0x1f04,
-0x3b9,0,0x1f84,0x1f8c,0x1f05,0x3b9,0,0x1f85,0x1f8d,0x1f06,0x3b9,0,0x1f86,0x1f8e,0x1f07,0x3b9,
-0,0x1f87,0x1f8f,0x1f20,0x3b9,0,0x1f90,0x1f98,0x1f21,0x3b9,0,0x1f91,0x1f99,0x1f22,0x3b9,0,
-0x1f92,0x1f9a,0x1f23,0x3b9,0,0x1f93,0x1f9b,0x1f24,0x3b9,0,0x1f94,0x1f9c,0x1f25,0x3b9,0,0x1f95,
-0x1f9d,0x1f26,0x3b9,0,0x1f96,0x1f9e,0x1f27,0x3b9,0,0x1f97,0x1f9f,0x1f60,0x3b9,0,0x1fa0,0x1fa8,
-0x1f61,0x3b9,0,0x1fa1,0x1fa9,0x1f62,0x3b9,0,0x1fa2,0x1faa,0x1f63,0x3b9,0,0x1fa3,0x1fab,0x1f64,
-0x3b9,0,0x1fa4,0x1fac,0x1f65,0x3b9,0,0x1fa5,0x1fad,0x1f66,0x3b9,0,0x1fa6,0x1fae,0x1f67,0x3b9,
-0,0x1fa7,0x1faf,0x1f70,0x3b9,0,0x1fb2,0,0x1f74,0x3b9,0,0x1fc2,0,0x1f7c,0x3b9,0,
-0x1ff2,0
-};
-
-static const UCaseProps ucase_props_singleton={
- NULL,
- ucase_props_indexes,
- ucase_props_exceptions,
- ucase_props_unfold,
- {
- ucase_props_trieIndex,
- ucase_props_trieIndex+3288,
- NULL,
- 3288,
- 9068,
- 0x188,
- 0xd54,
- 0x0,
- 0x0,
- 0xe0800,
- 0x3040,
- NULL, 0, FALSE, FALSE, 0, NULL
- },
- { 4,0,0,0 }
-};
-
-#endif // INCLUDED_FROM_UCASE_CPP
diff --git a/contrib/libs/icu/common/ucasemap.cpp b/contrib/libs/icu/common/ucasemap.cpp
deleted file mode 100644
index ed72bda828f..00000000000
--- a/contrib/libs/icu/common/ucasemap.cpp
+++ /dev/null
@@ -1,953 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2005-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ucasemap.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2005may06
-* created by: Markus W. Scherer
-*
-* Case mapping service object and functions using it.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/brkiter.h"
-#include "unicode/bytestream.h"
-#include "unicode/casemap.h"
-#include "unicode/edits.h"
-#include "unicode/stringoptions.h"
-#include "unicode/stringpiece.h"
-#include "unicode/ubrk.h"
-#include "unicode/uloc.h"
-#include "unicode/ustring.h"
-#include "unicode/ucasemap.h"
-#if !UCONFIG_NO_BREAK_ITERATION
-#include "unicode/utext.h"
-#endif
-#include "unicode/utf.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "bytesinkutil.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uassert.h"
-#include "ucase.h"
-#include "ucasemap_imp.h"
-#include "ustr_imp.h"
-
-U_NAMESPACE_USE
-
-/* UCaseMap service object -------------------------------------------------- */
-
-UCaseMap::UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode) :
-#if !UCONFIG_NO_BREAK_ITERATION
- iter(NULL),
-#endif
- caseLocale(UCASE_LOC_UNKNOWN), options(opts) {
- ucasemap_setLocale(this, localeID, pErrorCode);
-}
-
-UCaseMap::~UCaseMap() {
-#if !UCONFIG_NO_BREAK_ITERATION
- delete iter;
-#endif
-}
-
-U_CAPI UCaseMap * U_EXPORT2
-ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- UCaseMap *csm = new UCaseMap(locale, options, pErrorCode);
- if(csm==NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- } else if (U_FAILURE(*pErrorCode)) {
- delete csm;
- return NULL;
- }
- return csm;
-}
-
-U_CAPI void U_EXPORT2
-ucasemap_close(UCaseMap *csm) {
- delete csm;
-}
-
-U_CAPI const char * U_EXPORT2
-ucasemap_getLocale(const UCaseMap *csm) {
- return csm->locale;
-}
-
-U_CAPI uint32_t U_EXPORT2
-ucasemap_getOptions(const UCaseMap *csm) {
- return csm->options;
-}
-
-U_CAPI void U_EXPORT2
-ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- if (locale != NULL && *locale == 0) {
- csm->locale[0] = 0;
- csm->caseLocale = UCASE_LOC_ROOT;
- return;
- }
-
- int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
- if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
- *pErrorCode=U_ZERO_ERROR;
- /* we only really need the language code for case mappings */
- length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
- }
- if(length==sizeof(csm->locale)) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- if(U_SUCCESS(*pErrorCode)) {
- csm->caseLocale=UCASE_LOC_UNKNOWN;
- csm->caseLocale = ucase_getCaseLocale(csm->locale);
- } else {
- csm->locale[0]=0;
- csm->caseLocale = UCASE_LOC_ROOT;
- }
-}
-
-U_CAPI void U_EXPORT2
-ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- csm->options=options;
-}
-
-/* UTF-8 string case mappings ----------------------------------------------- */
-
-/* TODO(markus): Move to a new, separate utf8case.cpp file. */
-
-namespace {
-
-/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
-inline UBool
-appendResult(int32_t cpLength, int32_t result, const UChar *s,
- ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
- U_ASSERT(U_SUCCESS(errorCode));
-
- /* decode the result */
- if(result<0) {
- /* (not) original code point */
- if(edits!=NULL) {
- edits->addUnchanged(cpLength);
- }
- if((options & U_OMIT_UNCHANGED_TEXT) == 0) {
- ByteSinkUtil::appendCodePoint(cpLength, ~result, sink);
- }
- } else {
- if(result<=UCASE_MAX_STRING_LENGTH) {
- // string: "result" is the UTF-16 length
- return ByteSinkUtil::appendChange(cpLength, s, result, sink, edits, errorCode);
- } else {
- ByteSinkUtil::appendCodePoint(cpLength, result, sink, edits);
- }
- }
- return TRUE;
-}
-
-// See unicode/utf8.h U8_APPEND_UNSAFE().
-inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
-inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
-
-UChar32 U_CALLCONV
-utf8_caseContextIterator(void *context, int8_t dir) {
- UCaseContext *csc=(UCaseContext *)context;
- UChar32 c;
-
- if(dir<0) {
- /* reset for backward iteration */
- csc->index=csc->cpStart;
- csc->dir=dir;
- } else if(dir>0) {
- /* reset for forward iteration */
- csc->index=csc->cpLimit;
- csc->dir=dir;
- } else {
- /* continue current iteration direction */
- dir=csc->dir;
- }
-
- if(dir<0) {
- if(csc->start<csc->index) {
- U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c);
- return c;
- }
- } else {
- if(csc->index<csc->limit) {
- U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c);
- return c;
- }
- }
- return U_SENTINEL;
-}
-
-/**
- * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
- * caseLocale < 0: Case-folds [srcStart..srcLimit[.
- */
-void toLower(int32_t caseLocale, uint32_t options,
- const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
- icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
- const int8_t *latinToLower;
- if (caseLocale == UCASE_LOC_ROOT ||
- (caseLocale >= 0 ?
- !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
- (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
- latinToLower = LatinCase::TO_LOWER_NORMAL;
- } else {
- latinToLower = LatinCase::TO_LOWER_TR_LT;
- }
- const UTrie2 *trie = ucase_getTrie();
- int32_t prev = srcStart;
- int32_t srcIndex = srcStart;
- for (;;) {
- // fast path for simple cases
- int32_t cpStart;
- UChar32 c;
- for (;;) {
- if (U_FAILURE(errorCode) || srcIndex >= srcLimit) {
- c = U_SENTINEL;
- break;
- }
- uint8_t lead = src[srcIndex++];
- if (lead <= 0x7f) {
- int8_t d = latinToLower[lead];
- if (d == LatinCase::EXC) {
- cpStart = srcIndex - 1;
- c = lead;
- break;
- }
- if (d == 0) { continue; }
- ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
- sink, options, edits, errorCode);
- char ascii = (char)(lead + d);
- sink.Append(&ascii, 1);
- if (edits != nullptr) {
- edits->addReplace(1, 1);
- }
- prev = srcIndex;
- continue;
- } else if (lead < 0xe3) {
- uint8_t t;
- if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit &&
- (t = src[srcIndex] - 0x80) <= 0x3f) {
- // U+0080..U+017F
- ++srcIndex;
- c = ((lead - 0xc0) << 6) | t;
- int8_t d = latinToLower[c];
- if (d == LatinCase::EXC) {
- cpStart = srcIndex - 2;
- break;
- }
- if (d == 0) { continue; }
- ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
- sink, options, edits, errorCode);
- ByteSinkUtil::appendTwoBytes(c + d, sink);
- if (edits != nullptr) {
- edits->addReplace(2, 2);
- }
- prev = srcIndex;
- continue;
- }
- } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
- (srcIndex + 2) <= srcLimit &&
- U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
- // most of CJK: no case mappings
- srcIndex += 2;
- continue;
- }
- cpStart = --srcIndex;
- U8_NEXT(src, srcIndex, srcLimit, c);
- if (c < 0) {
- // ill-formed UTF-8
- continue;
- }
- uint16_t props = UTRIE2_GET16(trie, c);
- if (UCASE_HAS_EXCEPTION(props)) { break; }
- int32_t delta;
- if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
- continue;
- }
- ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
- sink, options, edits, errorCode);
- ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
- prev = srcIndex;
- }
- if (c < 0) {
- break;
- }
- // slow path
- const UChar *s;
- if (caseLocale >= 0) {
- csc->cpStart = cpStart;
- csc->cpLimit = srcIndex;
- c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale);
- } else {
- c = ucase_toFullFolding(c, &s, options);
- }
- if (c >= 0) {
- ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
- sink, options, edits, errorCode);
- appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
- prev = srcIndex;
- }
- }
- ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
- sink, options, edits, errorCode);
-}
-
-void toUpper(int32_t caseLocale, uint32_t options,
- const uint8_t *src, UCaseContext *csc, int32_t srcLength,
- icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
- const int8_t *latinToUpper;
- if (caseLocale == UCASE_LOC_TURKISH) {
- latinToUpper = LatinCase::TO_UPPER_TR;
- } else {
- latinToUpper = LatinCase::TO_UPPER_NORMAL;
- }
- const UTrie2 *trie = ucase_getTrie();
- int32_t prev = 0;
- int32_t srcIndex = 0;
- for (;;) {
- // fast path for simple cases
- int32_t cpStart;
- UChar32 c;
- for (;;) {
- if (U_FAILURE(errorCode) || srcIndex >= srcLength) {
- c = U_SENTINEL;
- break;
- }
- uint8_t lead = src[srcIndex++];
- if (lead <= 0x7f) {
- int8_t d = latinToUpper[lead];
- if (d == LatinCase::EXC) {
- cpStart = srcIndex - 1;
- c = lead;
- break;
- }
- if (d == 0) { continue; }
- ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
- sink, options, edits, errorCode);
- char ascii = (char)(lead + d);
- sink.Append(&ascii, 1);
- if (edits != nullptr) {
- edits->addReplace(1, 1);
- }
- prev = srcIndex;
- continue;
- } else if (lead < 0xe3) {
- uint8_t t;
- if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength &&
- (t = src[srcIndex] - 0x80) <= 0x3f) {
- // U+0080..U+017F
- ++srcIndex;
- c = ((lead - 0xc0) << 6) | t;
- int8_t d = latinToUpper[c];
- if (d == LatinCase::EXC) {
- cpStart = srcIndex - 2;
- break;
- }
- if (d == 0) { continue; }
- ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
- sink, options, edits, errorCode);
- ByteSinkUtil::appendTwoBytes(c + d, sink);
- if (edits != nullptr) {
- edits->addReplace(2, 2);
- }
- prev = srcIndex;
- continue;
- }
- } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
- (srcIndex + 2) <= srcLength &&
- U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
- // most of CJK: no case mappings
- srcIndex += 2;
- continue;
- }
- cpStart = --srcIndex;
- U8_NEXT(src, srcIndex, srcLength, c);
- if (c < 0) {
- // ill-formed UTF-8
- continue;
- }
- uint16_t props = UTRIE2_GET16(trie, c);
- if (UCASE_HAS_EXCEPTION(props)) { break; }
- int32_t delta;
- if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
- continue;
- }
- ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
- sink, options, edits, errorCode);
- ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
- prev = srcIndex;
- }
- if (c < 0) {
- break;
- }
- // slow path
- csc->cpStart = cpStart;
- csc->cpLimit = srcIndex;
- const UChar *s;
- c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale);
- if (c >= 0) {
- ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
- sink, options, edits, errorCode);
- appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
- prev = srcIndex;
- }
- }
- ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
- sink, options, edits, errorCode);
-}
-
-} // namespace
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-U_CFUNC void U_CALLCONV
-ucasemap_internalUTF8ToTitle(
- int32_t caseLocale, uint32_t options, BreakIterator *iter,
- const uint8_t *src, int32_t srcLength,
- ByteSink &sink, icu::Edits *edits,
- UErrorCode &errorCode) {
- if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
- return;
- }
-
- /* set up local variables */
- UCaseContext csc=UCASECONTEXT_INITIALIZER;
- csc.p=(void *)src;
- csc.limit=srcLength;
- int32_t prev=0;
- UBool isFirstIndex=TRUE;
-
- /* titlecasing loop */
- while(prev<srcLength) {
- /* find next index where to titlecase */
- int32_t index;
- if(isFirstIndex) {
- isFirstIndex=FALSE;
- index=iter->first();
- } else {
- index=iter->next();
- }
- if(index==UBRK_DONE || index>srcLength) {
- index=srcLength;
- }
-
- /*
- * Segment [prev..index[ into 3 parts:
- * a) skipped characters (copy as-is) [prev..titleStart[
- * b) first letter (titlecase) [titleStart..titleLimit[
- * c) subsequent characters (lowercase) [titleLimit..index[
- */
- if(prev<index) {
- /* find and copy skipped characters [prev..titleStart[ */
- int32_t titleStart=prev;
- int32_t titleLimit=prev;
- UChar32 c;
- U8_NEXT(src, titleLimit, index, c);
- if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
- // Adjust the titlecasing index to the next cased character,
- // or to the next letter/number/symbol/private use.
- // Stop with titleStart<titleLimit<=index
- // if there is a character to be titlecased,
- // or else stop with titleStart==titleLimit==index.
- UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
- while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
- titleStart=titleLimit;
- if(titleLimit==index) {
- break;
- }
- U8_NEXT(src, titleLimit, index, c);
- }
- if (prev < titleStart) {
- if (!ByteSinkUtil::appendUnchanged(src+prev, titleStart-prev,
- sink, options, edits, errorCode)) {
- return;
- }
- }
- }
-
- if(titleStart<titleLimit) {
- /* titlecase c which is from [titleStart..titleLimit[ */
- if(c>=0) {
- csc.cpStart=titleStart;
- csc.cpLimit=titleLimit;
- const UChar *s;
- c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale);
- if (!appendResult(titleLimit-titleStart, c, s, sink, options, edits, errorCode)) {
- return;
- }
- } else {
- // Malformed UTF-8.
- if (!ByteSinkUtil::appendUnchanged(src+titleStart, titleLimit-titleStart,
- sink, options, edits, errorCode)) {
- return;
- }
- }
-
- /* Special case Dutch IJ titlecasing */
- if (titleStart+1 < index &&
- caseLocale == UCASE_LOC_DUTCH &&
- (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
- if (src[titleStart+1] == 0x006A) {
- ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits);
- titleLimit++;
- } else if (src[titleStart+1] == 0x004A) {
- // Keep the capital J from getting lowercased.
- if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1,
- sink, options, edits, errorCode)) {
- return;
- }
- titleLimit++;
- }
- }
-
- /* lowercase [titleLimit..index[ */
- if(titleLimit<index) {
- if((options&U_TITLECASE_NO_LOWERCASE)==0) {
- /* Normal operation: Lowercase the rest of the word. */
- toLower(caseLocale, options,
- src, &csc, titleLimit, index,
- sink, edits, errorCode);
- if(U_FAILURE(errorCode)) {
- return;
- }
- } else {
- /* Optionally just copy the rest of the word unchanged. */
- if (!ByteSinkUtil::appendUnchanged(src+titleLimit, index-titleLimit,
- sink, options, edits, errorCode)) {
- return;
- }
- }
- }
- }
- }
-
- prev=index;
- }
-}
-
-#endif
-
-U_NAMESPACE_BEGIN
-namespace GreekUpper {
-
-UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) {
- while (i < length) {
- UChar32 c;
- U8_NEXT(s, i, length, c);
- int32_t type = ucase_getTypeOrIgnorable(c);
- if ((type & UCASE_IGNORABLE) != 0) {
- // Case-ignorable, continue with the loop.
- } else if (type != UCASE_NONE) {
- return TRUE; // Followed by cased letter.
- } else {
- return FALSE; // Uncased and not case-ignorable.
- }
- }
- return FALSE; // Not followed by cased letter.
-}
-
-// Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
-void toUpper(uint32_t options,
- const uint8_t *src, int32_t srcLength,
- ByteSink &sink, Edits *edits,
- UErrorCode &errorCode) {
- uint32_t state = 0;
- for (int32_t i = 0; i < srcLength;) {
- int32_t nextIndex = i;
- UChar32 c;
- U8_NEXT(src, nextIndex, srcLength, c);
- uint32_t nextState = 0;
- int32_t type = ucase_getTypeOrIgnorable(c);
- if ((type & UCASE_IGNORABLE) != 0) {
- // c is case-ignorable
- nextState |= (state & AFTER_CASED);
- } else if (type != UCASE_NONE) {
- // c is cased
- nextState |= AFTER_CASED;
- }
- uint32_t data = getLetterData(c);
- if (data > 0) {
- uint32_t upper = data & UPPER_MASK;
- // Add a dialytika to this iota or ypsilon vowel
- // if we removed a tonos from the previous vowel,
- // and that previous vowel did not also have (or gain) a dialytika.
- // Adding one only to the final vowel in a longer sequence
- // (which does not occur in normal writing) would require lookahead.
- // Set the same flag as for preserving an existing dialytika.
- if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
- (upper == 0x399 || upper == 0x3A5)) {
- data |= HAS_DIALYTIKA;
- }
- int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota.
- if ((data & HAS_YPOGEGRAMMENI) != 0) {
- numYpogegrammeni = 1;
- }
- // Skip combining diacritics after this Greek letter.
- int32_t nextNextIndex = nextIndex;
- while (nextIndex < srcLength) {
- UChar32 c2;
- U8_NEXT(src, nextNextIndex, srcLength, c2);
- uint32_t diacriticData = getDiacriticData(c2);
- if (diacriticData != 0) {
- data |= diacriticData;
- if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
- ++numYpogegrammeni;
- }
- nextIndex = nextNextIndex;
- } else {
- break; // not a Greek diacritic
- }
- }
- if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
- nextState |= AFTER_VOWEL_WITH_ACCENT;
- }
- // Map according to Greek rules.
- UBool addTonos = FALSE;
- if (upper == 0x397 &&
- (data & HAS_ACCENT) != 0 &&
- numYpogegrammeni == 0 &&
- (state & AFTER_CASED) == 0 &&
- !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
- // Keep disjunctive "or" with (only) a tonos.
- // We use the same "word boundary" conditions as for the Final_Sigma test.
- if (i == nextIndex) {
- upper = 0x389; // Preserve the precomposed form.
- } else {
- addTonos = TRUE;
- }
- } else if ((data & HAS_DIALYTIKA) != 0) {
- // Preserve a vowel with dialytika in precomposed form if it exists.
- if (upper == 0x399) {
- upper = 0x3AA;
- data &= ~HAS_EITHER_DIALYTIKA;
- } else if (upper == 0x3A5) {
- upper = 0x3AB;
- data &= ~HAS_EITHER_DIALYTIKA;
- }
- }
-
- UBool change;
- if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
- change = TRUE; // common, simple usage
- } else {
- // Find out first whether we are changing the text.
- U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block
- change = (i + 2) > nextIndex ||
- src[i] != getTwoByteLead(upper) || src[i + 1] != getTwoByteTrail(upper) ||
- numYpogegrammeni > 0;
- int32_t i2 = i + 2;
- if ((data & HAS_EITHER_DIALYTIKA) != 0) {
- change |= (i2 + 2) > nextIndex ||
- src[i2] != (uint8_t)u8"\u0308"[0] ||
- src[i2 + 1] != (uint8_t)u8"\u0308"[1];
- i2 += 2;
- }
- if (addTonos) {
- change |= (i2 + 2) > nextIndex ||
- src[i2] != (uint8_t)u8"\u0301"[0] ||
- src[i2 + 1] != (uint8_t)u8"\u0301"[1];
- i2 += 2;
- }
- int32_t oldLength = nextIndex - i;
- int32_t newLength = (i2 - i) + numYpogegrammeni * 2; // 2 bytes per U+0399
- change |= oldLength != newLength;
- if (change) {
- if (edits != NULL) {
- edits->addReplace(oldLength, newLength);
- }
- } else {
- if (edits != NULL) {
- edits->addUnchanged(oldLength);
- }
- // Write unchanged text?
- change = (options & U_OMIT_UNCHANGED_TEXT) == 0;
- }
- }
-
- if (change) {
- ByteSinkUtil::appendTwoBytes(upper, sink);
- if ((data & HAS_EITHER_DIALYTIKA) != 0) {
- sink.AppendU8(u8"\u0308", 2); // restore or add a dialytika
- }
- if (addTonos) {
- sink.AppendU8(u8"\u0301", 2);
- }
- while (numYpogegrammeni > 0) {
- sink.AppendU8(u8"\u0399", 2);
- --numYpogegrammeni;
- }
- }
- } else if(c>=0) {
- const UChar *s;
- c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
- if (!appendResult(nextIndex - i, c, s, sink, options, edits, errorCode)) {
- return;
- }
- } else {
- // Malformed UTF-8.
- if (!ByteSinkUtil::appendUnchanged(src+i, nextIndex-i,
- sink, options, edits, errorCode)) {
- return;
- }
- }
- i = nextIndex;
- state = nextState;
- }
-}
-
-} // namespace GreekUpper
-U_NAMESPACE_END
-
-static void U_CALLCONV
-ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
- const uint8_t *src, int32_t srcLength,
- icu::ByteSink &sink, icu::Edits *edits,
- UErrorCode &errorCode) {
- UCaseContext csc=UCASECONTEXT_INITIALIZER;
- csc.p=(void *)src;
- csc.limit=srcLength;
- toLower(
- caseLocale, options,
- src, &csc, 0, srcLength,
- sink, edits, errorCode);
-}
-
-static void U_CALLCONV
-ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
- const uint8_t *src, int32_t srcLength,
- icu::ByteSink &sink, icu::Edits *edits,
- UErrorCode &errorCode) {
- if (caseLocale == UCASE_LOC_GREEK) {
- GreekUpper::toUpper(options, src, srcLength, sink, edits, errorCode);
- } else {
- UCaseContext csc=UCASECONTEXT_INITIALIZER;
- csc.p=(void *)src;
- csc.limit=srcLength;
- toUpper(
- caseLocale, options,
- src, &csc, srcLength,
- sink, edits, errorCode);
- }
-}
-
-static void U_CALLCONV
-ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
- const uint8_t *src, int32_t srcLength,
- icu::ByteSink &sink, icu::Edits *edits,
- UErrorCode &errorCode) {
- toLower(
- -1, options,
- src, nullptr, 0, srcLength,
- sink, edits, errorCode);
-}
-
-void
-ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
- const char *src, int32_t srcLength,
- UTF8CaseMapper *stringCaseMapper,
- icu::ByteSink &sink, icu::Edits *edits,
- UErrorCode &errorCode) {
- /* check argument values */
- if (U_FAILURE(errorCode)) {
- return;
- }
- if ((src == nullptr && srcLength != 0) || srcLength < -1) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- // Get the string length.
- if (srcLength == -1) {
- srcLength = (int32_t)uprv_strlen((const char *)src);
- }
-
- if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
- edits->reset();
- }
- stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
- (const uint8_t *)src, srcLength, sink, edits, errorCode);
- sink.Flush();
- if (U_SUCCESS(errorCode)) {
- if (edits != nullptr) {
- edits->copyErrorTo(errorCode);
- }
- }
-}
-
-int32_t
-ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
- char *dest, int32_t destCapacity,
- const char *src, int32_t srcLength,
- UTF8CaseMapper *stringCaseMapper,
- icu::Edits *edits,
- UErrorCode &errorCode) {
- /* check argument values */
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- if( destCapacity<0 ||
- (dest==NULL && destCapacity>0) ||
- (src==NULL && srcLength!=0) || srcLength<-1
- ) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* get the string length */
- if(srcLength==-1) {
- srcLength=(int32_t)uprv_strlen((const char *)src);
- }
-
- /* check for overlapping source and destination */
- if( dest!=NULL &&
- ((src>=dest && src<(dest+destCapacity)) ||
- (dest>=src && dest<(src+srcLength)))
- ) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- CheckedArrayByteSink sink(dest, destCapacity);
- if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
- edits->reset();
- }
- stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
- (const uint8_t *)src, srcLength, sink, edits, errorCode);
- sink.Flush();
- if (U_SUCCESS(errorCode)) {
- if (sink.Overflowed()) {
- errorCode = U_BUFFER_OVERFLOW_ERROR;
- } else if (edits != nullptr) {
- edits->copyErrorTo(errorCode);
- }
- }
- return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode);
-}
-
-/* public API functions */
-
-U_CAPI int32_t U_EXPORT2
-ucasemap_utf8ToLower(const UCaseMap *csm,
- char *dest, int32_t destCapacity,
- const char *src, int32_t srcLength,
- UErrorCode *pErrorCode) {
- return ucasemap_mapUTF8(
- csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
- dest, destCapacity,
- src, srcLength,
- ucasemap_internalUTF8ToLower, NULL, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-ucasemap_utf8ToUpper(const UCaseMap *csm,
- char *dest, int32_t destCapacity,
- const char *src, int32_t srcLength,
- UErrorCode *pErrorCode) {
- return ucasemap_mapUTF8(
- csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
- dest, destCapacity,
- src, srcLength,
- ucasemap_internalUTF8ToUpper, NULL, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-ucasemap_utf8FoldCase(const UCaseMap *csm,
- char *dest, int32_t destCapacity,
- const char *src, int32_t srcLength,
- UErrorCode *pErrorCode) {
- return ucasemap_mapUTF8(
- UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
- dest, destCapacity,
- src, srcLength,
- ucasemap_internalUTF8Fold, NULL, *pErrorCode);
-}
-
-U_NAMESPACE_BEGIN
-
-void CaseMap::utf8ToLower(
- const char *locale, uint32_t options,
- StringPiece src, ByteSink &sink, Edits *edits,
- UErrorCode &errorCode) {
- ucasemap_mapUTF8(
- ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
- src.data(), src.length(),
- ucasemap_internalUTF8ToLower, sink, edits, errorCode);
-}
-
-void CaseMap::utf8ToUpper(
- const char *locale, uint32_t options,
- StringPiece src, ByteSink &sink, Edits *edits,
- UErrorCode &errorCode) {
- ucasemap_mapUTF8(
- ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
- src.data(), src.length(),
- ucasemap_internalUTF8ToUpper, sink, edits, errorCode);
-}
-
-void CaseMap::utf8Fold(
- uint32_t options,
- StringPiece src, ByteSink &sink, Edits *edits,
- UErrorCode &errorCode) {
- ucasemap_mapUTF8(
- UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
- src.data(), src.length(),
- ucasemap_internalUTF8Fold, sink, edits, errorCode);
-}
-
-int32_t CaseMap::utf8ToLower(
- const char *locale, uint32_t options,
- const char *src, int32_t srcLength,
- char *dest, int32_t destCapacity, Edits *edits,
- UErrorCode &errorCode) {
- return ucasemap_mapUTF8(
- ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
- dest, destCapacity,
- src, srcLength,
- ucasemap_internalUTF8ToLower, edits, errorCode);
-}
-
-int32_t CaseMap::utf8ToUpper(
- const char *locale, uint32_t options,
- const char *src, int32_t srcLength,
- char *dest, int32_t destCapacity, Edits *edits,
- UErrorCode &errorCode) {
- return ucasemap_mapUTF8(
- ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
- dest, destCapacity,
- src, srcLength,
- ucasemap_internalUTF8ToUpper, edits, errorCode);
-}
-
-int32_t CaseMap::utf8Fold(
- uint32_t options,
- const char *src, int32_t srcLength,
- char *dest, int32_t destCapacity, Edits *edits,
- UErrorCode &errorCode) {
- return ucasemap_mapUTF8(
- UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
- dest, destCapacity,
- src, srcLength,
- ucasemap_internalUTF8Fold, edits, errorCode);
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/ucasemap_imp.h b/contrib/libs/icu/common/ucasemap_imp.h
deleted file mode 100644
index 7788fd93710..00000000000
--- a/contrib/libs/icu/common/ucasemap_imp.h
+++ /dev/null
@@ -1,282 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// ucasemap_imp.h
-// created: 2017feb08 Markus W. Scherer
-
-#ifndef __UCASEMAP_IMP_H__
-#define __UCASEMAP_IMP_H__
-
-#include "unicode/utypes.h"
-#include "unicode/ucasemap.h"
-#include "unicode/uchar.h"
-#include "ucase.h"
-
-/**
- * Bit mask for the titlecasing iterator options bit field.
- * Currently only 3 out of 8 values are used:
- * 0 (words), U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
- * See stringoptions.h.
- * @internal
- */
-#define U_TITLECASE_ITERATOR_MASK 0xe0
-
-/**
- * Bit mask for the titlecasing index adjustment options bit set.
- * Currently two bits are defined:
- * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED.
- * See stringoptions.h.
- * @internal
- */
-#define U_TITLECASE_ADJUSTMENT_MASK 0x600
-
-/**
- * Internal API, used by u_strcasecmp() etc.
- * Compare strings case-insensitively,
- * in code point order or code unit order.
- */
-U_CFUNC int32_t
-u_strcmpFold(const UChar *s1, int32_t length1,
- const UChar *s2, int32_t length2,
- uint32_t options,
- UErrorCode *pErrorCode);
-
-/**
- * Internal API, used for detecting length of
- * shared prefix case-insensitively.
- * @param s1 input string 1
- * @param length1 length of string 1, or -1 (NULL terminated)
- * @param s2 input string 2
- * @param length2 length of string 2, or -1 (NULL terminated)
- * @param options compare options
- * @param matchLen1 (output) length of partial prefix match in s1
- * @param matchLen2 (output) length of partial prefix match in s2
- * @param pErrorCode receives error status
- */
-U_CAPI void
-u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
- const UChar *s2, int32_t length2,
- uint32_t options,
- int32_t *matchLen1, int32_t *matchLen2,
- UErrorCode *pErrorCode);
-
-#ifdef __cplusplus
-
-U_NAMESPACE_BEGIN
-
-class BreakIterator; // unicode/brkiter.h
-class ByteSink;
-class Locale; // unicode/locid.h
-
-/** Returns TRUE if the options are valid. Otherwise FALSE, and sets an error. */
-inline UBool ustrcase_checkTitleAdjustmentOptions(uint32_t options, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return FALSE; }
- if ((options & U_TITLECASE_ADJUSTMENT_MASK) == U_TITLECASE_ADJUSTMENT_MASK) {
- // Both options together.
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- return TRUE;
-}
-
-inline UBool ustrcase_isLNS(UChar32 c) {
- // Letter, number, symbol,
- // or a private use code point because those are typically used as letters or numbers.
- // Consider modifier letters only if they are cased.
- const uint32_t LNS = (U_GC_L_MASK|U_GC_N_MASK|U_GC_S_MASK|U_GC_CO_MASK) & ~U_GC_LM_MASK;
- int gc = u_charType(c);
- return (U_MASK(gc) & LNS) != 0 || (gc == U_MODIFIER_LETTER && ucase_getType(c) != UCASE_NONE);
-}
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-/** Returns nullptr if error. Pass in either locale or locID, not both. */
-U_CFUNC
-BreakIterator *ustrcase_getTitleBreakIterator(
- const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter,
- LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode);
-
-#endif
-
-U_NAMESPACE_END
-
-#include "unicode/unistr.h" // for UStringCaseMapper
-
-/*
- * Internal string casing functions implementing
- * ustring.h/ustrcase.cpp and UnicodeString case mapping functions.
- */
-
-struct UCaseMap : public icu::UMemory {
- /** Implements most of ucasemap_open(). */
- UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
- ~UCaseMap();
-
-#if !UCONFIG_NO_BREAK_ITERATION
- icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
-#endif
- char locale[32];
- int32_t caseLocale;
- uint32_t options;
-};
-
-#if UCONFIG_NO_BREAK_ITERATION
-# define UCASEMAP_BREAK_ITERATOR_PARAM
-# define UCASEMAP_BREAK_ITERATOR_UNUSED
-# define UCASEMAP_BREAK_ITERATOR
-# define UCASEMAP_BREAK_ITERATOR_NULL
-#else
-# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
-# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
-# define UCASEMAP_BREAK_ITERATOR iter,
-# define UCASEMAP_BREAK_ITERATOR_NULL NULL,
-#endif
-
-U_CFUNC int32_t
-ustrcase_getCaseLocale(const char *locale);
-
-// TODO: swap src / dest if approved for new public api
-/** Implements UStringCaseMapper. */
-U_CFUNC int32_t U_CALLCONV
-ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- icu::Edits *edits,
- UErrorCode &errorCode);
-
-/** Implements UStringCaseMapper. */
-U_CFUNC int32_t U_CALLCONV
-ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- icu::Edits *edits,
- UErrorCode &errorCode);
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-/** Implements UStringCaseMapper. */
-U_CFUNC int32_t U_CALLCONV
-ustrcase_internalToTitle(int32_t caseLocale, uint32_t options,
- icu::BreakIterator *iter,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- icu::Edits *edits,
- UErrorCode &errorCode);
-
-#endif
-
-/** Implements UStringCaseMapper. */
-U_CFUNC int32_t U_CALLCONV
-ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- icu::Edits *edits,
- UErrorCode &errorCode);
-
-/**
- * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
- * Implements argument checking.
- */
-U_CFUNC int32_t
-ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- UStringCaseMapper *stringCaseMapper,
- icu::Edits *edits,
- UErrorCode &errorCode);
-
-/**
- * Common string case mapping implementation for old-fashioned u_strToXyz() functions
- * that allow the source string to overlap the destination buffer.
- * Implements argument checking and internally works with an intermediate buffer if necessary.
- */
-U_CFUNC int32_t
-ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- UStringCaseMapper *stringCaseMapper,
- UErrorCode &errorCode);
-
-/**
- * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
- * UTF-8 version of UStringCaseMapper.
- * All error checking must be done.
- * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
- */
-typedef void U_CALLCONV
-UTF8CaseMapper(int32_t caseLocale, uint32_t options,
-#if !UCONFIG_NO_BREAK_ITERATION
- icu::BreakIterator *iter,
-#endif
- const uint8_t *src, int32_t srcLength,
- icu::ByteSink &sink, icu::Edits *edits,
- UErrorCode &errorCode);
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-/** Implements UTF8CaseMapper. */
-U_CFUNC void U_CALLCONV
-ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
- icu::BreakIterator *iter,
- const uint8_t *src, int32_t srcLength,
- icu::ByteSink &sink, icu::Edits *edits,
- UErrorCode &errorCode);
-
-#endif
-
-void
-ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
- const char *src, int32_t srcLength,
- UTF8CaseMapper *stringCaseMapper,
- icu::ByteSink &sink, icu::Edits *edits,
- UErrorCode &errorCode);
-
-/**
- * Implements argument checking and buffer handling
- * for UTF-8 string case mapping as a common function.
- */
-int32_t
-ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
- char *dest, int32_t destCapacity,
- const char *src, int32_t srcLength,
- UTF8CaseMapper *stringCaseMapper,
- icu::Edits *edits,
- UErrorCode &errorCode);
-
-U_NAMESPACE_BEGIN
-namespace GreekUpper {
-
-// Data bits.
-static const uint32_t UPPER_MASK = 0x3ff;
-static const uint32_t HAS_VOWEL = 0x1000;
-static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
-static const uint32_t HAS_ACCENT = 0x4000;
-static const uint32_t HAS_DIALYTIKA = 0x8000;
-// Further bits during data building and processing, not stored in the data map.
-static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
-static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
-
-static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
-static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
- HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
-static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
-
-// State bits.
-static const uint32_t AFTER_CASED = 1;
-static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
-
-uint32_t getLetterData(UChar32 c);
-
-/**
- * Returns a non-zero value for each of the Greek combining diacritics
- * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
- * plus some perispomeni look-alikes.
- */
-uint32_t getDiacriticData(UChar32 c);
-
-} // namespace GreekUpper
-U_NAMESPACE_END
-
-#endif // __cplusplus
-
-#endif // __UCASEMAP_IMP_H__
diff --git a/contrib/libs/icu/common/ucasemap_titlecase_brkiter.cpp b/contrib/libs/icu/common/ucasemap_titlecase_brkiter.cpp
deleted file mode 100644
index c21dfb7698a..00000000000
--- a/contrib/libs/icu/common/ucasemap_titlecase_brkiter.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: ucasemap_titlecase_brkiter.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011jun02
-* created by: Markus W. Scherer
-*
-* Titlecasing functions that are based on BreakIterator
-* were moved here to break dependency cycles among parts of the common library.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/brkiter.h"
-#include "unicode/ubrk.h"
-#include "unicode/casemap.h"
-#include "unicode/ucasemap.h"
-#include "cmemory.h"
-#include "ucase.h"
-#include "ucasemap_imp.h"
-
-U_NAMESPACE_BEGIN
-
-void CaseMap::utf8ToTitle(
- const char *locale, uint32_t options, BreakIterator *iter,
- StringPiece src, ByteSink &sink, Edits *edits,
- UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return;
- }
- UText utext = UTEXT_INITIALIZER;
- utext_openUTF8(&utext, src.data(), src.length(), &errorCode);
- LocalPointer<BreakIterator> ownedIter;
- iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
- if (iter == nullptr) {
- utext_close(&utext);
- return;
- }
- iter->setText(&utext, errorCode);
- ucasemap_mapUTF8(
- ustrcase_getCaseLocale(locale), options, iter,
- src.data(), src.length(),
- ucasemap_internalUTF8ToTitle, sink, edits, errorCode);
- utext_close(&utext);
-}
-
-int32_t CaseMap::utf8ToTitle(
- const char *locale, uint32_t options, BreakIterator *iter,
- const char *src, int32_t srcLength,
- char *dest, int32_t destCapacity, Edits *edits,
- UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return 0;
- }
- UText utext=UTEXT_INITIALIZER;
- utext_openUTF8(&utext, src, srcLength, &errorCode);
- LocalPointer<BreakIterator> ownedIter;
- iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
- if(iter==NULL) {
- utext_close(&utext);
- return 0;
- }
- iter->setText(&utext, errorCode);
- int32_t length=ucasemap_mapUTF8(
- ustrcase_getCaseLocale(locale), options, iter,
- dest, destCapacity,
- src, srcLength,
- ucasemap_internalUTF8ToTitle, edits, errorCode);
- utext_close(&utext);
- return length;
-}
-
-U_NAMESPACE_END
-
-U_NAMESPACE_USE
-
-U_CAPI const UBreakIterator * U_EXPORT2
-ucasemap_getBreakIterator(const UCaseMap *csm) {
- return reinterpret_cast<UBreakIterator *>(csm->iter);
-}
-
-U_CAPI void U_EXPORT2
-ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- delete csm->iter;
- csm->iter=reinterpret_cast<BreakIterator *>(iterToAdopt);
-}
-
-U_CAPI int32_t U_EXPORT2
-ucasemap_utf8ToTitle(UCaseMap *csm,
- char *dest, int32_t destCapacity,
- const char *src, int32_t srcLength,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- UText utext=UTEXT_INITIALIZER;
- utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(csm->iter==NULL) {
- LocalPointer<BreakIterator> ownedIter;
- BreakIterator *iter = ustrcase_getTitleBreakIterator(
- nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode);
- if (iter == nullptr) {
- utext_close(&utext);
- return 0;
- }
- csm->iter = ownedIter.orphan();
- }
- csm->iter->setText(&utext, *pErrorCode);
- int32_t length=ucasemap_mapUTF8(
- csm->caseLocale, csm->options, csm->iter,
- dest, destCapacity,
- src, srcLength,
- ucasemap_internalUTF8ToTitle, NULL, *pErrorCode);
- utext_close(&utext);
- return length;
-}
-
-#endif // !UCONFIG_NO_BREAK_ITERATION
diff --git a/contrib/libs/icu/common/ucat.cpp b/contrib/libs/icu/common/ucat.cpp
deleted file mode 100644
index dac56eeb5ce..00000000000
--- a/contrib/libs/icu/common/ucat.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2003, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: March 19 2003
-* Since: ICU 2.6
-**********************************************************************
-*/
-#include "unicode/ucat.h"
-#include "unicode/ustring.h"
-#include "cstring.h"
-#include "uassert.h"
-
-/* Separator between set_num and msg_num */
-static const char SEPARATOR = '%';
-
-/* Maximum length of a set_num/msg_num key, incl. terminating zero.
- * Longest possible key is "-2147483648%-2147483648" */
-#define MAX_KEY_LEN (24)
-
-/**
- * Fill in buffer with a set_num/msg_num key string, given the numeric
- * values. Numeric values must be >= 0. Buffer must be of length
- * MAX_KEY_LEN or more.
- */
-static char*
-_catkey(char* buffer, int32_t set_num, int32_t msg_num) {
- int32_t i = 0;
- i = T_CString_integerToString(buffer, set_num, 10);
- buffer[i++] = SEPARATOR;
- T_CString_integerToString(buffer+i, msg_num, 10);
- return buffer;
-}
-
-U_CAPI u_nl_catd U_EXPORT2
-u_catopen(const char* name, const char* locale, UErrorCode* ec) {
- return (u_nl_catd) ures_open(name, locale, ec);
-}
-
-U_CAPI void U_EXPORT2
-u_catclose(u_nl_catd catd) {
- ures_close((UResourceBundle*) catd); /* may be NULL */
-}
-
-U_CAPI const UChar* U_EXPORT2
-u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num,
- const UChar* s,
- int32_t* len, UErrorCode* ec) {
-
- char key[MAX_KEY_LEN];
- const UChar* result;
-
- if (ec == NULL || U_FAILURE(*ec)) {
- goto ERROR;
- }
-
- result = ures_getStringByKey((const UResourceBundle*) catd,
- _catkey(key, set_num, msg_num),
- len, ec);
- if (U_FAILURE(*ec)) {
- goto ERROR;
- }
-
- return result;
-
- ERROR:
- /* In case of any failure, return s */
- if (len != NULL) {
- *len = u_strlen(s);
- }
- return s;
-}
-
-/*eof*/
diff --git a/contrib/libs/icu/common/uchar.cpp b/contrib/libs/icu/common/uchar.cpp
deleted file mode 100644
index eb14e4c75d5..00000000000
--- a/contrib/libs/icu/common/uchar.cpp
+++ /dev/null
@@ -1,730 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-********************************************************************************
-* Copyright (C) 1996-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-********************************************************************************
-*
-* File UCHAR.C
-*
-* Modification History:
-*
-* Date Name Description
-* 04/02/97 aliu Creation.
-* 4/15/99 Madhu Updated all the function definitions for C Implementation
-* 5/20/99 Madhu Added the function u_getVersion()
-* 8/19/1999 srl Upgraded scripts to Unicode3.0
-* 11/11/1999 weiv added u_isalnum(), cleaned comments
-* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion.
-* 06/20/2000 helena OS/400 port changes; mostly typecast.
-******************************************************************************
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "unicode/uscript.h"
-#include "unicode/udata.h"
-#include "uassert.h"
-#include "cmemory.h"
-#include "ucln_cmn.h"
-#include "utrie2.h"
-#include "udataswp.h"
-#include "uprops.h"
-#include "ustr_imp.h"
-
-/* uchar_props_data.h is machine-generated by genprops --csource */
-#define INCLUDED_FROM_UCHAR_C
-#include "uchar_props_data.h"
-
-/* constants and macros for access to the data ------------------------------ */
-
-/* getting a uint32_t properties word from the data */
-#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c))
-
-/* API functions ------------------------------------------------------------ */
-
-/* Gets the Unicode character's general category.*/
-U_CAPI int8_t U_EXPORT2
-u_charType(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (int8_t)GET_CATEGORY(props);
-}
-
-/* Enumerate all code points with their general categories. */
-struct _EnumTypeCallback {
- UCharEnumTypeRange *enumRange;
- const void *context;
-};
-
-static uint32_t U_CALLCONV
-_enumTypeValue(const void *context, uint32_t value) {
- (void)context;
- return GET_CATEGORY(value);
-}
-
-static UBool U_CALLCONV
-_enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
- /* just cast the value to UCharCategory */
- return ((struct _EnumTypeCallback *)context)->
- enumRange(((struct _EnumTypeCallback *)context)->context,
- start, end+1, (UCharCategory)value);
-}
-
-U_CAPI void U_EXPORT2
-u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) {
- struct _EnumTypeCallback callback;
-
- if(enumRange==NULL) {
- return;
- }
-
- callback.enumRange=enumRange;
- callback.context=context;
- utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback);
-}
-
-/* Checks if ch is a lower case letter.*/
-U_CAPI UBool U_EXPORT2
-u_islower(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER);
-}
-
-/* Checks if ch is an upper case letter.*/
-U_CAPI UBool U_EXPORT2
-u_isupper(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER);
-}
-
-/* Checks if ch is a title case letter; usually upper case letters.*/
-U_CAPI UBool U_EXPORT2
-u_istitle(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER);
-}
-
-/* Checks if ch is a decimal digit. */
-U_CAPI UBool U_EXPORT2
-u_isdigit(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
-}
-
-U_CAPI UBool U_EXPORT2
-u_isxdigit(UChar32 c) {
- uint32_t props;
-
- /* check ASCII and Fullwidth ASCII a-fA-F */
- if(
- (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
- (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
- ) {
- return TRUE;
- }
-
- GET_PROPS(c, props);
- return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
-}
-
-/* Checks if the Unicode character is a letter.*/
-U_CAPI UBool U_EXPORT2
-u_isalpha(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0);
-}
-
-U_CAPI UBool U_EXPORT2
-u_isUAlphabetic(UChar32 c) {
- return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0;
-}
-
-/* Checks if c is a letter or a decimal digit */
-U_CAPI UBool U_EXPORT2
-u_isalnum(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0);
-}
-
-/**
- * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
- * @internal
- */
-U_CFUNC UBool
-u_isalnumPOSIX(UChar32 c) {
- return (UBool)(u_isUAlphabetic(c) || u_isdigit(c));
-}
-
-/* Checks if ch is a unicode character with assigned character type.*/
-U_CAPI UBool U_EXPORT2
-u_isdefined(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)(GET_CATEGORY(props)!=0);
-}
-
-/* Checks if the Unicode character is a base form character that can take a diacritic.*/
-U_CAPI UBool U_EXPORT2
-u_isbase(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0);
-}
-
-/* Checks if the Unicode character is a control character.*/
-U_CAPI UBool U_EXPORT2
-u_iscntrl(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0);
-}
-
-U_CAPI UBool U_EXPORT2
-u_isISOControl(UChar32 c) {
- return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f);
-}
-
-/* Some control characters that are used as space. */
-#define IS_THAT_CONTROL_SPACE(c) \
- (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
-
-/* Java has decided that U+0085 New Line is not whitespace any more. */
-#define IS_THAT_ASCII_CONTROL_SPACE(c) \
- (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c))
-
-/* Checks if the Unicode character is a space character.*/
-U_CAPI UBool U_EXPORT2
-u_isspace(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c));
-}
-
-U_CAPI UBool U_EXPORT2
-u_isJavaSpaceChar(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0);
-}
-
-/* Checks if the Unicode character is a whitespace character.*/
-U_CAPI UBool U_EXPORT2
-u_isWhitespace(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)(
- ((CAT_MASK(props)&U_GC_Z_MASK)!=0 &&
- c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */
- IS_THAT_ASCII_CONTROL_SPACE(c)
- );
-}
-
-U_CAPI UBool U_EXPORT2
-u_isblank(UChar32 c) {
- if((uint32_t)c<=0x9f) {
- return c==9 || c==0x20; /* TAB or SPACE */
- } else {
- /* Zs */
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR);
- }
-}
-
-U_CAPI UBool U_EXPORT2
-u_isUWhiteSpace(UChar32 c) {
- return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0;
-}
-
-/* Checks if the Unicode character is printable.*/
-U_CAPI UBool U_EXPORT2
-u_isprint(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- /* comparing ==0 returns FALSE for the categories mentioned */
- return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0);
-}
-
-/**
- * Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
- * Implements UCHAR_POSIX_PRINT.
- * @internal
- */
-U_CFUNC UBool
-u_isprintPOSIX(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- /*
- * The only cntrl character in graph+blank is TAB (in blank).
- * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
- */
- return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c));
-}
-
-U_CAPI UBool U_EXPORT2
-u_isgraph(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- /* comparing ==0 returns FALSE for the categories mentioned */
- return (UBool)((CAT_MASK(props)&
- (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
- ==0);
-}
-
-/**
- * Checks if c is in
- * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
- * with space=\p{Whitespace} and Control=Cc.
- * Implements UCHAR_POSIX_GRAPH.
- * @internal
- */
-U_CFUNC UBool
-u_isgraphPOSIX(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
- /* comparing ==0 returns FALSE for the categories mentioned */
- return (UBool)((CAT_MASK(props)&
- (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
- ==0);
-}
-
-U_CAPI UBool U_EXPORT2
-u_ispunct(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0);
-}
-
-/* Checks if the Unicode character can start a Unicode identifier.*/
-U_CAPI UBool U_EXPORT2
-u_isIDStart(UChar32 c) {
- /* same as u_isalpha() */
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0);
-}
-
-/* Checks if the Unicode character can be a Unicode identifier part other than starting the
- identifier.*/
-U_CAPI UBool U_EXPORT2
-u_isIDPart(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)(
- (CAT_MASK(props)&
- (U_GC_ND_MASK|U_GC_NL_MASK|
- U_GC_L_MASK|
- U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK)
- )!=0 ||
- u_isIDIgnorable(c));
-}
-
-/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
-U_CAPI UBool U_EXPORT2
-u_isIDIgnorable(UChar32 c) {
- if(c<=0x9f) {
- return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c);
- } else {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR);
- }
-}
-
-/*Checks if the Unicode character can start a Java identifier.*/
-U_CAPI UBool U_EXPORT2
-u_isJavaIDStart(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0);
-}
-
-/*Checks if the Unicode character can be a Java identifier part other than starting the
- * identifier.
- */
-U_CAPI UBool U_EXPORT2
-u_isJavaIDPart(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return (UBool)(
- (CAT_MASK(props)&
- (U_GC_ND_MASK|U_GC_NL_MASK|
- U_GC_L_MASK|
- U_GC_SC_MASK|U_GC_PC_MASK|
- U_GC_MC_MASK|U_GC_MN_MASK)
- )!=0 ||
- u_isIDIgnorable(c));
-}
-
-U_CAPI int32_t U_EXPORT2
-u_charDigitValue(UChar32 c) {
- uint32_t props;
- int32_t value;
- GET_PROPS(c, props);
- value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START;
- if(value<=9) {
- return value;
- } else {
- return -1;
- }
-}
-
-U_CAPI double U_EXPORT2
-u_getNumericValue(UChar32 c) {
- uint32_t props;
- int32_t ntv;
- GET_PROPS(c, props);
- ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props);
-
- if(ntv==UPROPS_NTV_NONE) {
- return U_NO_NUMERIC_VALUE;
- } else if(ntv<UPROPS_NTV_DIGIT_START) {
- /* decimal digit */
- return ntv-UPROPS_NTV_DECIMAL_START;
- } else if(ntv<UPROPS_NTV_NUMERIC_START) {
- /* other digit */
- return ntv-UPROPS_NTV_DIGIT_START;
- } else if(ntv<UPROPS_NTV_FRACTION_START) {
- /* small integer */
- return ntv-UPROPS_NTV_NUMERIC_START;
- } else if(ntv<UPROPS_NTV_LARGE_START) {
- /* fraction */
- int32_t numerator=(ntv>>4)-12;
- int32_t denominator=(ntv&0xf)+1;
- return (double)numerator/denominator;
- } else if(ntv<UPROPS_NTV_BASE60_START) {
- /* large, single-significant-digit integer */
- double numValue;
- int32_t mant=(ntv>>5)-14;
- int32_t exp=(ntv&0x1f)+2;
- numValue=mant;
-
- /* multiply by 10^exp without math.h */
- while(exp>=4) {
- numValue*=10000.;
- exp-=4;
- }
- switch(exp) {
- case 3:
- numValue*=1000.;
- break;
- case 2:
- numValue*=100.;
- break;
- case 1:
- numValue*=10.;
- break;
- case 0:
- default:
- break;
- }
-
- return numValue;
- } else if(ntv<UPROPS_NTV_FRACTION20_START) {
- /* sexagesimal (base 60) integer */
- int32_t numValue=(ntv>>2)-0xbf;
- int32_t exp=(ntv&3)+1;
-
- switch(exp) {
- case 4:
- numValue*=60*60*60*60;
- break;
- case 3:
- numValue*=60*60*60;
- break;
- case 2:
- numValue*=60*60;
- break;
- case 1:
- numValue*=60;
- break;
- case 0:
- default:
- break;
- }
-
- return numValue;
- } else if(ntv<UPROPS_NTV_FRACTION32_START) {
- // fraction-20 e.g. 3/80
- int32_t frac20=ntv-UPROPS_NTV_FRACTION20_START; // 0..0x17
- int32_t numerator=2*(frac20&3)+1;
- int32_t denominator=20<<(frac20>>2);
- return (double)numerator/denominator;
- } else if(ntv<UPROPS_NTV_RESERVED_START) {
- // fraction-32 e.g. 3/64
- int32_t frac32=ntv-UPROPS_NTV_FRACTION32_START; // 0..15
- int32_t numerator=2*(frac32&3)+1;
- int32_t denominator=32<<(frac32>>2);
- return (double)numerator/denominator;
- } else {
- /* reserved */
- return U_NO_NUMERIC_VALUE;
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-u_digit(UChar32 ch, int8_t radix) {
- int8_t value;
- if((uint8_t)(radix-2)<=(36-2)) {
- value=(int8_t)u_charDigitValue(ch);
- if(value<0) {
- /* ch is not a decimal digit, try latin letters */
- if(ch>=0x61 && ch<=0x7A) {
- value=(int8_t)(ch-0x57); /* ch - 'a' + 10 */
- } else if(ch>=0x41 && ch<=0x5A) {
- value=(int8_t)(ch-0x37); /* ch - 'A' + 10 */
- } else if(ch>=0xFF41 && ch<=0xFF5A) {
- value=(int8_t)(ch-0xFF37); /* fullwidth ASCII a-z */
- } else if(ch>=0xFF21 && ch<=0xFF3A) {
- value=(int8_t)(ch-0xFF17); /* fullwidth ASCII A-Z */
- }
- }
- } else {
- value=-1; /* invalid radix */
- }
- return (int8_t)((value<radix) ? value : -1);
-}
-
-U_CAPI UChar32 U_EXPORT2
-u_forDigit(int32_t digit, int8_t radix) {
- if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) {
- return 0;
- } else if(digit<10) {
- return (UChar32)(0x30+digit);
- } else {
- return (UChar32)((0x61-10)+digit);
- }
-}
-
-/* miscellaneous, and support for uprops.cpp -------------------------------- */
-
-U_CAPI void U_EXPORT2
-u_getUnicodeVersion(UVersionInfo versionArray) {
- if(versionArray!=NULL) {
- uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH);
- }
-}
-
-U_CFUNC uint32_t
-u_getMainProperties(UChar32 c) {
- uint32_t props;
- GET_PROPS(c, props);
- return props;
-}
-
-U_CFUNC uint32_t
-u_getUnicodeProperties(UChar32 c, int32_t column) {
- U_ASSERT(column>=0);
- if(column>=propsVectorsColumns) {
- return 0;
- } else {
- uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c);
- return propsVectors[vecIndex+column];
- }
-}
-
-U_CFUNC int32_t
-uprv_getMaxValues(int32_t column) {
- switch(column) {
- case 0:
- return indexes[UPROPS_MAX_VALUES_INDEX];
- case 2:
- return indexes[UPROPS_MAX_VALUES_2_INDEX];
- default:
- return 0;
- }
-}
-
-U_CAPI void U_EXPORT2
-u_charAge(UChar32 c, UVersionInfo versionArray) {
- if(versionArray!=NULL) {
- uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT;
- versionArray[0]=(uint8_t)(version>>4);
- versionArray[1]=(uint8_t)(version&0xf);
- versionArray[2]=versionArray[3]=0;
- }
-}
-
-U_CAPI UScriptCode U_EXPORT2
-uscript_getScript(UChar32 c, UErrorCode *pErrorCode) {
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return USCRIPT_INVALID_CODE;
- }
- if((uint32_t)c>0x10ffff) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return USCRIPT_INVALID_CODE;
- }
- uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
- uint32_t codeOrIndex=uprops_mergeScriptCodeOrIndex(scriptX);
- if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
- return (UScriptCode)codeOrIndex;
- } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) {
- return USCRIPT_COMMON;
- } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) {
- return USCRIPT_INHERITED;
- } else {
- return (UScriptCode)scriptExtensions[codeOrIndex];
- }
-}
-
-U_CAPI UBool U_EXPORT2
-uscript_hasScript(UChar32 c, UScriptCode sc) {
- uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
- uint32_t codeOrIndex=uprops_mergeScriptCodeOrIndex(scriptX);
- if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
- return sc==(UScriptCode)codeOrIndex;
- }
-
- const uint16_t *scx=scriptExtensions+codeOrIndex;
- if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
- scx=scriptExtensions+scx[1];
- }
- uint32_t sc32=sc;
- if(sc32>0x7fff) {
- /* Guard against bogus input that would make us go past the Script_Extensions terminator. */
- return FALSE;
- }
- while(sc32>*scx) {
- ++scx;
- }
- return sc32==(*scx&0x7fff);
-}
-
-U_CAPI int32_t U_EXPORT2
-uscript_getScriptExtensions(UChar32 c,
- UScriptCode *scripts, int32_t capacity,
- UErrorCode *pErrorCode) {
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(capacity<0 || (capacity>0 && scripts==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
- uint32_t codeOrIndex=uprops_mergeScriptCodeOrIndex(scriptX);
- if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
- if(capacity==0) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- } else {
- scripts[0]=(UScriptCode)codeOrIndex;
- }
- return 1;
- }
-
- const uint16_t *scx=scriptExtensions+codeOrIndex;
- if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
- scx=scriptExtensions+scx[1];
- }
- int32_t length=0;
- uint16_t sx;
- do {
- sx=*scx++;
- if(length<capacity) {
- scripts[length]=(UScriptCode)(sx&0x7fff);
- }
- ++length;
- } while(sx<0x8000);
- if(length>capacity) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- return length;
-}
-
-U_CAPI UBlockCode U_EXPORT2
-ublock_getCode(UChar32 c) {
- return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT);
-}
-
-/* property starts for UnicodeSet ------------------------------------------- */
-
-static UBool U_CALLCONV
-_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
- /* add the start code point to the USet */
- const USetAdder *sa=(const USetAdder *)context;
- sa->add(sa->set, start);
- (void)end;
- (void)value;
- return TRUE;
-}
-
-#define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1)
-
-U_CFUNC void U_EXPORT2
-uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
-
- /* add the start code point of each same-value range of the main trie */
- utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa);
-
- /* add code points with hardcoded properties, plus the ones following them */
-
- /* add for u_isblank() */
- USET_ADD_CP_AND_NEXT(sa, TAB);
-
- /* add for IS_THAT_CONTROL_SPACE() */
- sa->add(sa->set, CR+1); /* range TAB..CR */
- sa->add(sa->set, 0x1c);
- sa->add(sa->set, 0x1f+1);
- USET_ADD_CP_AND_NEXT(sa, NL);
-
- /* add for u_isIDIgnorable() what was not added above */
- sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
- sa->add(sa->set, HAIRSP);
- sa->add(sa->set, RLM+1);
- sa->add(sa->set, INHSWAP);
- sa->add(sa->set, NOMDIG+1);
- USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
-
- /* add no-break spaces for u_isWhitespace() what was not added above */
- USET_ADD_CP_AND_NEXT(sa, NBSP);
- USET_ADD_CP_AND_NEXT(sa, FIGURESP);
- USET_ADD_CP_AND_NEXT(sa, NNBSP);
-
- /* add for u_digit() */
- sa->add(sa->set, U_a);
- sa->add(sa->set, U_z+1);
- sa->add(sa->set, U_A);
- sa->add(sa->set, U_Z+1);
- sa->add(sa->set, U_FW_a);
- sa->add(sa->set, U_FW_z+1);
- sa->add(sa->set, U_FW_A);
- sa->add(sa->set, U_FW_Z+1);
-
- /* add for u_isxdigit() */
- sa->add(sa->set, U_f+1);
- sa->add(sa->set, U_F+1);
- sa->add(sa->set, U_FW_f+1);
- sa->add(sa->set, U_FW_F+1);
-
- /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
- sa->add(sa->set, WJ); /* range WJ..NOMDIG */
- sa->add(sa->set, 0xfff0);
- sa->add(sa->set, 0xfffb+1);
- sa->add(sa->set, 0xe0000);
- sa->add(sa->set, 0xe0fff+1);
-
- /* add for UCHAR_GRAPHEME_BASE and others */
- USET_ADD_CP_AND_NEXT(sa, CGJ);
-}
-
-U_CFUNC void U_EXPORT2
-upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
-
- /* add the start code point of each same-value range of the properties vectors trie */
- utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
-}
diff --git a/contrib/libs/icu/common/uchar_props_data.h b/contrib/libs/icu/common/uchar_props_data.h
deleted file mode 100644
index 9a78918204f..00000000000
--- a/contrib/libs/icu/common/uchar_props_data.h
+++ /dev/null
@@ -1,3860 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-//
-// Copyright (C) 1999-2016, International Business Machines
-// Corporation and others. All Rights Reserved.
-//
-// file name: uchar_props_data.h
-//
-// machine-generated by: icu/tools/unicode/c/genprops/corepropsbuilder.cpp
-
-
-#ifdef INCLUDED_FROM_UCHAR_C
-
-static const UVersionInfo dataVersion={0xd,0,0,0};
-
-static const uint16_t propsTrie_index[22276]={
-0x46d,0x475,0x47d,0x485,0x49d,0x4a5,0x4ad,0x4b5,0x4bd,0x4c5,0x4cb,0x4d3,0x4db,0x4e3,0x4eb,0x4f3,
-0x4f9,0x501,0x509,0x511,0x514,0x51c,0x524,0x52c,0x534,0x53c,0x538,0x540,0x548,0x550,0x555,0x55d,
-0x565,0x56d,0x571,0x579,0x581,0x589,0x591,0x599,0x595,0x59d,0x5a2,0x5aa,0x5b0,0x5b8,0x5c0,0x5c8,
-0x5d0,0x5d8,0x5e0,0x5e8,0x5ed,0x5f5,0x5f8,0x600,0x608,0x610,0x616,0x61e,0x61d,0x625,0x62d,0x635,
-0x645,0x63d,0x64d,0x655,0x48d,0x665,0x66b,0x65d,0x67b,0x67d,0x685,0x673,0x695,0x69b,0x6a3,0x68d,
-0x6b3,0x6b9,0x6c1,0x6ab,0x6d1,0x6d7,0x6df,0x6c9,0x6ef,0x6f5,0x6fd,0x6e7,0x70d,0x715,0x71d,0x705,
-0x72d,0x733,0x73b,0x725,0x74b,0x751,0x759,0x743,0x769,0x76e,0x776,0x761,0x786,0x78d,0x795,0x77e,
-0x619,0x79d,0x7a5,0x48d,0x7ad,0x7b4,0x7bc,0x48d,0x7c4,0x7cc,0x7d4,0x7d9,0x7e1,0x7e8,0x7f0,0x48d,
-0x5d8,0x7f8,0x800,0x808,0x810,0x565,0x820,0x818,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x828,0x5d8,0x830,0x834,0x83c,0x5d8,0x842,0x5d8,0x848,0x850,0x858,0x565,0x565,0x860,
-0x868,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x86d,0x875,0x5d8,0x5d8,0x87d,0x885,0x88d,0x895,0x89d,0x5d8,0x8a5,0x8ad,0x8b5,
-0x8c5,0x5d8,0x8cd,0x8cf,0x8d7,0x8bd,0x5d8,0x8da,0x8ee,0x8e2,0x8ea,0x8f6,0x5d8,0x8fe,0x904,0x90c,
-0x914,0x5d8,0x924,0x92c,0x934,0x91c,0x944,0x48d,0x94c,0x94f,0x957,0x93c,0x967,0x95f,0x5d8,0x96e,
-0x5d8,0x97d,0x976,0x985,0x98d,0x991,0x999,0x9a1,0x50d,0x9a9,0x9ac,0x9b2,0x9b9,0x9ac,0x534,0x9c1,
-0x4bd,0x4bd,0x4bd,0x4bd,0x9c9,0x4bd,0x4bd,0x4bd,0x9d9,0x9e1,0x9e9,0x9f1,0x9f9,0x9fd,0xa05,0x9d1,
-0xa1d,0xa25,0xa0d,0xa15,0xa2d,0xa35,0xa3d,0xa45,0xa5d,0xa4d,0xa55,0xa65,0xa6d,0xa7c,0xa81,0xa74,
-0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa91,0xa99,0x90c,0xa9c,0xaa4,0xaab,0xab0,0xab8,
-0x90c,0xabf,0xabe,0xacf,0xad2,0x90c,0x90c,0xac7,0x90c,0x90c,0x90c,0x90c,0x90c,0xae1,0xae9,0xad9,
-0x90c,0x90c,0x90c,0xaee,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xaf4,0xafc,0x90c,0xb04,0xb0b,
-0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xa89,0xa89,0xa89,0xa89,0xb13,0xa89,0xb1a,0xb21,
-0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0x90c,0xb29,0xb30,0xb34,0xb3a,0x90c,0x90c,0x90c,
-0x565,0xb4a,0xb42,0xb52,0x4bd,0x4bd,0x4bd,0xb5a,0x50d,0xb62,0x5d8,0xb68,0xb78,0xb70,0xb70,0x534,
-0xb80,0xb88,0xb90,0x48d,0xb98,0x90c,0x90c,0xb9f,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xba7,0xbad,
-0xbbd,0xbb5,0x619,0x5d8,0xbc5,0x868,0x5d8,0xbcd,0xbd5,0xbd9,0x5d8,0x5d8,0xbde,0x5d8,0x90c,0xbe5,
-0xab9,0xbed,0xbf3,0x90c,0xbed,0xbfb,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,
-0xc03,0x5d8,0x5d8,0x5d8,0xc0b,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0xc11,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc16,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x90c,0x90c,
-0xc1e,0x5d8,0xc21,0x5d8,0xc29,0xc2f,0xc37,0xc3f,0xc44,0x5d8,0x5d8,0xc48,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc4f,0x5d8,0xc56,0xc5c,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc64,0x5d8,0x5d8,0x5d8,0xc6c,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc6e,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc75,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0xc7c,0x5d8,0x5d8,0x5d8,0xc83,0xc8b,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc90,0x5d8,0x5d8,0xc98,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc9c,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc9f,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xca2,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0xca8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0xcb0,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0xcb5,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xcba,0x5d8,0x5d8,0x5d8,0xcbf,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0xcc7,0xcce,0xcd2,0x5d8,0x5d8,0x5d8,0xcd9,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x8ce,
-0xce7,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0xcdf,0x90c,0xcef,0x985,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0xcf4,0xcfc,0x4bd,0xd0c,0xd04,0x5d8,0x5d8,0xd14,0xd1c,0xd2c,0x4bd,0xd31,0xd39,0xd3f,0xd47,0xd24,
-0xd4f,0xd57,0x5d8,0xd5f,0xd6f,0xd72,0xd67,0xd7a,0x62d,0xd82,0xd89,0x8ce,0x67b,0xd99,0xd91,0xda1,
-0x5d8,0xda9,0xdb1,0xdb9,0x5d8,0xdc1,0xdc9,0xdd1,0xdd9,0xde1,0xde5,0xded,0x50d,0x50d,0x5d8,0xdf5,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xdfd,0xe09,0xe01,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,
-0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0x5d8,0x5d8,0x5d8,0xe21,0x5d8,0xcda,0xe28,0xe2d,
-0x5d8,0x5d8,0x5d8,0xe35,0x5d8,0x5d8,0x8d9,0x48d,0xe4b,0xe3b,0xe43,0x5d8,0x5d8,0xe53,0xe5b,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xe60,0xe68,0x5d8,0xe6c,0x5d8,0xe72,0xe76,
-0xe7e,0xe86,0xe8d,0xe95,0x5d8,0x5d8,0x5d8,0xe9b,0xeb3,0x47d,0xebb,0xec3,0xec8,0x8ee,0xea3,0xeab,
-0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,
-0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,
-0x1234,0x1234,0x1274,0x12b4,0x12f4,0x132c,0x136c,0x13ac,0x13e4,0x1424,0x1450,0x1490,0x14d0,0x14e0,0x1520,0x1554,
-0x1594,0x15c4,0x1604,0x1644,0x1654,0x1688,0x16c0,0x1700,0x1740,0x1780,0x17b4,0x17e0,0x1820,0x1858,0x1874,0x18b4,
-0xa80,0xac0,0xb00,0xb40,0xb80,0xa40,0xbc0,0xa40,0xbe2,0xa40,0xa40,0xa40,0xa40,0xc22,0x1db,0x1db,
-0xc62,0xca2,0xa40,0xa40,0xa40,0xa40,0xce2,0xd02,0xa40,0xa40,0xd42,0xd82,0xdc2,0xe02,0xe42,0xe82,
-0xec2,0xef9,0x1db,0x1db,0xf1d,0xf51,0x1db,0xf79,0x1db,0x1db,0x1db,0x1db,0xfa6,0x1db,0x1db,0x1db,
-0x1db,0x1db,0x1db,0x1db,0xfba,0x1db,0xff2,0x1032,0x1db,0x103d,0x1db,0x1db,0x1db,0x1073,0xa40,0x10b3,
-0x1db,0x1db,0x10f3,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0x1133,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,
-0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x1173,
-0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,
-0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x1173,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0xed0,0xed7,0xedf,0x48d,0x5d8,0x5d8,0x5d8,0xee7,0xef7,0xeef,0xf0e,0xeff,0xf06,0xf16,0xf1a,0xf1e,
-0x48d,0x48d,0x48d,0x48d,0x8ce,0x5d8,0xf26,0xf2e,0x5d8,0xf36,0xf3e,0xf42,0xf4a,0x5d8,0xf52,0x48d,
-0x565,0x56f,0xf5a,0x5d8,0xf5e,0xf66,0xf76,0xf6e,0x5d8,0xf7e,0x5d8,0xf85,0x48d,0x48d,0x48d,0x48d,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xb78,0x8da,0xe72,0x48d,0x48d,0x48d,0x48d,
-0xf95,0xf8d,0xf98,0xfa0,0x8ee,0xfa8,0x48d,0xfb0,0xfb8,0xfc0,0x48d,0x48d,0x5d8,0xfd0,0xfd8,0xfc8,
-0xfe8,0xfef,0xfe0,0xff7,0xfff,0x48d,0x100f,0x1007,0x5d8,0x1012,0x101a,0x1022,0x102a,0x1032,0x48d,0x48d,
-0x5d8,0x5d8,0x103a,0x48d,0x565,0x1042,0x50d,0x104a,0x5d8,0x1052,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x105a,0x5d8,0x1062,0x48d,0x48d,0x106a,0x1072,0x1079,0x48d,0x48d,0xe68,0x1081,0xb78,
-0x1091,0x60e,0x1099,0x1089,0x967,0x10a1,0x10a9,0x10af,0x10c7,0x10b7,0x10bf,0x10cb,0x967,0x10db,0x10d3,0x10e3,
-0x10f3,0x10eb,0x48d,0x48d,0x10fa,0x1102,0x630,0x110a,0x111a,0x1120,0x1128,0x1112,0x48d,0x48d,0x48d,0x48d,
-0x5d8,0x1130,0x1138,0x1140,0x5d8,0x1148,0x1150,0x48d,0x48d,0x48d,0x48d,0x48d,0x5d8,0x1158,0x1160,0x48d,
-0x5d8,0x1168,0x1170,0x1178,0x5d8,0x1188,0x1180,0x48d,0x848,0x1190,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x5d8,0x1198,0x48d,0x48d,0x48d,0x565,0x50d,0x11a0,0x11b0,0x11b6,0x11a8,0x48d,0x48d,0x11c6,0x11ca,0x11be,
-0x11e2,0x11d2,0x11da,0x5d8,0x11f2,0x11ea,0x5d8,0x8cf,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x1208,0x120d,0x11fa,0x1202,0x121d,0x1215,0x48d,0x48d,0x122c,0x1230,0x1224,0x1240,0x1238,0x1180,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x1244,0x48d,0x48d,0x48d,0x48d,0x48d,0x124b,0x125b,0x1253,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x8d9,0x48d,0x48d,0x48d,
-0x126b,0x1273,0x127b,0x1263,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x1283,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x128b,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x1293,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x8cf,0x8ee,0x129b,0x48d,0x48d,0xe68,0x12a3,0x5d8,0x12b3,0x12bb,0x12c3,0x12ab,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x565,0x50d,0x12cb,0x48d,0x48d,0x48d,0x5d8,0x5d8,0x12d3,0x12d8,0x12de,0x48d,
-0x48d,0x12e6,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x12ee,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x8da,0x48d,0x103a,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x8ee,0x48d,0x12f4,0x12fb,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xe01,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x1301,0x1306,0x130e,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xba7,0x90c,0x1316,0x90c,0x131d,0x1325,0x132b,
-0x90c,0x1331,0x90c,0x90c,0x1339,0x48d,0x48d,0x48d,0x48d,0x1341,0x90c,0x90c,0xabb,0x1349,0x48d,0x48d,
-0x48d,0x48d,0x1359,0x1360,0x1365,0x136b,0x1373,0x137b,0x1383,0x135d,0x138b,0x1393,0x139b,0x13a0,0x1372,0x1359,
-0x1360,0x135c,0x136b,0x13a8,0x135a,0x13ab,0x135d,0x13b3,0x13bb,0x13c3,0x13ca,0x13b6,0x13be,0x13c6,0x13cd,0x13b9,
-0x13d5,0x1351,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,
-0x90c,0x90c,0x534,0x13e5,0x534,0x13ec,0x13f3,0x13dd,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x13fa,0x1402,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x5d8,0x1412,0x140a,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x5d8,0x141a,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x1422,0x48d,0x565,0x1432,0x142a,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x143a,0x144a,0x1442,0x48d,0x48d,0x145a,0x1452,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x146a,0x1472,0x147a,0x1482,0x148a,0x1492,0x48d,0x1462,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x90c,0x149a,0x90c,0x90c,0xb9f,0x149f,0x14a3,0xba7,0x14ab,0x90c,0x90c,0x90c,0x90c,0xba9,
-0x48d,0x14b3,0x14bb,0x14bf,0x14c7,0x14cf,0x48d,0x48d,0x48d,0x48d,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,
-0x90c,0x14d7,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,
-0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x14df,0x14e7,0x90c,0x90c,0x90c,0xb9f,0x90c,0x90c,
-0x14ef,0x14f7,0x149a,0x90c,0x14ff,0x90c,0x1507,0x150c,0x48d,0x48d,0x90c,0x90c,0x90c,0x1514,0x90c,0x90c,
-0x151b,0x90c,0x90c,0x90c,0xb9f,0x1520,0x1528,0x152e,0x1533,0x48d,0x90c,0x90c,0x90c,0x90c,0x153b,0x90c,
-0xabe,0x117c,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x1543,0x5d8,0x5d8,0x154a,0x5d8,0x5d8,0x5d8,0x1552,0x5d8,0x155a,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0xc80,0x5d8,0x5d8,0x1562,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x156a,0x1572,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xcbf,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x1579,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x1580,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x1587,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0xf5e,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x158b,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0xf5e,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x1066,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x1590,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x1598,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0xf5e,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
-0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x655,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x15a8,0x15a0,0x15a0,0x15a0,0x48d,0x48d,0x48d,0x48d,0x534,0x534,0x534,0x534,0x534,
-0x534,0x534,0x15b0,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
-0x48d,0x48d,0x48d,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
-0xe19,0xe19,0x15b8,0x46c,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
-0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
-0xf,0xf,0xf,0xf,0xc,0x17,0x17,0x17,0x19,0x17,0x17,0x17,0x14,0x15,0x17,0x18,
-0x17,0x13,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,
-0x18,0x18,0x18,0x17,0x17,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x14,
-0x17,0x15,0x1a,0x16,0x1a,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0x14,
-0x18,0x15,0x18,0xf,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
-0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
-0xf,0xf,0xf,0xf,0xc,0x17,0x19,0x19,0x19,0x19,0x1b,0x17,0x1a,0x1b,5,0x1c,
-0x18,0x10,0x1b,0x1a,0x1b,0x18,0x34b,0x38b,0x1a,2,0x17,0x17,0x1a,0x30b,5,0x1d,
-0x34cb,0x344b,0x3ccb,0x17,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,0x18,1,1,1,1,
-1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,0x18,2,2,2,2,
-2,2,2,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,2,1,2,1,
-2,1,2,1,2,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,1,2,1,2,1,2,2,2,1,1,2,
-1,2,1,1,2,1,1,1,2,2,1,1,1,1,2,1,
-1,2,1,1,1,2,2,2,1,1,2,1,1,2,1,2,
-1,2,1,1,2,1,2,2,1,2,1,1,2,1,1,1,
-2,1,2,1,1,2,2,5,1,2,2,2,5,5,5,5,
-1,3,2,1,3,2,1,3,2,1,2,1,2,1,2,1,
-2,1,2,1,2,1,2,1,2,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,2,1,3,2,
-1,2,1,1,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,2,2,2,2,2,2,1,1,
-2,1,1,2,2,1,2,1,1,1,1,2,1,2,1,2,
-1,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,5,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-4,4,0x1a,0x1a,0x1a,0x1a,4,4,4,4,4,4,4,4,4,4,
-4,4,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,
-4,4,4,4,4,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,4,0x1a,4,0x1a,
-0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-1,2,1,2,4,0x1a,1,2,0,0,4,2,2,2,0x17,1,
-0,0,0,0,0x1a,0x1a,1,0x17,1,1,1,0,1,0,1,1,
-2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0,1,1,1,1,1,1,1,1,1,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,1,2,2,1,1,1,2,2,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,2,2,2,2,1,2,0x18,1,2,1,1,2,
-2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,0x1b,6,6,6,6,6,7,7,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,1,2,1,2,1,2,1,2,1,2,1,
-2,1,2,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,0,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
-0,4,0x17,0x17,0x17,0x17,0x17,0x17,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,0x17,0x13,0,0,0x1b,0x1b,0x19,
-0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,0x13,6,
-0x17,6,6,0x17,6,6,0x17,6,0,0,0,0,0,0,0,0,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,5,
-5,5,5,0x17,0x17,0,0,0,0,0,0,0,0,0,0,0,
-0x10,0x10,0x10,0x10,0x10,0x10,0x18,0x18,0x18,0x17,0x17,0x19,0x17,0x17,0x1b,0x1b,
-6,6,6,6,6,6,6,6,6,6,6,0x17,0x10,0,0x17,0x17,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,5,5,
-6,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0x17,5,6,6,6,6,6,6,6,0x10,0x1b,6,
-6,6,6,6,6,4,4,6,6,0x1b,6,6,6,6,5,5,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,0x1b,0x1b,5,
-0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0x10,
-5,6,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,0,0,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,
-6,6,6,6,6,5,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,
-6,6,6,6,6,6,6,6,4,4,0x1b,0x17,0x17,0x17,4,0,
-0,6,0x19,0x19,6,6,6,6,4,6,6,6,4,6,6,6,
-6,6,0,0,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
-0x17,0x17,0x17,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,6,6,6,6,4,6,
-6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,
-0,0,0x17,0,5,5,5,5,5,5,5,5,5,5,5,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,6,6,0x10,6,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,
-5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,6,
-6,6,6,6,6,6,6,6,6,6,6,6,5,5,6,6,
-0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,4,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,8,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,8,
-6,5,8,8,8,6,6,6,6,6,6,6,6,8,8,8,
-8,6,8,8,5,6,6,6,6,6,6,6,5,5,5,5,
-5,5,5,5,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,
-0x1c9,0x209,0x249,0x289,5,5,0x19,0x19,0x37cb,0x35cb,0x3fcb,0x34cb,0x3ccb,0x94b,0x1b,0x19,
-5,0x17,6,0,5,6,8,8,0,5,5,5,5,5,5,5,
-5,0,0,5,5,0,0,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,0,
-0,0,5,5,5,5,0,0,6,5,8,8,8,6,6,6,
-6,0,0,8,8,0,0,8,8,6,5,0,0,0,0,0,
-0,0,0,8,0,0,0,0,5,5,0,5,0,0,0,0,
-0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,6,6,5,5,
-5,6,0x17,0,0,0,0,0,0,0,0,0,0,6,6,8,
-0,5,5,5,5,5,5,0,0,0,0,5,5,0,0,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5,
-5,5,5,5,5,0,5,5,0,5,5,0,5,5,0,0,
-6,0,8,8,8,6,6,0,0,0,0,6,6,0,0,6,
-6,6,0,0,0,6,0,0,0,0,0,0,0,5,5,5,
-5,0,5,0,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,
-0x1c9,0x209,0x249,0x289,0x17,0x19,0,0,0,0,0,0,0,5,6,6,
-6,6,6,6,0,6,6,8,0,5,5,5,5,5,5,5,
-5,5,0,5,5,5,0,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,5,
-0,5,5,5,5,5,0,0,6,5,8,8,8,6,6,6,
-6,6,0,6,6,8,0,8,8,6,0,0,5,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,5,5,6,6,
-0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x1b,5,0x34cb,0x344b,
-0x3ccb,0x37cb,0x35cb,0x3fcb,0,0,0,0,0,0,0,0,0,6,8,8,
-0,5,5,5,5,5,5,5,5,0,0,5,5,0,0,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5,
-5,5,5,5,5,0,5,5,0,5,5,5,5,5,0,0,
-6,5,8,6,8,6,6,6,6,0,0,8,8,0,0,8,
-8,6,0,0,0,0,0,0,0,6,6,8,0,0,0,0,
-5,5,0,5,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,
-0x1c9,0x209,0x249,0x289,0x7cb,0x1e4b,0x784b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x19,0x1b,0,
-0,0,0,0,0,0,6,5,0,5,5,5,5,5,5,0,
-0,0,5,5,5,0,5,5,5,5,0,0,0,5,5,0,
-5,0,5,5,0,0,0,5,5,0,0,0,5,5,5,0,
-0,0,5,5,5,5,5,5,5,5,5,5,5,5,0,0,
-0,0,8,8,6,8,8,0,0,0,8,8,8,0,8,8,
-8,6,0,0,5,0,0,0,0,0,0,8,0,0,0,0,
-0,0,0,0,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,
-0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,0,0x17,0x54b,0x58b,0x5cb,0x60b,
-0x58b,0x5cb,0x60b,0x1b,6,8,8,8,6,5,5,5,5,5,5,5,
-5,0,5,5,5,0,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,0,0,0,5,6,6,6,8,8,8,
-8,0,6,6,6,0,6,6,6,6,0,0,0,0,0,0,
-0,6,6,0,5,5,5,0,0,0,0,0,5,5,6,6,
-0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,5,5,0,
-0,0,0,0,0,0,0,0,0,0,0,0,5,6,8,8,
-0x17,5,5,5,5,5,5,5,5,0,5,5,5,0,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5,
-5,5,5,5,5,5,5,5,0,5,5,5,5,5,0,0,
-6,5,8,6,8,8,8,8,8,0,6,8,8,0,8,8,
-6,6,0,0,0,0,0,0,0,8,8,0,0,0,0,0,
-0,0,5,0,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,
-0x1c9,0x209,0x249,0x289,0x7cb,0x1e4b,0x784b,0x34cb,0x344b,0x3ccb,0x37cb,0x35cb,0x3fcb,0x1b,5,5,
-5,5,5,5,6,6,8,8,5,5,5,5,5,5,5,5,
-5,0,5,5,5,0,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,6,6,5,8,8,8,6,6,6,6,0,8,8,
-8,0,8,8,8,6,5,0x1b,0,0,0,0,5,5,5,8,
-0xcc0b,0xca0b,0xcb4b,0xc90b,0x364b,0xc94b,0x350b,5,0,0,0,0,0,0,0x49,0x89,
-0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,8,8,0x17,0,0,0,
-0,0,0,0,0,0,0,0,0,6,8,8,0,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,
-0,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,5,
-0,5,0,0,5,5,5,5,5,5,5,0,0,0,6,0,
-0,0,0,8,8,8,6,6,6,0,6,0,8,8,8,8,
-8,8,8,8,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,6,5,5,6,6,6,6,6,6,6,0,
-0,0,0,0x19,5,5,5,5,5,5,4,6,6,6,6,6,
-6,6,6,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,
-0,0,0,0,0,5,5,0,5,0,5,5,5,5,5,0,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0,5,0,5,5,5,5,5,5,5,5,5,
-5,6,5,5,6,6,6,6,6,6,6,6,6,5,0,0,
-5,5,5,5,5,0,4,0,6,6,6,6,6,6,0,0,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,5,5,5,5,
-5,0x1b,0x1b,0x1b,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
-0x17,0x17,0x17,0x1b,0x17,0x1b,0x1b,0x1b,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x344b,0x3c4b,0x444b,0x4c4b,0x544b,0x5c4b,
-0x644b,0x6c4b,0x744b,0x2c4b,0x1b,6,0x1b,6,0x1b,6,0x14,0x15,0x14,0x15,8,8,
-5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,8,6,6,6,6,6,0x17,6,6,5,5,5,5,
-5,6,6,6,6,6,6,6,6,6,6,6,0,6,6,6,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,6,6,6,6,6,0,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,
-0x17,0x17,0x17,0x17,0x17,0x1b,0x1b,0x1b,0x1b,0x17,0x17,0,0,0,0,0,
-5,5,5,5,5,5,5,5,5,5,5,8,8,6,6,6,
-6,8,6,6,6,6,6,6,8,6,6,8,8,6,6,5,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,0x17,0x17,
-5,5,5,5,5,5,8,8,6,6,5,5,5,5,6,6,
-6,5,8,8,8,5,5,8,8,8,8,8,8,8,5,5,
-5,6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,
-5,5,6,8,8,6,6,8,8,8,8,8,8,6,5,8,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,8,8,8,6,0x1b,0x1b,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,0x17,4,2,2,2,
-1,1,1,1,1,1,0,1,0,0,0,0,0,1,0,0,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-5,5,5,5,5,5,5,5,5,0,5,5,5,5,0,0,
-5,5,5,5,5,5,5,0,5,0,5,5,5,5,0,0,
-5,5,5,5,5,5,5,5,5,0,5,5,5,5,0,0,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0,5,5,5,5,0,0,5,5,5,5,5,5,5,0,
-5,0,5,5,5,5,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0,5,5,5,5,0,0,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,0,0,6,6,6,
-0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,
-0x4cb,0x50b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x788b,0,0,0,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0,0,2,2,2,2,2,2,0,0,
-0x13,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0x1b,0x17,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0xc,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x14,
-0x15,0,0,0,5,5,5,5,5,5,5,5,5,5,5,0x17,
-0x17,0x17,0x98a,0x9ca,0xa0a,5,5,5,5,5,5,5,5,0,0,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0,5,5,5,5,6,6,6,0,0,0,0,0,0,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,6,6,6,0x17,0x17,0,0,0,0,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,6,6,0,0,0,0,0,0,0,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0,5,5,5,0,6,6,0,0,0,0,0,0,0,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,6,6,8,6,6,6,6,6,
-6,6,8,8,8,8,8,8,8,8,6,8,8,6,6,6,
-6,6,6,6,6,6,6,6,0x17,0x17,0x17,4,0x17,0x17,0x17,0x19,
-5,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,
-0,0,0,0,0x54b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,6,5,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0x17,0x17,0x17,0x17,0x17,0x17,0x13,0x17,0x17,0x17,0x17,6,
-6,6,0x10,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,
-0,0,0,0,5,5,5,4,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,0,0,0,0,0,0,0,5,5,5,5,
-5,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,
-0,0,0,0,0,0,0,0,6,6,6,8,8,8,8,6,
-6,8,8,8,0,0,0,0,8,8,6,8,8,8,8,8,
-8,6,6,6,0,0,0,0,0x1b,0,0,0,0x17,0x17,0x49,0x89,
-0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,0,0,5,5,5,5,5,0,0,0,
-0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x30b,0,0,0,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,6,6,8,8,6,0,0,0x17,0x17,
-0x17,0x17,0x17,0x17,0x17,0x17,0x17,4,0x17,0x17,0x17,0x17,0x17,0x17,0,0,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,6,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,8,6,8,6,6,6,6,6,6,6,0,
-6,8,6,8,8,6,6,6,6,6,6,6,6,8,8,8,
-8,8,8,6,6,6,6,6,6,6,6,6,6,0,0,6,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
-0x17,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,6,6,6,6,
-6,6,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,
-6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-6,6,6,6,8,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-6,8,6,6,6,6,6,8,6,8,8,8,8,8,6,8,
-8,5,5,5,5,5,5,5,0,0,0,0,0x49,0x89,0xc9,0x109,
-0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,0x17,0x17,5,8,6,6,
-6,6,8,8,6,6,8,6,6,6,5,5,0x49,0x89,0xc9,0x109,
-0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,5,5,5,6,6,8,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,8,
-6,6,8,8,8,6,8,6,6,6,8,8,0,0,0,0,
-0,0,0,0,0x17,0x17,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
-0x249,0x289,0,0,0,5,5,5,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
-0x249,0x289,5,5,5,5,5,5,8,8,8,8,8,8,8,8,
-6,6,6,6,6,6,6,6,8,8,6,6,0,0,0,0x17,
-0x17,0x17,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,4,4,4,4,
-4,4,0x17,0x17,2,2,2,2,2,2,2,2,2,0,0,0,
-0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
-0,1,1,1,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,
-0,0,0,0,6,6,6,0x17,6,6,6,6,6,6,6,6,
-6,6,6,6,6,8,6,6,6,6,6,6,6,5,5,5,
-5,6,5,5,5,5,5,5,6,5,5,8,6,6,5,0,
-0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-4,4,4,2,2,2,2,2,2,2,2,2,2,2,2,2,
-4,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,4,
-4,4,4,4,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,6,
-6,6,6,6,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,2,2,2,2,2,2,
-2,2,1,2,2,2,2,2,2,2,2,2,1,1,1,1,
-1,0x1a,0x1a,0x1a,0,0,2,2,2,0,2,2,1,1,1,1,
-3,0x1a,0x1a,0,2,2,2,2,2,2,2,2,1,1,1,1,
-1,1,1,1,2,2,2,2,2,2,0,0,1,1,1,1,
-1,1,0,0,2,2,2,2,2,2,2,2,1,1,1,1,
-1,1,1,1,2,2,2,2,2,2,2,2,1,1,1,1,
-1,1,1,1,2,2,2,2,2,2,0,0,1,1,1,1,
-1,1,0,0,2,2,2,2,2,2,2,2,0,1,0,1,
-0,1,0,1,2,2,2,2,2,2,2,2,1,1,1,1,
-1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,0,0,2,2,2,2,2,2,2,2,3,3,3,3,
-3,3,3,3,2,2,2,2,2,2,2,2,3,3,3,3,
-3,3,3,3,2,2,2,2,2,0,2,2,1,1,1,1,
-3,0x1a,2,0x1a,0x1a,0x1a,2,2,2,0,2,2,1,1,1,1,
-3,0x1a,0x1a,0x1a,2,2,2,2,0,0,2,2,1,1,1,1,
-0,0x1a,0x1a,0x1a,0x16,0x17,0x17,0x17,0x18,0x14,0x15,0x17,0x17,0x17,0x17,0x17,
-0x17,0x17,0x17,0x17,0x17,0x17,0x18,0x17,0x16,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
-0x17,0x17,0x17,0xc,0x10,0x10,0x10,0x10,0x10,0,0x10,0x10,0x10,0x10,0x10,0x10,
-0x10,0x10,0x10,0x10,0x2cb,4,0,0,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x18,0x18,
-0x18,0x14,0x15,4,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0x10,
-0x10,0x10,0x10,0x10,0x13,0x13,0x13,0x13,0x13,0x13,0x17,0x17,0x1c,0x1d,0x14,0x1c,
-0x1c,0x1d,0x14,0x1c,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0xd,0xe,0x10,0x10,
-0x10,0x10,0x10,0xc,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x1c,0x1d,0x17,
-0x17,0x17,0x17,0x16,0x2cb,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x18,0x18,
-0x18,0x14,0x15,0,4,4,4,4,4,4,4,4,4,4,4,4,
-4,0,0,0,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,
-0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,
-0x19,0x19,0x19,0x19,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,6,
-6,7,7,7,7,6,7,7,7,6,6,6,6,6,6,6,
-6,6,6,6,6,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x1b,0x1b,0x1b,0x1b,1,0x1b,1,0x1b,1,0x1b,1,1,
-1,1,0x1b,2,1,1,1,1,2,5,5,5,5,2,0x1b,0x1b,
-2,2,1,1,0x18,0x18,0x18,0x18,0x18,1,2,2,2,2,0x1b,0x18,
-0x1b,0x1b,2,0x1b,0x358b,0x360b,0x364b,0x348b,0x388b,0x350b,0x390b,0x3d0b,0x410b,0x354b,0x454b,0x35cb,
-0x3dcb,0x45cb,0x4dcb,0x58b,0x1b,0x1b,1,0x1b,0x1b,0x1b,0x1b,1,0x1b,0x1b,2,1,
-1,1,2,2,1,1,1,2,0x1b,1,0x1b,0x1b,0x18,1,1,1,
-1,1,0x1b,0x1b,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x7ca,0x80a,0x84a,
-0x11ca,0x1e4a,0x980a,0x784a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x7ca,0x80a,0x84a,
-0x11ca,0x1e4a,0x980a,0x784a,0x784a,0x984a,0x788a,1,2,0x6ca,0x11ca,0x988a,0x78ca,0x54b,0x1b,0x1b,
-0,0,0,0,0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
-0x18,0x1b,0x1b,0x18,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x18,0x18,0x1b,0x1b,0x18,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
-0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
-0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x14,0x15,0x14,0x15,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x14,0x15,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,
-0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,
-0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x2cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,
-0xa4b,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x2cb,0x30b,0x34b,0x38b,0x3cb,
-0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,0xa4b,
-0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,
-0x98b,0x9cb,0xa0b,0xa4b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,
-0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,
-0x14,0x15,0x14,0x15,0x14,0x15,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,
-0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,
-0x48b,0x4cb,0x50b,0x7cb,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x18,0x18,0x18,0x18,0x18,0x14,0x15,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
-0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
-0x18,0x18,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x18,0x18,0x18,0x18,
-0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x14,
-0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,
-0x15,0x14,0x15,0x14,0x15,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
-0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
-0x14,0x15,0x14,0x15,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
-0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
-0x14,0x15,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
-0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,0,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,0,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,1,2,1,1,1,2,2,1,
-2,1,2,1,2,1,1,1,1,2,1,2,2,1,2,2,
-2,2,2,2,4,4,1,1,1,2,1,2,2,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,1,2,1,2,6,6,6,1,2,0,0,0,0,
-0,0x17,0x17,0x17,0x17,0x344b,0x17,0x17,2,2,2,2,2,2,0,2,
-0,0,0,0,0,2,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,4,
-0x17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,
-5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,0,
-5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,0,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,0,0,0,0,0,0,0,0,0,
-0x17,0x17,0x1c,0x1d,0x1c,0x1d,0x17,0x17,0x17,0x1c,0x1d,0x17,0x1c,0x1d,0x17,0x17,
-0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x13,0x17,0x17,0x13,0x17,0x1c,0x1d,0x17,0x17,
-0x1c,0x1d,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x17,0x17,0x17,0x17,0x17,4,
-0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x13,0x13,0x17,0x17,0x17,0x17,
-0x13,0x17,0x14,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
-0x1b,0x1b,0x17,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0,0,0,0,0x1b,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,6,6,
-6,6,8,8,0x13,4,4,4,4,4,0x1b,0x1b,0x7ca,0xa4a,0xcca,4,
-5,0x17,0x1b,0x1b,0xc,0x17,0x17,0x17,0x1b,4,5,0x54a,0x14,0x15,0x14,0x15,
-0x14,0x15,0x14,0x15,0x14,0x15,0x1b,0x1b,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,
-0x13,0x14,0x15,0x15,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,0,0,6,6,0x1a,
-0x1a,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x17,
-4,4,4,5,0,0,0,0,0,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,0,0x1b,0x1b,0x58b,0x5cb,0x60b,0x64b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1b,0xa8b,0xacb,0xb0b,
-0xb4b,0xb8b,0xbcb,0xc0b,0xc4b,0xc8b,0xccb,0xd0b,0xd4b,0xd8b,0xdcb,0xe0b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0xe4b,0xe8b,0xecb,
-0xf0b,0xf4b,0xf8b,0xfcb,0x100b,0x104b,0x108b,0x10cb,0x110b,0x114b,0x118b,0x11cb,5,5,5,5,
-5,0x685,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x5c5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x685,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,0x705,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,0x585,5,5,0x705,5,5,5,0x7885,
-5,0x605,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0x785,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0x5c5,5,5,5,5,5,5,5,0x685,5,0x645,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,0x7985,0x7c5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,0x7845,5,5,5,5,
-5,5,5,5,0x605,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,0x685,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0x1e45,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0x7985,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x7a85,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0x5c5,5,0x745,5,0x6c5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0x7c5,5,0x7845,0xa45,0xcc5,5,5,5,5,5,5,0xf45,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0x605,0x605,0x605,0x605,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,0x645,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,0x585,5,5,5,5,5,5,5,0x585,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,0x585,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,0x785,0xa45,5,5,5,5,
-5,5,5,5,5,5,5,5,0x585,0x5c5,0x605,5,0x5c5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x7c5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,0x745,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,0x705,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x785,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x1e45,5,
-5,5,5,5,5,5,0x645,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0x7885,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,0x5c5,5,5,5,5,0x5c5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,0x5c5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,0x7845,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x6c5,5,
-5,5,5,5,0x1e45,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0x6c5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,0x545,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,4,5,5,5,5,5,5,5,5,5,5,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,4,0x17,0x17,0x17,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,2,1,2,1,2,4,4,6,6,
-1,2,1,2,1,2,1,2,1,2,1,2,1,2,5,6,
-7,7,7,0x17,6,6,6,6,6,6,6,6,6,6,0x17,4,
-5,5,5,5,5,5,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x54a,
-6,6,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,
-0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,
-0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,4,4,4,4,4,4,4,4,4,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,1,2,5,4,4,2,5,5,5,5,5,
-0x1a,0x1a,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-2,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
-1,2,1,2,4,2,2,2,2,2,2,2,2,1,2,1,
-2,1,1,2,1,2,1,2,1,2,1,2,4,0x1a,0x1a,1,
-2,1,2,5,1,2,1,2,2,2,1,2,1,2,1,2,
-1,2,1,2,1,2,1,1,1,1,1,2,1,1,1,1,
-1,2,1,2,1,2,1,2,1,2,1,2,0,0,1,2,
-1,1,1,1,2,1,2,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,5,5,6,5,
-5,5,6,5,5,5,5,6,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,8,
-8,6,6,8,0x1b,0x1b,0x1b,0x1b,6,0,0,0,0x34cb,0x344b,0x3ccb,0x37cb,
-0x35cb,0x3fcb,0x1b,0x1b,0x19,0x1b,0,0,0,0,0,0,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,8,8,8,8,
-6,6,0,0,0,0,0,0,0,0,0x17,0x17,0x49,0x89,0xc9,0x109,
-0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,8,8,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,8,8,8,8,
-8,8,8,8,8,8,8,8,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,6,6,5,5,5,5,5,5,
-0x17,0x17,0x17,5,0x17,5,5,6,5,5,5,5,5,5,6,6,
-6,6,6,6,6,6,0x17,0x17,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,
-6,6,6,6,6,6,8,8,0,0,0,0,0,0,0,0,
-0,0,0,0x17,8,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
-0x17,0x17,0,4,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,
-0,0,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,6,8,8,6,6,6,6,8,8,
-6,6,8,8,5,5,5,5,5,6,4,5,5,5,5,5,
-5,5,5,5,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,
-5,5,5,0,5,5,5,5,5,5,5,5,5,6,6,6,
-6,6,6,8,8,6,6,8,8,6,6,0,0,0,0,0,
-0,0,0,0,5,5,5,6,5,5,5,5,5,5,5,5,
-6,8,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,
-0x17,0x17,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,4,5,5,5,5,5,5,0x1b,0x1b,0x1b,5,8,
-6,8,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,6,5,6,6,6,5,5,6,6,5,5,5,
-5,5,6,6,5,6,5,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,
-5,4,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,8,
-6,6,8,8,0x17,0x17,5,4,4,8,6,0,0,0,0,0,
-0,0,0,0,0,5,5,5,5,5,5,0,0,5,5,5,
-5,5,5,0,0,5,5,5,5,5,5,0,0,0,0,0,
-0,0,0,0,5,5,5,5,5,5,5,0,5,5,5,5,
-5,5,5,0,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0x1a,
-4,4,4,4,2,2,2,2,2,2,2,2,2,4,0x1a,0x1a,
-0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,5,5,5,8,8,6,8,8,6,8,8,0x17,
-8,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,
-0,0,0,0,5,5,5,5,0,0,0,0,0,0,0,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0,0,0,0,5,5,5,5,5,5,5,0,0,0,0,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
-0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
-0x12,0x12,0x12,0x12,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,
-0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,
-0x11,0x11,0x11,0x11,5,5,5,5,5,5,5,5,5,5,5,0x605,
-5,5,5,5,5,5,5,0x7c5,5,5,5,5,0x5c5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0x6c5,5,0x6c5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0x7c5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,0x18,5,5,5,5,5,5,5,5,5,5,
-5,5,5,0,5,5,5,5,5,0,5,0,5,5,0,5,
-5,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,2,2,2,2,
-2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,
-2,2,2,2,0,0,0,0,0,5,6,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x1a,0x1a,
-0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x15,0x14,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,0x19,0x1b,0,0,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x14,
-0x15,0x17,0,0,0,0,0,0,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,0x17,0x13,0x13,0x16,0x16,0x14,0x15,0x14,
-0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x17,0x17,0x14,0x15,0x17,0x17,0x17,
-0x17,0x16,0x16,0x16,0x17,0x17,0x17,0,0x17,0x17,0x17,0x17,0x13,0x14,0x15,0x14,
-0x15,0x14,0x15,0x17,0x17,0x17,0x18,0x13,0x18,0x18,0x18,0,0x17,0x19,0x17,0x17,
-0,0,0,0,5,5,5,5,5,0,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0,0,0x10,0,0,5,5,
-5,5,5,5,0,0,5,5,5,5,5,5,0,0,5,5,
-5,5,5,5,0,0,5,5,5,0,0,0,0x19,0x19,0x18,0x1a,
-0x1b,0x19,0x19,0,0x1b,0x18,0x18,0x18,0x18,0x1b,0x1b,0,0,0,0,0,
-0,0,0,0,0,0x10,0x10,0x10,0x1b,0x1b,0,0,0,0x17,0x17,0x17,
-0x19,0x17,0x17,0x17,0x14,0x15,0x17,0x18,0x17,0x13,0x17,0x17,0x49,0x89,0xc9,0x109,
-0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x18,0x18,0x18,0x17,0x1a,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,0x14,0x18,0x15,0x18,0x14,0x15,0x17,0x14,0x15,
-0x17,0x17,5,5,5,5,5,5,5,5,5,5,4,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,4,4,
-5,5,5,5,5,5,5,5,5,5,5,5,0,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,0,5,5,0,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,0,0,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,0,0,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,0,0,0,0,0,0xb00b,0xb80b,0x784b,0x804b,
-0x884b,0x904b,0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,
-0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x17,0x17,0x17,0,
-0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,
-0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0x7ca,0x7ca,0x7ca,0x7ca,
-0x7ca,0xcca,0x11ca,0x11ca,0x11ca,0x11ca,0x1e4a,0x880a,0x980a,0x980a,0x980a,0x980a,0x980a,0x784a,0x984a,0x68a,
-0x11ca,0x344b,0x344b,0x388b,0x3ccb,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x54b,0x34cb,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x34ca,0x344a,0x58a,0x68a,0x11ca,0x980a,0x984a,0x988a,
-0x68a,0x7ca,0x11ca,0x1e4a,0x980a,0x784a,0x984a,0x68a,0x7ca,0x11ca,0x1e4a,0x980a,0x784a,0x788a,0x988a,0x7ca,
-0x58a,0x58a,0x58a,0x5ca,0x5ca,0x5ca,0x5ca,0x68a,0x1b,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,6,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,6,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,
-0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,
-0xa00b,0xa80b,0xb00b,0xb80b,0,0,0,0,0x58b,0x68b,0x7cb,0x11cb,0,0,0,0,
-0,0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0x1bca,5,5,5,5,5,5,
-5,5,0xb80a,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,
-6,6,6,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,0,0x17,5,5,5,5,0,0,0,0,
-5,5,5,5,5,5,5,5,0x17,0x58a,0x5ca,0x7ca,0xa4a,0x1e4a,0,0,
-0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
-0x249,0x289,0,0,0,0,0,0,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,0,0,0,0,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
-2,2,2,2,2,2,2,2,5,5,5,5,5,5,5,5,
-0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,0,
-0,0,0,0x17,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,0,5,5,0,0,0,
-5,0,0,5,5,5,5,5,5,5,0,0,5,0,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,0,0x17,0x58b,0x5cb,0x60b,0x7cb,0xa4b,0x1e4b,0x784b,0x788b,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,0x1b,0x1b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x7cb,0xa4b,
-0,0,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x64b,0x68b,0x7cb,0xa4b,0x1e4b,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,0,5,5,0,0,0,0,0,0x58b,0x68b,0x7cb,0xa4b,0x1e4b,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,0x58b,0x7cb,0xa4b,0x1e4b,0x5cb,0x60b,0,0,0,0x17,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,0,0,0,0,0,0x17,
-0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x78cb,0x80cb,0x88cb,
-0x90cb,0x98cb,0xa0cb,0xa8cb,0xb0cb,0xb8cb,0x36cb,0x354b,0x34cb,0x348b,0x46cb,0x344b,0x4ecb,0x388b,0x3ccb,0x454b,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,0,0,0,0,0x5ecb,0x344b,5,5,
-0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,
-0,0,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b,
-0x30b,0x34b,0x38b,0x3cb,0x7cb,0xa4b,0x1e4b,0x784b,0x344b,0,0,0,0,0,0,0,
-0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,
-5,6,6,6,0,6,6,0,0,0,0,0,6,6,6,6,
-5,5,5,5,0,5,5,5,0,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,0,0,6,6,6,0,0,0,0,6,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0x58b,0x11cb,0x17,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0x58b,0x7cb,0xa4b,5,5,5,5,
-5,6,6,0,0,0,0,0x58b,0x68b,0x7cb,0xa4b,0x1e4b,0x17,0x17,0x17,0x17,
-0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,5,5,5,5,
-5,5,5,5,0x1b,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,
-0,0x17,0x17,0x17,0x17,0x17,0x17,0x17,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,
-0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0x784b,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,0,
-0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0x784b,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,
-0,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,0,0,
-0,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
-0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,
-0,0,0x58b,0x68b,0x7cb,0x11cb,0x1e4b,0x784b,5,5,5,5,6,6,6,6,
-0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
-0x249,0x289,0,0,0,0,0,0,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,
-0x50b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,
-0xa80b,0xb00b,0xb80b,0x344b,0x34cb,0x348b,0x388b,0,5,5,5,5,5,5,5,5,
-5,5,0,6,6,0x13,0,0,5,5,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x68b,0x7cb,0xa4b,0xccb,0x1e4b,0x344b,5,
-0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,
-6,6,6,6,6,0x58b,0x7cb,0xa4b,0x1e4b,0x17,0x17,0x17,0x17,0x17,0,0,
-0,0,0,0,5,5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x784b,0x49,0x89,0xc9,0x109,0x149,0x189,
-0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,6,8,6,8,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,6,6,6,6,6,6,6,0x17,0x17,0x17,0x17,0x17,
-0x17,0x17,0,0,0,0,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,
-0xa4b,0xccb,0xf4b,0x11cb,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,8,8,8,6,6,6,6,8,8,6,6,0x17,
-0x17,0x10,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,0,
-0,0x10,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,
-0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,5,5,5,5,
-5,5,5,6,6,6,6,6,8,6,6,6,6,6,6,6,
-6,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,
-5,8,8,5,0,0,0,0,0,0,0,0,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,
-0x17,0x17,5,0,0,0,0,0,0,0,0,0,8,5,5,5,
-5,0x17,0x17,0x17,0x17,6,6,6,6,0x17,8,6,0x49,0x89,0xc9,0x109,
-0x149,0x189,0x1c9,0x209,0x249,0x289,5,0x17,5,0x17,0x17,0x17,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,8,
-8,8,6,6,6,6,6,6,6,6,6,8,0,0x58b,0x5cb,0x60b,
-0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,
-0x784b,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
-5,5,5,5,5,5,5,5,8,8,8,6,6,6,8,8,
-6,8,6,6,0x17,0x17,0x17,0x17,0x17,0x17,6,0,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,
-5,0,5,5,5,5,0,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,5,
-5,0x17,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,8,8,8,6,6,6,6,6,
-6,6,6,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
-0x249,0x289,0,0,0,0,0,0,5,5,8,8,0,0,6,6,
-6,6,6,6,6,0,0,0,6,6,6,6,6,0,0,0,
-0,0,0,0,0,0,0,0,6,6,8,8,0,5,5,5,
-5,5,5,5,5,0,0,5,5,0,0,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,
-5,0,5,5,0,5,5,5,5,5,0,6,6,5,8,8,
-6,8,8,8,8,0,0,8,8,0,0,8,8,8,0,0,
-5,0,0,0,0,0,0,8,0,0,0,0,0,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,8,8,8,6,6,6,6,6,6,6,6,
-8,8,6,6,6,8,6,5,5,5,5,0x17,0x17,0x17,0x17,0x17,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0,0x17,6,5,
-5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-8,8,8,6,6,6,6,6,6,8,6,8,8,8,8,6,
-6,8,6,6,5,5,0x17,5,0,0,0,0,0,0,0,0,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,8,
-8,8,6,6,6,6,0,0,8,8,8,8,6,6,8,6,
-6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
-0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,5,5,5,5,6,6,0,0,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-8,8,8,6,6,6,6,6,6,6,6,8,8,6,8,6,
-6,0x17,0x17,0x17,5,0,0,0,0,0,0,0,0,0,0,0,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
-0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-5,5,5,5,5,5,5,5,5,5,5,6,8,6,8,8,
-6,6,6,6,6,6,8,6,5,0,0,0,0,0,0,0,
-8,8,6,6,6,6,8,6,6,6,6,6,0,0,0,0,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x7cb,0xa4b,0x17,0x17,0x17,0x1b,
-5,5,5,5,5,5,5,5,5,5,5,5,8,8,8,6,
-6,6,6,6,6,6,6,6,8,6,6,0x17,0,0,0,0,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,
-0x16cb,0x194b,0x1bcb,0,0,0,0,0,0,0,0,0,0,0,0,5,
-8,5,8,6,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
-5,5,5,5,5,5,5,0,0,5,0,0,5,5,5,5,
-5,5,5,5,0,5,5,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,8,8,8,8,8,8,0,8,
-8,0,0,6,6,8,6,5,6,5,0x17,5,8,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-0,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,8,8,8,6,6,6,6,
-0,0,6,6,8,8,8,8,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,
-6,8,5,6,6,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,6,
-0,0,0,0,0,0,0,0,5,6,6,6,6,6,6,8,
-8,6,6,6,5,5,5,5,5,6,6,6,6,6,6,6,
-6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,0x17,0x17,0x17,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,8,
-6,6,0x17,0x17,0x17,5,0x17,0x17,5,0x17,0x17,0x17,0x17,0x17,0,0,
-0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
-0x249,0x289,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,
-0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0,0,0,0x17,0x17,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,8,6,6,6,6,6,6,6,0,6,6,6,6,
-6,6,8,6,6,6,6,6,6,6,6,6,0,8,6,6,
-6,6,6,6,6,8,6,6,8,6,6,0,0,0,0,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0,0,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,5,6,0,0,0,0,0,0,0,0,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
-5,5,5,5,5,5,5,0,5,5,0,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,6,6,6,6,6,6,0,0,0,6,0,6,6,0,6,
-5,5,5,5,5,5,5,5,5,5,8,8,8,8,8,0,
-6,6,0,8,8,6,8,6,5,0,0,0,0,0,0,0,
-5,5,5,5,5,5,0,5,5,0,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,6,6,8,8,0x17,0x17,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x19,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0x17,0xcd0b,0xcc0b,0xcb0b,0xd00b,
-0xca0b,0xcf0b,0xcb4b,0xd04b,0xc90b,0x37cb,0x37cb,0x364b,0x35cb,0xc94b,0x3fcb,0x350b,0x34cb,0x344b,0x344b,0x3ccb,
-0xcd0b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x19,0x19,0x19,0x34ca,0x354a,0x34ca,0x34ca,
-0x344a,0x348a,0x388a,0xf4a,0x11ca,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0,0x17,0x17,0x17,0x17,
-0x17,0,0,0,0,0,0,0,0,0,0,0,0x5ca,0x60a,0x64a,0x68a,
-0x6ca,0x70a,0x74a,0x78a,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x64a,0x68a,0x6ca,0x70a,0x74a,
-0x78a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x5ca,
-0x60a,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a,0x60a,0x64a,0x68a,0xc08a,0xc18a,
-0x58a,0x5ca,0x60a,0x60a,0x64a,0x68a,0x60a,0x60a,0x64a,0x64a,0x64a,0x64a,0x6ca,0x70a,0x70a,0x70a,
-0x74a,0x74a,0x78a,0x78a,0x78a,0x78a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x58a,0x5ca,0x60a,0x64a,0x64a,
-0x68a,0x68a,0x5ca,0x60a,0x58a,0x5ca,0x348a,0x388a,0x454a,0x348a,0x388a,0x35ca,5,5,5,5,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,0,0x10,0x10,0x10,0x10,
-0x10,0x10,0x10,0x10,0x10,0,0,0,0,0,0,0,5,5,5,5,
-5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,
-0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0x17,0x17,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,0,0,6,6,6,6,
-6,0x17,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,
-6,6,6,0x17,0x17,0x17,0x17,0x17,0x1b,0x1b,0x1b,0x1b,4,4,4,4,
-0x17,0x1b,0,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,
-0x149,0x189,0x1c9,0x209,0x249,0x289,0,0x7cb,0x1e4b,0x788b,0x790b,0x798b,0x7a0b,0x7a8b,0,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0,0,0,0,0,5,5,5,0x54b,0x58b,0x5cb,0x60b,
-0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,
-0x58b,0x5cb,0x60b,0x17,0x17,0x17,0x17,0,0,0,0,0,5,5,5,5,
-5,5,5,5,5,5,5,0,0,0,0,6,5,8,8,8,
-8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-0,0,0,0,0,0,0,6,6,6,6,4,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,0x17,4,6,0,0,0,
-0,0,0,0,0,0,0,0,8,8,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,
-5,5,5,5,0,0,0,0,0,0,0,0,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,0,0,0x1b,6,6,0x17,0x10,0x10,0x10,0x10,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,
-0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,8,8,6,6,6,0x1b,0x1b,
-0x1b,8,8,8,8,8,8,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,6,
-6,6,6,6,6,6,6,0x1b,0x1b,6,6,6,6,6,6,6,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,6,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x1b,0x1b,6,6,6,0x1b,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x54b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x80b,
-0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,0,0,0,0,0,0,0,0,
-0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,
-0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x58b,0x5cb,0x60b,0x64b,0x68b,0x58b,0x68b,0,0,0,
-0,0,0,0,0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,
-0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189,
-0x1c9,0x209,0x249,0x289,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,2,2,2,2,2,2,2,0,2,2,2,2,2,2,
-2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,1,0,1,1,0,0,1,0,
-0,1,1,0,0,1,1,1,1,0,1,1,1,1,1,1,
-1,1,2,2,2,2,0,2,0,2,2,2,2,2,2,2,
-0,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
-1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,
-1,0,1,1,1,1,1,1,1,0,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,1,1,0,1,1,1,1,0,1,1,1,1,
-1,0,1,0,0,0,1,1,1,1,1,1,1,0,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
-2,2,0,0,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,0x18,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,0x18,2,2,2,2,2,2,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,0x18,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,0x18,2,2,
-2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,2,2,2,0x18,2,2,2,2,2,2,1,2,
-0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x49,0x89,0xc9,0x109,
-0x149,0x189,0x1c9,0x209,0,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,6,6,6,0x1b,0x1b,0x1b,0x1b,6,
-6,6,6,6,6,6,6,6,6,6,6,6,6,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-6,0x1b,0x1b,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,6,6,6,6,6,6,6,6,0,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-6,0,0,6,6,6,6,6,6,6,0,6,6,0,6,6,
-6,6,6,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
-0x249,0x289,0,0,0,0,5,0x1b,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,5,0,0,0,6,6,6,6,6,6,6,4,
-4,4,4,4,4,4,0,0,5,5,5,5,5,5,5,5,
-5,5,5,5,6,6,6,6,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
-0x249,0x289,0,0,0,0,0,0x19,5,5,5,5,5,0,0,0x58b,
-0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,6,6,6,6,6,6,6,0,
-0,0,0,0,0,0,0,0,2,2,2,2,6,6,6,6,
-6,6,6,4,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
-0x249,0x289,0,0,0,0,0x17,0x17,1,1,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,
-0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x78cb,0x794b,0x814b,0x58b,0x5cb,0x60b,0x64b,0x68b,
-0x6cb,0x70b,0x74b,0x78b,0x1b,0x34cb,0x344b,0x3ccb,0x19,0x58b,0x5cb,0x788b,0x78cb,0,0,0,
-0,0,0,0,0,0,0,0,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,
-0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,
-0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x78cb,0x80cb,0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,
-0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x1b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,
-0x900b,0xa00b,0x804b,0x788b,0x344b,0x354b,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,
-0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,
-0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0x18,0x18,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,5,5,5,5,0,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,0,5,5,0,5,0,0,5,
-0,5,5,5,5,5,5,5,5,5,5,0,5,5,5,5,
-0,5,0,5,0,0,0,0,0,0,5,0,0,0,0,5,
-0,5,0,5,0,5,5,5,0,5,5,0,5,0,0,5,
-0,5,0,5,0,5,0,5,0,5,5,0,5,0,0,5,
-5,5,5,0,5,5,5,5,5,5,5,0,5,5,5,5,
-0,5,5,5,5,0,5,0,5,5,5,5,5,5,5,5,
-5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0,0,0,0,0,5,5,5,0,5,5,5,
-5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x2cb,0x2cb,0x30b,0x34b,
-0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x54b,0x54b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,
-0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0x1b,0x1b,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x1a,0x1a,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,
-0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,
-0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x1b,0x1b,0x1b,0,0,0,0,0,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,
-0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,5,0x705,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,0x645,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,0x645,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0x685,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,0xcc5,5,5,5,5,
-5,5,5,5,0xf45,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0xf45,5,5,5,5,5,5,5,5,5,5,5,
-5,5,0x6c5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,0x605,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0x605,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,0x605,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,0x605,5,5,5,5,
-5,5,5,5,5,5,5,5,5,0x645,5,5,5,5,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0x785,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
-0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
-0,0x10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,
-0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0,0,
-0,0,0,0
-};
-
-static const UTrie2 propsTrie={
- propsTrie_index,
- propsTrie_index+4532,
- NULL,
- 4532,
- 17744,
- 0xa40,
- 0x1234,
- 0x0,
- 0x0,
- 0x110000,
- 0x5700,
- NULL, 0, FALSE, FALSE, 0, NULL
-};
-
-static const uint16_t propsVectorsTrie_index[31228]={
-0x4e8,0x4f0,0x4f8,0x500,0x518,0x520,0x528,0x530,0x538,0x540,0x548,0x550,0x558,0x560,0x568,0x570,
-0x577,0x57f,0x587,0x58f,0x592,0x59a,0x5a2,0x5aa,0x5b2,0x5ba,0x5c2,0x5ca,0x5d2,0x5da,0x5e2,0x5ea,
-0x5f2,0x5fa,0x601,0x609,0x611,0x619,0x621,0x629,0x631,0x639,0x63e,0x646,0x64d,0x655,0x65d,0x665,
-0x66d,0x675,0x67d,0x685,0x68c,0x694,0x69c,0x6a4,0x6ac,0x6b4,0x6bc,0x6c4,0x6cc,0x6d4,0x6dc,0x6e4,
-0x1a38,0xd5e,0xe35,0x6ec,0x508,0xe9c,0xea4,0x1bf2,0x1300,0x1310,0x12f8,0x1308,0x7c5,0x7cb,0x7d3,0x7db,
-0x7e3,0x7e9,0x7f1,0x7f9,0x801,0x807,0x80f,0x817,0x81f,0x825,0x82d,0x835,0x83d,0x845,0x84d,0x854,
-0x85c,0x862,0x86a,0x872,0x87a,0x880,0x888,0x890,0x898,0x1318,0x8a0,0x8a8,0x8b0,0x8b7,0x8bf,0x8c7,
-0x8cf,0x8d3,0x8db,0x8e2,0x8ea,0x8f2,0x8fa,0x902,0x162c,0x1634,0x90a,0x912,0x91a,0x922,0x92a,0x931,
-0x1692,0x1682,0x168a,0x1973,0x197b,0x1328,0x939,0x1320,0x1572,0x1572,0x1574,0x133c,0x133d,0x1330,0x1332,0x1334,
-0x169a,0x169c,0x941,0x169c,0x949,0x94e,0x956,0x16a1,0x95c,0x169c,0x962,0x96a,0xc39,0x16a9,0x16a9,0x972,
-0x16b9,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,
-0x16ba,0x16ba,0x16ba,0x16b1,0x97a,0x16c2,0x16c2,0x982,0xb59,0xb61,0xb69,0xb71,0x16d2,0x16ca,0x98a,0x992,
-0x99a,0x16dc,0x16e4,0x9a2,0x16da,0x9aa,0x1a40,0xd66,0xb79,0xb81,0xb89,0xb8e,0x18e1,0xc6c,0xc73,0x1849,
-0xc09,0x1a48,0xd6e,0xd76,0xd7e,0xd86,0xf47,0xf48,0x1939,0x193e,0xca8,0xcb0,0x19af,0x19b7,0x1b11,0xe3d,
-0x19bf,0xcf2,0xcfa,0x19c7,0x10f6,0x1196,0xf27,0xd8e,0x1869,0x1851,0x1861,0x1859,0x18f9,0x18f1,0x18b9,0xc19,
-0x1345,0x1345,0x1345,0x1345,0x1348,0x1345,0x1345,0x1350,0x9b2,0x1358,0x9b6,0x9be,0x1358,0x9c6,0x9ce,0x9d6,
-0x1368,0x1360,0x1370,0x9de,0x9e6,0x1378,0x9ee,0x9f6,0x1380,0x1388,0x1390,0x1398,0x9fe,0x13a0,0x13a7,0x13af,
-0x13b7,0x13bf,0x13c7,0x13cf,0x13d7,0x13de,0x13e6,0x13ee,0x13f6,0x13fe,0x1401,0x1403,0x16ec,0x17dc,0x17e2,0x1929,
-0x140b,0xa06,0xa0e,0x1525,0x152a,0x152d,0x1535,0x1413,0x153d,0x153d,0x1423,0x141b,0x142b,0x1433,0x143b,0x1443,
-0x144b,0x1453,0x145b,0x1463,0x17ea,0x1841,0x1983,0x1ad9,0x1473,0x147a,0x1482,0x148a,0x146b,0x1492,0x17f2,0x17f9,
-0x16f4,0x16f4,0x16f4,0x16f4,0x16f4,0x16f4,0x16f4,0x16f4,0x1801,0x1804,0x1801,0x1801,0x180c,0x1813,0x1815,0x181c,
-0x1824,0x1828,0x1828,0x182b,0x1828,0x1828,0x1831,0x1828,0x1871,0x1931,0x198b,0xb96,0xb9c,0x1c36,0x1c3e,0x1d15,
-0x18d1,0xc49,0xc4d,0x1946,0x18c1,0x18c1,0x18c1,0xc21,0x18c9,0xc41,0x1911,0xc98,0xc29,0xc31,0xc31,0x19cf,
-0x1901,0x1993,0xc83,0xc88,0xa16,0x16fc,0x16fc,0xa1e,0x1704,0x1704,0x1704,0x1704,0x1704,0x1704,0xa26,0x6f0,
-0x155a,0x157c,0xa2e,0x1584,0xa36,0x158c,0x1594,0x159c,0xa3e,0xa43,0x15a4,0x15ab,0xa48,0x170c,0x1921,0xc11,
-0xa50,0x1606,0x160d,0x15b3,0x1615,0x161c,0x15bb,0x15bf,0x15d8,0x15d8,0x15da,0x15c7,0x15cf,0x15cf,0x15d0,0x1624,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
-0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1717,0x1879,0x1879,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
-0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e9,0x1a30,0x12b5,
-0x171f,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,
-0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,
-0x1725,0x1725,0x1725,0x1725,0xa58,0x172d,0xa60,0x1a50,0x19db,0x19db,0x19db,0x19db,0x19db,0x19db,0x19db,0x19db,
-0x19d7,0xd02,0x19eb,0x19e3,0x19ed,0x1a58,0x1a58,0xd96,0x18d9,0x194e,0x19a3,0x19a7,0x199b,0x1b09,0xcb8,0xcbb,
-0x1909,0xc90,0x1956,0xcc3,0x19f5,0x19f8,0xd0a,0x1a60,0x1a08,0x1a00,0xd12,0xd9e,0x1a68,0x1a6c,0xda6,0xff0,
-0x1a10,0xd1a,0xd22,0x1a74,0x1a84,0x1a7c,0xdae,0xef7,0xe45,0xe4d,0x1c85,0xfa8,0x1d32,0x1d32,0x1a8c,0xdb6,
-0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,
-0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,
-0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,
-0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,
-0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,
-0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,
-0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,
-0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,
-0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,
-0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,
-0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,
-0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,
-0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,
-0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,
-0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,
-0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,
-0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,
-0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,
-0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,
-0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,
-0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,
-0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0xa68,0xdbe,0xdc1,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,
-0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
-0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x15f1,0x15f1,0x15f1,0x15f1,0x15f1,0x15f1,0x15f1,0x15f1,
-0x15f6,0x15fe,0x1839,0x12bd,0x1919,0x1919,0x12c1,0x12c8,0xa70,0xa78,0xa80,0x14b2,0x14b9,0x14c1,0xa88,0x14c9,
-0x14fa,0x14fa,0x14a2,0x14aa,0x14d1,0x14f1,0x14f2,0x1502,0x14d9,0x149a,0xa90,0x14e1,0xa98,0x14e9,0xaa0,0xaa4,
-0xca0,0x150a,0xaac,0xab4,0x1512,0x1518,0x151d,0xabc,0xacc,0x1562,0x156a,0x154d,0x1552,0xad4,0xadc,0xac4,
-0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,
-0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x1644,0x1644,0x1644,0x1644,
-0x1420,0x1420,0x1460,0x14a0,0x14e0,0x1520,0x1560,0x15a0,0x15dc,0x161c,0x1648,0x1688,0x16c8,0x1708,0x1748,0x1788,
-0x17c8,0x1804,0x1844,0x1884,0x18c4,0x18f8,0x1934,0x1974,0x19b4,0x19f4,0x1a30,0x1a70,0x1ab0,0x1af0,0x1b30,0x1b70,
-0xa80,0xac0,0xb00,0xb40,0xb80,0xa40,0xe75,0xa40,0xe97,0xa40,0xa40,0xa40,0xa40,0xbc0,0x12dd,0x12dd,
-0xed7,0xc00,0xa40,0xa40,0xa40,0xa40,0xf17,0xc2d,0xa40,0xa40,0xc6d,0xcad,0xced,0xd2d,0xe35,0xda5,
-0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,
-0x121d,0x121d,0x121d,0x121d,0xf57,0x125d,0x1092,0x10d2,0x129d,0x10dd,0x131d,0x131d,0x131d,0xf97,0xfb7,0xff7,
-0x135d,0x135d,0x1037,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,
-0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0x1052,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
-0xde5,0xdf5,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
-0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
-0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,
-0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x111d,
-0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,
-0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x115d,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0xba4,0xbab,0xbb3,0xbbb,0x1881,0x1881,0x1881,0xbc3,0xbcb,0xbce,0x18b1,0x18a9,0xc01,0xd2a,0xd2e,0xd32,
-0x508,0x508,0x508,0x508,0xd3a,0x1a18,0xd42,0xf3f,0x1735,0xae4,0xaea,0x1000,0xbd6,0x18e9,0xc7b,0x508,
-0x174a,0x173d,0x1742,0x1889,0xbde,0xbe6,0x1134,0x113a,0x1c6d,0xf5d,0x1c5d,0x6f8,0x508,0x508,0x508,0x508,
-0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0xfb0,0xfb8,0xfc0,0x508,0x508,0x508,0x508,
-0xbee,0xbf1,0xdc9,0x1cd5,0xff8,0x700,0x508,0x1092,0xccb,0xd4a,0x508,0x508,0x1c02,0xeff,0xf07,0x1d1d,
-0xc55,0xc5c,0xc64,0x1a94,0x1cb5,0x508,0x1c95,0xfd0,0x1a9c,0xdd1,0xdd9,0xde1,0x1020,0x708,0x508,0x508,
-0x1aa4,0x1aa4,0x710,0x508,0x1d4a,0x10aa,0x1d42,0x10b2,0x1e0e,0x11ac,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0xde9,0x1e66,0x1291,0x508,0x508,0x1e2e,0x11d4,0x11db,0x718,0x508,0x71c,0x1248,0x11e3,
-0x1b19,0x1b1b,0xe55,0xe5c,0x1aac,0x1ab4,0xdf1,0xf1f,0x1bfa,0xee7,0xeef,0xfc8,0x1c1a,0x1c1e,0x1c26,0x1040,
-0xf93,0xf98,0x724,0x508,0x109a,0x10a2,0x1c7d,0xfa0,0xf75,0xf7b,0xf83,0xf8b,0x508,0x508,0x508,0x508,
-0x1daa,0x1da2,0x1124,0x112c,0x1cfd,0x1cf5,0x1068,0x508,0x508,0x508,0x508,0x508,0x1ce5,0x1028,0x1030,0x1038,
-0x1cad,0x1ca5,0xfe0,0x111c,0x1c2e,0xf2f,0x72c,0x508,0x1078,0x1080,0x508,0x508,0x508,0x508,0x508,0x508,
-0x1e06,0x118e,0x734,0x508,0x508,0x1d0d,0x1d05,0x1070,0x1250,0x1256,0x125e,0x508,0x508,0x11eb,0x11ef,0x11f7,
-0x1dde,0x1dd6,0x1176,0x1dce,0x1dc6,0x73c,0x1cdd,0x1018,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x10da,0x10df,0x10e7,0x10ee,0x110e,0x1114,0x508,0x508,0x115a,0x115e,0x1166,0x119e,0x11a4,0x744,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x11bc,0x508,0x508,0x508,0x508,0x508,0x748,0x1e4e,0x1238,
-0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,
-0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x1963,0xcd3,0xcda,0xcda,0xcda,
-0x196b,0x196b,0x196b,0xce2,0x1d3a,0x1d3a,0x1d3a,0x1d3a,0x1d3a,0x1d3a,0x750,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,
-0x1b23,0xe64,0xfe8,0x758,0x508,0x508,0x75c,0xf37,0x1ccd,0x1cc5,0x1008,0x1010,0x764,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x1e26,0x1e1e,0x11cc,0x508,0x508,0x508,0x1c12,0x1c12,0xf0f,0x1c0a,0xf17,0x508,0x508,0x1106,
-0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,
-0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dbe,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,
-0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1266,0x126c,0x1286,0x1289,0x1289,0x1289,0x76c,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1c65,0x1c65,0x1c65,
-0xf50,0xf55,0x774,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1752,0x1752,0x1752,
-0x1752,0x1752,0x1752,0x1752,0xaf2,0x1762,0xafa,0x1763,0x175a,0x176b,0x1771,0x1779,0xb02,0x18a1,0x18a1,0x77c,
-0x508,0x508,0x508,0x508,0x11c4,0x1891,0x1891,0xbf9,0xcea,0x508,0x508,0x508,0x508,0x17aa,0x17b1,0xb0a,
-0x17b4,0xb12,0xb1a,0xb22,0x17ae,0xb2a,0xb32,0xb3a,0x17b3,0x17bb,0x17aa,0x17b1,0x17ad,0x17b4,0x17bc,0x17ab,
-0x17b2,0x17ae,0xb41,0x1781,0x1789,0x1790,0x1797,0x1784,0x178c,0x1793,0x179a,0xb49,0x17a2,0x1d62,0x1d62,0x1d62,
-0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d52,0x1d55,0x1d52,
-0x1d5c,0x10ca,0x784,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x10fe,0x78c,0x508,
-0x508,0x508,0x508,0x508,0x508,0x1e46,0x11ff,0x794,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x1e56,0x1240,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1c9d,0x1c9d,0x1c9d,
-0x1c9d,0x1c9d,0x1c9d,0xfd8,0x508,0x1d9a,0x1d92,0x10d2,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x798,0x1e16,0x11b4,0x508,0x508,0x1207,0x1208,0x7a0,0x508,0x508,0x508,0x508,0x508,0xeac,0xeb4,0xebc,
-0xec4,0xecc,0xed4,0xedb,0xedf,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x7a4,0x1048,0x1ced,0x104e,0x1ced,0x1056,0x105b,0x1060,0x1060,0x1d72,0x1d82,0x1d8a,
-0x10ba,0x1d7a,0x1e36,0x10c2,0x1dee,0x1e3e,0x1e3e,0x117e,0x1186,0x121f,0x1225,0x122a,0x1230,0x1e5e,0x1e5e,0x1e5e,
-0x1e5e,0x1274,0x1e5e,0x127a,0x127e,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,
-0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,
-0x7ac,0x7ac,0x7ac,0x7ac,0x7ad,0xb51,0x17c4,0x17c4,0x17c4,0x7b5,0x7b5,0x7b5,0x7b5,0x1899,0x1899,0x1899,
-0x1899,0x1899,0x1899,0x1899,0x7bd,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,
-0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,
-0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,
-0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,
-0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x1a20,0xd52,0x1a28,0x1a28,0xd56,0xe6c,0xe74,0xe7c,0x1ae9,0x1ad1,0x1af1,
-0x1af9,0x1ae1,0xe01,0xe05,0xe0c,0xe14,0xe1b,0xe23,0xe2b,0xe2d,0xe2d,0xe2d,0xe2d,0x1b5a,0x1b62,0x1b5a,
-0x1b68,0x1b70,0x1b3b,0x1b78,0x1b80,0x1b5a,0x1b88,0x1b90,0x1b97,0x1b9f,0x1b43,0x1b5a,0x1ba4,0x1b4b,0x1b52,0x1bac,
-0x1bb2,0x1c4e,0x1c55,0x1c46,0x1bba,0x1bc2,0x1bca,0x1bd2,0x1cbd,0x1bda,0x1be2,0xe84,0xe8c,0x1b2b,0x1b2b,0x1b2b,
-0xe94,0x1c75,0x1c75,0xf65,0xf6d,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,
-0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abe,0x1abc,0x1ac6,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,
-0x1ac9,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0xdf9,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,
-0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d2a,0x1d25,0x1d25,0x1d25,0x1088,0x108a,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
-0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
-0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
-0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
-0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1142,0x1b33,0x1de6,0x1de6,0x1de6,0x1de6,0x1de6,0x1de6,0x1de6,0x114a,
-0x1152,0x1210,0x1217,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x116e,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
-0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,
-0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,
-0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,
-0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x12d0,0x1299,0x1b01,
-0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,
-0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,
-0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x12a1,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
-0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
-0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
-0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
-0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,
-0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x12d8,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
-0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
-0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
-0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x12a5,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,
-0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,
-0x1e6e,0x12ad,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
-0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
-0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
-0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
-0x1299,0x12a5,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,
-0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,
-0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,
-0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x12e0,0x1bea,0x1bea,0x1bea,0x1bea,
-0x1bea,0x1bea,0x12e8,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
-0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
-0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
-0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
-0x1d6a,0x1d6a,0x12f0,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1664,0x1664,0x1664,
-0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,
-0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,
-0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,
-0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1654,0x166c,0x166c,0x166c,
-0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,
-0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,
-0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,
-0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x165c,0x1664,0x1664,0x1664,
-0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,
-0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,
-0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,
-0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x166c,0x166c,0x166c,
-0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,
-0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,
-0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,
-0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x17cc,0x17cc,0x17cc,
-0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,
-0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,
-0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,
-0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x1b01,0x1b01,0x1b01,
-0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,
-0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,
-0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,
-0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1d6a,0x1d6a,0x1d6a,
-0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
-0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
-0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
-0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1db2,0x1db2,0x1db2,
-0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
-0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
-0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
-0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1df6,0x1df6,0x1df6,
-0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,
-0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,
-0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,
-0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1e6e,0x1e6e,0x1e6e,
-0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,
-0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,
-0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,
-0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x4e7,0x4e7,0x4e7,
-0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2ca,0x2d3,0x2cd,0x2cd,0x2d0,0x2c7,0x2c7,
-0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,
-0x7fb,0x7f5,0x7da,0x7d1,0x7c8,0x7c5,0x7bc,0x7d7,0x7c2,0x7ce,0x7d1,0x7ec,0x7e3,0x7d4,0x7f8,0x7cb,
-0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7e0,0x7dd,0x7e6,0x7e6,0x7e6,0x7f5,
-0x7bc,0x807,0x807,0x807,0x807,0x807,0x807,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,
-0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x7c2,0x7c8,0x7ce,0x7f2,0x7b6,
-0x7ef,0x804,0x804,0x804,0x804,0x804,0x804,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,
-0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7c2,0x7e9,0x7bf,0x7e6,0x2c7,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2e5,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,
-0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,
-0x2d9,0x651,0x810,0x813,0x657,0x813,0x80d,0x64e,0x645,0x2df,0x663,0x2e2,0x816,0x63c,0x65a,0x80a,
-0x654,0x660,0x642,0x642,0x648,0x2dc,0x64e,0x64b,0x645,0x642,0x663,0x2e2,0x63f,0x63f,0x63f,0x651,
-0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x66c,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,
-0x66c,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x65d,0x66c,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x66c,0x666,
-0x669,0x669,0x2e8,0x2e8,0x2e8,0x2e8,0x666,0x2e8,0x669,0x669,0x669,0x2e8,0x669,0x669,0x2e8,0x2e8,
-0x666,0x2e8,0x669,0x669,0x2e8,0x2e8,0x2e8,0x65d,0x666,0x669,0x669,0x2e8,0x669,0x2e8,0x666,0x2e8,
-0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,
-0x2f4,0x66f,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee,
-0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x678,0x66f,0x2f7,0x2ee,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee,
-0x2f7,0x66f,0x67b,0x675,0x2f7,0x2ee,0x2f7,0x2ee,0x66f,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x67b,
-0x675,0x678,0x66f,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x672,0x67e,0x678,0x66f,0x2f7,0x672,0x2f7,0x2ee,
-0x2f7,0x2ee,0x678,0x66f,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,
-0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x678,0x66f,0x2f7,0x2ee,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee,
-0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f1,
-0x2fa,0x306,0x306,0x2fa,0x306,0x2fa,0x306,0x306,0x2fa,0x306,0x306,0x306,0x2fa,0x2fa,0x306,0x306,
-0x306,0x306,0x2fa,0x306,0x306,0x2fa,0x306,0x306,0x306,0x2fa,0x2fa,0x2fa,0x306,0x306,0x2fa,0x306,
-0x309,0x2fd,0x306,0x2fa,0x306,0x2fa,0x306,0x306,0x2fa,0x306,0x2fa,0x2fa,0x306,0x2fa,0x306,0x309,
-0x2fd,0x306,0x306,0x306,0x2fa,0x306,0x2fa,0x306,0x306,0x2fa,0x2fa,0x303,0x306,0x2fa,0x2fa,0x2fa,
-0x303,0x303,0x303,0x303,0x30c,0x30c,0x300,0x30c,0x30c,0x300,0x30c,0x30c,0x300,0x309,0x681,0x309,
-0x681,0x309,0x681,0x309,0x681,0x309,0x681,0x309,0x681,0x309,0x681,0x309,0x681,0x2fa,0x309,0x2fd,
-0x309,0x2fd,0x309,0x2fd,0x306,0x2fa,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,
-0x2fd,0x30c,0x30c,0x300,0x309,0x2fd,0x9ea,0x9ea,0x9ed,0x9e7,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,
-0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,
-0x309,0x2fd,0x309,0x2fd,0x9ed,0x9e7,0x9ed,0x9e7,0x9ea,0x9e4,0x9ed,0x9e7,0xbaf,0xcb7,0x9ea,0x9e4,
-0x9ea,0x9e4,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,
-0xcb7,0xcb7,0xcb7,0xdb6,0xdb6,0xdb6,0xdb9,0xdb9,0xdb6,0xdb9,0xdb9,0xdb6,0xdb6,0xdb9,0xefa,0xefd,
-0xefd,0xefd,0xefd,0xefa,0xefd,0xefa,0xefd,0xefa,0xefd,0xefa,0xefd,0xefa,0x30f,0x684,0x30f,0x30f,
-0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x684,0x30f,0x30f,
-0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,
-0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x312,0x30f,0x30f,0x30f,
-0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,
-0x30f,0x9f0,0x9f0,0x9f0,0x9f0,0x9f0,0xcba,0xcba,0x327,0x327,0x327,0x327,0x327,0x327,0x327,0x327,
-0x327,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31b,0x31b,0x318,0x318,0x68a,0x318,0x31e,0x68d,
-0x321,0x68d,0x68d,0x68d,0x321,0x68d,0x31e,0x31e,0x690,0x324,0x318,0x318,0x318,0x318,0x318,0x318,
-0x687,0x687,0x687,0x687,0x315,0x687,0x318,0xb25,0x327,0x327,0x327,0x327,0x327,0x318,0x318,0x318,
-0x318,0x318,0x9f9,0x9f9,0x9f6,0x9f3,0x9f6,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,
-0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,
-0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,
-0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,
-0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,
-0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x696,0x696,0x94b,0x696,0x696,0x94e,0xb28,0xb28,
-0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xc6c,0xd83,0xd83,0xd83,0xd83,0xd83,0xd83,0xd83,0xd83,
-0xebe,0xebe,0xebe,0xebe,0xec1,0xd86,0xd86,0xd86,0x699,0x699,0xb2b,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,
-0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xfa8,0xfa5,0xfa8,0xfa5,0x333,0x33c,0xfa8,0xfa5,
-9,9,0x342,0xf00,0xf00,0xf00,0x32a,0x14fd,9,9,9,9,0x33f,0x32d,0x351,0x330,
-0x351,0x351,0x351,9,0x351,9,0x351,0x351,0x348,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,
-0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,9,0x69f,0x69f,0x69f,0x69f,0x69f,
-0x69f,0x69f,0x351,0x351,0x348,0x348,0x348,0x348,0x348,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,
-0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x345,0x69c,0x69c,0x69c,0x69c,0x69c,
-0x69c,0x69c,0x348,0x348,0x348,0x348,0x348,0xfa8,0x354,0x354,0x357,0x351,0x351,0x354,0x34b,0x9fc,
-0xbb8,0xbb5,0x34e,0x9fc,0x34e,0x9fc,0x34e,0x9fc,0x34e,0x9fc,0x339,0x336,0x339,0x336,0x339,0x336,
-0x339,0x336,0x339,0x336,0x339,0x336,0x339,0x336,0x354,0x354,0x34b,0x345,0xb67,0xb64,0xbb2,0xcc3,
-0xcc0,0xcc6,0xcc3,0xcc0,0xdbc,0xdbf,0xdbf,0xdbf,0xa0b,0x6ab,0x363,0x366,0x363,0x363,0x363,0x366,
-0x363,0x363,0x363,0x363,0x366,0xa0b,0x366,0x363,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,
-0x6a8,0x6ab,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,
-0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,
-0x6a2,0x6a5,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,
-0x6a2,0x6a2,0x6a2,0x6a2,0xa05,0x6a5,0x35d,0x360,0x35d,0x35d,0x35d,0x360,0x35d,0x35d,0x35d,0x35d,
-0x360,0xa05,0x360,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,
-0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x366,0x360,0x363,0x35d,0x363,0x35d,
-0x363,0x35d,0x363,0x35d,0x363,0x35d,0x35a,0x957,0x95a,0x93c,0x93c,0x114f,0x9ff,0x9ff,0xbbe,0xbbb,
-0xa08,0xa02,0xa08,0xa02,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,
-0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,
-0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,
-0x363,0x35d,0x363,0x35d,0x363,0x366,0x360,0x363,0x35d,0xbbe,0xbbb,0x363,0x35d,0xbbe,0xbbb,0x363,
-0x35d,0xbbe,0xbbb,0xf03,0x366,0x360,0x366,0x360,0x363,0x35d,0x366,0x360,0x363,0x35d,0x366,0x360,
-0x366,0x360,0x366,0x360,0x363,0x35d,0x366,0x360,0x366,0x360,0x366,0x360,0x363,0x35d,0x366,0x360,
-0xa0b,0xa05,0x366,0x360,0x366,0x360,0x366,0x360,0x366,0x360,0xdc5,0xdc2,0x366,0x360,0xf06,0xf03,
-0xf06,0xf03,0xf06,0xf03,0xc2d,0xc2a,0xc2d,0xc2a,0xc2d,0xc2a,0xc2d,0xc2a,0xc2d,0xc2a,0xc2d,0xc2a,
-0xc2d,0xc2a,0xc2d,0xc2a,0xf33,0xf30,0xf33,0xf30,0x1023,0x1020,0x1023,0x1020,0x1023,0x1020,0x1023,0x1020,
-0x1023,0x1020,0x1023,0x1020,0x1023,0x1020,0x1023,0x1020,0x1188,0x1185,0x1371,0x136e,0x1536,0x1533,0x1536,0x1533,
-0x1536,0x1533,0x1536,0x1533,0xc,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,
-0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0xc,
-0xc,0x37b,0x369,0x369,0x369,0x36f,0x369,0x36c,0x1941,0x372,0x372,0x372,0x372,0x372,0x372,0x372,
-0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,
-0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x375,0x1941,0x37e,0xa0e,0xc,
-0xc,0x1500,0x1500,0x141c,0xf,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,
-0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0xdc8,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,
-0x97e,0x97e,0x97e,0x97e,0x381,0x381,0x381,0x381,0x381,0x381,0x381,0x381,0x381,0x381,0xf09,0x381,
-0x381,0x381,0x38d,0x381,0x384,0x381,0x381,0x390,0x981,0xdcb,0xdce,0xdcb,0xf,0xf,0xf,0xf,
-0xf,0xf,0xf,0xf,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,
-0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0xf,
-0xf,0xf,0xf,0x1944,0x393,0x393,0x393,0x38a,0x387,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
-0xf,0xf,0xf,0xf,0xcdb,0xcdb,0xcdb,0xcdb,0x141f,0x1503,0xfb1,0xfb1,0xfb1,0xfae,0xfae,0xdd4,
-0x8c7,0xcd5,0xcd2,0xcd2,0xcc9,0xcc9,0xcc9,0xcc9,0xcc9,0xcc9,0xfab,0xfab,0xfab,0xfab,0xfab,0x8c4,
-0x14fa,0x12,0xdd7,0x8ca,0x1338,0x3ae,0x3b1,0x3b1,0x3b1,0x3b1,0x3b1,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,
-0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xfb4,
-0xfb4,0xfb4,0xfb4,0xfb4,0x8cd,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x942,
-0x942,0x942,0x942,0x942,0x942,0x942,0x942,0xb5e,0xb5e,0xb5e,0xcc9,0xccf,0xccc,0xdd1,0xdd1,0xdd1,
-0xdd1,0xdd1,0xdd1,0x1335,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x3a8,0x3a5,
-0x3a2,0x39f,0xbc1,0xbc1,0x93f,0x3ae,0x3ae,0x3ba,0x3ae,0x3b4,0x3b4,0x3b4,0x3b4,0x3ae,0x3ae,0x3ae,
-0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,
-0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,
-0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,
-0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xa14,0xa14,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xa14,
-0x3b1,0x3ae,0x3b1,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xa14,
-0x3ae,0x3ae,0x3ae,0x3b1,0x95d,0x3ae,0x399,0x399,0x399,0x399,0x399,0x399,0x399,0x396,0x39f,0x39c,
-0x39c,0x399,0x399,0x399,0x399,0x3b7,0x3b7,0x399,0x399,0x39f,0x39c,0x39c,0x39c,0x399,0xcd8,0xcd8,
-0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0xa14,0xa14,0xa14,0xa11,0xa11,0xcd8,
-0xa29,0xa29,0xa29,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa20,0xa23,0xa20,0x15,0xa2c,
-0xa26,0xa17,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,
-0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xcde,0xcde,0xcde,
-0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,
-0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0x15,0x15,0xcde,0xcde,0xcde,
-0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,
-0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0x1035,0x1035,
-0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,
-0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,
-0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,
-0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,
-0xa2f,0xbc4,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
-0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,
-0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,
-0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf42,0xf42,0xf42,0xf42,0xf42,
-0xf42,0xf42,0xf42,0xf42,0xf51,0xf51,0xf45,0xf45,0xf48,0xf57,0xf54,0x10b,0x10b,0x1968,0x196b,0x196b,
-0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x252,0x252,0x252,0x252,0x252,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xb37,0xb37,0xb3a,0xb3a,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0x72,0x72,0x72,0x72,
-0x15ba,0x15ba,0x15ba,0x15ba,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x15b7,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x174,0x174,0x174,0x174,0x174,0x174,0x174,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,
-0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x18e7,0x18ea,0x18ea,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x261,0x261,0x261,0x261,0x261,0x261,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x1b1b,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,
-0x17a9,0x17a9,0x17a9,0x17a9,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x1e9,0x1e9,0x1e9,0x1e9,0x1665,0x1665,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,
-0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x1713,0x1713,0x1713,0x1713,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0xe2e,0xe2e,0xe2b,0xe2b,0xe2b,0xe2e,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x225,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x1845,0x1845,0x231,0x1845,0x1845,0x231,0x1845,0x1845,0x1845,0x1845,0x1845,0x231,0x231,0x231,0x231,0x231,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x282,0x282,0x282,0x282,0x1a70,0x1a6a,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x267,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,
-0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x97b,0x97b,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-3,3,0x97b,0x97b,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,
-0xd8c,0xd8c,0xd8c,0xd8c,6,6,6,6,6,6,6,6,6,6,6,6,
-6,6,6,6,0x1509,0x3d5,0x3e4,0x3e4,0x1b,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,
-0x3ea,0x1b,0x1b,0x3ea,0x3ea,0x1b,0x1b,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,
-0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x1b,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x1b,0x3ea,0x1b,
-0x1b,0x1b,0x3ea,0x3ea,0x3ea,0x3ea,0x1b,0x1b,0x3d8,0xce4,0x3d5,0x3e4,0x3e4,0x3d5,0x3d5,0x3d5,
-0x3d5,0x1b,0x1b,0x3e4,0x3e4,0x1b,0x1b,0x3e7,0x3e7,0x3db,0xddd,0x1b,0x1b,0x1b,0x1b,0x1b,
-0x1b,0x1b,0x1b,0x3d5,0x1b,0x1b,0x1b,0x1b,0x3ed,0x3ed,0x1b,0x3ed,0x3ea,0x3ea,0x3d5,0x3d5,
-0x1b,0x1b,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0x3ea,0x3ea,0x3e1,0x3e1,
-0x3de,0x3de,0x3de,0x3de,0x3de,0x3e1,0x3de,0x115e,0x18a2,0x189f,0x1947,0x1b,0x1e,0xce7,0x3f0,0xcea,
-0x1e,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x1e,0x1e,0x1e,0x1e,0x3fc,0x3fc,0x1e,0x1e,0x3fc,
-0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x1e,0x3fc,0x3fc,
-0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x1e,0x3fc,0x3ff,0x1e,0x3fc,0x3ff,0x1e,0x3fc,0x3fc,0x1e,0x1e,
-0x3f3,0x1e,0x3f9,0x3f9,0x3f9,0x3f0,0x3f0,0x1e,0x1e,0x1e,0x1e,0x3f0,0x3f0,0x1e,0x1e,0x3f0,
-0x3f0,0x3f6,0x1e,0x1e,0x1e,0xfbd,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x3ff,0x3ff,0x3ff,
-0x3fc,0x1e,0x3ff,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x969,0x969,0x969,0x969,0x969,0x969,
-0x969,0x969,0x969,0x969,0x3f0,0x3f0,0x3fc,0x3fc,0x3fc,0xfbd,0x194a,0x1e,0x1e,0x1e,0x1e,0x1e,
-0x1e,0x1e,0x1e,0x1e,0x21,0x402,0x402,0x40b,0x21,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,
-0xcf3,0x40e,0x21,0x40e,0x40e,0x40e,0x21,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,
-0x40e,0x40e,0x40e,0x40e,0x40e,0x21,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x21,0x40e,0x40e,
-0x21,0x40e,0x40e,0x40e,0x40e,0x40e,0x21,0x21,0x405,0x40e,0x40b,0x40b,0x40b,0x402,0x402,0x402,
-0x402,0x402,0x21,0x402,0x402,0x40b,0x21,0x40b,0x40b,0x408,0x21,0x21,0x40e,0x21,0x21,0x21,
-0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x40e,0xcf3,0xced,0xced,
-0x21,0x21,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x1422,0xcf0,0x21,0x21,
-0x21,0x21,0x21,0x21,0x21,0x1725,0x18a5,0x18a5,0x18a5,0x18a8,0x18a8,0x18a8,0x24,0x411,0x420,0x420,
-0x24,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x24,0x24,0x426,0x426,0x24,0x24,0x426,
-0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x24,0x426,0x426,
-0x426,0x426,0x426,0x426,0x426,0x24,0x426,0x426,0x24,0xcf6,0x426,0x426,0x426,0x426,0x24,0x24,
-0x414,0x426,0x411,0x411,0x420,0x411,0x411,0x411,0xfc0,0x24,0x24,0x420,0x423,0x24,0x24,0x423,
-0x423,0x417,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x1ab5,0x411,0x411,0x24,0x24,0x24,0x24,
-0x429,0x429,0x24,0x426,0x426,0x426,0xfc0,0xfc0,0x24,0x24,0x41d,0x41d,0x41d,0x41d,0x41d,0x41d,
-0x41d,0x41d,0x41d,0x41d,0x41a,0xcf6,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x24,0x24,0x24,0x24,
-0x24,0x24,0x24,0x24,0x27,0x27,0x42c,0x438,0x27,0x438,0x438,0x438,0x438,0x438,0x438,0x27,
-0x27,0x27,0x438,0x438,0x438,0x27,0x438,0x438,0x43b,0x438,0x27,0x27,0x27,0x438,0x438,0x27,
-0x438,0x27,0x438,0x438,0x27,0x27,0x27,0x438,0x438,0x27,0x27,0x27,0x438,0x438,0x438,0x27,
-0x27,0x27,0x438,0x438,0x438,0x438,0x438,0x438,0x438,0x438,0xde0,0x438,0x438,0x438,0x27,0x27,
-0x27,0x27,0x42c,0x432,0x42c,0x432,0x432,0x27,0x27,0x27,0x432,0x432,0x432,0x27,0x435,0x435,
-0x435,0x42f,0x27,0x27,0xfc3,0x27,0x27,0x27,0x27,0x27,0x27,0x42c,0x27,0x27,0x27,0x27,
-0x27,0x27,0x27,0x27,0x27,0x27,0xef7,0x972,0x972,0x972,0x972,0x972,0x972,0x972,0x972,0x972,
-0x96f,0x96f,0x96f,0xdb0,0xcf9,0xcf9,0xcf9,0xcf9,0xcf9,0xcfc,0xcf9,0x27,0x27,0x27,0x27,0x27,
-0x150c,0x44a,0x44a,0x44a,0x194d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x2a,0x44d,0x44d,
-0x44d,0x2a,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,
-0x44d,0x2a,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x150f,0x44d,0x44d,0x44d,
-0x44d,0x44d,0x2a,0x2a,0x2a,0xfcc,0x43e,0x43e,0x43e,0x44a,0x44a,0x44a,0x44a,0x2a,0x43e,0x43e,
-0x441,0x2a,0x43e,0x43e,0x43e,0x444,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x43e,0x43e,0x2a,
-0xfcc,0xfcc,0x1728,0x2a,0x2a,0x2a,0x2a,0x2a,0x44d,0x44d,0xfc6,0xfc6,0x2a,0x2a,0x447,0x447,
-0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x1a19,
-0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0x17e5,0x1512,0x456,0x456,0x1950,0x45c,0x45c,0x45c,
-0x45c,0x45c,0x45c,0x45c,0x45c,0x2d,0x45c,0x45c,0x45c,0x2d,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,
-0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x2d,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,
-0x45c,0x45c,0x45c,0x45c,0x2d,0x45c,0x45c,0x45c,0x45c,0x45c,0x2d,0x2d,0xcff,0xd02,0x456,0x450,
-0x459,0x456,0x450,0x456,0x456,0x2d,0x450,0x459,0x459,0x2d,0x459,0x459,0x450,0x453,0x2d,0x2d,
-0x2d,0x2d,0x2d,0x2d,0x2d,0x450,0x450,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x45c,0x2d,
-0x45c,0x45c,0xf0f,0xf0f,0x2d,0x2d,0x975,0x975,0x975,0x975,0x975,0x975,0x975,0x975,0x975,0x975,
-0x2d,0xf12,0xf12,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,
-0x18ab,0x1515,0x468,0x468,0x1ab8,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x30,0x46e,0x46e,
-0x46e,0x30,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,
-0x468,0x45f,0x45f,0x45f,0xfcf,0x30,0x468,0x468,0x468,0x30,0x46b,0x46b,0x46b,0x462,0x134a,0x17e8,
-0x30,0x30,0x30,0x30,0x17eb,0x17eb,0x17eb,0x45f,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x172b,
-0x46e,0x46e,0xfcf,0xfcf,0x30,0x30,0x465,0x465,0x465,0x465,0x465,0x465,0x465,0x465,0x465,0x465,
-0xfd2,0xfd2,0xfd2,0xfd2,0xfd2,0xfd2,0x17e8,0x17e8,0x17e8,0xfd5,0xfd8,0xfd8,0xfd8,0xfd8,0xfd8,0xfd8,
-0x33,0x1abb,0xa3e,0xa3e,0x33,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,
-0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0x33,0x33,0x33,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,
-0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0x33,0xa44,
-0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0x33,0xa44,0x33,0x33,0xa44,0xa44,0xa44,0xa44,
-0xa44,0xa44,0xa44,0x33,0x33,0x33,0xa38,0x33,0x33,0x33,0x33,0xa35,0xa3e,0xa3e,0xa35,0xa35,
-0xa35,0x33,0xa35,0x33,0xa3e,0xa3e,0xa41,0xa3e,0xa41,0xa41,0xa41,0xa35,0x33,0x33,0x33,0x33,
-0x33,0x33,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x33,0x33,0xa3e,0xa3e,
-0xa3b,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x36,0x489,0x489,0x489,
-0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,
-0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x474,0x489,0x486,
-0x474,0x474,0x474,0x474,0x474,0x474,0x47a,0x36,0x36,0x36,0x36,0x471,0x48f,0x48f,0x48f,0x48f,
-0x48f,0x489,0x48c,0x477,0x477,0x477,0x477,0x477,0x477,0x474,0x477,0x47d,0x483,0x483,0x483,0x483,
-0x483,0x483,0x483,0x483,0x483,0x483,0x480,0x480,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,
-0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,
-0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x39,0x49e,0x49e,0x39,0x49e,0x39,0x1a1f,0x49e,
-0x49e,0x1a1f,0x49e,0x39,0x1a1f,0x49e,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x49e,0x49e,0x49e,0x49e,
-0x1a1f,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x1a1f,0x49e,0x49e,0x49e,0x39,0x49e,0x39,0x49e,
-0x1a1f,0x1a1f,0x49e,0x49e,0x1a1f,0x49e,0x49e,0x49e,0x49e,0x492,0x49e,0x49b,0x492,0x492,0x492,0x492,
-0x492,0x492,0x1a1c,0x492,0x492,0x49e,0x39,0x39,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x39,0x4a4,0x39,
-0x495,0x495,0x495,0x495,0x495,0x492,0x39,0x39,0x498,0x498,0x498,0x498,0x498,0x498,0x498,0x498,
-0x498,0x498,0x39,0x39,0x4a1,0x4a1,0x1425,0x1425,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,
-0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,
-0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x9b7,0x9b7,0x9b7,0x9ba,0x9b7,0x9b7,0x9b7,0x9b7,
-0x3c,0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,
-0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,
-0x9b7,0x9ba,0xa53,0xfe4,0xfe4,0x3c,0x3c,0x3c,0x3c,0x984,0x984,0x987,0x984,0x987,0x987,0x990,
-0x987,0x990,0x984,0x984,0x984,0x984,0x984,0x9b1,0x984,0x987,0x98a,0x98a,0x98d,0x996,0x98a,0x98a,
-0x9b7,0x9b7,0x9b7,0x9b7,0x1353,0x134d,0x134d,0x134d,0x984,0x984,0x984,0x987,0x984,0x984,0xa47,0x984,
-0x3c,0x984,0x984,0x984,0x984,0x987,0x984,0x984,0x984,0x984,0x987,0x984,0x984,0x984,0x984,0x987,
-0x984,0x984,0x984,0x984,0x987,0x984,0xa47,0xa47,0xa47,0x984,0x984,0x984,0x984,0x984,0x984,0x984,
-0xa47,0x987,0xa47,0xa47,0xa47,0x3c,0xa50,0xa50,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4a,0xa4d,
-0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0x3c,0xfdb,0xa4d,0xde3,0xde3,0xfde,0xfe1,0xfdb,0x1161,0x1161,0x1161,
-0x1161,0x1350,0x1350,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,
-0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,
-0x3c,0x3c,0x3c,0x3c,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x3f,0x142b,0x3f,0x3f,0x3f,0x3f,
-0x3f,0x142b,0x3f,0x3f,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,
-0x4aa,0x4aa,0x4aa,0x4aa,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0x42,0xa7d,0xa7d,
-0xa7d,0xa7d,0x42,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0xa7d,0x42,0xa7d,0xa7d,
-0xa7d,0xa7d,0x42,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0x42,0xa7d,0xa7d,
-0xa7d,0xa7d,0x42,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,
-0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0x42,
-0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0xa7d,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0x42,
-0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0x42,
-0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,
-0xa7d,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,
-0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,
-0xa7d,0xa7d,0xa7d,0x42,0x42,0x1356,0x1356,0xdec,0xdef,0xa77,0xa80,0xa74,0xa74,0xa74,0xa74,0xa80,
-0xa80,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,
-0xa71,0xa71,0xa71,0xa71,0xa71,0x42,0x42,0x42,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,
-0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0x1731,0x45,0x45,
-0x172e,0x172e,0x172e,0x172e,0x172e,0x172e,0x45,0x45,0xa95,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,
-0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,
-0xa98,0xa98,0xa98,0xa92,0xa8f,0x48,0x48,0x48,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,
-0xa9e,0xa9e,0xa9e,0xa9b,0xa9b,0xa9b,0xa9e,0xa9e,0xa9e,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,
-0x151b,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xaa1,0xabf,
-0xabf,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa7,0xaa4,0xab6,0xab6,0xab9,0xac2,
-0xab0,0xaad,0xab6,0xab3,0xac2,0xd05,0x4e,0x4e,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,
-0xabc,0xabc,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,
-0xd08,0xd08,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xad1,0xad1,0xb52,0xb55,0xad7,0xb4f,0xad4,0xad1,
-0xada,0xae9,0xadd,0xaec,0xaec,0xaec,0xac8,0x51,0xae0,0xae0,0xae0,0xae0,0xae0,0xae0,0xae0,0xae0,
-0xae0,0xae0,0x51,0x51,0x51,0x51,0x51,0x51,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
-0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
-0x1953,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
-0xae3,0xacb,0x1002,0x51,0x51,0x51,0x51,0x51,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,
-0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,
-0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x54,0x54,
-0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x54,0x54,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,
-0x54,0x4ce,0x54,0x4ce,0x54,0x4ce,0x54,0x4ce,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,
-0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,
-0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x54,0x54,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,
-0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x54,0x4cb,0x4cb,
-0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4c5,0x4cb,0x4c5,0x4c5,0x4c2,0x4cb,0x4cb,0x4cb,0x54,0x4cb,0x4cb,
-0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4c2,0x4c2,0x4c2,0x4cb,0x4cb,0x4cb,0x4cb,0x54,0x54,0x4cb,0x4cb,
-0x4ce,0x4ce,0x4ce,0x4ce,0x54,0x4c2,0x4c2,0x4c2,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,
-0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4c2,0x4c2,0x4c2,0x54,0x54,0x4cb,0x4cb,0x4cb,0x54,0x4cb,0x4cb,
-0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4c8,0x4c5,0x54,0xbca,0xbcd,0xbcd,0xbcd,0x100b,0x57,0x14f7,0x14f7,
-0x14f7,0x14f7,0x4d7,0x4d7,0x4d7,0x4d7,0x4d7,0x4d7,0x522,0xbdf,0x5a,0x5a,0x6e1,0x522,0x522,0x522,
-0x522,0x522,0x528,0x53a,0x528,0x534,0x52e,0x6e4,0x51f,0x6de,0x6de,0x6de,0x6de,0x51f,0x51f,0x51f,
-0x51f,0x51f,0x525,0x537,0x525,0x531,0x52b,0x5a,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0x1359,0x1359,0x1359,
-0x1359,0x1359,0x1359,0x1359,0x1359,0x5a,0x5a,0x5a,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,
-0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,
-0x549,0x549,0x549,0x549,0x549,0x546,0x546,0x546,0x546,0x549,0xafb,0xafe,0xbe5,0xbeb,0xbeb,0xbe8,
-0xbe8,0xbe8,0xbe8,0xe01,0xf15,0xf15,0xf15,0xf15,0x114c,0x60,0x60,0x60,0x60,0x60,0x60,0x60,
-0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x579,0x579,0x579,0xb07,0xf1e,0x1011,0x1011,0x1011,
-0x1011,0x12ab,0x1737,0x1737,0x63,0x63,0x63,0x63,0x70b,0x70b,0x70b,0x70b,0x70e,0x70e,0x70e,0x70e,
-0x70e,0x70e,0x585,0x585,0x582,0x582,0x582,0x582,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0xb13,0xb13,0x66,
-0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,
-0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x5af,0x5af,0x5af,0x5af,0x5af,0x5af,0x5af,0x5af,
-0x5af,0x5af,0x5af,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,
-0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,
-0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,
-0xb2e,0xb2e,0x6c,0xb2e,0xb2e,0xb2e,0xb2e,0xb31,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,
-0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb31,0x6c,0x6c,0x6c,0x6c,
-0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,
-0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0x6f,0x6f,
-0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x75,0x843,0x83d,0x843,0x83d,0x843,0x83d,0x843,
-0x83d,0x843,0x83d,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,
-0x840,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x83d,0x83d,0x83d,0x843,0x83d,0x843,0x83d,0x843,
-0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x843,0x83d,0x83d,0x83d,0x83d,0x83d,0x840,0xc93,0xc93,0x75,
-0x75,0x954,0x954,0x91e,0x91e,0x846,0x849,0xc90,0x78,0x78,0x78,0x78,0x78,0x85b,0x85b,0x85b,
-0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,
-0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x113a,0x191a,0x1a01,0x7b,0x85e,0x85e,0x85e,
-0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x7b,
-0x927,0x927,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,
-0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,
-0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0xd98,0xd98,0x7e,
-0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0x81,0x81,0x81,
-0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,
-0xb4c,0xc9c,0xb4c,0xb4c,0xb4c,0xc9c,0xb4c,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,
-0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,
-0x9db,0x9db,0x9db,0x9db,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,
-0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,
-0x60c,0x60c,0x60c,0x60c,0x60c,0x60c,0x60c,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,
-0x8a,0x8a,0x8a,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x8a,0x8a,0x8a,0x8a,0x8a,0xb1f,0x5fd,0x603,
-0x609,0x609,0x609,0x609,0x609,0x609,0x609,0x609,0x609,0x600,0x603,0x603,0x603,0x603,0x603,0x603,
-0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x8a,0x603,0x603,0x603,0x603,0x603,0x8a,0x603,0x8a,
-0x603,0x603,0x8a,0x603,0x603,0x8a,0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x606,
-0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,
-0x136b,0x136b,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,
-0x8d,0x8d,0x8d,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x61e,0x618,
-0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,
-0x61b,0x618,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x618,0x61b,0x618,0x618,0x61b,0x61b,0x618,0x618,
-0x618,0x618,0x618,0x61b,0x618,0x618,0x61b,0x618,0x61b,0x61b,0x61b,0x618,0x61b,0x61b,0x61b,0x61b,
-0x8d,0x8d,0x61b,0x61b,0x61b,0x61b,0x618,0x618,0x61b,0x618,0x618,0x618,0x618,0x61b,0x618,0x618,
-0x618,0x618,0x618,0x61b,0x61b,0x61b,0x618,0x618,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,
-0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,
-0x61e,0x61e,0x978,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x615,0x615,0xc24,0xdb3,0x8d,0x8d,
-0x87f,0x891,0x88e,0x891,0x88e,0xcb1,0xcb1,0xda4,0xda1,0x882,0x882,0x882,0x882,0x894,0x894,0x894,
-0x8ac,0x8af,0x8be,0x90,0x8b2,0x8b5,0x8c1,0x8c1,0x8a9,0x8a0,0x89a,0x8a0,0x89a,0x8a0,0x89a,0x89d,
-0x89d,0x8b8,0x8b8,0x8bb,0x8b8,0x8b8,0x8b8,0x90,0x8b8,0x8a6,0x8a3,0x89d,0x90,0x90,0x90,0x90,
-0x62a,0x636,0x62a,0xc27,0x62a,0x93,0x62a,0x636,0x62a,0x636,0x62a,0x636,0x62a,0x636,0x62a,0x636,
-0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,
-0x62d,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,0x62d,0x633,0x62d,0x633,0x62d,0x93,0x93,0x627,
-0x77d,0x780,0x795,0x798,0x777,0x780,0x780,0x99,0x75f,0x762,0x762,0x762,0x762,0x75f,0x75f,0x99,
-0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0xb22,0xb22,0xb22,0x9de,0x759,0x639,0x639,
-0x99,0x7a7,0x786,0x777,0x780,0x77d,0x777,0x789,0x77a,0x774,0x777,0x795,0x78c,0x783,0x7a4,0x777,
-0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x792,0x78f,0x795,0x795,0x795,0x7a7,
-0x768,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,
-0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x99,
-0x99,0x99,0x765,0x765,0x765,0x765,0x765,0x765,0x99,0x99,0x765,0x765,0x765,0x765,0x765,0x765,
-0x99,0x99,0x765,0x765,0x765,0x765,0x765,0x765,0x99,0x99,0x765,0x765,0x765,0x99,0x99,0x99,
-0xb6d,0xb6d,0xb6d,0xb6d,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x18b7,0x18b7,0x18b7,
-0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,
-0xb73,0xb73,0xb73,0x9f,0x9f,0x9f,0x9f,0x9f,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,
-0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,
-0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xa2,0xa2,
-0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xa5,
-0xa5,0x101d,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,
-0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,
-0x173d,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,
-0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
-0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xa8,0xb9d,0xb9d,
-0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
-0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
-0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xa8,0xba0,0xba0,0xa8,0xa8,0xba0,0xa8,0xa8,0xba0,0xba0,0xa8,
-0xa8,0xba0,0xba0,0xba0,0xba0,0xa8,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xb9d,0xb9d,
-0xb9d,0xb9d,0xa8,0xb9d,0xa8,0xb9d,0xb9d,0xb9d,0xb9d,0xd29,0xb9d,0xb9d,0xa8,0xb9d,0xb9d,0xb9d,
-0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
-0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xa8,0xba0,
-0xba0,0xba0,0xba0,0xa8,0xa8,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xa8,0xba0,0xba0,
-0xba0,0xba0,0xba0,0xba0,0xba0,0xa8,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
-0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
-0xba0,0xba0,0xa8,0xba0,0xba0,0xba0,0xba0,0xa8,0xba0,0xba0,0xba0,0xba0,0xba0,0xa8,0xba0,0xa8,
-0xa8,0xa8,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xa8,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
-0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xe16,0xe16,0xa8,0xa8,0xba0,0xba0,0xba0,0xba0,
-0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
-0xba0,0xba0,0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xf2d,0xf2a,
-0xa8,0xa8,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,
-0xb9a,0xb9a,0xb9a,0xb9a,0xab,0xba6,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,
-0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,
-0xab,0xab,0xab,0xab,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,
-0xc36,0xae,0xc36,0xc36,0xc36,0xc36,0xc30,0xc30,0xc33,0xae,0xae,0xae,0xae,0xae,0xae,0xae,
-0xae,0xae,0xae,0xae,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,
-0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc39,0xc39,0xc3c,0xca5,0xca5,0xb1,0xb1,0xb1,0xb1,0xb1,
-0xb1,0xb1,0xb1,0xb1,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,
-0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc42,0xc42,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,
-0xb4,0xb4,0xb4,0xb4,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,
-0xc4b,0xb7,0xc4b,0xc4b,0xc4b,0xb7,0xc48,0xc48,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,
-0xb7,0xb7,0xb7,0xb7,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,
-0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,
-0xd3b,0x1539,0x1539,0xba,0xd2c,0xd2c,0xd2c,0xd38,0xd38,0xd38,0xd38,0xd2c,0xd2c,0xd38,0xd38,0xd38,
-0xba,0xba,0xba,0xba,0xd38,0xd38,0xd2c,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd2f,0xd2f,0xd2f,
-0xba,0xba,0xba,0xba,0xd32,0xba,0xba,0xba,0xd3e,0xd3e,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,
-0xd35,0xd35,0xd35,0xd35,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,
-0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xbd,0xbd,0xd41,0xd41,0xd41,0xd41,0xd41,0xbd,0xbd,0xbd,
-0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,
-0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0xc0,0xc0,0x153c,0x153c,
-0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,
-0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0xc0,0x1abe,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,
-0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xc3,0xd68,0xd68,0xd68,
-0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,
-0xd68,0xd68,0xd68,0xc3,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,
-0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xc3,0xd68,0xd68,0xc3,0xd68,0xd68,0xd68,0xd68,0xd68,
-0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xc3,0xc3,0xd68,0xd68,0xd68,0xd68,
-0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,
-0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,
-0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xd6b,0xd6b,0xd6b,0xd6b,
-0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,
-0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xc6,0xc6,0xc6,0xc6,0xc6,0xdad,0xdad,0xdad,0xc9,
-0xc9,0xc9,0xc9,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,
-0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xc9,0xc9,0xc9,0xdaa,
-0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,
-0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,
-0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xcc,0xd6e,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,
-0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,
-0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xcf,0xcf,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,
-0xd77,0xd77,0xcf,0xcf,0xcf,0xcf,0xcf,0xcf,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,
-0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd2,0xd2,
-0xd7d,0xd2,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,
-0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd2,0xd7d,0xd7d,0xd2,0xd2,0xd2,
-0xd7d,0xd2,0xd2,0xd7d,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,
-0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd5,0xd5,0xd5,0xd5,0xd5,
-0xd5,0xd5,0xd5,0xd5,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0x153f,
-0x153f,0x17f1,0x17f1,0xdb,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,
-0x1acd,0x132,0x132,0x132,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,
-0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe3a,0xe3a,0xe40,0xe40,0xe3a,
-0xde,0xde,0xe3d,0xe3d,0x1149,0x1149,0x1149,0x1149,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,
-0xe1,0xe1,0xe1,0xe1,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,
-0xca2,0xca2,0xca2,0xca2,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1542,0x1542,0x1542,0x1542,0x1542,
-0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1545,0x18bd,0x18bd,0x193e,0x18bd,0xe4,0x17f4,
-0x1377,0x118b,0xf3c,0xf3c,0xe55,0xe52,0xe55,0xe52,0xe52,0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0x1194,
-0x1191,0x1194,0x1191,0x118e,0x118e,0x118e,0x1434,0x1431,0xe7,0xe7,0xe7,0xe7,0xe7,0xe4f,0xe4c,0xe4c,
-0xe4c,0xe49,0xe4f,0xe4c,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,
-0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xea,0xea,0xea,0xea,0xea,
-0xea,0xea,0xea,0xea,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xea,0xe58,0xe58,0xe58,0xe58,
-0xe58,0xe58,0xe58,0xea,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xea,0xe58,0xe58,0xe58,0xe58,
-0xe58,0xe58,0xe58,0xea,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,
-0xe5e,0xe5e,0xe5e,0xe5e,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xed,0xed,
-0xed,0xed,0xed,0xed,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xf0,0x1437,0xf0,0xf0,0xf0,0xf0,
-0xf0,0x1437,0xf0,0xf0,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,
-0xeb8,0xeb8,0xeb8,0xeb8,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,
-0xe67,0xe67,0xe67,0xf3,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,
-0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,
-0xe64,0xe64,0xe64,0xf3,0xe79,0xe6d,0xe6d,0xe6d,0xf6,0xe6d,0xe6d,0xf6,0xf6,0xf6,0xf6,0xf6,
-0xe6d,0xe6d,0xe6d,0xe6d,0xe79,0xe79,0xe79,0xe79,0xf6,0xe79,0xe79,0xe79,0xf6,0xe79,0xe79,0xe79,
-0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,
-0xe79,0xe79,0xe79,0xe79,0x195c,0x195c,0xf6,0xf6,0xe6a,0xe6a,0xe6a,0xf6,0xf6,0xf6,0xf6,0xe70,
-0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0x1959,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,
-0xe76,0xe76,0xe76,0xe76,0xe76,0xe76,0xe7c,0xe7c,0xe73,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,
-0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0x119a,0x119a,0xf9,0xf9,0xf9,0xf9,
-0xe88,0xe88,0xe88,0xe88,0xe88,0xe8b,0xe8b,0xe8b,0xe88,0xe88,0xe8b,0xe88,0xe88,0xe88,0xe88,0xe88,
-0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9,0xe85,0xe85,0xe85,0xe85,
-0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0x1197,0xf9,0xf9,0xf9,0xe82,0xe82,0xe91,0xe91,0xe91,0xe91,
-0xfc,0xfc,0xfc,0xfc,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe8e,0xe91,0xe91,0xe91,
-0xe91,0xe91,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0x154e,0x1554,0x1551,0x189c,
-0x17f7,0x18c0,0x18c0,0x18c0,0x18c0,0x18c0,0x1962,0x195f,0x1965,0x195f,0x1965,0x1a25,0x1ac1,0x1ac1,0x1ac1,0xff,
-0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
-0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
-0xeb5,0xeb5,0xeb5,0xeb2,0xeb2,0xea9,0xea9,0xeb2,0xeaf,0xeaf,0xeaf,0xeaf,0x1ac4,0x102,0x102,0x102,
-0x1314,0x1314,0x1314,0x1317,0x1317,0x1317,0x130e,0x130e,0x1311,0x130e,0x156,0x156,0x156,0x156,0x156,0x156,
-0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0x1443,0x1443,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0xebb,
-0x137d,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x137a,
-0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc75,
-0xee8,0xed9,0xed3,0xee5,0xee2,0xedc,0xedc,0xeeb,0xed6,0xedf,0x108,0x108,0x108,0x108,0x108,0x108,
-0xf6f,0xf6f,0xf5a,0xf6f,0xf72,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0x10e,0x10e,0x10e,0x10e,
-0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf7b,0xf7b,0xf60,0xf66,0xf7b,0xf7b,
-0xf63,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,
-0xf5d,0xf5d,0xf5d,0xf5d,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0x10e,0x10e,0x10e,
-0x111,0x111,0x1a2b,0x1a28,0x1a2b,0x1a2b,0x1a2b,0x1aca,0x1ac7,0x1aca,0x1ac7,0x111,0x111,0x111,0x111,0x111,
-0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,
-0x111,0x1aca,0x1ac7,0x155d,0x144c,0x144c,0x1380,0x1074,0x1074,0x1074,0x1074,0x1074,0xf8a,0xf8a,0xf8a,0xf8a,
-0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,
-0xf87,0xf87,0xf8d,0xf8d,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0xf96,0xf96,0xf96,0xf96,
-0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,
-0xf96,0xf96,0xf90,0xf90,0xf90,0xf90,0x11a3,0x11a3,0x117,0x117,0x117,0xf93,0x1563,0x1563,0x1563,0x1563,
-0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,
-0x1563,0x1563,0x1563,0x1563,0x1563,0x174c,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,
-0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,
-0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0xf9f,0xf9f,0xf9f,0x1569,0x1569,0x1569,0x1569,0x1569,
-0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x11d,0xf9c,0xf9c,0xf9c,0xf9c,0x1566,0x11d,0x11d,0x11d,
-0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,
-0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,
-0x1974,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x109b,0x109b,0x109b,0x109b,0x1098,0x1098,0x1098,0x1098,
-0x1098,0x1098,0x1098,0x1098,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1098,0x1098,0x108f,0x108c,
-0x123,0x123,0x123,0x109e,0x109e,0x1092,0x1092,0x1092,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,
-0x1095,0x1095,0x123,0x123,0x123,0x109b,0x109b,0x109b,0x10a1,0x10a1,0x10a1,0x10a1,0x10a1,0x10a1,0x10a1,0x10a1,
-0x10a1,0x10a1,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,
-0x10b6,0x10b6,0x10b9,0x10b9,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,
-0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x10e0,0x10e0,0x10e0,0x10e0,0x10da,0x17fd,0x129,0x129,
-0x129,0x129,0x129,0x129,0x129,0x129,0x10e6,0x10e6,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,
-0x10dd,0x10dd,0x129,0x129,0x129,0x129,0x129,0x129,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x10f8,
-0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10fe,0x1101,0x12c,0x12c,0x12c,0x12c,
-0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x10fb,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,
-0x1113,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1110,0x1110,0x1107,0x1107,0x1110,0x1110,0x1107,0x1107,0x12f,
-0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x1113,0x1113,0x1113,0x1107,0x1113,0x1113,0x1113,0x1113,
-0x1113,0x1113,0x1113,0x1113,0x1107,0x1110,0x12f,0x12f,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,
-0x110d,0x110d,0x12f,0x12f,0x110a,0x1116,0x1116,0x1116,0x1575,0x132,0x132,0x132,0x132,0x132,0x132,0x132,
-0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,
-0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,
-0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,
-0x111c,0x111c,0x111c,0x111c,0x111c,0x111f,0x135,0x135,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,
-0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,
-0x1122,0x1122,0x1122,0x1122,0x1122,0x138,0x138,0x138,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,
-0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,
-0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,
-0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,
-0x112b,0x112b,0x13e,0x13e,0x13e,0x13e,0x13e,0x1128,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,
-0x112e,0x112e,0x112e,0x112e,0x141,0x141,0x141,0x141,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,
-0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x144,0x144,0x144,0x144,
-0x144,0x144,0x144,0x144,0x144,0x144,0x144,0x144,0x11a9,0x11a9,0x11a9,0x11a9,0x11b2,0x11a9,0x11a9,0x11a9,
-0x11b2,0x11a9,0x11a9,0x11a9,0x11a9,0x11a6,0x147,0x147,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11b5,
-0x11af,0x11b5,0x11af,0x11af,0x11af,0x11b5,0x11b5,0x147,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,
-0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x14a,0x14a,
-0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,
-0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d0,0x11bb,0x11d0,
-0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x14d,0x11c4,0x11cd,0x11bb,0x11cd,0x11cd,0x11bb,0x11bb,0x11bb,
-0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11bb,0x11bb,0x11c1,0x11c1,0x11c1,
-0x11c1,0x11c1,0x11c1,0x11c1,0x11c1,0x14d,0x14d,0x11be,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,
-0x11ca,0x11ca,0x14d,0x14d,0x14d,0x14d,0x14d,0x14d,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,
-0x11ca,0x11ca,0x14d,0x14d,0x14d,0x14d,0x14d,0x14d,0x11c7,0x11c7,0x11c7,0x11c7,0x11c7,0x11c7,0x11c7,0x11d6,
-0x11d9,0x11d9,0x11d9,0x11d9,0x11c7,0x11c7,0x14d,0x14d,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,
-0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15bd,0x1adf,0x1329,0x1302,0x1320,0x1320,0x1320,0x1320,0x1320,0x1320,
-0x1320,0x1308,0x1305,0x12fc,0x12fc,0x1326,0x12fc,0x12fc,0x12fc,0x12fc,0x130b,0x14eb,0x14f1,0x14ee,0x14ee,0x193b,
-0x1716,0x1716,0x1aac,0x150,0x150,0x150,0x150,0x150,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,
-0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11e5,0x11e5,0x11e8,0x11f1,0x11eb,0x11eb,0x11eb,0x11f1,
-0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,
-0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,
-0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x159,0x159,0x159,0x120f,0x1203,0x1203,0x1203,0x1203,0x1203,0x1203,0x1206,
-0x1215,0x1215,0x1203,0x1203,0x1203,0x1203,0x15c,0x131a,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,
-0x1209,0x1209,0x15c,0x15c,0x15c,0x15c,0x1203,0x1203,0x1233,0x1227,0x1233,0x15f,0x15f,0x15f,0x15f,0x15f,
-0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,
-0x15f,0x15f,0x15f,0x1230,0x1230,0x1236,0x122a,0x122d,0x124b,0x124b,0x124b,0x1245,0x1245,0x123c,0x1245,0x1245,
-0x123c,0x1245,0x1245,0x124e,0x1248,0x123f,0x162,0x162,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,
-0x1242,0x1242,0x162,0x162,0x162,0x162,0x162,0x162,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x165,
-0x165,0x165,0x165,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,
-0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,
-0x165,0x165,0x165,0x165,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,
-0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x168,0x125a,0x1257,0x1257,0x1257,0x1257,
-0x1257,0x1257,0x1257,0x1257,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,
-0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x16b,0x16b,0x16b,0x1266,0x1269,0x1269,
-0x1269,0x1269,0x1269,0x1269,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,
-0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x16e,0x16e,0x126f,0x126f,0x126f,0x126f,
-0x126f,0x126f,0x126f,0x126f,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,
-0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x171,0x171,0x171,0x171,0x171,0x1275,0x1275,0x1275,0x1275,
-0x1275,0x1275,0x1275,0x1275,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,
-0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,
-0x127e,0x127e,0x127e,0x177,0x1299,0x1299,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,
-0x17a,0x197d,0x17a,0x17a,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,
-0x14ca,0x14ca,0x14ca,0x14ca,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,
-0x129f,0x129f,0x129f,0x17d,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a52,0x1a4c,0x279,0x279,0x279,
-0x279,0x279,0x279,0x279,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,
-0x187e,0x1ad0,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,
-0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,
-0x180,0x180,0x180,0x180,0x180,0x180,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,
-0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,
-0x12f6,0x13f5,0x13f2,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,
-0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f3,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,
-0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f3,0x12f0,0x12f0,0x13f5,0x13f5,
-0x13f5,0x13f5,0x13f5,0x13f2,0x13f5,0x13f5,0x13f5,0x1881,0x183,0x183,0x183,0x183,0x12ed,0x12ed,0x12ed,0x12ed,
-0x12ed,0x12ed,0x12ed,0x12ed,0x12ed,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x1419,0x1419,0x183,0x183,
-0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x1920,0x1920,0x1920,0x1920,
-0x1920,0x1920,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,
-0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,
-0x183,0x183,0x183,0x183,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,
-0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x138f,0x138f,0x138f,
-0x186,0x186,0x1392,0x186,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x1398,0x13a1,0x139b,0x139b,0x13a1,0x13a1,
-0x13a1,0x139b,0x13a1,0x139b,0x139b,0x139b,0x13a4,0x13a4,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,
-0x139e,0x139e,0x139e,0x139e,0x18c,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x18c,0x18c,0x13aa,0x13aa,0x13aa,
-0x13aa,0x13aa,0x13aa,0x18c,0x18c,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x18c,0x18c,0x18c,0x18c,0x18c,
-0x18c,0x18c,0x18c,0x18c,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x18c,0x13aa,0x13aa,0x13aa,0x13aa,
-0x13aa,0x13aa,0x13aa,0x18c,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,
-0x161d,0x161d,0x161d,0x161d,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13b0,0x13c2,0x13c2,0x13b6,0x13b6,0x13b6,
-0x13b6,0x13b6,0x18f,0x18f,0x18f,0x18f,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,
-0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,
-0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x1584,
-0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,
-0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x192,0x192,0x192,0x192,0x192,0x192,0x192,
-0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x195,
-0x195,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x1587,
-0x195,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13fe,
-0x195,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,
-0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,
-0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x195,
-0x1413,0x1410,0x1410,0x1410,0x1410,0x1410,0x159c,0x159c,0x159c,0x159c,0x159c,0x159f,0x170d,0x159f,0x159f,0x159f,
-0x17d9,0x188a,0x188a,0x18c3,0x18c3,0x1a8e,0x1b39,0x1b39,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198,
-0x159f,0x159f,0x159f,0x159f,0x159f,0x159f,0x159c,0x159c,0x159c,0x159f,0x159c,0x170a,0x170a,0x198,0x198,0x198,
-0x159f,0x159c,0x159c,0x159f,0x188a,0x188a,0x188a,0x1926,0x1926,0x1a07,0x1a8e,0x1b39,0x1b39,0x198,0x198,0x198,
-0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,
-0x13cb,0x13cb,0x13cb,0x13cb,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,
-0x1467,0x15a5,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x15a5,0x15a5,0x15a5,
-0x15a5,0x15a5,0x15a5,0x175e,0x175e,0x19e,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1ad3,0x1ad3,
-0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,
-0x19e,0x19e,0x19e,0x1983,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,
-0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,
-0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,
-0x1a1,0x146d,0x146d,0x1a1,0x146d,0x1a1,0x1a1,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,
-0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x1a1,0x1a1,0x1a1,
-0x1a1,0x1a1,0x146d,0x1a1,0x1a1,0x1a1,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x146d,0x146d,
-0x1a1,0x146d,0x146d,0x1a1,0x146d,0x1a1,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,
-0x1a1,0x146d,0x146d,0x1a1,0x146d,0x1a1,0x1a1,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,
-0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x1a1,
-0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,
-0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x1a1,0x1a1,0x1a1,0x1a1,
-0x1a1,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,
-0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x1a1,0x1a1,0x1a1,0x1a1,
-0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,
-0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x146a,0x146a,0x1a1,0x1a1,
-0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1482,0x1482,0x1482,0x1482,
-0x1482,0x1482,0x1482,0x1470,0x1470,0x1470,0x1470,0x1470,0x147f,0x1470,0x1473,0x1473,0x1470,0x1470,0x1470,0x1476,
-0x1476,0x1a4,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x1479,0x1485,0x1485,0x1485,
-0x1989,0x1986,0x1986,0x1ad6,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x162f,0x162f,0x162f,0x162f,
-0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x1491,0x1491,0x1491,0x1491,
-0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x148e,0x1488,0x1488,0x148e,0x148e,0x1497,0x1497,0x1491,0x1494,
-0x1494,0x148e,0x148b,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x149a,0x149a,0x149a,0x149a,
-0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,
-0x149a,0x149a,0x149a,0x149a,0x1aa,0x1aa,0x1aa,0x1aa,0x1761,0x1761,0x149a,0x149a,0x1761,0x1761,0x1761,0x1761,
-0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1aa,0x1aa,0x1761,0x1761,
-0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x14a6,0x14a6,0x14a6,0x14a6,
-0x14a6,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1ad,0x1ad,0x1ad,0x1ad,0x1a31,0x14a6,0x14a3,0x14a3,0x14a3,
-0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x1a34,0x1a34,0x1a34,0x1a34,
-0x1a34,0x1a34,0x1a34,0x1a34,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x14a0,0x14a0,0x14a0,0x14a0,0x14a9,
-0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14ca,0x14ca,0x14ca,0x14ca,
-0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x14c7,0x14c7,0x14c7,0x14c7,
-0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x14cd,0x14cd,0x14cd,0x14cd,
-0x14cd,0x14cd,0x14cd,0x14cd,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1323,0x1320,0x1323,0x12ff,
-0x1320,0x1326,0x1326,0x1329,0x1326,0x1329,0x132c,0x1320,0x1329,0x1329,0x1320,0x1320,0x14df,0x14df,0x14df,0x14df,
-0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14d0,0x14d9,0x14d0,0x14d9,0x14d9,0x14d0,0x14d0,0x14d0,0x14d0,
-0x14d0,0x14d0,0x14dc,0x14d3,0x1a3a,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x15b1,0x15b1,0x15b1,0x15b1,
-0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x1b9,0x1b9,0x15ae,0x15ae,0x15ae,0x15ae,
-0x15ae,0x15b4,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1719,0x1710,0x1710,0x1710,
-0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,
-0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1bf,0x1bf,0x1bf,0x1bf,0x1adf,0x1c2,0x1c2,0x1c2,
-0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,
-0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,
-0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,
-0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x1c5,0x1c5,0x1c5,
-0x1c5,0x1c5,0x1c5,0x1c5,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x1c5,0x1c5,
-0x15c9,0x15c3,0x15c6,0x15cf,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x1c8,0x1c8,0x1c8,0x1c8,
-0x1c8,0x1c8,0x1c8,0x1c8,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,
-0x15ba,0x15ba,0x15ba,0x15ba,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,
-0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x198c,0x198c,0x198c,0x198c,0x1cb,0x1cb,0x1cb,
-0x1cb,0x1cb,0x1cb,0x1cb,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,
-0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,
-0x1cb,0x1cb,0x1cb,0x1cb,0x1776,0x171c,0x15de,0x1722,0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,
-0x15e7,0x1ce,0x1ce,0x15e7,0x15e7,0x1ce,0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,
-0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x1ce,0x15e7,0x15e7,
-0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x1ce,0x1a16,0x171f,0x15e7,0x15d8,0x15de,0x15d8,0x15de,0x15de,0x15de,
-0x15de,0x1ce,0x1ce,0x15de,0x15de,0x1ce,0x1ce,0x15e1,0x15e1,0x15e4,0x1ce,0x1ce,0x1779,0x1ce,0x1ce,0x1ce,
-0x1ce,0x1ce,0x1ce,0x15d8,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x15ea,0x15e7,0x15e7,0x15e7,0x15e7,0x15de,0x15de,
-0x1ce,0x1ce,0x15db,0x15db,0x15db,0x15db,0x15db,0x15db,0x15db,0x1ce,0x1ce,0x1ce,0x15db,0x15db,0x15db,0x15db,
-0x15db,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x15ff,0x15ff,0x15ff,0x15ff,
-0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x1d1,0x15ff,
-0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15f9,0x15f9,0x15f9,0x15ed,
-0x15ed,0x15ed,0x15f9,0x15f9,0x15ed,0x15fc,0x15f0,0x15ed,0x1602,0x1602,0x15f6,0x1602,0x1602,0x15f3,0x180c,0x1d1,
-0x1611,0x1611,0x1611,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1608,0x160b,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,
-0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,
-0x177c,0x177c,0x177c,0x177c,0x161d,0x161a,0x1a3d,0x1a3d,0x1ae5,0x1ae8,0x1ae2,0x1ae2,0x1d7,0x1d7,0x1d7,0x1d7,
-0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,
-0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,
-0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,
-0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,
-0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,
-0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,
-0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,
-0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,
-0x162f,0x162f,0x162f,0x1626,0x1629,0x162c,0x162f,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,
-0x163e,0x163e,0x163e,0x163e,0x163e,0x1632,0x1632,0x1e0,0x1e0,0x1e0,0x1e0,0x1635,0x1635,0x1635,0x1635,0x1635,
-0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x1638,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,
-0x1647,0x1647,0x1647,0x1647,0x1647,0x1e3,0x1e3,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,
-0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,
-0x164a,0x165c,0x165c,0x1650,0x1659,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,
-0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,
-0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,
-0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1e9,
-0x166e,0x166e,0x166e,0x166e,0x166e,0x1668,0x1671,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,
-0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166e,0x166e,0x166e,0x166e,0x166e,0x1ec,
-0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,
-0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1ef,
-0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,
-0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1680,0x1680,0x1680,0x1680,0x1680,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,
-0x169b,0x169b,0x169e,0x169e,0x16a1,0x1692,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,
-0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1f5,0x1692,0x1692,0x1692,0x1692,0x1692,
-0x1692,0x1692,0x1f5,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,
-0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x169b,0x169b,0x169b,
-0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,
-0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,
-0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,
-0x16b3,0x16b3,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x16b0,0x16b0,0x16b0,0x16b0,0x1fb,0x1fb,0x1fb,
-0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16b6,
-0x16c8,0x16c8,0x16b6,0x16b6,0x16b6,0x16b6,0x201,0x201,0x16c8,0x16c8,0x16cb,0x16cb,0x16b6,0x16b6,0x16c8,0x16bc,
-0x16b9,0x16bf,0x16d1,0x16d1,0x16c2,0x16c2,0x16c5,0x16c5,0x16c5,0x16d1,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,
-0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1782,0x1782,0x1782,0x1782,0x177f,0x177f,0x201,0x201,
-0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,
-0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,
-0x204,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,
-0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,
-0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x207,0x207,0x207,0x207,
-0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,
-0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,
-0x16d7,0x16d7,0x207,0x207,0x207,0x207,0x207,0x207,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,
-0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,
-0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x207,0x207,0x1aeb,0x1aeb,0x207,0x207,
-0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,
-0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,
-0x16da,0x16e9,0x16e0,0x16dd,0x16ef,0x16ef,0x16e3,0x16ef,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,
-0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,
-0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f2,0x16f2,0x16f2,0x16f2,0x16f2,0x16f2,
-0x16f2,0x16f2,0x16f2,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x16fb,
-0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,
-0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x198f,0x210,0x210,0x1788,0x1788,0x1788,
-0x1794,0x1794,0x1788,0x1788,0x1788,0x1788,0x1794,0x1788,0x1788,0x1788,0x1788,0x178b,0x210,0x210,0x210,0x210,
-0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x178e,0x178e,0x179a,0x179a,0x179a,0x178e,
-0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,
-0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,
-0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,
-0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x219,0x17af,0x17af,0x219,0x219,
-0x219,0x219,0x219,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x21c,
-0x17b2,0x21c,0x17b2,0x17b2,0x17b2,0x17b2,0x21c,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,
-0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x21c,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,
-0x17b2,0x17b5,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,
-0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,
-0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x21f,0x21f,0x21f,0x21f,0x21f,
-0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,
-0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x21f,0x21f,0x21f,0x21f,0x21f,
-0x21f,0x21f,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,
-0x1929,0x1929,0x1929,0x1929,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a94,0x1b3c,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1b3f,
-0x1b3c,0x222,0x1a0a,0x1a94,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x17df,0x1a0a,0x1a0a,0x1a94,0x1a94,0x1a94,0x1a94,0x1a94,
-0x1a94,0x1a94,0x1a94,0x1b3c,0x222,0x1a97,0x1a97,0x1a97,0x1929,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,
-0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x1929,0x1929,0x17c4,0x17c4,0x17c4,0x17c4,0x17c1,0x17c4,0x17c4,0x17c7,
-0x17ca,0x17c7,0x17c7,0x17c4,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,
-0x225,0x225,0x225,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x1821,0x1821,0x1821,0x1821,0x1818,0x1818,0x1818,0x1812,
-0x1815,0x1815,0x1815,0x1a40,0x228,0x228,0x228,0x228,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e,
-0x181e,0x181e,0x228,0x228,0x228,0x228,0x181b,0x181b,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,
-0x183c,0x22b,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,
-0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x1839,0x1827,0x1827,0x1827,0x1827,
-0x1827,0x1827,0x1827,0x22b,0x1827,0x1827,0x1827,0x1827,0x1827,0x1827,0x1839,0x182a,0x183c,0x183f,0x183f,0x1833,
-0x1830,0x1830,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x1836,0x1836,0x1836,0x1836,
-0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,
-0x182d,0x182d,0x182d,0x182d,0x182d,0x22b,0x22b,0x22b,0x184b,0x184e,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,
-0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,
-0x1842,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,
-0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x231,
-0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,
-0x1845,0x231,0x231,0x1845,0x1845,0x1845,0x1845,0x1845,0x1893,0x192f,0x1a9a,0x1a9d,0x1b45,0x234,0x234,0x234,
-0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x1b42,0x1b42,0x234,0x234,0x234,0x234,0x234,0x234,
-0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,
-0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x237,0x237,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,
-0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x237,0x1851,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,
-0x1848,0x1851,0x1848,0x1848,0x1851,0x1848,0x1848,0x237,0x237,0x237,0x237,0x237,0x237,0x237,0x237,0x237,
-0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x23a,0x23a,0x23a,
-0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,
-0x186f,0x186f,0x1860,0x185a,0x185a,0x186f,0x185d,0x1872,0x1872,0x1872,0x1872,0x1875,0x1875,0x1869,0x1866,0x1863,
-0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x1aee,0x1869,0x23d,0x1863,0x1992,0x1a43,
-0x1af1,0x1af1,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,
-0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,
-0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,
-0x187b,0x187b,0x187b,0x187b,0x240,0x240,0x240,0x240,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,
-0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,
-0x1878,0x1878,0x1878,0x1878,0x240,0x240,0x240,0x240,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,
-0x1896,0x1896,0x1896,0x1896,0x1896,0x1a13,0x1a13,0x1a13,0x1a13,0x1a13,0x1aa0,0x1aa0,0x1aa0,0x1aa0,0x1aa0,0x1aa0,
-0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,
-0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,
-0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246,
-0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,
-0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x249,
-0x18d5,0x18d5,0x249,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,
-0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x249,
-0x249,0x249,0x18c9,0x249,0x18c9,0x18c9,0x249,0x18c9,0x18c9,0x18c9,0x18cc,0x18c9,0x18cf,0x18cf,0x18d8,0x18c9,
-0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,
-0x18d2,0x18d2,0x249,0x249,0x249,0x249,0x249,0x249,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,
-0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,
-0x1938,0x1938,0x1938,0x1938,0x24c,0x24c,0x24c,0x24c,0x1905,0x1908,0x1917,0x1917,0x1908,0x190b,0x1905,0x1902,
-0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x18f0,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18ed,
-0x18ed,0x18db,0x18db,0x18db,0x18f0,0x18f0,0x18f0,0x18f0,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,
-0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x258,0x258,0x258,0x258,
-0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,
-0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x258,0x258,0x1aa9,0x1aa9,0x1aa9,0x1aa9,0x1b4b,0x28b,0x28b,0x28b,
-0x1aa9,0x1aa9,0x1aa9,0x28b,0x28b,0x28b,0x28b,0x28b,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,
-0x19aa,0x19aa,0x19aa,0x19aa,0x19a7,0x19a7,0x19a7,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,
-0x19a7,0x19a1,0x199e,0x19a4,0x25b,0x25b,0x25b,0x25b,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,
-0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,
-0x19ad,0x19ad,0x19ad,0x25e,0x25e,0x19ad,0x19ad,0x19ad,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x261,0x19bc,
-0x19bc,0x261,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,
-0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19b9,0x19b9,0x19b9,0x19b9,0x19b9,0x261,
-0x19b0,0x19b0,0x261,0x19b9,0x19b9,0x19b0,0x19b9,0x19b3,0x19bc,0x261,0x261,0x261,0x261,0x261,0x261,0x261,
-0x19c5,0x19c5,0x19c8,0x19c8,0x19bf,0x19bf,0x19bf,0x19bf,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,
-0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x264,0x264,0x264,0x264,0x264,0x264,
-0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19ce,0x19cb,0x19cb,0x19cb,
-0x19ce,0x19cb,0x19cb,0x19cb,0x19cb,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267,
-0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,
-0x19d7,0x19d7,0x19d7,0x19d1,0x19d1,0x19d4,0x19d4,0x19da,0x19da,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,
-0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,
-0x19dd,0x19dd,0x19dd,0x19dd,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,
-0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,
-0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e3,0x19ec,0x19e0,0x19e0,0x270,0x270,0x270,0x270,0x270,
-0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19f2,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,
-0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,
-0x19fb,0x19fb,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f8,0x19f8,0x19f8,
-0x19f8,0x19fe,0x19fe,0x19fe,0x19fe,0x19fe,0x276,0x276,0x276,0x276,0x276,0x276,0x1a55,0x1a55,0x1a55,0x1a55,
-0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,
-0x1a55,0x1a55,0x1a55,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x1a64,0x1a64,0x1a64,0x1a64,
-0x1a64,0x1a64,0x1a64,0x1a64,0x27f,0x27f,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,
-0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a61,0x1a61,0x1a61,
-0x1a58,0x1a58,0x1a58,0x1a58,0x27f,0x27f,0x1a58,0x1a58,0x1a61,0x1a61,0x1a61,0x1a61,0x1a5b,0x1a64,0x1a5e,0x1a64,
-0x1a61,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,
-0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x1a70,0x1a70,0x1a70,0x1a70,
-0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x282,0x282,0x282,0x1a67,0x1a67,0x1a67,0x1a67,
-0x1a67,0x1a67,0x1a67,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a73,0x1a73,0x282,0x282,0x285,0x1a76,0x1a76,0x1a76,
-0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,
-0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x285,0x285,
-0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,
-0x1aa3,0x1aa3,0x1aa3,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,
-0x1aa6,0x1aa6,0x1aa6,0x1aa6,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x1938,0x1938,0x1938,0x1938,
-0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1aa9,0x1aa9,0x1aa9,0x1b4b,
-0x1b4b,0x1b4b,0x1b4b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x1aa9,0x1aa9,0x1aa9,0x1aa9,
-0x1aa9,0x1aa9,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x28b,0x28b,0x28b,
-0x28b,0x28b,0x28b,0x28b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x28b,0x28b,0x28b,0x28b,0x28b,
-0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x28b,
-0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,
-0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,
-0x1a7f,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,
-0x1a79,0x1a79,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x1a7c,
-0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a82,0x1a82,0x1a82,0x1a82,
-0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x291,0x291,0x291,0x291,0x291,0x1a85,
-0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x294,0x294,0x294,0x294,
-0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,
-0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x297,0x297,0x1b12,0x297,0x297,0x1b12,0x1b12,0x1b12,0x1b12,
-0x1b12,0x1b12,0x1b12,0x1b12,0x297,0x1b12,0x1b12,0x297,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,
-0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1afa,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x297,0x1b09,
-0x1b0c,0x297,0x297,0x1afa,0x1afa,0x1b0f,0x1b00,0x1b15,0x1b09,0x1b15,0x1b09,0x1afd,0x1b18,0x1b03,0x1b18,0x297,
-0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,
-0x1b06,0x1b06,0x297,0x297,0x297,0x297,0x297,0x297,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,
-0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x29a,0x29a,
-0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,
-0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,
-0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,
-0x1b1e,0x1b1e,0x1b1e,0x2a0,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,
-0x1b1e,0x1b1e,0x1b1e,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,
-0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,
-0x1b21,0x1b21,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x1b54,0x1b54,0x1b54,0x1b54,0x1b54,0x1b54,0x1b54,0x1b54,
-0x1b54,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,
-0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,
-0x2a3,0x2a3,0x2a3,0x2a3,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x2a6,0x1b24,
-0x1b24,0x1b27,0x2a6,0x2a6,0x1b2a,0x1b2a,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,
-0x2a6,0x2a6,0x2a6,0x2a6,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,
-0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,
-0x2a9,0x2a9,0x2a9,0x2a9,0x1932,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,
-0x2c1,0x2c1,0x2c1,0x2c1,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,
-0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,
-0x2a9,0x2a9,0x97b,0x97b,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x2c4,
-0x2c4,0x2c4,0x2c4,0x2c4,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,
-0x2a9,0x2a9,0x2a9,0x2a9,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x1a04,
-0x1a04,0x1a04,0x1a04,0x1a04,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,
-0x1b33,0x2ac,0x2ac,0x2ac,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0x12e4,
-0x12e4,0x12e4,0x2af,0x2af,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,
-0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0x2af,0x2af,
-0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,
-0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,
-0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,
-0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0x1b36,0x1b36,0x1b36,0x1b36,0x1b36,0x1b36,0x1b36,0x2b2,0x2b2,
-0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,
-0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0x2b5,0x2b5,
-0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,
-0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,
-0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,
-0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x2bb,0x2bb,
-0x17dc,0x17dc,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,
-0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,
-0x3cc,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3cc,0x3cc,0x3cc,0x3cc,0x3c6,0x1158,0x133e,
-0x3cf,0x945,0x948,0x3bd,0x3bd,0x1155,0x133b,0x133b,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,
-0x1155,0x3c0,0x3c0,0x3cc,0xce1,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,
-0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,
-0x3cf,0x3cf,0x3c0,0x3c0,0x8d0,0x8d3,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x963,
-0x3c9,0xfba,0xfb7,0x1341,0x1341,0x1341,0x1341,0x1341,0x1506,0x115b,0x115b,0xf0c,0xf0c,0xdda,0xf0c,0xf0c,
-0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3d2,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,
-0x3cf,0x3d2,0x3cf,0x3cf,0x3d2,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x133b,0x133e,0x3c3,0x3cf,0x3cc,0x3cc,
-0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x1347,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,
-0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x1347,0x18ae,0x18ae,0xfd8,0x45f,0x468,
-0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,
-0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0xbc7,0xbc7,0xde6,0xde6,0x8d6,0xde9,0x1428,0x1428,0x1428,
-0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,
-0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,
-0x4b3,0x4b3,0x4b3,0x1170,0x1170,0x1170,0x1170,0x1170,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,
-0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,
-0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x116d,0x116d,0x116d,0x116d,0x116d,0x116d,
-0x4b6,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,
-0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,
-0x4b3,0x4b3,0x4b3,0x4b3,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,
-0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,
-0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4b9,0x4b9,0x4b9,0x4b9,0x4bc,0x9bd,0x1005,0x1005,0x1008,0x1005,
-0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,
-0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x1008,0x1005,0x1008,0x1005,0x1008,0x1005,
-0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,
-0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,
-0x6ae,0x6ae,0x6b1,0x4e9,0x6bd,0x6ba,0x6ba,0x6b7,0x513,0x513,0x4d1,0x4d1,0x4d1,0x4d1,0x4d1,0xb58,
-0x6c0,0x4f5,0x6d8,0x6db,0x50a,0x6c0,0x4f8,0x4f8,0x4e9,0x504,0x504,0x6ae,0x510,0x50d,0x6b4,0x4e3,
-0x4da,0x4da,0x4dd,0x4dd,0x4dd,0x4dd,0x4dd,0x4e0,0x4dd,0x4dd,0x4dd,0x4d4,0x51c,0x519,0x516,0x516,
-0x6cc,0x4fe,0x4fb,0x6c9,0x6c6,0x6c3,0x6d5,0x4ec,0x6d2,0x6d2,0x501,0x504,0x6cf,0x6cf,0x501,0x504,
-0x4e6,0x4e9,0x4e9,0x4e9,0x507,0x4f2,0x4ef,0xbdc,0xaf2,0xaf5,0xaef,0xaef,0xaef,0xaef,0xbd3,0xbd3,
-0xbd3,0xbd3,0xbd9,0xd0e,0xd0b,0xdf5,0xdf8,0xbd6,0xdf8,0xdf8,0xdf8,0xdf8,0xdf5,0xdf8,0xdf8,0xbd0,
-0x540,0x540,0x540,0x540,0x540,0x540,0x540,0x53d,0x543,0x75c,0x540,0x9c0,0x9e1,0xaf8,0xaf8,0xaf8,
-0xbe2,0xbe2,0xdfe,0xdfe,0xdfe,0xdfe,0x1179,0x117c,0x117c,0x135c,0x14f4,0x151e,0x1521,0x1521,0x1734,0x18b1,
-0x54f,0x54f,0x567,0x6ea,0x54c,0x6e7,0x54f,0x564,0x54c,0x6ea,0x55e,0x567,0x567,0x567,0x55e,0x55e,
-0x567,0x567,0x567,0x6f3,0x54c,0x567,0x6ed,0x54c,0x55b,0x567,0x567,0x567,0x567,0x567,0x54c,0x54c,
-0x552,0x6e7,0x6f0,0x54c,0x567,0x54c,0x6f6,0x54c,0x567,0x555,0x56d,0x6f9,0x567,0x567,0x558,0x55e,
-0x567,0x567,0x56a,0x567,0x55e,0x561,0x561,0x561,0x561,0xb04,0xb01,0xd11,0xe07,0xbf7,0xbfa,0xbfa,
-0xbf4,0xbf1,0xbf1,0xbf1,0xbf1,0xbfa,0xbf7,0xbf7,0xbf7,0xbf7,0xbee,0xbf1,0xe04,0xf18,0xf1b,0x100e,
-0x117f,0x117f,0x117f,0x6ff,0x6fc,0x570,0x573,0x573,0x573,0x573,0x573,0x6fc,0x6ff,0x6ff,0x6fc,0x573,
-0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x57c,0x57c,0x57c,0x57c,
-0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x576,0x576,0x576,0x576,0x576,0x576,
-0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x57f,0x588,0x588,0x582,0x582,0x582,0x585,0x57f,
-0x582,0x582,0x57f,0x57f,0x57f,0x57f,0x582,0x582,0x708,0x708,0x57f,0x57f,0x582,0x582,0x582,0x582,
-0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x585,0x585,0x585,0x582,0x582,0x70b,0x582,
-0x70b,0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x57f,0x582,0x57f,0x57f,0x57f,0x57f,0x57f,0x57f,
-0x582,0x582,0x57f,0x708,0x57f,0x57f,0x57f,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,
-0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0x711,0x58b,0x711,0x711,
-0x58e,0x58b,0x58b,0x711,0x711,0x58e,0x58b,0x711,0x58e,0x58b,0x58b,0x711,0x58b,0x711,0x59a,0x597,
-0x58b,0x711,0x58b,0x58b,0x58b,0x58b,0x711,0x58b,0x58b,0x711,0x711,0x711,0x711,0x58b,0x58b,0x711,
-0x58e,0x711,0x58e,0x711,0x711,0x711,0x711,0x711,0x717,0x591,0x711,0x591,0x591,0x58b,0x58b,0x58b,
-0x711,0x711,0x711,0x711,0x58b,0x58b,0x58b,0x58b,0x711,0x711,0x58b,0x58b,0x58b,0x58e,0x58b,0x58b,
-0x58e,0x58b,0x58b,0x58e,0x711,0x58e,0x58b,0x58b,0x711,0x58b,0x58b,0x58b,0x58b,0x58b,0x711,0x58b,
-0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x714,0x711,0x58e,0x58b,
-0x711,0x711,0x711,0x711,0x58b,0x58b,0x711,0x711,0x58b,0x58e,0x714,0x714,0x58e,0x58e,0x58b,0x58b,
-0x58e,0x58e,0x58b,0x58b,0x58e,0x58e,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58e,0x58e,0x711,0x711,
-0x58e,0x58e,0x711,0x711,0x58e,0x58e,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,
-0x58b,0x711,0x58b,0x58b,0x58b,0x711,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x711,0x58b,0x58b,
-0x58b,0x58b,0x58b,0x58b,0x58e,0x58e,0x58e,0x58e,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,
-0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x711,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,
-0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,
-0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58e,0x58e,0x58e,0x58e,0x58b,0x58b,0x58b,0x58b,
-0x58b,0x58b,0x58e,0x58e,0x58e,0x58e,0x58b,0x594,0x58b,0x58b,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,
-0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0x59d,0xb0d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,
-0x5a9,0x5a6,0x5a9,0x5a6,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x71a,0x59d,0x59d,0x59d,0x59d,0x59d,
-0x59d,0x59d,0x81f,0x81f,0x59d,0x59d,0x59d,0x59d,0x5a3,0x5a3,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,
-0x5a0,0x825,0x822,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,
-0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,
-0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0xb0d,0xc06,0xb0d,0xb0d,0xb0d,0x5ac,0x5ac,0x5ac,0x5ac,
-0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,
-0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x723,0x723,0x723,0x723,
-0x723,0x723,0x723,0x723,0x723,0x723,0x5b2,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,
-0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xd89,0x72c,0x72c,0x72c,0x72c,
-0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,
-0x5b5,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x72c,0x72c,0x72c,0x72c,
-0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x5b8,0x5b8,0x5b8,0x5b8,0x72c,0x72c,0x72c,0x72c,
-0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72f,0x72f,0x72f,0x72f,
-0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x5bb,0x5bb,0x72f,0x72f,
-0x72f,0x72f,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0x735,0x735,0x5be,0x732,
-0x732,0x732,0x732,0x732,0x732,0x732,0x5c1,0x5c1,0x5be,0x5be,0x5c4,0x5c4,0x5c4,0x5c4,0x735,0x735,
-0x5c4,0x5c4,0x738,0x735,0x5be,0x5be,0x5be,0x5be,0x735,0x735,0x5c4,0x5c4,0x738,0x735,0x5be,0x5be,
-0x5be,0x5be,0x735,0x735,0x732,0x5be,0x5c4,0x735,0x5be,0x5be,0x732,0x735,0x735,0x735,0x5c4,0x5c4,
-0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x735,0x732,
-0x735,0x732,0x5be,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5be,0x5be,0x732,0xb16,0xb16,0xb16,0xb16,
-0xb16,0xb16,0xb16,0xb16,0xc0c,0xc0c,0xc0c,0xc0f,0xc0f,0xc8a,0xc8a,0xc0c,0x5d3,0x5d3,0x5d3,0x5d3,
-0x5d0,0x74a,0x747,0x5ca,0x5ca,0x73b,0x5ca,0x5ca,0x5ca,0x5ca,0x741,0x73b,0x5ca,0x5d0,0x5ca,0x5c7,
-0xd92,0xd92,0xc15,0xc15,0xe13,0xb19,0x5cd,0x5cd,0x73e,0x5d6,0x73e,0x5cd,0x5d0,0x5ca,0x5d0,0x5d0,
-0x5ca,0x5ca,0x5d0,0x5ca,0x5ca,0x5ca,0x5d0,0x5ca,0x5ca,0x5ca,0x5d0,0x5d0,0x5ca,0x5ca,0x5ca,0x5ca,
-0x5ca,0x5ca,0x5ca,0x5ca,0x5d0,0x5d3,0x5d3,0x5cd,0x5ca,0x5ca,0x5ca,0x5ca,0x74d,0x5ca,0x74d,0x5ca,
-0x5ca,0x5ca,0x5ca,0x5ca,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,
-0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5d0,0x74d,0x74a,0x5d9,0x74d,
-0x73b,0x741,0x5d0,0x73b,0x744,0x73b,0x73b,0x5ca,0x73b,0x74a,0x5d9,0x74a,0xb19,0xb19,0xc18,0xc18,
-0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc1b,0xc18,0xc18,0xe10,0xec7,0x5dc,0x5dc,0x5dc,0x5dc,
-0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,
-0x5df,0x13e6,0x13e6,0x13e6,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x152a,0x5eb,0x5f4,0x5eb,
-0x5eb,0x13e6,0x5df,0x5df,0x5f4,0x5f4,0x13e9,0x13e9,0x5f7,0x5f7,0x5e8,0x5ee,0x5e8,0x5e8,0x5ee,0x5df,
-0x5ee,0x5df,0x5ee,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5ee,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,
-0x13e6,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5ee,0x5ee,0x5df,0x5df,0x5df,
-0x5df,0x5df,0x5df,0x5df,0x5df,0x753,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5ee,0x5df,0x5df,0x5ee,
-0x5df,0x5df,0x5df,0x5df,0x13e6,0x5df,0x13e6,0x5df,0x5df,0x5df,0x5df,0x13e6,0x13e6,0x13e6,0x5df,0x12de,
-0x5df,0x5df,0x5df,0x5e5,0x5e5,0x5e5,0x5e5,0x1368,0x1368,0x5df,0x5e2,0x5f1,0x5f4,0x5e8,0x5e8,0x5e8,
-0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0x750,0x750,
-0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x5df,0x5ee,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,
-0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x13e6,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,
-0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x13e6,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,
-0x618,0x618,0x618,0x618,0x618,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x621,0x621,0x621,0x621,
-0x621,0x621,0x621,0x621,0x618,0x61e,0x60f,0x612,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,
-0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,
-0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x615,0x615,0x615,0x615,0x615,0x615,0x618,0x618,0x618,0x618,
-0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,
-0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,
-0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,
-0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,
-0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,
-0x61b,0x621,0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61e,0x618,
-0x61b,0x621,0x61e,0x618,0x61e,0x618,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,
-0x136b,0x136b,0x136b,0x136b,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61b,0x621,0x61e,0x618,
-0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61b,0x61e,0x618,0x61b,
-0x61e,0x618,0x61b,0x621,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,
-0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x61b,0x61b,0x61b,0x61b,0x61b,
-0x61b,0x61b,0x61b,0x61b,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,
-0x61e,0x61e,0x61e,0x61e,0x61e,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,
-0x618,0x618,0x618,0x618,0x61b,0x61b,0x618,0x61b,0x618,0x61b,0x618,0x618,0x61b,0x618,0x618,0x61b,
-0x618,0x61b,0x618,0x618,0x61b,0x618,0x61b,0x61b,0x618,0x618,0x618,0x61b,0x618,0x618,0x618,0x618,
-0x618,0x61b,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,
-0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x61b,0x61b,0x618,0x618,0x61b,0x618,0x61b,0x618,
-0x618,0x618,0x618,0x618,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,
-0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,
-0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x621,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,
-0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,
-0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,
-0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x61e,0x61e,0x61e,
-0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x624,0x624,0x624,0x624,0x101a,0x101a,0x101a,0x152d,
-0x152d,0x152d,0x152d,0x152d,0x152d,0x152d,0x173a,0x173a,0x885,0x88b,0x88b,0x897,0x897,0x888,0x87f,0x888,
-0x87f,0x888,0x87f,0x888,0x87f,0x888,0x87f,0x888,0x633,0x633,0x62d,0x633,0x62d,0x633,0x62d,0x633,
-0x62d,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,0x62d,0x630,
-0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,0x62d,0x633,0x62d,0x633,
-0x62d,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,
-0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,
-0x636,0x633,0x62d,0x630,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,
-0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,
-0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,
-0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x726,0x726,0x726,0x726,0x726,0x726,
-0x726,0x726,0x726,0x726,0x726,0x726,0x729,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,
-0x726,0x726,0x726,0x726,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,
-0x723,0x723,0x723,0x723,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,
-0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,
-0x72c,0x72c,0x72c,0x72c,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,
-0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,
-0x756,0x756,0x756,0x756,0xc78,0x8e8,0x8e2,0x8df,0x8e5,0x8dc,0x76b,0x76e,0x76e,0x76e,0x76e,0x76e,
-0x76e,0x76e,0x76e,0x76e,0x8ee,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,
-0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,
-0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x8eb,0x8eb,0x771,0x8fd,0x900,0x906,0x82b,0x837,0x91b,0x834,
-0x8f4,0x8f1,0x8f4,0x8f1,0x8fa,0x8f7,0x8fa,0x8f7,0x8f4,0x8f1,0x831,0x906,0x8f4,0x8f1,0x8f4,0x8f1,
-0x8f4,0x8f1,0x8f4,0x8f1,0x909,0x912,0x90f,0x90f,0x777,0x7b3,0x7b3,0x7b3,0x7b3,0x7b3,0x7b3,0x7ad,
-0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,
-0x7ad,0x7ad,0x7ad,0x77a,0x795,0x774,0x79b,0x79e,0x798,0x7b0,0x7b0,0x7b0,0x7b0,0x7b0,0x7b0,0x7aa,
-0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,
-0x7aa,0x7aa,0x7aa,0x77a,0x795,0x774,0x795,0xc7b,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,
-0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,
-0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x12d8,0x12d8,0x12d8,0x12d8,0x12d8,0x81c,
-0x831,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x951,0x951,0x951,0x951,0x83a,0x83a,
-0x90c,0x918,0x918,0x918,0x918,0x915,0x82e,0x903,0xb3d,0xb3d,0xb3d,0xc8d,0xcab,0xca8,0xb5b,0x8d9,
-0x840,0x83d,0x840,0x843,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,
-0x840,0x840,0x83d,0x840,0x840,0x83d,0x840,0x840,0x83d,0x840,0x840,0x83d,0x840,0x840,0x83d,0x83d,
-0xcae,0x852,0x84c,0x852,0x84c,0x852,0x84c,0x852,0x84c,0x852,0x84c,0x84c,0x84f,0x84c,0x84f,0x84c,
-0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,
-0x84f,0x84c,0x84f,0x852,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,
-0x84f,0x84f,0x84c,0x84f,0x84f,0x84c,0x84f,0x84f,0x84c,0x84f,0x84f,0x84c,0x84f,0x84f,0x84c,0x84c,
-0x84c,0x84c,0x84c,0x852,0x84c,0x852,0x84c,0x852,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x852,0x84c,
-0x84c,0x84c,0x84c,0x84c,0x84f,0x852,0x852,0x84f,0x84f,0x84f,0x84f,0x921,0x924,0x855,0x858,0xc96,
-0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,
-0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,
-0x861,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,
-0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x86a,0x86a,0x86a,0x86a,
-0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,
-0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0xd9b,0xd9b,0xeca,0x864,0x92d,0x92d,0x92d,0x92d,
-0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0xd95,0xd95,0xd95,0xd95,0x86d,0x86d,0x86d,0x86d,
-0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,
-0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x1ab2,0x936,0x936,0x936,0x936,
-0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x870,0x870,0x870,
-0x870,0x870,0x870,0xd9e,0xd9e,0xd9e,0xd9e,0x939,0x939,0x939,0x939,0x939,0x870,0x870,0x870,0x870,
-0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,
-0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0xd9e,0xd9e,
-0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,
-0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,
-0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,
-0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,
-0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,
-0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,
-0x113d,0x113d,0x113d,0x113d,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,
-0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,
-0x879,0x879,0x879,0x879,0x879,0x879,0x87c,0x87c,0x879,0x87c,0x879,0x87c,0x87c,0x879,0x879,0x879,
-0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x87c,0x879,0x87c,0x879,0x87c,0x87c,0x879,0x879,0x87c,
-0x87c,0x87c,0x879,0x879,0x879,0x879,0x14e5,0x14e5,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,
-0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,
-0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,
-0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x131d,0x131d,0x131d,0x131d,0x12bd,0x12bd,0x12bd,0x12bd,
-0x12bd,0x12bd,0x12bd,0x12bd,0xd95,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,
-0xc99,0xc99,0xc99,0xc99,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,
-0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x933,0x930,0x933,0x930,0x930,
-0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,
-0x930,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,
-0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,
-0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0xd9e,
-0x9b7,0x999,0x999,0x999,0x999,0x993,0x999,0x999,0x9ab,0x999,0x999,0x996,0x9a2,0x9a8,0x9a8,0x9a8,
-0x9a8,0x9a8,0x9ab,0x993,0x99f,0x993,0x993,0x993,0x98a,0x98a,0x993,0x993,0x993,0x993,0x993,0x993,
-0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x993,0x993,0x993,0x993,0x993,0x993,
-0x993,0x993,0x993,0x993,0x996,0x98a,0x993,0x98a,0x993,0x98a,0x9a5,0x99c,0x9a5,0x99c,0x9b4,0x9b4,
-0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,
-0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,
-0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,
-0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,
-0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,
-0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,
-0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,
-0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9cc,0x9cc,
-0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,
-0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9cf,0x9cf,
-0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,
-0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,
-0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,
-0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,
-0x9d8,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,
-0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9d8,0x9db,0x9db,0x9db,
-0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,
-0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0xa68,0xa68,0xfff,0xa68,0xa68,0xa68,0xa6b,0xa68,
-0xfff,0xa68,0xa68,0xff6,0xa62,0xa56,0xa56,0xa56,0xa56,0xa65,0xa56,0xfe7,0xfe7,0xfe7,0xa56,0xa59,
-0xa62,0xa5c,0xfed,0xff9,0xff9,0xfe7,0xfe7,0xfff,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,
-0xb61,0xb61,0xa6e,0xa6e,0xa5f,0xa5f,0xa5f,0xa5f,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa65,0xa65,
-0xa56,0xa56,0xfff,0xfff,0xfff,0xfff,0xfe7,0xfe7,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,
-0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,
-0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,
-0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,
-0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,
-0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,
-0xa7d,0xa7d,0xa7d,0xa7d,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,
-0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,
-0xa83,0xa83,0xa83,0xa83,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,
-0xa89,0xa86,0xa8c,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0x1176,0x1176,0x1176,0x1176,0x1176,
-0x1176,0x1176,0x1176,0x1176,0x1173,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,
-0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,
-0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,
-0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,
-0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xac2,0xac2,0xac2,0xac5,0xac5,0xac2,0xac2,0xac2,
-0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xaaa,0xaaa,0xabf,0xaa1,
-0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0xabf,0xabf,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,
-0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,
-0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xae3,0xae3,0xae3,0xae3,0xae3,0xace,0xace,0xae3,
-0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
-0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
-0xae3,0xae3,0xae3,0xae6,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
-0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
-0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb10,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,
-0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xc06,0xc06,0xc06,0xc06,0xc06,
-0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,
-0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,
-0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,
-0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,
-0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,
-0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,
-0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,
-0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0x13ec,0x13ec,0x13ec,0x1b2d,0x1b2d,0x1b2d,0x1b2d,0x1b2d,
-0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,
-0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,
-0xb43,0xb43,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0xb46,0xb46,0xb46,0xb46,
-0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,
-0xb46,0xb49,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,
-0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,
-0xb46,0xb46,0xb46,0xb46,0xb4c,0xb4c,0xc9c,0xc9c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,
-0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xc9c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,
-0xb4c,0xb4c,0xb4c,0xb4c,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,
-0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,
-0xb70,0xb70,0xb70,0x1530,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xd26,0xd26,0xb76,0xb76,0xb76,0xb76,
-0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,
-0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xd23,0xd23,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,
-0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,
-0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,
-0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,
-0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,
-0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb82,0xb8e,0xb94,
-0xb94,0xb94,0xb88,0xb88,0xb88,0xb91,0xb85,0xb85,0xb85,0xb85,0xb85,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,
-0xb7f,0xb7f,0xb7f,0xb94,0xb94,0xb94,0xb94,0xb94,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,
-0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,
-0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb8b,0xb8b,0xb94,0xb94,0xb94,0xb88,
-0xb88,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,
-0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb94,0xb94,
-0xb94,0xb94,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb8b,
-0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,
-0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,
-0xb88,0xb88,0x173d,0x173d,0xba0,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
-0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb97,
-0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
-0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xb97,
-0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
-0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,
-0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
-0xba0,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
-0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,
-0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,
-0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
-0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
-0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
-0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
-0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
-0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
-0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,
-0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,
-0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,
-0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,
-0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,
-0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,
-0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,
-0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,
-0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc03,0xc06,0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,
-0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,0xd14,0xd17,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,
-0xe0a,0xe0a,0xe0a,0xe0a,0xf24,0xf24,0xf24,0xf24,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc12,0xc12,
-0xc12,0xc12,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1d,0xd1d,0xe10,0xec4,0xe10,0xe10,0xe10,0xe10,
-0xe0d,0xe10,0xe0d,0xe10,0xe10,0x1014,0x12ae,0x12ae,0xe19,0xe19,0xe19,0xe19,0xe19,0xe1f,0xe1c,0xf36,
-0xf36,0xf36,0xf36,0x142e,0x1026,0x142e,0x1374,0x1374,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,
-0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc81,0xc7e,0xc81,0xc7e,0xc81,0xc7e,
-0x1137,0x1134,0x102c,0x1029,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,
-0xc51,0xc51,0xc51,0xc51,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,
-0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,
-0xc54,0xc54,0xc54,0xc54,0xc57,0xc57,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,
-0xc5a,0xc5a,0xc5a,0xc60,0xc5d,0xc87,0xc84,0xc60,0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc60,
-0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,
-0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,
-0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc60,0xc5d,0xc60,0xc5d,
-0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,
-0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc60,0xc5d,0xc5a,0xc5a,
-0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc69,0xc63,0xc63,0xc63,
-0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,
-0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,
-0xc69,0xc69,0xc69,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,
-0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,
-0xc66,0xc63,0xc63,0xc63,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,
-0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,
-0xc9f,0xc9f,0xc9f,0xc9f,0xd20,0xd8f,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe10,0xe0d,0xe0d,0xec4,0xec4,
-0xe0d,0xe0d,0xe0d,0xe0d,0xe10,0xe10,0xf27,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,
-0x1014,0x12db,0x12db,0x12b1,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,
-0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,
-0xd44,0xd44,0xd44,0xd44,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd47,
-0xd5c,0xd5c,0xd5c,0xd56,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd56,
-0xd5c,0xd5c,0xd5c,0xd5c,0xd50,0xd50,0xd59,0xd59,0xd59,0xd59,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd53,
-0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe22,0xe25,0xe25,0xe25,
-0xe25,0xe25,0xe25,0xe25,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,
-0xd5c,0xd5c,0xd56,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,
-0xd5c,0xd50,0xd50,0xd50,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,
-0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,
-0xd53,0xd53,0xd53,0xd53,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd62,0xd62,0xd62,0xd5f,0xd5f,0xd5f,0xd5f,
-0xd5f,0xd5f,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0x1140,
-0x1140,0x102f,0x102f,0x102f,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,
-0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,
-0xd65,0xd65,0xd65,0xd65,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,
-0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,
-0xd6b,0xd6b,0xd6b,0xd6b,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,
-0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,
-0xd74,0xd74,0xd74,0xd74,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,
-0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,
-0xd80,0xd80,0xd80,0xd80,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,
-0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,
-0xd8c,0xd8c,0xd8c,0xd8c,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,
-0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,
-0xe2e,0xe2e,0xe2e,0xe2e,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,
-0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,
-0xe31,0xe31,0xe31,0xe31,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,
-0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,
-0xe34,0xe34,0xe34,0xe34,0xef4,0xef4,0xe46,0xe46,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0x103b,
-0x103b,0x103b,0x103b,0x103b,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,
-0x1038,0x1038,0x1038,0x1038,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,
-0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,
-0xe55,0xe52,0xe55,0xe52,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,
-0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,
-0xe61,0xe61,0xe61,0xe61,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,
-0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,
-0xe67,0xe67,0xe67,0xe67,0xeee,0xeee,0xeee,0xeee,0xeee,0xeee,0xeee,0xeee,0xe7f,0xe7f,0xe7f,0xe7f,
-0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xf3f,0xf3f,0xf3f,0xf3f,0x103e,
-0x103e,0x103e,0x103e,0x103e,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,
-0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,
-0xe88,0xe88,0xe88,0xe88,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,
-0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,
-0xe91,0xe91,0xe91,0xe91,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,
-0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,
-0xe9a,0xe9a,0xe9a,0xe94,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,
-0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe9a,
-0xe9a,0xe9a,0xe9a,0xe9a,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,
-0xea3,0xea3,0xea0,0xea0,0xea0,0xea0,0xea0,0xea0,0xea0,0xea0,0xe9d,0xea6,0x104a,0x1044,0x1053,0x1041,
-0xea3,0xea3,0x1041,0x1041,0xeb5,0xeb5,0xea9,0xeb5,0xeb5,0xeb5,0xeac,0xeb5,0xeb5,0xeb5,0xeb5,0xea9,
-0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,
-0xeb5,0xeb5,0xeb5,0xeb5,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,
-0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,
-0xeb8,0xeb8,0xeb8,0xeb8,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,
-0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,
-0xed0,0xed0,0xed0,0xed0,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,
-0xef1,0xef1,0xef1,0xef1,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,
-0x1149,0x1149,0x1149,0x1149,0xf24,0xf24,0xf24,0xf21,0xf21,0xf21,0xf21,0xf21,0x1182,0x13dd,0x13dd,0x13dd,
-0x13dd,0x135f,0x135f,0x135f,0x13e0,0x1362,0x1362,0x13e0,0x1524,0x1524,0x1524,0x1524,0x1527,0x1527,0x1527,0x17ee,
-0x17ee,0x17ee,0x17ee,0x18b4,0xf39,0xf39,0xf39,0xf39,0x102f,0x102f,0x102f,0x102f,0x102f,0x102f,0x102f,0x102f,
-0x102f,0x102f,0x102f,0x102f,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,
-0x1032,0x1032,0x1032,0x1032,0xf5a,0xf5a,0xf5a,0xf5a,0xf6c,0xf75,0xf78,0xf75,0xf78,0xf75,0xf78,0xf75,
-0xf78,0xf75,0xf78,0xf75,0xf75,0xf75,0xf78,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,
-0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf5d,0xf5a,0xf5a,0xf5a,
-0xf5a,0xf5a,0xf5a,0xf6f,0xf5a,0xf6f,0xf6c,0xf6c,0xf81,0xf7e,0xf81,0xf81,0xf81,0xf7e,0xf7e,0xf81,
-0xf7e,0xf81,0xf7e,0xf81,0xf7e,0x1065,0x1065,0x1065,0x11a0,0x105c,0x1065,0x105c,0xf7e,0xf81,0xf7e,0xf7e,
-0x105c,0x105c,0x105c,0x105c,0x105f,0x1062,0x11a0,0x11a0,0xf84,0xf84,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,
-0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x106e,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,
-0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,
-0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,
-0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,
-0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,
-0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0x1563,0x1563,0x1563,0x1563,0x1563,
-0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0xf9f,0xf9f,0xf9f,0xf9f,
-0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,
-0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xfe7,0xfff,0xff6,0xffc,
-0xffc,0xfff,0xfff,0xff6,0xff6,0xffc,0xffc,0xffc,0xffc,0xffc,0xfff,0xfff,0xfff,0xfe7,0xfe7,0xfe7,
-0xfe7,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfe7,0xff6,
-0xff9,0xfe7,0xfe7,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xfea,0xfff,0xffc,0xff3,0xff3,0xff3,0xff3,
-0xff3,0xff3,0xff3,0xff3,0xff3,0xff3,0x116a,0x116a,0x1167,0x1164,0xff0,0xff0,0x1017,0x1017,0x1017,0x1017,
-0x12db,0x12db,0x12b1,0x12b1,0x12b7,0x12ae,0x12ae,0x12ae,0x12ae,0x12b1,0x13e3,0x12b7,0x12b1,0x12b7,0x12ae,0x12b7,
-0x12db,0x12ae,0x12ae,0x12ae,0x12b1,0x12b1,0x12ae,0x12ae,0x12b1,0x12ae,0x12ae,0x12b1,0x1032,0x1032,0x1032,0x1032,
-0x1032,0x102f,0x102f,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x153c,0x153c,0x153c,0x1140,0x102f,0x102f,0x102f,
-0x102f,0x12e7,0x12c0,0x12c0,0x12c0,0x12c0,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x1050,0x1050,0x104d,0x1047,
-0x104d,0x1047,0x104d,0x1047,0x104d,0x1047,0x1044,0x1044,0x1044,0x1044,0x1059,0x1056,0x1044,0x119d,0x143a,0x143d,
-0x143d,0x143a,0x143a,0x143a,0x143a,0x143a,0x1440,0x1440,0x1557,0x154b,0x154b,0x1548,0x1077,0x106e,0x1077,0x106e,
-0x1077,0x106e,0x1077,0x106e,0x106b,0x1068,0x1068,0x1077,0x106e,0x1383,0x1380,0x1746,0x1383,0x1380,0x1449,0x1446,
-0x155a,0x155a,0x1560,0x155a,0x1560,0x155a,0x1560,0x155a,0x1560,0x155a,0x1560,0x155a,0x1077,0x106e,0x1077,0x106e,
-0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,
-0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1071,0x106e,0x106e,0x106e,
-0x106e,0x106e,0x106e,0x106e,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x1077,0x106e,0x107a,0x107a,0x1080,0x1086,
-0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,
-0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1080,0x107a,0x107a,
-0x107a,0x107a,0x1080,0x1080,0x107a,0x107a,0x1083,0x1452,0x144f,0x144f,0x1086,0x1086,0x107d,0x107d,0x107d,0x107d,
-0x107d,0x107d,0x107d,0x107d,0x107d,0x107d,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x109b,0x109b,0x109b,0x109b,
-0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,
-0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x10a4,0x10a4,0x10a4,0x10a4,
-0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,
-0x10a4,0x10a4,0x10a4,0x10a4,0x10a7,0x10a7,0x10a7,0x10aa,0x10a7,0x10a7,0x10ad,0x10ad,0x10b0,0x10b0,0x10b0,0x10b0,
-0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,
-0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b9,0x10b9,0x10b9,0x10b9,
-0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10bc,0x10b3,0x10c2,0x10bf,0x10b9,0x10b9,0x10b9,0x10b9,
-0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,
-0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x1389,0x1386,0x10d4,0x10ce,
-0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d1,0x1152,0x10c5,0x10c5,0x10c5,0x10cb,
-0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x10c8,0x10c8,0x10cb,0x10d7,0x10d4,0x10ce,0x10d4,0x10ce,
-0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,
-0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x156f,0x156c,0x156f,0x156c,
-0x1572,0x1572,0x174f,0x1458,0x10e0,0x10e0,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,
-0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,
-0x10e3,0x10e3,0x10e3,0x10e3,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,
-0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10ec,0x10ec,0x10ec,0x1146,0x10f5,
-0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,
-0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,
-0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,
-0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,
-0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,
-0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,
-0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,
-0x112e,0x112e,0x112e,0x112e,0x1143,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,
-0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,
-0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,
-0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,
-0x113d,0x113d,0x113d,0x113d,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x14e2,0x17cd,0x17cd,0x17cd,
-0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,
-0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,
-0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11a9,0x11a9,0x11ac,0x11ac,0x11b2,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,
-0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,
-0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,
-0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,
-0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,
-0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,
-0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11dc,0x11e2,
-0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,
-0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,
-0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,
-0x11f4,0x132f,0x11fa,0x1332,0x11fa,0x11fa,0x11fa,0x11fa,0x11f7,0x11f7,0x11f7,0x11fa,0x1752,0x1755,0x197a,0x1977,
-0x11fd,0x11fd,0x11fd,0x120c,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,
-0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,
-0x1212,0x1212,0x1212,0x1200,0x120c,0x120c,0x11fd,0x11fd,0x11fd,0x11fd,0x120c,0x120c,0x11fd,0x11fd,0x120c,0x120c,
-0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,
-0x1221,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x1218,0x1218,0x1218,0x121e,0x121b,0x1578,0x157b,0x157e,0x157e,
-0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,
-0x1224,0x1230,0x1224,0x1224,0x1224,0x1239,0x1239,0x1224,0x1224,0x1239,0x1230,0x1239,0x1239,0x1230,0x1224,0x1227,
-0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,
-0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,
-0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,
-0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,
-0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,
-0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1260,0x1260,0x1260,
-0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,
-0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,
-0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,
-0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,
-0x1281,0x1281,0x1290,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,
-0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1296,0x1293,0x1296,0x1293,0x1293,0x1293,
-0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1296,0x1293,0x1293,0x1293,0x1293,
-0x1290,0x1290,0x1290,0x1284,0x1284,0x1284,0x1284,0x1290,0x1290,0x128a,0x1287,0x128d,0x128d,0x129c,0x1299,0x1299,
-0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,
-0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,
-0x12a5,0x12a5,0x12a5,0x12a2,0x12a2,0x12a2,0x129f,0x129f,0x129f,0x129f,0x12a2,0x129f,0x129f,0x129f,0x12a5,0x12a2,
-0x12a5,0x12a2,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,
-0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x12a5,0x12a2,0x12a2,
-0x129f,0x129f,0x129f,0x129f,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c9,
-0x12c9,0x12c9,0x12a8,0x1980,0x13d7,0x12d2,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,
-0x13d7,0x12d2,0x13d7,0x12d2,0x12b1,0x12b1,0x1365,0x12ae,0x1365,0x1365,0x1365,0x1365,0x12ae,0x12b4,0x12db,0x12ae,
-0x12ae,0x12ae,0x12ae,0x12ae,0x12b4,0x12b7,0x12db,0x12db,0x12b7,0x12db,0x12ae,0x12b7,0x12b7,0x12ba,0x12db,0x12ae,
-0x12ae,0x12db,0x12b1,0x12b1,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x12c3,0x12c3,
-0x12c3,0x12c3,0x13ef,0x13ce,0x12cc,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x187e,
-0x187e,0x187e,0x187e,0x187e,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x1581,
-0x1581,0x1ad0,0x1ad0,0x1ad0,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,
-0x12c6,0x12c6,0x12c6,0x12c6,0x13d7,0x13d7,0x12d2,0x13d7,0x13d7,0x13d7,0x12d2,0x13d7,0x13d7,0x13d7,0x12cc,0x12cc,
-0x12cc,0x12cc,0x12cc,0x13d1,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x12cf,0x13d4,0x13d4,0x13d4,0x13d4,
-0x13d4,0x13d4,0x13d4,0x12cf,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x145b,0x145b,
-0x1a2e,0x1ad0,0x1ad0,0x1ad0,0x13da,0x13da,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x12cf,0x13d4,0x12cf,
-0x12cf,0x13d4,0x13da,0x12d5,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,
-0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,
-0x12f9,0x12f9,0x12f9,0x12f9,0x1383,0x1380,0x1383,0x1380,0x1383,0x1380,0x1383,0x1380,0x1383,0x1380,0x1449,0x1560,
-0x1560,0x1560,0x17fa,0x196e,0x1560,0x1560,0x1749,0x1749,0x1749,0x1743,0x1749,0x1743,0x1971,0x196e,0x1a2b,0x1a28,
-0x1a2b,0x1a28,0x1a2b,0x1a28,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,
-0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,
-0x13a7,0x13a7,0x13a7,0x13a7,0x13bc,0x13ad,0x13bc,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,
-0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,
-0x13bf,0x13bf,0x13bf,0x13bf,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13c5,0x13c5,0x13c5,0x13c5,
-0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,
-0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13cb,0x13cb,0x13cb,0x13cb,
-0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,
-0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13fb,0x13f8,0x1923,0x1923,
-0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,
-0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1404,0x1404,0x1404,0x1404,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
-0x1404,0x1401,0x1401,0x1404,0x1404,0x1404,0x1404,0x1404,0x1401,0x1404,0x1404,0x1404,0x1401,0x1404,0x1401,0x1404,
-0x1401,0x1404,0x1404,0x1404,0x1404,0x1404,0x1407,0x1404,0x1404,0x1404,0x1404,0x1401,0x1404,0x1401,0x1401,0x1404,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1401,0x1401,0x1401,0x1401,
-0x1401,0x1401,0x1401,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
-0x1404,0x1404,0x1404,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1404,0x1404,0x1404,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,
-0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x158a,0x158a,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1596,0x1590,0x1590,0x1596,0x1596,0x1596,0x1596,
-0x1596,0x1596,0x1596,0x1596,0x1596,0x17d0,0x17d0,0x17d0,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1596,0x1404,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1596,0x17d0,0x17d0,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1407,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
-0x1404,0x1404,0x1404,0x1404,0x1590,0x1590,0x1596,0x1596,0x1590,0x1596,0x1596,0x1596,0x158d,0x158d,0x1596,0x1596,
-0x1404,0x1404,0x1407,0x1407,0x1407,0x1701,0x1404,0x1407,0x1404,0x1404,0x1407,0x1599,0x1599,0x1596,0x1596,0x17d0,
-0x17d0,0x17d0,0x17d0,0x17d0,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
-0x1404,0x1590,0x1590,0x1596,0x1701,0x1596,0x1590,0x1596,0x17d0,0x17d0,0x17d0,0x17d3,0x17d3,0x17d3,0x17d3,0x17d3,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1596,
-0x1404,0x1596,0x1407,0x1407,0x1404,0x1404,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,
-0x1407,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
-0x1404,0x1404,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,
-0x1407,0x1407,0x1407,0x1407,0x1407,0x1404,0x1404,0x1404,0x1407,0x1404,0x1404,0x1404,0x1404,0x1407,0x1407,0x1407,
-0x1404,0x1407,0x1407,0x1407,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1407,0x1404,0x1407,0x1404,0x1404,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1701,0x1404,0x1404,0x1404,0x1404,0x1596,0x1590,0x17d0,
-0x145e,0x145e,0x145e,0x145e,0x158a,0x158a,0x158d,0x158d,0x158d,0x1593,0x1596,0x17d0,0x17d0,0x17d0,0x17d0,0x1758,
-0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
-0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1596,0x1590,0x1590,0x1596,0x1599,0x1599,0x1596,0x1596,
-0x1596,0x1596,0x1887,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1590,0x1596,0x1590,0x1590,0x1590,0x1590,
-0x1596,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1590,0x1590,0x1590,0x1596,0x158d,0x158d,0x158d,0x158d,
-0x158d,0x158d,0x1596,0x1404,0x1404,0x1404,0x1404,0x1404,0x14e8,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
-0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x14e8,0x140a,0x140a,0x140a,0x14e8,0x140a,0x14e8,
-0x140a,0x14e8,0x140a,0x14e8,0x140a,0x140a,0x140a,0x14e8,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x14e8,0x14e8,
-0x140a,0x140a,0x140a,0x140a,0x14e8,0x140a,0x14e8,0x14e8,0x140a,0x140a,0x140a,0x140a,0x14e8,0x140a,0x140a,0x140a,
-0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x1707,0x1707,0x17d6,0x17d6,0x140d,0x140d,0x140d,
-0x140a,0x140a,0x140a,0x140d,0x140d,0x140d,0x140d,0x140d,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,
-0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,
-0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,
-0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1413,0x1410,0x1410,0x1410,0x1410,
-0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1413,0x1413,0x1413,0x1410,
-0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,
-0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,
-0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1803,0x1803,0x1800,0x175b,0x1464,0x1464,0x1464,0x1464,
-0x1464,0x1464,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,
-0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x15a2,0x1470,0x1470,0x1470,0x1482,0x1482,0x1482,0x1482,0x1482,
-0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,
-0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,
-0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,
-0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,
-0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,
-0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x1a34,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,
-0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,
-0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14ac,0x14ac,0x14b8,0x14be,0x14be,0x14be,0x14be,0x14be,
-0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,
-0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14b8,0x14b8,0x14b8,0x14ac,0x14ac,
-0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14b8,0x14bb,0x14be,0x14c1,0x14c1,0x14be,0x14c4,0x14c4,0x14af,
-0x14b2,0x1764,0x1767,0x1767,0x1767,0x15ab,0x1adc,0x1ad9,0x14b5,0x14b5,0x14b5,0x14b5,0x14b5,0x14b5,0x14b5,0x14b5,
-0x14b5,0x14b5,0x15a8,0x176d,0x1770,0x176a,0x1773,0x1773,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,
-0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,
-0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,
-0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,
-0x153c,0x153c,0x1956,0x1956,0x1956,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,
-0x153c,0x1a22,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x18ba,0x1956,0x1956,0x1956,0x1956,0x1956,
-0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1590,0x1590,0x1596,0x1596,0x1596,0x1590,0x1590,0x1590,
-0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1596,0x1596,0x158d,0x158d,0x158d,0x158d,
-0x158d,0x158d,0x158d,0x158d,0x1596,0x1596,0x1596,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,
-0x1590,0x1590,0x1596,0x1596,0x1596,0x1596,0x1590,0x1590,0x1599,0x1590,0x1590,0x1590,0x1590,0x1704,0x1704,0x1590,
-0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1884,0x1596,0x1590,0x1590,0x1596,0x1590,0x1590,0x1590,
-0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1596,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,
-0x1596,0x1590,0x1590,0x1590,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,
-0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,
-0x15ba,0x15ba,0x15ba,0x15ba,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,
-0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,
-0x15cc,0x15cc,0x15cc,0x15cc,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,
-0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,
-0x15d2,0x15d2,0x15d2,0x15d2,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,
-0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,
-0x15d5,0x15d5,0x15d5,0x15d5,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,
-0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,
-0x1614,0x1614,0x1614,0x1605,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,
-0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x1617,
-0x1620,0x1620,0x1620,0x1620,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,
-0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,
-0x1623,0x1623,0x1623,0x1623,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x1635,0x163e,0x163e,0x163e,
-0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,
-0x163e,0x163e,0x163e,0x163e,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,
-0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,
-0x1647,0x1647,0x1647,0x1647,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,
-0x1659,0x1659,0x1659,0x1659,0x1656,0x1656,0x1656,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x1656,
-0x1656,0x164a,0x1656,0x164d,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,
-0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,
-0x1659,0x1659,0x1659,0x1659,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,
-0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,
-0x167d,0x167a,0x167a,0x167a,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,
-0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x168c,0x168c,0x168c,0x1689,0x1689,0x1689,
-0x1686,0x1686,0x1686,0x1686,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,
-0x169b,0x169b,0x169b,0x169b,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x16a1,0x16a1,0x1695,0x1692,0x1692,
-0x1692,0x1692,0x1692,0x1692,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,
-0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,
-0x169b,0x169b,0x169b,0x169b,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,
-0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a4,0x16a4,0x16a4,0x16a4,0x16a4,
-0x16a4,0x16a4,0x16a4,0x16a4,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,
-0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,
-0x16aa,0x16aa,0x16aa,0x16aa,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,
-0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,
-0x16ce,0x16ce,0x16ce,0x16ce,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,
-0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,
-0x16d7,0x16d7,0x16d7,0x16d7,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,
-0x16ef,0x16ef,0x16ef,0x16ef,0x16da,0x16e9,0x16e9,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16e9,0x16da,0x16ec,
-0x16ec,0x16da,0x16ec,0x16da,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,
-0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,
-0x16ef,0x16ef,0x16ef,0x16ef,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,
-0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,
-0x16f8,0x16f8,0x16f8,0x16f8,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,
-0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,
-0x16fe,0x16fe,0x16fe,0x16fe,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,
-0x1740,0x1740,0x1740,0x1740,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,
-0x1956,0x1956,0x1956,0x1a22,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,
-0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,
-0x1761,0x1761,0x1761,0x1761,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,
-0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,
-0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x17a3,0x17a0,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,
-0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,
-0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,
-0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,
-0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,
-0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,
-0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,
-0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,
-0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,
-0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,
-0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,
-0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c4,0x17c4,0x17c4,0x17c4,0x17c1,
-0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c4,0x17c4,0x17c4,
-0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c1,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,
-0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,
-0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,
-0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,
-0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,
-0x18c6,0x18c6,0x18c6,0x18c6,0x1b3f,0x1a94,0x1a94,0x1a97,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,
-0x17e2,0x1890,0x1890,0x1890,0x1890,0x1890,0x1890,0x192c,0x17df,0x17df,0x17df,0x17df,0x17df,0x188d,0x188d,0x188d,
-0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,
-0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x1890,0x188d,
-0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1890,0x192c,0x192c,0x1890,0x1890,0x1890,0x1890,0x1890,
-0x1890,0x1890,0x188d,0x180f,0x1890,0x1890,0x1890,0x1a94,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x180f,0x188d,
-0x188d,0x188d,0x188d,0x188d,0x1929,0x1a0a,0x1a0a,0x1a0a,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,
-0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x1929,0x1824,0x1824,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,
-0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,
-0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,
-0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,
-0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,
-0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x186f,0x186f,0x186f,
-0x185a,0x185a,0x185a,0x185a,0x185a,0x185a,0x185a,0x185a,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,
-0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,
-0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,
-0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,
-0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,
-0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,
-0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,
-0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x18f0,0x18f0,0x18f0,0x18f0,0x1a46,0x1a46,0x18f3,0x18f3,
-0x18f3,0x18f3,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18ed,
-0x18de,0x18e1,0x18e4,0x18f6,0x18f6,0x1995,0x18e7,0x18e7,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,
-0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,
-0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,
-0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x18fc,0x1902,0x18ff,0x18ff,0x18ff,
-0x18ff,0x190e,0x1914,0x18ff,0x18ff,0x18ff,0x18ff,0x190b,0x1911,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,
-0x18ff,0x18ff,0x18ff,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,
-0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,
-0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,
-0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1a0a,
-0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,
-0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,
-0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,
-0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,
-0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,
-0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,
-0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,
-0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,
-0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,
-0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,
-0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,
-0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,
-0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,
-0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,
-0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,
-0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,
-0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,
-0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19ef,0x19ef,0x19ef,0x1a0a,0x1a0a,0x1a0a,0x1b3c,0x1b3c,0x1a94,0x1a94,0x1a94,
-0x1a94,0x1a94,0x1a94,0x1b3c,0x1b3c,0x1b3c,0x1a94,0x1a94,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0a,0x1a10,0x1a10,0x1a0a,
-0x1a10,0x1a10,0x1a94,0x1a97,0x1a94,0x1a94,0x1a94,0x1a94,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,
-0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,
-0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,
-0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,
-0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,
-0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1aaf,0x1aaf,0x1a79,0x1aaf,0x1a79,0x1a79,0x1a79,0x1a79,
-0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a7f,0x1a7f,0x1a7f,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,
-0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,
-0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,
-0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,
-0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,
-0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,
-0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,
-0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,
-0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,
-0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,
-0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0,0,0,0
-};
-
-static const UTrie2 propsVectorsTrie={
- propsVectorsTrie_index,
- propsVectorsTrie_index+5024,
- NULL,
- 5024,
- 26204,
- 0xa40,
- 0x1420,
- 0x0,
- 0x0,
- 0x110000,
- 0x79f8,
- NULL, 0, FALSE, FALSE, 0, NULL
-};
-
-static const uint32_t propsVectors[6999]={
-0x67,0,0,0x67,0,0x4e00000,0x67,0x80000,0x20,0x867,0,0,0xa67,0,0,0xb67,
-0,0,0xc67,0,0,0xd67,0,0,0xe67,0,0,0x1067,0,0,0x1167,0,
-0,0x1267,0,0,0x1367,0,0,0x1467,0,0,0x1567,0,0,0x1667,0,0,
-0x1767,0,0,0x1867,0,0,0x1967,0,0,0x1a67,0,0,0x1b67,0,0,0x1d67,
-0,0,0x1f67,0,0,0x2067,0,0,0x2267,0,0,0x2367,0,0,0x2467,0,
-0,0x2567,0,0,0x2767,0,0,0x2867,0x80000,0x20,0x2967,0,0,0x2a67,0,0x1600000,
-0x2b67,0,0,0x2d67,0,0,0x3167,0x20000000,0,0x3267,0x20000000,0,0x3a67,0,0,0x3b67,
-0,0,0x3c67,0,0,0x3e67,0,0,0x4067,0,0,0x4167,0,0,0x4467,0,
-0,0x4867,0,0,0x4967,0,0,0x4a67,0,0,0x5067,0,0,0x5167,0,0,
-0x5467,0,0,0x5567,0,0,0x5667,0x80000,0x20,0x5767,0,0,0x5867,0,0,0x5967,
-0,0,0x5b67,0,0,0x5c67,0,0,0x5d67,0,0,0x6067,0x80000,0x20,0x6267,0,
-0,0x6367,0,0,0x6467,0,0,0x6567,0,0,0x6f67,0,0,0x7067,0,0,
-0x7367,0x20000000,0,0x7567,0,0,0x7667,0,0,0x7767,0,0,0x7867,0,0,0x7a67,
-0,0,0x7b67,0,0,0x7c67,0,0,0x7e67,0,0,0x7f67,0,0,0x8167,0,
-0,0x8267,0,0,0x8367,0,0,0x8467,0,0,0x8567,0,0,0x8667,0,0,
-0x8767,0,0,0x8867,0,0,0x8967,0,0,0x8b67,0,0,0x8c67,0,0,0x8e67,
-0x20000000,0,0x8f67,0,0,0x9067,0,0,0x9167,0,0,0x9267,0,0,0x9367,0,
-0,0x9567,0,0,0x9667,0,0,0x9767,0,0,0x9867,0,0,0x9967,0,0,
-0x9a67,0,0,0x9c67,0,0,0x9f67,0,0,0xa167,0,0,0xa367,0,0,0xa467,
-0,0,0xa567,0,0,0xa667,0,0,0xa767,0,0,0xa867,0,0,0xa967,0,
-0,0xaa67,0,0x4e00000,0xab67,0,0x4e00000,0xac67,0,0,0xad67,0,0,0xae67,0,0,
-0xaf67,0,0,0xb167,0,0,0xb267,0,0,0xb467,0,0,0xb567,0,0,0xb767,
-0,0,0xb867,0,0,0xb967,0,0,0xba67,0,0,0xbc67,0,0,0xbd67,0,
-0,0xbe67,0,0,0xbf67,0,0,0xc067,0,0,0xc167,0,0,0xc267,0,0,
-0xc367,0,0x4e00000,0xc467,0,0x4e00000,0xc667,0,0,0xc767,0,0,0xc867,0,0,0xc967,
-0,0,0xca67,0,0,0xcc67,0,0x4e00000,0xcf67,0,0x4e00000,0xd067,0,0x4e00000,0xd267,0,
-0,0xd367,0,0,0xd467,0,0,0xd567,0,0,0xd667,0,0,0xd867,0,0,
-0xda67,0,0,0xdb67,0,0,0xdc67,0,0,0xdd67,0,0,0xde67,0,0,0xdf67,
-0,0,0xe067,0,0,0xe167,0,0,0xe267,0,0,0xe367,0,0x4e00000,0xe467,0,
-0,0xe567,0,0,0xe667,0,0,0xe767,0,0,0xe867,0,0,0xe967,0,0,
-0xea67,0,0,0xeb67,0,0,0xec67,0,0,0xed67,0,0,0xee67,0,0,0xef67,
-0,0,0xf167,0,0,0xf367,0,0,0xf567,0,0,0xf667,0,0,0xf767,0,
-0,0xf867,0,0,0xf967,0,0,0xfa67,0,0x4e00000,0xfb67,0,0,0xfc67,0,0,
-0xfd67,0,0,0xfe67,0,0,0x10167,0,0,0x10267,0,0,0x10367,0,0,0x10467,
-0,0,0x10567,0,0x4e00000,0x10667,0,0,0x10767,0,0,0x10867,0,0,0x10967,0,
-0,0x10a67,0,0,0x10b67,0,0,0x10c67,0,0,0x10d67,0,0,0x10e67,0,0,
-0x10f67,0,0,0x11067,0,0,0x11367,0,0,0x11467,0,0,0x11567,0,0,0x11667,
-0,0,0x11767,0,0,0x11867,0,0,0x11967,0,0x4e00000,0x11a67,0,0,0x11b67,0,
-0,0x11c67,0,0,0x11d67,0,0,0x11e67,0,0,0x11f67,0,0,0x12067,0,0,
-0x12167,0,0,0x12267,0,0,0x12367,0,0,0x12467,0,0,0x12567,0,0,0x12667,
-0,0,0x12767,0,0,0x12867,0,0,0x12967,0,0,0x12a67,0,0x4e00000,0x12b67,0,
-0,0x12c67,0,0,0x12d67,0,0,0x12f67,0,0,0x13067,0,0,0x13167,0,0,
-0x13267,0,0,0x13367,0,0,0x13467,0,0,0xa0067,0,0xe00000,0xa4767,0,0xe00000,0xa4f67,
-0,0xe00000,0xa5e67,0,0xe00000,0xa5f67,0,0xe00000,0xac567,0,0xe00000,0xad167,0,0xe00000,0xb0067,0,
-0xe00000,0xb1267,0,0xe00000,0xb2e67,0,0xe00000,0x11000100,0,0x900020,0x11000100,0x40000001,0x440020,0x11000100,0x40000001,0x643020,
-0x11000100,0x40000001,0xa5a040,0x11000100,0x40000001,0x116a8a0,0x11000200,0,0x900020,0x11000200,0x4000001,0xc4000b,0x11000200,0x7c00100,0x220402,0x11000200,
-0x24000000,0x14200000,0x11000200,0x24000008,0x1710000,0x11000200,0x40000001,0x1d3b020,0x11000219,0x7c00100,0x220401,0x11000219,0x7c00100,0x250401,0x11000319,0x7c00100,
-0x220401,0x11000319,0x7c00100,0x220402,0x11000319,0x7c00100,0x250400,0x11000319,0x7c00100,0x250401,0x11000419,0x7c00100,0x220400,0x11000419,0x7c00100,0x220401,
-0x11000419,0x7c00100,0x220402,0x11000419,0x7c00100,0x230400,0x11000419,0x7c00100,0x250400,0x11000419,0x7c00100,0x250401,0x11000419,0x7c00100,0x250402,0x11000519,
-0x7c00100,0x220400,0x11000519,0x7c00100,0x230400,0x11000600,0x4000400,0x200002,0x11000600,0x4000400,0x200400,0x11000600,0x7c00500,0x220400,0x11000600,0x7c00500,
-0x230400,0x11000600,0x7c00500,0x530400,0x11000600,0x7c00d00,0x230400,0x11000619,0x7c00500,0x22040f,0x11000800,0x4000010,0x1001401,0x11000800,0x4000400,0x200001,
-0x11000800,0x6800010,0x201001,0x11000800,0x7c00500,0x230401,0x11000807,0x7c00100,0x220400,0x11000807,0x7c00100,0x250400,0x1100080e,0x4000400,0x200000,0x1100080e,
-0x4000400,0x200002,0x1100080e,0x7000500,0x220402,0x1100080e,0x7c00100,0x220400,0x1100080e,0x7c00100,0x220401,0x1100080e,0x7c00100,0x220402,0x1100080e,0x7c00100,
-0x250400,0x1100080e,0x7c00100,0x250401,0x1100080e,0x7c00120,0x220402,0x1100080e,0x7c00120,0x250402,0x11000908,0x4000000,0x200000,0x11000908,0x7c00100,0x220400,
-0x11000908,0x7c00100,0x220401,0x11000908,0x7c00100,0x250400,0x11000908,0x7c00100,0x250401,0x11000a03,0x4000000,0x200400,0x11000a03,0x4000000,0x201000,0x11000a03,
-0x4000000,0x270000,0x11000a03,0x7c00100,0x220400,0x11000a03,0x7c00100,0x220402,0x11000a03,0x7c00100,0x250400,0x11000a03,0x7c00500,0x230400,0x11000a03,0xc000010,
-0x1049400,0x11000b13,0x2802500,0x962460,0x11000b13,0x4000000,0x200000,0x11000b13,0x4000000,0x201000,0x11000b13,0x4000000,0x230400,0x11000b13,0x4000002,0x400000,
-0x11000b13,0x4000010,0x200000,0x11000b13,0x7c00100,0x2633800,0x11000c00,0x80000000,0x218960,0x11000c02,0x2802100,0x962460,0x11000c02,0x2802400,0x962460,0x11000c02,
-0x4000000,0x200000,0x11000c02,0x4000000,0x1329400,0x11000c02,0x4000000,0x1329800,0x11000c02,0x4000000,0x1500000,0x11000c02,0x6800000,0x1329800,0x11000c02,0x7c00100,
-0x230400,0x11000c02,0x7c00100,0x230401,0x11000c02,0x7c00100,0x230402,0x11000c02,0x7c00500,0x230400,0x11000c02,0x7d00100,0x230400,0x11000f01,0x2802400,0x962460,
-0x11000f0a,0x2802100,0x962460,0x11000f0a,0x2802400,0x962460,0x11000f0a,0x2806400,0x962460,0x11000f0a,0x4000000,0x200000,0x11000f0a,0x6800100,0x962540,0x11000f0a,
-0x7c00100,0x230400,0x11000f0a,0x7c00100,0x230401,0x11001004,0x2802100,0x962460,0x11001004,0x2802400,0x962460,0x11001004,0x2806400,0x962460,0x11001004,0x4000000,
-0x200000,0x11001004,0x4000000,0x1500000,0x11001004,0x6800100,0x962540,0x11001004,0x6800100,0x962541,0x11001004,0x7c00100,0x230400,0x11001004,0x7c00100,0x230401,
-0x11001110,0x2802100,0x962460,0x11001110,0x2802400,0x962460,0x11001110,0x2806400,0x962460,0x11001110,0x6800100,0x962540,0x11001110,0x7c00100,0x230400,0x11001110,
-0x7c00100,0x230401,0x1100120f,0x2802100,0x962460,0x1100120f,0x2802400,0x962460,0x1100120f,0x2806400,0x962460,0x1100120f,0x6800100,0x962540,0x1100120f,0x7c00100,
-0x230400,0x1100131f,0x2802100,0x962460,0x1100131f,0x2802400,0x962460,0x1100131f,0x2806400,0x962460,0x1100131f,0x4000000,0x200000,0x1100131f,0x6800000,0x1329800,
-0x1100131f,0x6800100,0x962540,0x1100131f,0x6800100,0x962541,0x1100131f,0x7c00100,0x230400,0x1100131f,0x7c00100,0x230401,0x11001423,0x2802100,0x962460,0x11001423,
-0x2806400,0x962460,0x11001423,0x6800100,0x962540,0x11001423,0x6800100,0x962541,0x11001423,0x7c00100,0x230400,0x11001423,0x7c00100,0x230401,0x11001524,0x2802100,
-0x962460,0x11001524,0x2802100,0x962461,0x11001524,0x2806400,0x962460,0x11001524,0x6800000,0x1329800,0x11001524,0x6800100,0x962540,0x11001524,0x7c00100,0x230400,
-0x11001615,0x2802100,0x962460,0x11001615,0x2806400,0x962460,0x11001615,0x6800100,0x962540,0x11001615,0x6800100,0x962541,0x11001615,0x7c00100,0x230400,0x1100171a,
-0x2802100,0x962460,0x1100171a,0x2806400,0x962460,0x1100171a,0x6800000,0x1329800,0x1100171a,0x6800100,0x962540,0x1100171a,0x6800100,0x962541,0x1100171a,0x7c00100,
-0x230400,0x11001900,0x4000000,0x1600000,0x11001926,0x2802100,0x1862460,0x11001926,0x2802400,0x1862460,0x11001926,0x2806100,0x1862460,0x11001926,0x4000000,0x200000,
-0x11001926,0x4000010,0x400000,0x11001926,0x6800000,0x1329800,0x11001926,0x7800100,0x1830142,0x11001926,0x7c00100,0x1830000,0x11001926,0x7c00900,0x1830000,0x11001926,
-0x7e00100,0x1830000,0x11001a18,0x2802100,0x1862460,0x11001a18,0x2802400,0x1862460,0x11001a18,0x6800000,0x1329800,0x11001a18,0x7800100,0x1830142,0x11001a18,0x7c00100,
-0x1830000,0x11001a18,0x7c00100,0x1830002,0x11001a18,0x7c00900,0x1830000,0x11001a18,0x7e00100,0x1830000,0x11001d0c,0x7c00100,0x230400,0x11001d0c,0x7c00100,0x250400,
-0x11001e12,0x7c00100,0x2230500,0x11001e12,0x7c00100,0x2330520,0x11001e12,0x7c80100,0x2330520,0x11002619,0x7c00100,0x220401,0x11002619,0x7c00100,0x220402,0x11002619,
-0x7c00100,0x250401,0x1100270e,0x4000400,0x200001,0x1100270e,0x4000400,0x200002,0x1100270e,0x4000400,0x500001,0x1100270e,0x7c00100,0x220401,0x1100270e,0x7c00100,
-0x250401,0x11002800,0x80000,0x918820,0x11002800,0x80000,0x1c18020,0x11002800,0x180000,0x918820,0x11002800,0x4000001,0x445801,0x11002800,0x4000001,0x445802,
-0x11002800,0x4000001,0xc4000b,0x11002800,0x6800000,0x201c00,0x11002800,0x6800020,0x201c00,0x11002800,0x24000000,0x200000,0x11002800,0x24000000,0x200002,0x11002800,
-0x24000000,0x810000,0x11002800,0x24000000,0x1410000,0x11002800,0x24000000,0x1500000,0x11002800,0x24000000,0x1500002,0x11002800,0x24000002,0x400000,0x11002800,0x24000006,
-0xc0000b,0x11002800,0x24000008,0x1410000,0x11002800,0x24000008,0x1710000,0x11002800,0x24000020,0x1001400,0x11002800,0x24000020,0x1500002,0x11002800,0x2c000010,0x1248000,
-0x11002800,0x2c000010,0x15248002,0x11002800,0x40000001,0x63b020,0x11002800,0x40080000,0x918820,0x11002801,0x80000,0xaa65620,0x11002801,0x82000,0x962460,0x11002900,
-0x4000000,0x20000e,0x11002900,0x4000000,0x20000f,0x11002900,0x4000020,0x20000e,0x11002900,0x4000020,0x20000f,0x11002900,0x4000020,0x81000e,0x11002900,0x4000020,
-0x81000f,0x11002900,0x4000020,0x141000e,0x11002900,0x4000020,0x141000f,0x11002900,0x4000022,0x20000e,0x11002900,0x4000022,0x20000f,0x11002a00,0x4000000,0x1500000,
-0x11002a00,0x4000000,0x1600000,0x11002a00,0x4000000,0x1600002,0x11002b01,0x2000,0x962460,0x11002b01,0x2802020,0x962460,0x11002c00,0x4000000,0x200000,0x11002c00,
-0x4000000,0x200002,0x11002c00,0x4000000,0x20000f,0x11002c00,0x4000020,0x200000,0x11002c00,0x7c00000,0x200000,0x11002c00,0x7c00020,0x200000,0x11002c00,0x7c00120,
-0x220405,0x11002c00,0x7c00120,0x230402,0x11002c00,0x7c00120,0x250402,0x11002c00,0x7c00120,0x250405,0x11002c19,0x7c00100,0x250400,0x11002c19,0x7c00100,0x250401,
-0x11002d00,0x4000000,0x100006,0x11002d00,0x4000000,0x200006,0x11002d19,0x7c00100,0x220402,0x11002d19,0x7c00100,0x230400,0x11002d19,0x7c00100,0x250402,0x11002e00,
-0x24000000,0x200000,0x11002e00,0x24000020,0x200000,0x11002e00,0x24000020,0x200001,0x11002e00,0x24000020,0x14200000,0x11002f00,0x24000020,0x200000,0x11002f00,0x24000020,
-0x200001,0x11002f00,0x24000020,0x200002,0x11002f00,0x24000020,0xf00000,0x11002f00,0x24000020,0x1600000,0x11002f00,0x24000022,0x1600000,0x11003000,0x24000000,0x200000,
-0x11003000,0x24000000,0x14200000,0x11003000,0x24000020,0x200000,0x11003000,0x24000020,0x810000,0x11003000,0x24000020,0x1410000,0x11003100,0x24000000,0x200000,0x11003200,
-0x24000000,0x200000,0x11003300,0x4000000,0x100003,0x11003400,0x24000000,0x100000,0x11003400,0x24000000,0x200000,0x11003500,0x24000000,0x200000,0x11003600,0x24000000,
-0x200000,0x11003600,0x24000000,0x14200000,0x11003600,0x24000020,0x200000,0x11003700,0x24000000,0x200000,0x11003700,0x24000000,0x4200000,0x11003700,0x24000000,0x4e00000,
-0x11003700,0x24000000,0x14200000,0x11003700,0x24000000,0x14e00000,0x11003700,0x24000000,0x96800000,0x11003700,0x24000020,0x4200000,0x11003800,0x4000000,0x100000,0x11003800,
-0x24000000,0x200000,0x11003800,0x24000000,0xb00000,0x11003800,0x24000000,0x1710000,0x11003800,0x24000000,0x4200000,0x11003800,0x24000000,0x4e00000,0x11003800,0x24000000,
-0x14200000,0x11003800,0x24000000,0x14b00000,0x11003800,0x24000000,0x14e00000,0x11003800,0x24000000,0x96800000,0x11005003,0x7c00100,0x220402,0x11005013,0x2802500,0x962460,
-0x11005013,0x4000020,0x200005,0x11005013,0x7c00100,0x2633801,0x11005013,0x7c00100,0x2633802,0x11005013,0x7c00100,0x2633805,0x11005019,0x7c00100,0x220402,0x11005100,
-0x24000000,0x810000,0x11005100,0x24000000,0x1410000,0x11005102,0x7000100,0x230408,0x11005102,0x7c00100,0x230404,0x11005102,0x7c00100,0x230407,0x11005102,0x7c00100,
-0x230408,0x11005102,0x7c00100,0x230409,0x11005201,0x2802400,0x962460,0x11005500,0x80000,0x1e18820,0x11005502,0x7000100,0x230408,0x11005502,0x7c00100,0x230404,
-0x11005502,0x7c00100,0x230407,0x11005502,0x7c00100,0x230408,0x11005502,0x7c00100,0x230409,0x11005667,0x1000,0,0x11020200,0x80004,0x418820,0x11020200,
-0x4000000,0x100006,0x11020200,0x4000000,0x10000f,0x11020200,0x4000400,0x100002,0x11020200,0x4000400,0x500002,0x11020200,0x6800c00,0x101000,0x11020200,0x24000000,
-0x100000,0x11020200,0x24000000,0x1400000,0x11020200,0x24000000,0x1500000,0x11020200,0x24000000,0x1600000,0x11020200,0x24000000,0x14200000,0x11020200,0x24000020,0x100000,
-0x11020200,0x24000020,0x1600000,0x11020219,0x7c00100,0x12040f,0x11020219,0x7c00100,0x220400,0x11020219,0x7c00100,0x220401,0x11020219,0x7c00100,0x250400,0x11020319,
-0x7c00100,0x220400,0x11020319,0x7c00100,0x220401,0x11020319,0x7c00100,0x220402,0x11020319,0x7c00100,0x250400,0x11020319,0x7c00100,0x250402,0x11020319,0x7d00100,
-0x220402,0x11020419,0x7c00100,0x220401,0x11020519,0x7c00100,0x220400,0x11020600,0x4000400,0x100002,0x11020600,0x4000400,0x200400,0x11020600,0x7c00500,0x130400,
-0x11020600,0x7c00d00,0x130400,0x11020701,0x2802400,0x962460,0x11020701,0x2802400,0x962461,0x11020701,0x2802400,0xc62460,0x1102080e,0x7c00100,0x220400,0x1102080e,
-0x7c00100,0x250400,0x11020908,0x7c00100,0x220400,0x11020908,0x7c00100,0x220401,0x11020908,0x7c00100,0x250400,0x11020908,0x7c00100,0x250401,0x11022800,0x24000000,
-0x100000,0x11022800,0x24000000,0x200000,0x11022800,0x24000000,0x200002,0x11022800,0x24000000,0x401000,0x11022800,0x24000000,0xf00002,0x11022800,0x24000000,0xf0ac02,
-0x11022800,0x24000000,0x1500000,0x11022800,0x24000002,0x100000,0x11022800,0x24000002,0x370000,0x11022800,0x24000002,0x470000,0x11022800,0x24000006,0x400000,0x11022800,
-0x24000008,0x1710000,0x11022800,0x24000008,0x1712c00,0x11022800,0x24000020,0x100000,0x11022800,0x24000020,0x1500000,0x11022800,0x24000020,0x1500002,0x11022900,0x4000000,
-0x10000e,0x11022900,0x4000000,0x10000f,0x11022919,0x7c00100,0x12040f,0x11022c00,0x4000000,0x100002,0x11022c00,0x4000000,0x1500002,0x11022c00,0x4000000,0x1600002,
-0x11022c00,0x4000000,0x1410000f,0x11022c00,0x7c00120,0x120405,0x11022c0e,0x7c00100,0x250401,0x11022c19,0x7c00100,0x150401,0x11022d00,0x4000000,0x100006,0x11022d00,
-0x4000000,0x200006,0x11022d19,0x7c00100,0x120402,0x11022d19,0x7c00100,0x150402,0x11022e00,0x24000000,0x200000,0x11022e00,0x24000020,0x100000,0x11022e00,0x24000020,
-0x14100000,0x11022f00,0x24000020,0x100000,0x11022f00,0x24000020,0x100001,0x11022f00,0x24000020,0x100002,0x11023000,0x24000000,0x100000,0x11023300,0x4000000,0x100002,
-0x11023300,0x4000000,0x100003,0x11023300,0x4000100,0x120403,0x11023300,0x4000100,0x150403,0x11023300,0x4000100,0x14150403,0x11023400,0x24000000,0x100000,0x11023500,
-0x24000000,0x100000,0x11023600,0x24000000,0x100000,0x11023600,0x24000020,0x100000,0x11023600,0x24000020,0x14100000,0x11023700,0x24000000,0x4100000,0x11023700,0x24000000,
-0x4e00000,0x11023700,0x24000000,0x14100000,0x11023700,0x24000000,0x14e00000,0x11023700,0x24000020,0x100000,0x11023700,0x24000020,0x4100000,0x11023700,0x24000020,0x14100000,
-0x11023800,0x4000000,0x100000,0x11023800,0x24000000,0x200000,0x11024e67,0,0,0x11025600,0x4000000,0x100000,0x11042a00,0x4000000,0x1600000,0x11045700,
-0x4000000,0x20000a,0x11045700,0x4000020,0x20000a,0x11045712,0x7c00100,0xe3040a,0x11045712,0x7c80100,0xe3040a,0x11045716,0x7c00100,0xe30c0a,0x11045716,0x7c00100,
-0x2530c0a,0x11063d00,0x4000001,0x445811,0x11065700,0x4000000,0x810011,0x11065700,0x4000000,0xe00011,0x11065700,0x4000000,0x1410011,0x11065700,0x4000000,0x1500011,
-0x11065700,0x4000000,0x1600011,0x11065700,0x4000006,0xe70011,0x11065700,0x4000008,0xe00011,0x11065700,0x4000008,0xe02c11,0x11065700,0x4000010,0x871411,0x11065700,
-0x4000010,0x1201411,0x11065700,0x4000010,0x1271011,0x11065700,0x4000020,0xe00011,0x11065700,0x4000400,0xe00011,0x11065700,0x4000420,0xe00011,0x11065700,0x6800000,
-0xe01c11,0x11065700,0x6800040,0xe29811,0x11065700,0xc000010,0x80ac11,0x11065700,0xc000010,0xb48011,0x11065719,0x7c00100,0xe20411,0x11065719,0x7c00100,0xe50411,
-0x11065719,0x7c00140,0xe20411,0x11065719,0x7c00140,0xe50411,0x11080100,0x6800000,0x201c00,0x11080100,0x68000c0,0x19329800,0x11080100,0x24000000,0x200000,0x11080100,
-0x24000000,0x810000,0x11080100,0x24000000,0x1410000,0x11080100,0x24000000,0x1500000,0x11080100,0x24000000,0x1600000,0x11080100,0x24000000,0x1b00000,0x11080100,0x24000000,
-0x2410000,0x11080100,0x24000000,0x18200000,0x11080100,0x24000006,0xd70000,0x11080100,0x24000008,0x1713c00,0x11080100,0x24000008,0x1714000,0x11080100,0x24000010,0x1001400,
-0x11080100,0x24000010,0x1071000,0x11080100,0x24000010,0x1071400,0x11080100,0x24000020,0x200000,0x11080100,0x24000020,0x400000,0x11080100,0x24000020,0x1600000,0x11080100,
-0x24000400,0x200000,0x11080100,0x24000420,0x200000,0x11080100,0x2c000010,0xb48000,0x11080100,0x2c000010,0x100ac00,0x11080100,0x44000001,0x1a45800,0x11080119,0x7c00100,
-0x220400,0x11080119,0x7c00100,0x250400,0x11080119,0x7c001c0,0x220400,0x11080119,0x7c001c0,0x250400,0x11080200,0x4000400,0x200002,0x11080200,0x24000000,0x200000,
-0x11080200,0x24000000,0x1500000,0x11080200,0x24000000,0x1600000,0x11080200,0x24000020,0x200000,0x110a1e12,0x7c00100,0x2130480,0x110a1e12,0x7c80100,0x2130480,0x110a3000,
-0x24000000,0x34e00000,0x110a3000,0x24100000,0x810001,0x110a3000,0x24100000,0x1410001,0x110a3700,0x24000000,0x34200000,0x110a3d00,0x4000000,0xe00000,0x110a3d00,0x4000000,
-0xe00002,0x110a3d00,0x24000000,0xe00000,0x110a3d11,0x7c00300,0xe30000,0x110a3d11,0x7c00900,0x1230400,0x110a3d12,0x2802400,0x962460,0x110a3e14,0x7c00100,0xe30000,
-0x110a3e14,0x7c00100,0xe30001,0x110a3e14,0x7c00100,0x2530000,0x110a3e14,0x7c00900,0x1230000,0x110a3e14,0x7c00900,0x1230001,0x110a3f16,0x7c00100,0xe30c00,0x110a3f16,
-0x7c00100,0xe30c01,0x110a3f16,0x7c00100,0x2530c00,0x110a3f16,0x7c00900,0x1230c00,0x110a3f16,0x7c00900,0x1230c01,0x110a4005,0x7c00100,0xe30400,0x110a4112,0x7c00100,
-0xe30402,0x110a4112,0x7c80100,0xe30402,0x110a4400,0x4000000,0xe00000,0x110a4412,0x4000000,0xe00002,0x110a4412,0x4000000,0xe00003,0x110a4416,0x4000000,0xe00c03,
-0x110a4500,0x4000000,0xe0000d,0x110a4516,0x4000000,0xe00c0d,0x110a4711,0x7c40300,0xe30000,0x110a4f11,0x7c00300,0xe30001,0x110a4f11,0x7c40300,0xe30000,0x110a5300,
-0x4000000,0x810010,0x110a5300,0x4000000,0xe00002,0x110a5300,0x4000000,0xe00010,0x110a5300,0x4000000,0x1410010,0x110a5300,0x4000002,0xe70010,0x110a5300,0x4000008,
-0x810010,0x110a5300,0x4000008,0x1410010,0x110a5300,0x6800000,0xe01c02,0x110a5300,0x6800000,0xe01c10,0x110a5400,0x4000000,0x81000c,0x110a5400,0x4000000,0xe0000c,
-0x110a5400,0x4000000,0x141000c,0x110a5400,0x4000000,0x150000c,0x110a5400,0x4000000,0x160000c,0x110a5400,0x4000002,0xe7000c,0x110a5400,0x4000010,0x87140c,0x110a5400,
-0x4000010,0xe7000c,0x110a5400,0x4000010,0x120140c,0x110a5400,0x4000010,0x127100c,0x110a5400,0x4000020,0xe0000c,0x110a5400,0x4000026,0xe7000c,0x110a5400,0xc000010,
-0x80ac0c,0x110a5400,0xc000010,0xb4800c,0x11400c0c,0x4000010,0xb00000,0x11400c0c,0x4000010,0x1071400,0x11400c0c,0xc000010,0xb48000,0x11400c16,0x7c00900,0x230400,
-0x11400f40,0xc000010,0x448000,0x11400f54,0xc000010,0x448000,0x11401d89,0x4000000,0x200000,0x11403dbf,0x4000000,0xe00000,0x114457b4,0x4000004,0x120000a,0x114457b4,
-0x4000008,0x81000a,0x114457b4,0x4000008,0x141000a,0x114457b4,0x4000010,0x87000a,0x114457b4,0xc000010,0x84800a,0x114457bd,0x3802500,0x126246a,0x114457bd,0x7c00d00,
-0x2530c0a,0x114a3db4,0x24000000,0x810000,0x114a3db4,0x24000000,0x1410000,0x114a3db4,0x24000008,0x810000,0x114a3db4,0x24000008,0x1410000,0x114a3db4,0x24000010,0x870000,
-0x114a3db4,0x2c000010,0x848000,0x114a3dba,0x4000000,0xe00000,0x114a3dba,0x24000000,0xe00000,0x114a3dba,0x24000002,0x1200000,0x114a3dba,0x24000002,0x14e00000,0x114a3dba,
-0x24000008,0x810000,0x114a3dba,0x24000008,0x1410000,0x114a3dbd,0x7c00900,0x930c00,0x114a3dbd,0x7c00900,0xe30c00,0x114a3dbf,0x7c00300,0xe30000,0x114a3ebd,0x7000400,
-0x1200c02,0x114a3fb4,0x4000004,0x1200000,0x114a3fbd,0x7c00d00,0x2530c00,0x114a42bf,0x4000000,0xe00000,0x114a42bf,0x4000000,0xe0000f,0x114a44bf,0x4000000,0xe00002,
-0x114a44bf,0x4000000,0xe00003,0x114a44bf,0x4000000,0x14e00003,0x114a45bf,0x4000000,0xe00002,0x114a45bf,0x4000000,0xe0000d,0x1180090a,0x2802400,0x962460,0x11800c1e,
-0x2802100,0x962460,0x11800c1e,0x2802500,0x962460,0x11800f27,0x2802400,0x962460,0x11800f34,0x2802400,0x962460,0x11820700,0x2802400,0x962460,0x11820700,0x2802500,
-0x962460,0x118a3dc0,0x2802400,0x962460,0x118a3ebd,0x2802400,0x962460,0x11c00904,0x2802400,0x962460,0x11c00908,0x2802400,0x962460,0x11c00c20,0xc000010,0xb48000,
-0x11c00c23,0x6800000,0x1329800,0x11c00f6d,0x6800000,0x1329800,0x11c01072,0x6800000,0x1329800,0x11c01176,0x6800000,0x1329800,0x11c0127a,0x6800000,0x1329800,0x11c0147e,
-0x4000000,0x200000,0x11c0147e,0x6800000,0x1329800,0x11c01682,0x6800000,0x1329800,0x11c051fa,0x7c00100,0x230408,0x20000067,0x1000,0,0x20000b13,0x2802400,
-0x962460,0x20000b13,0x2802500,0x962460,0x20001b27,0x2802100,0x962460,0x20001b27,0x2802100,0x962461,0x20001b27,0x2802400,0x962460,0x20001b27,0x2806400,0x962460,
-0x20001b27,0x2902100,0x962462,0x20001b27,0x4000000,0x200000,0x20001b27,0x4000000,0x400000,0x20001b27,0x4000000,0x500000,0x20001b27,0x4000000,0x810000,0x20001b27,
-0x4000000,0xb00000,0x20001b27,0x4000000,0xc0000b,0x20001b27,0x4000000,0x1410000,0x20001b27,0x4000010,0xb00000,0x20001b27,0x4000010,0xc00000,0x20001b27,0x6800000,
-0x1329800,0x20001b27,0x6800100,0x462540,0x20001b27,0x6800400,0x962540,0x20001b27,0x7c00100,0x230400,0x20001b27,0x7c00100,0x230401,0x20002619,0x7c00100,0x220401,
-0x20002a00,0x4000000,0x1600000,0x20004b67,0,0x1900000,0x20004c67,0,0x1900000,0x20004d67,0,0x1900000,0x20006d67,0x1000,0,0x20006e67,
-0x1000,0,0x20026d67,0,0,0x20026e67,0,0,0x200a4a12,0x7c00100,0x1f304c1,0x200a4a12,0x7c00100,0x20304e1,0x21005600,0x4000000,
-0x700000,0x21022a00,0x4000000,0x1600000,0x30000419,0x7c00100,0x220400,0x30000419,0x7c00100,0x220401,0x30000419,0x7c00100,0x250400,0x30000419,0x7c00100,0x250401,
-0x30000519,0x7c00100,0x220400,0x30000600,0x4000400,0x200400,0x30000600,0x7c00500,0x230400,0x30000605,0x4000400,0x200400,0x3000080e,0x7c00100,0x220400,0x30000908,
-0x2000,0x962460,0x30000908,0x7c00100,0x220400,0x30000908,0x7c00100,0x220401,0x30000908,0x7c00100,0x250400,0x30000908,0x7c00100,0x250401,0x30000a03,0x4000006,
-0x400400,0x30000c02,0x4000000,0x200000,0x30000c02,0x7c00100,0x230400,0x30000d22,0x2802100,0x962460,0x30000d22,0x2802400,0x962460,0x30000d22,0x2802500,0x962460,
-0x30000d22,0x4000000,0x200000,0x30000d22,0x4000010,0x200000,0x30000d22,0x7c00100,0x230400,0x30000d22,0xc000010,0x248000,0x30000d22,0x80000000,0x218960,0x30000e25,
-0x2802500,0x962460,0x30000e25,0x7c00100,0x230400,0x30001821,0x2802100,0x962460,0x30001821,0x2806400,0x962460,0x30001821,0x4000000,0x200000,0x30001821,0x6800100,
-0x962540,0x30001821,0x6800100,0x962541,0x30001821,0x7c00100,0x230400,0x30001b27,0x2802100,0x962460,0x30001b27,0x2802400,0x962460,0x30001b27,0x4000000,0x200000,
-0x30001b27,0x4000000,0x400000,0x30001b27,0x7c00100,0x230400,0x30001c1c,0x2802100,0x1862460,0x30001c1c,0x2802400,0x1862460,0x30001c1c,0x2806400,0x1862460,0x30001c1c,
-0x4000000,0x200000,0x30001c1c,0x6800100,0x1862400,0x30001c1c,0x6800100,0x1862540,0x30001c1c,0x7c00100,0x1830000,0x30001c1c,0x7c00100,0x1830001,0x30001c1c,0xc000010,
-0x448000,0x30001f0b,0x4000000,0x200000,0x30001f0b,0x4000010,0x200000,0x30001f0b,0x4000010,0x400000,0x30001f0b,0x6800000,0x200000,0x30001f0b,0x7c00100,0x230400,
-0x30001f0b,0xc000010,0x248000,0x30002006,0x7c00100,0x250400,0x30002128,0x4000000,0x200000,0x30002128,0x7c00100,0x230400,0x30002128,0xc000010,0x248000,0x3000221d,
-0x4000000,0x810000,0x3000221d,0x4000000,0x1410000,0x3000221d,0x4000001,0x445800,0x3000221d,0x7c00100,0x230400,0x30002300,0x4000010,0x400000,0x30002320,0x7c00100,
-0x230400,0x30002417,0x2802100,0x1862460,0x30002417,0x2802400,0x1862460,0x30002417,0x2806400,0x1862460,0x30002417,0x2882000,0x1862460,0x30002417,0x4000000,0x200000,
-0x30002417,0x4000000,0x400000,0x30002417,0x4000000,0x1600000,0x30002417,0x4000010,0x400000,0x30002417,0x4000010,0x1200000,0x30002417,0x6800000,0x1329800,0x30002417,
-0x6800100,0x1862540,0x30002417,0x7c00100,0x1830000,0x30002417,0x7d00100,0x1830000,0x3000251b,0x80000,0xc18820,0x3000251b,0x2802100,0x962460,0x3000251b,0x3c02100,
-0x962460,0x3000251b,0x4000000,0x200000,0x3000251b,0x4000006,0x500000,0x3000251b,0x4000010,0x400000,0x3000251b,0x4000010,0xb70000,0x3000251b,0x4000800,0x200000,
-0x3000251b,0x6800000,0x1329800,0x3000251b,0x7c00100,0x230400,0x3000251b,0x7c00900,0x230400,0x3000251b,0xc000010,0xb48000,0x3000251b,0x12882000,0x962460,0x30002800,
-0x24000000,0x200000,0x30002800,0x2c000010,0x1248002,0x30002800,0x2c000010,0x15248002,0x30002a00,0x4000000,0x1600000,0x30002b01,0x2000,0x962460,0x30002b01,0x2000,
-0x8962460,0x30002c00,0x4000000,0x200000,0x30002c00,0x7c00100,0x14220405,0x30002d19,0x7c00100,0x250400,0x30002e00,0x24000000,0x200000,0x30003000,0x24000000,0x200000,
-0x30003000,0x24000000,0x4200000,0x30003100,0x24000000,0x200000,0x30003600,0x24000000,0x200000,0x30003700,0x24000000,0x4200000,0x3000392e,0x24000000,0x200000,0x30005013,
-0x7c00100,0x2633801,0x30005600,0,0x918820,0x30020600,0x4000400,0x500400,0x30020701,0x2802400,0x962460,0x30020701,0x2802400,0xc62460,0x300a3a11,0x4020000,
-0xe00000,0x300a3a11,0x4020000,0xe00002,0x300a3b11,0x4020000,0xe00002,0x300a3c00,0x4008000,0xe00000,0x300a3c00,0x4010000,0xe00000,0x300a3d11,0x7c00300,0xe30002,
-0x300a4305,0x7c00100,0xe30400,0x300a4611,0x7c40300,0xe30000,0x300a4829,0x7c00100,0xe30400,0x300a4829,0x7c00900,0x1230400,0x300a4929,0x4000000,0xe00000,0x3040258f,
-0x4000010,0x400000,0x3040258f,0x4000010,0xb70000,0x3040258f,0xc000010,0xb48000,0x304028af,0x4000001,0xc41c0b,0x304a3dbf,0x4000000,0xe00000,0x30800c1e,0x2802100,
-0x962460,0x30c01c87,0x6800000,0x1329800,0x3100080e,0x7c00120,0x220402,0x3100080e,0x7c00120,0x250402,0x31005167,0x1000,0,0x3100581e,0x4000000,0x200000,
-0x3100581e,0x7c00100,0x230400,0x3100590d,0x7c00100,0x230400,0x31005a09,0x7c00100,0x220400,0x31005a09,0x7c00100,0x250400,0x31005b00,0x4000000,0x200000,0x31005c00,
-0x80000,0x918820,0x31005c00,0x2802000,0x962460,0x31005c00,0x2802400,0x962460,0x31005c00,0x4000000,0x200000,0x31005c00,0x4000000,0x200001,0x31005c00,0x6800000,
-0x962540,0x31005c00,0x6800400,0x962540,0x31005c01,0x2802400,0x962460,0x31005d00,0x4000020,0x200005,0x31005d00,0x6800020,0x1329805,0x31005d00,0x7c00120,0x220405,
-0x31005d00,0x7c00120,0x250405,0x31006000,0x82000,0x8962460,0x31006000,0x180000,0x918820,0x310a5e11,0x7c40300,0xe30000,0x310a5f11,0x7c00300,0xe30001,0x32000419,
-0x7c00100,0x250400,0x3200080e,0x4000020,0x200000,0x3200080e,0x7c00100,0x220400,0x3200080e,0x7c00100,0x250400,0x32000908,0x7c00100,0x220400,0x32000908,0x7c00100,
-0x250400,0x32000c02,0x7c00100,0x230400,0x32000e25,0x7c00100,0x230400,0x32001d0c,0x7c00100,0x230400,0x32002800,0x80000,0x1e18820,0x32002800,0x80020,0x218820,
-0x32002800,0x4000001,0x445802,0x32002800,0x24000000,0x200000,0x32002800,0x24000000,0x200002,0x32002800,0x24000020,0x200000,0x32002800,0x2c000010,0x1248002,0x32002919,
-0x7c00100,0x22040f,0x32002a00,0x4000000,0x1600000,0x32002b01,0x2000,0x962460,0x32002b01,0x2802000,0x962460,0x32002b01,0x2802020,0x962460,0x32002c00,0x4000000,
-0x200000,0x32002c00,0x4000020,0x200000,0x32002c00,0x4000020,0x200005,0x32002c00,0x7c00120,0x220405,0x32002c00,0x7c00120,0x250405,0x32002e00,0x24000020,0x200000,
-0x32002f00,0x24000020,0x200000,0x32003000,0x24000000,0x200000,0x32003000,0x24000020,0x200000,0x32003500,0x24000000,0x200000,0x32003600,0x24000020,0x200000,0x32003600,
-0x24000020,0x14200000,0x32003700,0x24000000,0x200000,0x32003700,0x24000000,0x4100000,0x32003700,0x24000000,0x4200000,0x32003700,0x24000000,0x14200000,0x32003800,0x24000000,
-0x810000,0x32003800,0x24000000,0x1410000,0x32005102,0x4000000,0x1500008,0x32005502,0x7c00100,0x230400,0x32006108,0x7c00100,0x220400,0x32006108,0x7c00100,0x250400,
-0x3200622a,0x2802100,0x962460,0x3200622a,0x2806000,0x962460,0x3200622a,0x7c00100,0x230400,0x3200632b,0x2802100,0x962460,0x3200632b,0x2806000,0x962460,0x3200632b,
-0x7c00100,0x230400,0x3200642c,0x2802100,0x962460,0x3200642c,0x7c00100,0x230400,0x3200652d,0x2802100,0x962460,0x3200652d,0x7c00100,0x230400,0x32006600,0x24000020,
-0x200000,0x32006700,0x24000020,0x200000,0x32006800,0x24000020,0x200000,0x32006800,0x24000020,0x14200000,0x32006900,0x24000020,0x200000,0x32006900,0x24000020,0x810000,
-0x32006900,0x24000020,0x1410000,0x32006a00,0x24000020,0x200000,0x32006a00,0x24000020,0x200001,0x32006a00,0x24000020,0x200002,0x32020701,0x2882000,0xc62460,0x32023300,
-0x4000000,0x100000,0x32026c01,0x12882000,0x962460,0x32026c01,0x12882000,0x8962460,0x32065700,0x4000000,0x810011,0x32065700,0x4000000,0x1410011,0x32086600,0x24000020,
-0x810000,0x32086600,0x24000020,0x1410000,0x32086900,0x24000020,0x810000,0x32086900,0x24000020,0x1410000,0x320a3600,0x24000020,0x34200000,0x320a3d11,0x7c00100,0x1230400,
-0x320a3e14,0x7c00100,0xe30010,0x320a3e14,0x7c00100,0x2530000,0x320a3f16,0x7c00100,0xe30c10,0x320a4400,0x4000000,0xe00003,0x320a4929,0x4000000,0xe00000,0x320a4f11,
-0x7c00300,0xe30001,0x320a6b16,0x7c00100,0x2530c00,0x3240638b,0xc000010,0x448000,0x324a3dc2,0x4000000,0x14e00000,0x324a3dc2,0x7c00100,0x1230400,0x324a3fbd,0x4000002,
-0x1200c00,0x324a53ba,0x24000000,0xe00000,0x32820701,0x2802000,0x962460,0x40000419,0x7c00100,0x220400,0x40000519,0x7c00100,0x220400,0x40000600,0x4000400,0x200400,
-0x4000080e,0x7c00100,0x220400,0x4000080e,0x7c00100,0x250400,0x4000080e,0x7c00100,0x250402,0x40000c02,0x2802100,0x962460,0x40000c02,0x2802400,0x962460,0x40000c02,
-0x2802500,0x962460,0x40000c02,0x4000000,0x200000,0x40000c02,0x4000000,0x1071400,0x40000c02,0x7c00100,0x230400,0x40000c02,0x80000000,0x218960,0x40000d22,0x7c00100,
-0x230400,0x40000f0a,0x7c00100,0x230400,0x40001004,0x7c00100,0x230400,0x40001110,0x2802100,0x962460,0x40001110,0x6800100,0x962540,0x4000120f,0x2802100,0x962460,
-0x4000120f,0x4000000,0x1600000,0x4000120f,0x7c00100,0x230400,0x4000131f,0x7c00100,0x230400,0x40001423,0x4000000,0x200000,0x40001423,0x4000000,0x1600000,0x40001615,
-0x2802400,0x962460,0x40001615,0x7c00100,0x230400,0x40002417,0x2802400,0x1862460,0x40002417,0x4000000,0x200000,0x40002800,0x6800000,0x201c00,0x40002800,0x24000002,
-0x200000,0x40002c00,0x4000000,0x200002,0x40003000,0x24000000,0x14200000,0x40003000,0x24000020,0x200000,0x40003700,0x24000000,0x200000,0x40003700,0x24000000,0x4200000,
-0x40003700,0x24000000,0x14200000,0x40005a09,0x7c00100,0x220400,0x40005a09,0x7c00100,0x250400,0x40005d00,0x7c00120,0x220405,0x40006f30,0x2802100,0x962460,0x40006f30,
-0x2802400,0x962460,0x40006f30,0x4000000,0x200000,0x40006f30,0x6800000,0x1329800,0x40006f30,0x6800100,0x962540,0x40006f30,0x7c00100,0x230400,0x40006f30,0xc000010,
-0xb48000,0x40007034,0x7c00100,0x1830000,0x40007117,0x4000000,0x200000,0x40007208,0x7c00100,0x220400,0x4000720e,0x7c00100,0x220400,0x4000720e,0x7c00500,0x22040e,
-0x4000720e,0x7c00500,0x22040f,0x40007219,0x7c00100,0x220400,0x40007219,0x7c00500,0x220400,0x40007219,0x7c00500,0x22040e,0x40007219,0x7c00500,0x22040f,0x40007300,
-0x24000000,0x200000,0x40007300,0x24000000,0x14200000,0x40007400,0x4000000,0x200000,0x40007531,0x7c00100,0x230400,0x40007631,0x7c00100,0x230400,0x40007835,0x4000010,
-0x400000,0x40007835,0x7c00100,0x230400,0x40007933,0x7c00100,0x230400,0x40007a32,0x6800000,0x1329800,0x40007a32,0x7c00100,0x230400,0x40007b2f,0x7c00100,0x230400,
-0x40007c00,0x4000000,0x200000,0x40020701,0x2802400,0x962460,0x40020701,0x2802400,0xc62460,0x40023300,0x4000000,0x200000,0x40027d01,0x12882000,0x962460,0x400a3700,
-0x24000000,0x34200000,0x400a3700,0x24000000,0x34e00000,0x400a4400,0x4000000,0xe0000d,0x400a4412,0x4000000,0xe00002,0x400a4412,0x4000000,0xe00003,0x400a4500,0x4000000,
-0xe0000d,0x400a5300,0x4000000,0x810010,0x400a5300,0x4000000,0x1410010,0x404077fc,0x4000000,0x200000,0x404077ff,0x4000000,0x200000,0x404077ff,0x4000000,0x400000,
-0x40c0147e,0x4000000,0x200000,0x40c051fa,0x4000000,0x200000,0x41000419,0x7c00100,0x220400,0x41000419,0x7c00100,0x250400,0x4100080e,0x7c00100,0x220400,0x4100080e,
-0x7c00100,0x250400,0x41000908,0x7c00100,0x220400,0x41000908,0x7c00100,0x250400,0x41000b13,0x2802000,0x962460,0x41000b13,0x2802100,0x962460,0x41000b13,0x4000000,
-0xb00000,0x41000c02,0x2802100,0x962460,0x41000c02,0x4000000,0x1500000,0x41000c02,0xc000010,0xb48000,0x41000f0a,0x7c00100,0x230400,0x41001004,0x7c00100,0x230400,
-0x41001423,0x7c00100,0x230400,0x41001b27,0x4000000,0x500000,0x41001d0c,0x7c00100,0x230400,0x41001d0c,0x7c00100,0x23040f,0x41001f0b,0x2802400,0x962460,0x41001f0b,
-0x4000000,0x200000,0x41001f0b,0x7c00100,0x230400,0x41002800,0x24000000,0x200000,0x41002800,0x24000000,0x400000,0x41002919,0x7c00100,0x22040e,0x41002a00,0x4000000,
-0x1600000,0x41002b01,0x2802020,0x962460,0x41002c00,0x4000000,0x200000,0x41002c00,0x7c00120,0x220405,0x41003000,0x24000000,0x200000,0x41003700,0x24000000,0x4200000,
-0x41003700,0x24000000,0x14200000,0x41003700,0x24000000,0x14e00000,0x41005d00,0x7c00120,0x220405,0x41006600,0x24000020,0x200000,0x41006600,0x24000020,0x810000,0x41006600,
-0x24000020,0x1410000,0x41007208,0x7c00100,0x22040f,0x41007219,0x7c00100,0x220400,0x41007300,0x24000000,0x200000,0x41007e0e,0x2802000,0x962460,0x41007e0e,0x4000000,
-0x200000,0x41007f0e,0x4000000,0x200000,0x41007f0e,0x7c00100,0x230400,0x41008002,0x7c00100,0x230400,0x41008137,0x2802100,0x962460,0x41008137,0x4000000,0x200000,
-0x41008137,0x6800100,0x962540,0x41008137,0x7c00100,0x230400,0x41008301,0x2802000,0x962460,0x41008407,0x4000000,0x200000,0x41008407,0x4000000,0x400000,0x41008407,
-0x4000000,0xb00000,0x41008407,0x7c00100,0x220400,0x41008407,0x7c00100,0x250400,0x4100850b,0x7c00100,0x230400,0x4100860b,0x4000000,0x200000,0x4100860b,0x7c00100,
-0x230400,0x4100870c,0x7c00100,0x220400,0x41008838,0x7c00100,0x220400,0x41008838,0x7c00100,0x250400,0x41008939,0x2802000,0x962460,0x41008939,0x2802100,0x962460,
-0x41008939,0x2806000,0x962460,0x41008939,0x4000000,0x200000,0x41008939,0x4000000,0x400000,0x41008939,0x7c00100,0x230400,0x41008939,0xc000010,0x448000,0x41008a00,
-0x4000400,0x200400,0x41008b3b,0x4000000,0x1800000,0x41008b3b,0x6800000,0x1329800,0x41008b3b,0x7c00100,0x1830000,0x41008b3b,0x7e00100,0x1830000,0x41008c3d,0x4000010,
-0x400000,0x41008c3d,0x7c00100,0x230400,0x41008d0e,0x7c00100,0x22040f,0x41008d19,0x7c00100,0x220400,0x41008d19,0x7c00100,0x22040f,0x41008e00,0x24000000,0x200000,
-0x41008e00,0x24000000,0x400000,0x41008e00,0x24000000,0x1710000,0x41008e00,0x24000006,0x400000,0x41008f3a,0x2802100,0x962460,0x41008f3a,0x2806000,0x962460,0x41008f3a,
-0x4000000,0x200000,0x41008f3a,0x6800100,0x962540,0x41008f3a,0x7c00100,0x230400,0x4100903c,0x7c00100,0x230400,0x4100903c,0x7c00100,0x23040f,0x41020701,0x2802000,
-0x962460,0x41020701,0x2802000,0xc62460,0x410a3700,0x24000000,0x34200000,0x410a3700,0x24000000,0x34e00000,0x410a4412,0x4000000,0xe00003,0x410a4711,0x7c40300,0xe30000,
-0x410a4f11,0x7c00300,0xe30001,0x410a9100,0x4000000,0x800010,0x410a9100,0x4000000,0x810010,0x410a9100,0x4000000,0x870010,0x410a9100,0x4000000,0xb00010,0x410a9100,
-0x4000000,0xf00010,0x410a9100,0x4000000,0x1001410,0x410a9100,0x4000000,0x1071010,0x410a9100,0x4000000,0x1071410,0x410a9100,0x4000000,0x1410010,0x41408ac5,0x4000400,
-0x200000,0x414a82bf,0x4000000,0xe00000,0x41808300,0x2802000,0x962460,0x41c0147e,0x6800000,0x1329800,0x50000419,0x7c00100,0x220400,0x50000419,0x7c00100,0x250400,
-0x5000080e,0x7c00100,0x220400,0x50000908,0x7c00100,0x220400,0x50000908,0x7c00100,0x250400,0x50000b13,0x2802500,0x962460,0x50000f0a,0x7c00100,0x230400,0x50001615,
-0x2802100,0x962460,0x50001615,0x7c00100,0x230400,0x50002b01,0x2802020,0x962460,0x50002c00,0x4000000,0x200000,0x50002c19,0x7c00100,0x220400,0x50002d19,0x7c00100,
-0x220400,0x50003000,0x24000000,0x200000,0x50003000,0x24000020,0x200000,0x50003700,0x24000000,0x4200000,0x50005d00,0x7c00120,0x220405,0x50005d00,0x7c00120,0x250405,
-0x50006108,0x7c00100,0x220400,0x50006108,0x7c00100,0x250400,0x50006600,0x24000020,0x200000,0x50007300,0x24000000,0x200000,0x50008301,0x2802400,0x962460,0x50008a00,
-0x7c00500,0x230400,0x50009257,0x2802400,0x962460,0x50009257,0x4000000,0x200000,0x50009257,0x4000010,0x1071400,0x50009257,0x6800000,0x1329800,0x50009257,0x7c00100,
-0x230400,0x50009257,0x7c00500,0x230400,0x50009257,0x7c00900,0x230400,0x50009257,0xc000010,0xb48000,0x5000933e,0x2802100,0x962460,0x5000933e,0x2802400,0x962460,
-0x5000933e,0x4000000,0x200000,0x5000933e,0x4000000,0x400000,0x5000933e,0x4000010,0x400000,0x5000933e,0x6800000,0x1329800,0x5000933e,0x6800100,0x962540,0x5000933e,
-0x6800100,0x962541,0x5000933e,0x6804400,0x962540,0x5000933e,0x7c00100,0x230400,0x5000933e,0x7c00100,0x230401,0x5000933e,0xc000010,0x448000,0x50009419,0x7c00100,
-0x220400,0x50009419,0x7c00100,0x250400,0x50009500,0x4000400,0x200400,0x5000965a,0x4000000,0x500000,0x5000965a,0x7c00100,0x230400,0x5000965a,0xc000010,0xb48000,
-0x5000975b,0x4000000,0x200000,0x5000975b,0x4000010,0x400000,0x5000975b,0x7c00100,0x230400,0x50009865,0x7c00100,0x230400,0x50009965,0x4000010,0x400000,0x50009965,
-0x7c00100,0x230400,0x50409abf,0x4000000,0x200000,0x5100080e,0x7c00100,0x220400,0x5100080e,0x7c00100,0x250400,0x51000c02,0x2802100,0x962460,0x51000c02,0x4000000,
-0x1500000,0x51000c02,0x4000020,0x200000,0x51000c02,0x7c00100,0x230400,0x51000f0a,0x7c00100,0x230400,0x51000f0a,0x7c00500,0x230400,0x51001110,0x2802100,0x962460,
-0x5100131f,0x2802100,0x962460,0x51001423,0x7c00100,0x230400,0x51001524,0x2802100,0x962460,0x51001524,0x4000000,0x200000,0x51001524,0x7c00100,0x230400,0x5100171a,
-0x2802100,0x962460,0x5100171a,0x4000000,0x200000,0x5100171a,0x4000000,0x1500000,0x5100171a,0x7c00100,0x230400,0x51001b27,0x4000000,0x200000,0x51001b27,0x4000000,
-0x400000,0x51001b27,0x4000000,0x500000,0x51001b27,0x7c00100,0x230400,0x51001c1c,0x2802100,0x1862460,0x51001c1c,0x2802500,0x1862460,0x51001c1c,0x2806400,0x1862460,
-0x51001c1c,0x4000000,0x1800000,0x51001c1c,0x6800000,0x1329800,0x51001c1c,0x6800100,0x1862400,0x51001c1c,0x6800100,0x1862540,0x51001c1c,0x6800500,0x1862400,0x51001c1c,
-0x7c00100,0x1830000,0x5100251b,0x7c00100,0x230400,0x51002619,0x7c00100,0x220400,0x51002619,0x7c00100,0x250400,0x51002800,0x80020,0x218820,0x51002c00,0x4000000,
-0x200000,0x51002d19,0x7c00100,0x230400,0x51003700,0x24000000,0x4200000,0x51003700,0x24000000,0x4e00000,0x51005201,0x2802400,0x962460,0x51005c00,0x4000000,0x200000,
-0x51006108,0x7c00100,0x220400,0x51006108,0x7c00100,0x250400,0x51006600,0x24000020,0x200000,0x51006600,0x24000020,0x810000,0x51006600,0x24000020,0x1410000,0x51007300,
-0x24000000,0x200000,0x51007300,0x24000020,0x200000,0x51008002,0x7c00100,0x230400,0x51008301,0x2802000,0x962460,0x51008301,0x2802400,0x962460,0x51008a00,0x7c00500,
-0x230400,0x51008e00,0x24000000,0x200000,0x51008e00,0x24000000,0x400000,0x51008e00,0x24000000,0x810000,0x51008e00,0x24000000,0x1400000,0x51008e00,0x24000000,0x1410000,
-0x51008e00,0x24000000,0x1710000,0x51008e00,0x24000002,0x200000,0x51008e00,0x24000500,0x230400,0x51008e00,0x2c000010,0xb48000,0x51009419,0x7c00100,0x220400,0x51009419,
-0x7c00100,0x22040e,0x51009419,0x7c00100,0x22040f,0x51009419,0x7c00100,0x250400,0x51009500,0x4000400,0x200400,0x51009500,0x7c00500,0x230400,0x51009519,0x7c00100,
-0x220400,0x51009519,0x7c00100,0x22040f,0x51009519,0x7c00100,0x230400,0x51009519,0x7c00100,0x250400,0x51009b71,0x2802100,0x962460,0x51009b71,0x6800000,0x1329800,
-0x51009b71,0x6800100,0x962540,0x51009b71,0x6804400,0x962540,0x51009b71,0x7c00100,0x230400,0x51009c52,0x2802100,0x962460,0x51009c52,0x2802400,0x962460,0x51009c52,
-0x2802d00,0x962460,0x51009c52,0x4000010,0x400000,0x51009c52,0x6800000,0x1329800,0x51009c52,0x6800100,0x962540,0x51009c52,0x7c00100,0x230400,0x51009c52,0xc000010,
-0x448000,0x51009d6d,0x6800000,0x1329800,0x51009d6d,0x7c00100,0x230400,0x51009d6d,0x7c00500,0x230400,0x51009d6d,0x7c00d00,0x230400,0x51009d6d,0xc000010,0x448000,
-0x51009e08,0x2802100,0x962460,0x51009f63,0x4000010,0x400000,0x51009f63,0x6800000,0x1329800,0x51009f63,0x7c00100,0x230400,0x51009f63,0x7c00900,0x230400,0x51009f63,
-0xc000010,0x448000,0x51009f63,0xc000010,0xb48000,0x5100a008,0x2000,0x962460,0x5100a008,0x2802400,0x962460,0x5100a008,0x4000000,0x200000,0x5100a008,0x7c00100,
-0x220400,0x5100a008,0x7c00100,0x230400,0x5100a008,0x7c00100,0x250400,0x5100a008,0x7c00500,0x230400,0x5100a16f,0x2806400,0x962460,0x5100a16f,0x6800000,0x1329800,
-0x5100a16f,0x6800100,0x962540,0x5100a16f,0x7c00100,0x230400,0x5100a16f,0xc000010,0x448000,0x5100a24f,0x2802100,0x962460,0x5100a24f,0x2802400,0x962460,0x5100a24f,
-0x6800000,0x1329800,0x5100a24f,0x7c00100,0x230400,0x5100a24f,0xc000010,0x448000,0x5100a36e,0x2802100,0x962460,0x5100a36e,0x4000000,0x200000,0x5100a36e,0x6800100,
-0x962540,0x5100a36e,0x6804400,0x962540,0x5100a36e,0x7c00100,0x230400,0x5100a442,0x2802100,0x962460,0x5100a442,0x4000000,0x200000,0x5100a442,0x6800000,0x1329800,
-0x5100a442,0x6800100,0x962540,0x5100a442,0x7c00100,0x230400,0x5100a442,0xc000010,0x448000,0x5100a500,0x4000000,0x200000,0x5100a600,0x4000000,0x200000,0x5100a601,
-0x2802000,0x962460,0x5100a76b,0x7c00100,0x230400,0x5100a868,0x7c00100,0x230400,0x5100a96c,0x4000000,0x200000,0x5100a96c,0x7c00100,0x230400,0x5100aa00,0x4000000,
-0x4e00000,0x5100ab00,0x4000000,0x4e00000,0x51086600,0x24000020,0x810000,0x51086600,0x24000020,0x1410000,0x510a4005,0x7c00100,0xe30400,0x510a4711,0x7c40300,0xe30000,
-0x510a7300,0x24000000,0x34200000,0x510aaa00,0x4000000,0x34e00000,0x5140a2f3,0x4000400,0x400000,0x514a82bf,0x4000000,0xe00000,0x51802bb1,0x2802000,0x962460,0x51c00908,
-0x2802400,0x962460,0x51c0a008,0x2802400,0x962460,0x52000f0a,0x2802100,0x962460,0x52000f0a,0x6800100,0x962540,0x52000f0a,0x7c00100,0x230400,0x52001004,0x4000000,
-0x1600000,0x52001b00,0x4000000,0x200000,0x52001c1c,0x2802100,0x1862460,0x52001c1c,0x6800100,0x1862400,0x52001c1c,0x6800500,0x1862400,0x52001e12,0x7c00100,0x2230500,
-0x52001e12,0x7c00100,0x2330520,0x52002128,0x4000002,0x400000,0x52002128,0x7c00100,0x230400,0x52002a00,0x4000000,0x1500000,0x52002a00,0x4000000,0x1600000,0x52002d00,
-0x4000000,0x200006,0x52003000,0x24000000,0x200000,0x52006108,0x7c00100,0x220400,0x52006108,0x7c00100,0x250400,0x52008301,0x2802400,0x962460,0x52008407,0x2802400,
-0x962460,0x52008407,0x7c00100,0x220400,0x52008407,0x7c00100,0x250400,0x52008b3b,0x6800000,0x1800000,0x52008b3b,0x7c00100,0x1830000,0x52008e00,0x24000000,0x400000,
-0x52009419,0x7c00100,0x250400,0x5200975b,0x4000000,0x200000,0x5200ac7e,0x2802000,0x962460,0x5200ac7e,0x2802100,0x962460,0x5200ac7e,0x2802400,0x962460,0x5200ac7e,
-0x4000010,0x200000,0x5200ac7e,0x7c00100,0x230400,0x5200ac7e,0xc000010,0x248000,0x5200ad28,0x7c00100,0x230400,0x5200ae6a,0x2802100,0x1862460,0x5200ae6a,0x2802400,
-0x962460,0x5200ae6a,0x2802400,0x1862460,0x5200ae6a,0x2806000,0x1862460,0x5200ae6a,0x4000000,0x1800000,0x5200ae6a,0x6800000,0x1329800,0x5200ae6a,0x6800100,0x1862400,
-0x5200ae6a,0x6800100,0x1862540,0x5200ae6a,0x7c00100,0x1830000,0x5200ae6a,0x7c00900,0x1830000,0x5200ae6a,0xc000010,0x1848000,0x5200b083,0x4000010,0x400000,0x5200b083,
-0x7c00100,0x230400,0x5200b083,0xc000010,0x448000,0x5200b182,0x2802400,0x962460,0x5200b182,0x4000000,0x200000,0x5200b182,0x4000010,0x400000,0x5200b182,0x7c00100,
-0x230400,0x5200b182,0xc000010,0x448000,0x5200b30a,0x2802400,0x962460,0x5200b30a,0x4000000,0x200000,0x5200b30a,0x7c00100,0x230400,0x5200b54e,0x2802100,0x962460,
-0x5200b54e,0x2802400,0x962460,0x5200b54e,0x4000000,0x200000,0x5200b54e,0x4000010,0x400000,0x5200b54e,0x6800000,0x1329800,0x5200b54e,0x6800100,0x962540,0x5200b54e,
-0x6804400,0x962540,0x5200b54e,0x7c00100,0x230400,0x5200b54e,0xc000010,0x448000,0x5200b61c,0x4000000,0x1800000,0x5200b61c,0x6800500,0x1862400,0x5200b61c,0x7c00100,
-0x1830000,0x5200b61c,0x7c00900,0x1830000,0x5200b77f,0x2802100,0x1862460,0x5200b77f,0x2802400,0x1862460,0x5200b77f,0x4000000,0x1800000,0x5200b77f,0x4000010,0x1800000,
-0x5200b77f,0x7c00100,0x1830000,0x5200b77f,0x7c00500,0x1830000,0x5200b77f,0x7c00900,0x1830000,0x5200b77f,0x7e00100,0x1830000,0x5200b873,0x2802100,0x962460,0x5200b873,
-0x2806400,0x962460,0x5200b873,0x6800000,0x1329800,0x5200b873,0x6800100,0x962540,0x5200b873,0x6800400,0x962540,0x5200b873,0x7c00100,0x230400,0x5200b873,0xc000010,
-0x448000,0x5200b912,0x7c00100,0x2230500,0x5200b912,0x7c00100,0x2330520,0x5200ba74,0x4000000,0x200000,0x5200ba74,0x4000010,0x400000,0x5200ba74,0x7c00100,0x230400,
-0x5200bb85,0x4000000,0x200000,0x5200bb85,0x7c00100,0x230400,0x5200bc75,0x4000000,0x400000,0x5200bc75,0x4000010,0x400000,0x5200bc75,0x7c00100,0x230400,0x5200bd7d,
-0x4000000,0x200000,0x5200bd7d,0x7c00100,0x230400,0x5200be7a,0x4000000,0x200000,0x5200be7a,0x7c00100,0x230400,0x5200bf58,0x7c00100,0x230400,0x5200c002,0x4000000,
-0x200000,0x5200c178,0x2802000,0x962460,0x5200c178,0x2802100,0x962460,0x5200c178,0x2802400,0x962460,0x5200c178,0x2806400,0x962460,0x5200c178,0x4000000,0x200000,
-0x5200c178,0x6800100,0x962540,0x5200c178,0x7c00100,0x230400,0x5200c178,0x7c00100,0x230401,0x5200c178,0xc000010,0x448000,0x5200c178,0x80000000,0x218960,0x5200c247,
-0x7c00100,0x230400,0x5200c247,0x7c00100,0x830400,0x5200c247,0x7c00100,0x1430400,0x5200c300,0x4000000,0x200003,0x52022d00,0x4000000,0x100006,0x52023700,0x24000000,
-0x4100000,0x52023700,0x24000000,0x4e00000,0x52023700,0x24000000,0x14100000,0x52023700,0x24000000,0x14e00000,0x52023700,0x24000000,0x96800000,0x52024400,0x4000000,0x100000,
-0x52027300,0x24000000,0x100000,0x5202c300,0x4000000,0x100000,0x5202c300,0x4000000,0x100002,0x5202c300,0x4000000,0x100003,0x5202c300,0x4000000,0x10000d,0x5202c300,
-0x4000100,0x150400,0x5202c300,0x4000100,0x15040d,0x5202c300,0x4000100,0x14150400,0x520a1e12,0x7c00100,0x2130480,0x520a3700,0x24000000,0x34e00000,0x520a3800,0x24000000,
-0x34100000,0x520a4711,0x7c40300,0xe30000,0x520a4f11,0x7c00300,0xe30001,0x520a7300,0x24000000,0x34100000,0x520ab412,0x7c00100,0x2130480,0x520ac400,0x4000000,0xe00002,
-0x520ac400,0x4000000,0xe0000d,0x520ac400,0x4000000,0x34e0000d,0x520ac414,0x4000000,0xe0000d,0x520ac511,0x7c40300,0xe30000,0x5240af91,0x7c00100,0x230400,0x5240af96,
-0x4000400,0x200000,0x5240af98,0x6800400,0x962540,0x5240af98,0x7c00100,0x230400,0x5240afa2,0x7c00100,0x230400,0x5240afa4,0x7c00100,0x230400,0x5240b2c7,0x4000000,
-0x200000,0x5240b2c7,0x4000000,0x1500000,0x5240b2d2,0x4000000,0x200000,0x5240b2e0,0x4000000,0x200000,0x5240b5f6,0x7c00900,0x230400,0x524a44bf,0x4000000,0xe00003,
-0x5280af91,0x2802400,0x962460,0x5280af92,0x2802400,0x962460,0x5280af98,0x2802400,0x962460,0x5280af9a,0x2802400,0x962460,0x5280af9c,0x2802400,0x962460,0x52c0b3ed,
-0x2802400,0x962460,0x52c0b3f1,0x7c00100,0x230400,0x60000c02,0x2802100,0x962460,0x60000c02,0x7c00100,0x230400,0x60000f0a,0x2802100,0x962460,0x60000f0a,0x6800100,
-0x962540,0x60000f0a,0x7c00100,0x230400,0x6000131f,0x4000000,0x200000,0x6000171a,0x7c00100,0x230400,0x6000171a,0x7c00100,0x230560,0x60001b27,0x2802100,0x962460,
-0x60001b27,0x4000000,0xc00000,0x60001b27,0x7c00100,0x230400,0x60001f0b,0x2802400,0x962460,0x60002919,0x7c00100,0x22040e,0x60002a00,0x4000000,0x1600000,0x60003000,
-0x24000000,0x14200000,0x60003000,0x24000000,0x14e00000,0x60003700,0x24000000,0x4200000,0x60003800,0x24000000,0x1710000,0x60005102,0x4000000,0x200000,0x60006108,0x7c00100,
-0x220400,0x60006108,0x7c00100,0x250400,0x60006600,0x24000020,0x200000,0x60008301,0x2802000,0x962460,0x6000903c,0x2806000,0x962460,0x6000903c,0x4000000,0x400000,
-0x60009519,0x7c00100,0x220400,0x60009519,0x7c00100,0x250400,0x6000a008,0x7c00100,0x220400,0x6000a008,0x7c00100,0x250400,0x6000c300,0x4000000,0x3a703580,0x6000c654,
-0x2802000,0x962460,0x6000c654,0x4000010,0x200000,0x6000c654,0x7c00100,0x230400,0x6000c73f,0x2802000,0x962460,0x6000c73f,0x2802100,0x962460,0x6000c73f,0x4000000,
-0x200000,0x6000c73f,0x6800100,0x962540,0x6000c73f,0x6804000,0x962540,0x6000c73f,0x7c00100,0x230400,0x6000c80b,0x7c00100,0x230400,0x6000c941,0x2802100,0x962460,
-0x6000c941,0x2806000,0x962460,0x6000c941,0x4000000,0x200000,0x6000c941,0x4000010,0x200000,0x6000c941,0x6800000,0x1329800,0x6000c941,0x6800100,0x962540,0x6000c941,
-0x7c00100,0x230400,0x6000c941,0xc000010,0x448000,0x6000ca82,0x7c00100,0x230400,0x6000cc00,0x4000000,0x4e00000,0x6000d000,0x4000000,0x200000,0x6002c300,0x4000000,
-0x100000,0x6002c300,0x4000000,0x10000d,0x6002c300,0x4000100,0x150400,0x6002c300,0x4000100,0x15040d,0x6002c300,0x4000100,0x14150400,0x600a3000,0x24000000,0x34200000,
-0x600a3000,0x24000000,0x34e00000,0x600a3700,0x24000000,0x34200000,0x600a3800,0x24000000,0x34200000,0x600a3800,0x24000000,0xb6800000,0x600a4305,0x7c00100,0xe30400,0x600ac300,
-0x4000000,0x34100000,0x600ac400,0x4000000,0x14e0000d,0x600ac400,0x4000000,0x34e0000d,0x600acb14,0x7c00100,0xe30000,0x600acb16,0x7c00100,0xe30c00,0x600acc00,0x4000000,
-0x34e00000,0x600acd00,0x4000000,0x34200000,0x600acd00,0x4000000,0x34e00000,0x600acd00,0x4000000,0xb6800000,0x600ace00,0x4000000,0x34e00000,0x600ace00,0x4000000,0xb6800000,
-0x600acf00,0x4000000,0x34e00000,0x600acf00,0x4000000,0xb6800000,0x600ad111,0x7c40300,0xe30000,0x604ac4bf,0x4000000,0x34e00003,0x61000a03,0x4000000,0x1600000,0x61000c02,
-0x80000000,0x218960,0x6100120f,0x4000000,0x200000,0x61001a18,0x7c00100,0x1830000,0x61001d0c,0x7c00100,0x230400,0x61001d0c,0x7c00100,0x250400,0x61006600,0x24000020,
-0x200000,0x61008407,0x7c00100,0x220400,0x61008407,0x7c00100,0x250400,0x6100870c,0x7c00100,0x220400,0x61008e00,0x24000000,0x200000,0x61008e00,0x24000000,0x400000,
-0x61008e00,0x24000002,0x300000,0x6100903c,0x7c00100,0x230400,0x61009519,0x7c00100,0x220400,0x61009519,0x7c00100,0x250400,0x61009519,0x7c00500,0x22040f,0x61009b71,
-0x2802100,0x962460,0x61009b71,0x2806400,0x962460,0x61009b71,0x7c00100,0x230400,0x6100a008,0x2802100,0x962460,0x6100c300,0x4000000,0x20000f,0x6100cd00,0x4000000,
-0x200000,0x6100d202,0x2802400,0x962460,0x6100d202,0x2802500,0x962460,0x6100d202,0x7c00100,0x230400,0x6100d302,0x4000020,0x200000,0x6100d302,0x7c00120,0x230405,
-0x6100d476,0x2802100,0x962460,0x6100d476,0x2802100,0x962461,0x6100d476,0x2806400,0x962460,0x6100d476,0x4000000,0x400000,0x6100d476,0x6800000,0x1329800,0x6100d476,
-0x6800100,0x962540,0x6100d476,0x7c00100,0x230400,0x6100d476,0xc000010,0x448000,0x6100d573,0x2802100,0x962460,0x6100d573,0x2806400,0x962460,0x6100d573,0x6800100,
-0x962540,0x6100d573,0x7c00100,0x230400,0x6100d573,0x7c00900,0x230400,0x6100d573,0xc000010,0x448000,0x6100d68d,0x7c00100,0x230400,0x6100d756,0x7c00100,0x230400,
-0x6100d85c,0x2802500,0x962460,0x6100d85c,0x6800100,0x962540,0x6100d85c,0x7c00100,0x230400,0x6100d85c,0x7c00500,0x230400,0x6100d997,0x2802100,0x962460,0x6100d997,
-0x4000000,0x200000,0x6100d997,0x4000000,0x400000,0x6100d997,0x6800000,0x1329800,0x6100d997,0x6800100,0x962540,0x6100d997,0x6804400,0x962540,0x6100d997,0x7c00100,
-0x230400,0x6100d997,0x7c00100,0x230560,0x6100d997,0xc000010,0x448000,0x6100da98,0x6800000,0x1329800,0x6100da98,0x7c00100,0x230400,0x6100db71,0x4000000,0x200000,
-0x6100dc99,0x2802100,0x962460,0x6100dc99,0x2802400,0x962460,0x6100dc99,0x6800000,0x1329800,0x6100dc99,0x6800100,0x962540,0x6100dc99,0x6804400,0x962540,0x6100dc99,
-0x7c00100,0x230400,0x610a4711,0x7c40300,0xe30000,0x610a4f11,0x7c00300,0xe30001,0x610ace00,0x4000000,0x34e00000,0x6140af96,0x7c00100,0x230400,0x6140af98,0x7c00100,
-0x230400,0x6180af93,0x2802400,0x962460,0x62002a00,0x4000000,0x1600000,0x63002800,0x80000,0x918820,0x63c00c14,0x80000,0x918820,0x7000080e,0x7c00100,0x250400,
-0x70000a03,0x4000000,0x200000,0x70000c00,0x80000000,0x218960,0x70000f0a,0x7c00100,0x230400,0x70001004,0x7c00100,0x230400,0x70001524,0x2802100,0x962460,0x70001524,
-0x7c00100,0x230400,0x70001615,0x2802100,0x962460,0x7000171a,0x2802100,0x962460,0x70001821,0x6800000,0x1329800,0x70002320,0x7c00100,0x230400,0x70002a00,0x4000000,
-0x1500000,0x70002a00,0x4000000,0x1600000,0x70003000,0x24000000,0x200000,0x70003000,0x24000000,0x14200000,0x70003800,0x24000000,0x4e00000,0x70005201,0x2802400,0x962460,
-0x7000581e,0x7c00100,0x230400,0x70006108,0x7c00100,0x220400,0x70006108,0x7c00100,0x250400,0x70006f30,0x7c00100,0x230400,0x70007300,0x24000000,0x200000,0x70007f0e,
-0x4000000,0x200000,0x70008301,0x2802100,0x962460,0x70008301,0x2802400,0x962460,0x70008e00,0x24000000,0x200000,0x70008e00,0x24000000,0x400000,0x70008e00,0x24000002,
-0x400000,0x70008e00,0x24000008,0x1410000,0x70008e00,0x24000010,0x400000,0x70008e00,0x2c000010,0x448000,0x70009519,0x7c00100,0x220400,0x70009519,0x7c00100,0x230400,
-0x70009519,0x7c00100,0x250400,0x70009865,0x7c00100,0x230400,0x70009965,0x4000010,0x400000,0x70009965,0x7c00100,0x230400,0x7000a008,0x7c00100,0x220400,0x7000a008,
-0x7c00100,0x250400,0x7000a008,0x7c00500,0x22040f,0x7000a50e,0x4000000,0x200000,0x7000b61c,0x2802500,0x1862460,0x7000b61c,0x6800500,0x1862400,0x7000b61c,0x7c00100,
-0x1830000,0x7000c300,0x4000000,0x100000,0x7000c941,0x2806000,0x962460,0x7000cc00,0x4000000,0x4e00000,0x7000cd00,0x4000000,0x200000,0x7000cd00,0x4000000,0x4200000,
-0x7000cd00,0x4000000,0x4e00000,0x7000cd00,0x4000000,0x14200000,0x7000cd00,0x4000000,0x14e00000,0x7000cd00,0x4000000,0x96800000,0x7000cf00,0x4000000,0x4e00000,0x7000cf00,
-0x4000000,0x14e00000,0x7000d202,0x2802100,0x962460,0x7000d202,0x7c00100,0x230400,0x7000d997,0x7c00100,0x230400,0x7000d997,0xc000010,0x248000,0x7000dd86,0x2802400,
-0x962460,0x7000dd86,0x7c00100,0x230400,0x7000dd86,0xc000010,0x448000,0x7000de9f,0x4000000,0x200000,0x7000de9f,0x7c00100,0x230400,0x7000e001,0x2000,0x962460,
-0x7000e001,0x2802400,0x962460,0x7000e187,0x2802000,0x962460,0x7000e187,0x2802100,0x962460,0x7000e187,0x4000000,0x200000,0x7000e187,0x7c00100,0x230400,0x7000e187,
-0xc000010,0x448000,0x7000e288,0x7c00100,0x230400,0x7000e300,0x4000000,0x200000,0x7000e489,0x2802100,0x962460,0x7000e489,0x2802400,0x962460,0x7000e489,0x6800100,
-0x962540,0x7000e489,0x6800100,0x962541,0x7000e489,0x6804400,0x962540,0x7000e489,0x7c00100,0x230400,0x7000e489,0x7c00900,0x230400,0x7000e59d,0x2802100,0x962460,
-0x7000e59d,0x2802400,0x962460,0x7000e59d,0x4000000,0x200000,0x7000e59d,0x4000010,0x200000,0x7000e59d,0x6800100,0x962540,0x7000e59d,0x6804400,0x962540,0x7000e59d,
-0x7c00100,0x230400,0x7000e59d,0xc000010,0x448000,0x7000e691,0x2802100,0x962460,0x7000e691,0x2802400,0x962460,0x7000e691,0x2806400,0x962460,0x7000e691,0x6800000,
-0x1329800,0x7000e691,0x6800100,0x962540,0x7000e691,0x7c00100,0x230400,0x7000e700,0x4000400,0x200400,0x7000e70e,0x7c00100,0x220400,0x7000e719,0x7c00100,0x220400,
-0x7000e719,0x7c00500,0x22040f,0x7000e853,0x7c00100,0x230400,0x7000e9a0,0x2802400,0x962460,0x7000e9a0,0x4000000,0x200000,0x7000e9a0,0x4000000,0x500000,0x7000e9a0,
-0x7c00100,0x230400,0x7000ea79,0x2802400,0x962460,0x7000ea79,0x4000000,0x200000,0x7000ea79,0x4000000,0xf00000,0x7000ea79,0x4000010,0x400000,0x7000ea79,0x7c00100,
-0x230400,0x7000eb8c,0x2802400,0x962460,0x7000eb8c,0x4000000,0x200000,0x7000eb8c,0x7c00100,0x230400,0x7000eca3,0x2802100,0x962460,0x7000eca3,0x2806400,0x962460,
-0x7000eca3,0x4000000,0x200000,0x7000eca3,0x6800000,0x1329800,0x7000eca3,0x6800100,0x962540,0x7000eca3,0x7c00100,0x230400,0x7000eca3,0xc000010,0x448000,0x7000ed95,
-0x6800000,0x1329800,0x7000ed95,0x7c00100,0x230400,0x7000ed95,0xc000010,0x448000,0x7000ee1c,0x2802500,0x1862460,0x7000ee1c,0x6800000,0x1329800,0x7000ee1c,0x7c00100,
-0x1830000,0x7000ee1c,0x7c00900,0x1830000,0x7000ef8f,0x4000000,0x200000,0x7000ef8f,0x7c00100,0x230400,0x7000f08e,0x4000000,0x200000,0x7000f08e,0x7c00100,0x230400,
-0x7000f159,0x2802100,0x962460,0x7000f159,0x7c00100,0x230400,0x7000f200,0x4000000,0x200000,0x7000f200,0x4000000,0x1200000,0x7000f200,0x4000000,0x1710000,0x7000f34b,
-0x2802400,0x962460,0x7000f34b,0x4000000,0x200000,0x7000f34b,0x4000010,0x400000,0x7000f34b,0x6800000,0x1329800,0x7000f34b,0x7c00100,0x230400,0x7000f34b,0x7c00900,
-0x230400,0x7000f34b,0xc000010,0x448000,0x7000f490,0x4000000,0x200000,0x7000f490,0x7c00100,0x230400,0x7000f5a5,0x7c00100,0x230400,0x7000f67b,0x4000000,0x200000,
-0x7000f67b,0x4000010,0x200000,0x7000f67b,0x7c00100,0x230400,0x7000f8a6,0x2802100,0x962460,0x7000f8a6,0x2802400,0x962460,0x7000f8a6,0x2806400,0x962460,0x7000f8a6,
-0x4000000,0x500000,0x7000f8a6,0x4000010,0xb00000,0x7000f8a6,0x4000800,0x200000,0x7000f8a6,0x6800100,0x962540,0x7000f8a6,0x6800100,0x962541,0x7000f8a6,0x7c00100,
-0x230400,0x7000f8a6,0xc000010,0x448000,0x7000f921,0x4000000,0x200000,0x7000fa00,0x4000000,0x200000,0x7000fb9e,0x2802100,0x962460,0x7000fb9e,0x2802400,0x962460,
-0x7000fb9e,0x2806400,0x962460,0x7000fb9e,0x4000000,0x200000,0x7000fb9e,0x6800000,0x1329800,0x7000fb9e,0x6800100,0x962540,0x7000fb9e,0x6800100,0x962541,0x7000fb9e,
-0x7c00100,0x230400,0x7000fc92,0x4000000,0x200000,0x7000fc92,0x6800000,0x1329800,0x7000fc92,0x7c00100,0x220400,0x7000fc92,0x7c00100,0x230400,0x7000fc92,0x7c00100,
-0x250400,0x700acd00,0x4000000,0x34e00000,0x700acd00,0x4000000,0xb6800000,0x700ace00,0x4000000,0x34e00000,0x700acf00,0x4000000,0x34e00000,0x700acf00,0x4000000,0xb6800000,
-0x7050df01,0x4000000,0x200000,0x7050f705,0x80000,0x918820,0x7080af96,0x2802400,0x962460,0x7090df01,0x2802400,0x962460,0x70d0e403,0x2802100,0x962460,0x70d0e403,
-0x2802400,0x962460,0x70d0e403,0x6800100,0x962540,0x8000120f,0x7c00100,0x230400,0x80001524,0x7c00100,0x230400,0x8000171a,0x7c00100,0x230400,0x80002006,0x7c00100,
-0x220400,0x80002006,0x7c00100,0x250400,0x80002a00,0x4000000,0x1500000,0x80002d00,0x4000000,0x200000,0x80005208,0x2802400,0x962460,0x80005c00,0x4000000,0x200000,
-0x80007300,0x24000000,0x200000,0x80009519,0x7c00100,0x220400,0x80009519,0x7c00100,0x230400,0x80009519,0x7c00100,0x250400,0x80009865,0x7c00100,0x230400,0x8000a008,
-0x2802100,0x962460,0x8000b30a,0x4000000,0x500000,0x8000b30a,0x7c00100,0x230400,0x8000cd00,0x4000000,0x4e00000,0x8000d202,0x2802500,0x962460,0x8000d202,0x7c00100,
-0x230400,0x8000d68d,0x4000000,0x200000,0x8000d997,0x2802000,0x962460,0x8000d997,0x2802400,0x962460,0x8000d997,0x4000000,0x400000,0x8000d997,0x4000000,0x500000,
-0x8000d997,0x7c00100,0x230400,0x8000d997,0xc000010,0x448000,0x8000e489,0x2802100,0x962460,0x8000e489,0x7c00100,0x230400,0x8000e719,0x7c00100,0x220400,0x8000f8a6,
-0x2802100,0x962460,0x8000f8a6,0x7c00100,0x230400,0x8000f8a6,0xc000010,0x448000,0x8000fda1,0x2802100,0x1862460,0x8000fda1,0x2806400,0x1862460,0x8000fda1,0x4000000,
-0x1800000,0x8000fda1,0x6800000,0x1329800,0x8000fda1,0x6800100,0x1862540,0x8000fda1,0x7c00100,0x1830000,0x8000fda1,0xc000010,0x448000,0x8000fe9c,0x7c00100,0x230400,
-0x8000fe9c,0x7c00100,0x830400,0x8000fe9c,0x7c00100,0x1430400,0x8000ff06,0x7c00100,0x220400,0x80010165,0x7c00100,0x230400,0x800102a2,0x4000000,0x200000,0x800102a2,
-0x7c00100,0x230400,0x800103a4,0x7c00100,0x230400,0x800103a4,0xc000010,0x448000,0x8001044c,0x4000000,0x200000,0x8001044c,0x7c00100,0x220400,0x8001044c,0x7c00100,
-0x250400,0x80010670,0x2802000,0x962460,0x80010670,0x4000000,0x200000,0x80010670,0x4000010,0x400000,0x80010670,0xc000010,0x448000,0x800a4711,0x7c40300,0xe30000,
-0x800acd00,0x4000000,0x34e00000,0x800acd00,0x4000000,0x7a902460,0x800ace00,0x4000000,0x34e00000,0x800acf00,0x4000000,0x34e00000,0x800b0011,0x7c40300,0xe30000,0x800b0500,
-0x4000000,0x34e00000,0x800b0500,0x4000000,0xb6800000,0x90001615,0x7c00100,0x230400,0x9000171a,0x4000000,0x200000,0x9000171a,0x7c00100,0x230400,0x90003000,0x24000000,
-0x200000,0x90007f0e,0x4000000,0x200000,0x90008301,0x2802000,0x962460,0x90008e00,0x24000000,0x400000,0x90009519,0x7c00100,0x250400,0x9000a16f,0x2802100,0x962460,
-0x9000d200,0x80000000,0x218960,0x9000d202,0x2802000,0x962460,0x9000d202,0x2802100,0x962460,0x9000d202,0x7c00100,0x230400,0x9000e59d,0x2802100,0x962460,0x90010500,
-0x4000000,0xe00000,0x900107a7,0x2802100,0x962460,0x900107a7,0x2802400,0x962460,0x900107a7,0x2802c00,0x962460,0x900107a7,0x4000000,0x1400000,0x900107a7,0x6800000,
-0x1329800,0x900107a7,0x7c00100,0x220400,0x900107a7,0x7c00100,0x250400,0x900108a8,0x2802100,0x962460,0x900108a8,0x2806400,0x962460,0x900108a8,0x4000000,0x200000,
-0x900108a8,0x4000000,0x400000,0x900108a8,0x4000010,0x400000,0x900108a8,0x6800000,0x1329800,0x900108a8,0x6800100,0x962540,0x900108a8,0x7c00100,0x230400,0x900108a8,
-0xc000010,0x448000,0x90010908,0x7c00100,0x220400,0x90010a38,0x2802100,0x962460,0x90010ca9,0x2802100,0x962460,0x90010ca9,0x4000000,0x500000,0x90010ca9,0x4000010,
-0xb00000,0x90010ca9,0x6800100,0x962540,0x90010ca9,0x7c00100,0x230400,0x90010d1b,0x4000000,0x500000,0x90010eaa,0x2802100,0x962460,0x90010eaa,0x2802400,0x962460,
-0x90010eaa,0x2806400,0x962460,0x90010eaa,0x4000000,0x200000,0x90010eaa,0x4000000,0x400000,0x90010eaa,0x4000010,0x400000,0x90010eaa,0x6800000,0x1329800,0x90010eaa,
-0x6800100,0x962540,0x90010eaa,0x7c00100,0x230400,0x90010eaa,0xc000010,0x448000,0x90010fab,0x7c00100,0x220400,0x90010fab,0x7c00100,0x250400,0x9002c300,0x4000000,
-0x100000,0x900ac400,0x4000000,0xe0000d,0x900acd00,0x4000000,0x34e00000,0x900acd00,0x4000000,0xb6800000,0x900acf00,0x4000000,0x34e00000,0x900b0500,0x4000000,0x34e00000,
-0x900b0500,0x4000000,0xb6800000,0x900b0b9a,0x7c00900,0x1230400,0x900b109a,0x7c00300,0xe30000,0x900b119a,0x7c00300,0xe30000,0x90408e06,0x24000000,0x400000,0xa0001004,
-0x4000000,0x200000,0xa0001004,0x7c00100,0x230400,0xa000120f,0x2802100,0x962460,0xa000120f,0x2802400,0x962460,0xa000171a,0x2802100,0x962460,0xa000171a,0x2806400,
-0x962460,0xa0002a00,0x4000000,0x1600000,0xa0003000,0x24000000,0x200000,0xa000581e,0x7c00100,0x230400,0xa0007300,0x24000000,0x200000,0xa0008301,0x2802400,0x962460,
-0xa0008e00,0x24000000,0x400000,0xa000cf00,0x4000000,0x4e00000,0xa0010500,0x4000000,0x200000,0xa00114af,0x2802100,0x962460,0xa00114af,0x2802400,0x962460,0xa00114af,
-0x2806400,0x962460,0xa00114af,0x6800000,0x1329800,0xa00114af,0x7c00100,0x230400,0xa00114af,0x7c00100,0x230560,0xa00116b0,0x2802100,0x962460,0xa00116b0,0x2802800,
-0x962460,0xa00116b0,0x2806400,0x962460,0xa00116b0,0x4000000,0x400000,0xa00116b0,0x4000000,0x500000,0xa00116b0,0x4000010,0x400000,0xa00116b0,0x6800100,0x962540,
-0xa00116b0,0x7c00100,0x230400,0xa00116b0,0x7c00100,0x230560,0xa00116b0,0xc000010,0x448000,0xa0011722,0x7c00100,0x230400,0xa00118b1,0x2802000,0x962460,0xa00118b1,
-0x2802100,0x962460,0xa00118b1,0x2806400,0x962460,0xa00118b1,0x4000000,0x200000,0xa00118b1,0x4000000,0x400000,0xa00118b1,0x4000000,0x500000,0xa00118b1,0x6800100,
-0x962540,0xa00118b1,0x7c00100,0x230400,0xa00118b1,0x7c00100,0x230560,0xa00118b1,0xc000010,0x448000,0xa00a4005,0x7c00100,0xe30400,0xa00a4711,0x7c40300,0xe30000,
-0xa00ac400,0x4000000,0x4e00000,0xa00acb14,0x7c00100,0xe30000,0xa00acf00,0x4000000,0x34e00000,0xa00b0500,0x4000000,0x34e00000,0xa00b0500,0x4000000,0xb6800000,0xa00b0b96,
-0x7c00900,0x1230400,0xa00b1211,0x7c40300,0xe30000,0xa00b1314,0x7c00100,0xe30000,0xa00b1596,0x7c00300,0xe30000,0xa040afac,0x6800400,0x962540,0xa08083ad,0x2802400,
-0x962460,0xb0000a03,0x7c00100,0x220400,0xb0000b13,0x7c00100,0x2633800,0xb0001004,0x2802000,0x962460,0xb0001110,0x4000000,0x200000,0xb0001524,0x2802000,0x962460,
-0xb0001615,0x4000000,0x500000,0xb000251b,0x7c00100,0x230400,0xb0007300,0x24000000,0x200000,0xb0008939,0x4000000,0x200000,0xb0008939,0x7c00100,0x230400,0xb0008e00,
-0x24000000,0x200000,0xb0008e00,0x24000000,0x400000,0xb0008e00,0x24000010,0x400000,0xb0009257,0x2802000,0x962460,0xb0009257,0x4000000,0x1600000,0xb0009519,0x7c00100,
-0x220400,0xb0009519,0x7c00100,0x250400,0xb0009a00,0x4000000,0x200000,0xb000b30a,0x2802100,0x962460,0xb000b30a,0x7c00100,0x230400,0xb000c178,0x80000000,0x218960,
-0xb000c300,0x4000000,0x4200000,0xb000d202,0x2802000,0x962460,0xb000d476,0x6800100,0x962540,0xb000d476,0x7c00100,0x230400,0xb000e300,0x4000000,0x4e00000,0xb000fda1,
-0x7c00100,0x1830000,0xb0010eaa,0x2802000,0x962460,0xb00116b0,0x7c00100,0x230400,0xb0011900,0x4000000,0x4e00000,0xb0011ab2,0x2802100,0x962460,0xb0011ab2,0x2802400,
-0x962460,0xb0011ab2,0x2806400,0x962460,0xb0011ab2,0x4000000,0x200000,0xb0011ab2,0x6800100,0x962540,0xb0011ab2,0x7c00100,0x230400,0xb0011b0c,0x7c00100,0x230400,
-0xb0011cb3,0x2802100,0x962460,0xb0011cb3,0x2806400,0x962460,0xb0011cb3,0x6800000,0x1329800,0xb0011cb3,0x6800100,0x962540,0xb0011cb3,0x7c00100,0x230400,0xb0011db6,
-0x2802500,0x962460,0xb0011db6,0x6800000,0x1329800,0xb0011db6,0x7c00100,0x230400,0xb0011db6,0x7c00500,0x230400,0xb0011e00,0x4000000,0x200000,0xb0011e00,0x4000000,
-0x1500000,0xb0011fb4,0x2802100,0x962460,0xb0011fb4,0x6800100,0x962540,0xb0011fb4,0x7c00100,0x230400,0xb0011fb4,0xc000010,0x248000,0xb0012000,0x4000000,0x200000,
-0xb00121b5,0x4000000,0x200000,0xb00121b5,0x4000010,0x400000,0xb00121b5,0x7c00100,0x220400,0xb00121b5,0x7c00100,0x250400,0xb00121b5,0xc000010,0x448000,0xb00122b8,
-0x4000000,0x200000,0xb00122b8,0x7c00100,0x230400,0xb00123b7,0x2802400,0x962460,0xb00123b7,0x4000000,0x200000,0xb00123b7,0x7c00100,0x230400,0xb00123b7,0xc000010,
-0x248000,0xb00a4005,0x7c00100,0xe30400,0xb00a4711,0x7c40300,0xe30000,0xb00acf00,0x4000000,0x34e00000,0xb00b0500,0x4000000,0x34e00000,0xb00b0500,0x4000000,0x3ce00000,
-0xb00b0500,0x4000000,0xb6800000,0xb00b109a,0x7c00300,0xe30000,0xb080e47c,0x2802000,0x962460,0xc0001524,0x4000000,0x500000,0xc0001a18,0x2806400,0x1862460,0xc0001a18,
-0x7c00100,0x1830000,0xc0007300,0x24000000,0x200000,0xc0008e00,0x24000010,0x400000,0xc0009519,0x7c00100,0x220400,0xc0009519,0x7c00100,0x250400,0xc000c300,0x4000000,
-0x420000f,0xc000d85c,0x2802100,0x962460,0xc000d85c,0x6800100,0x962540,0xc000d85c,0x7c00100,0x230400,0xc000dc99,0x7c00100,0x230400,0xc000e719,0x7c00100,0x220400,
-0xc00107a7,0x7c00100,0x230400,0xc0010eaa,0x7c00100,0x230400,0xc00116b0,0x7c00100,0x230560,0xc0011900,0x4000000,0x4200000,0xc0012447,0,0x818820,0xc0012447,
-0,0xc18820,0xc0012447,0,0x1418820,0xc00125b9,0x7c00100,0x230400,0xc00126bb,0x2802100,0x962460,0xc00126bb,0x2806400,0x962460,0xc00126bb,0x4000000,
-0x500000,0xc00126bb,0x6800100,0x962540,0xc00126bb,0x7c00100,0x230400,0xc00127ba,0x2802400,0x962460,0xc00127ba,0x4000000,0x200000,0xc00127ba,0x6800000,0x1329800,
-0xc00127ba,0x7c00100,0x230400,0xc00127ba,0x7c00900,0x230400,0xc0012800,0x4000000,0x200000,0xc0012b23,0x4000000,0x200000,0xc0012b23,0x4000000,0x400000,0xc0012b23,
-0x4000000,0x1500000,0xc0012cbc,0x2802400,0x962460,0xc0012cbc,0x4000000,0x1600000,0xc0012cbc,0x6800000,0x1329800,0xc0012cbc,0x7c00100,0x230400,0xc00acf00,0x4000000,
-0x34e00000,0xc00ae300,0x4000000,0x34e00000,0xc00b0500,0x4000000,0x34e00000,0xc00b0500,0x4000000,0xb6800000,0xc00b0b00,0x4000000,0x1200000,0xc00b0b00,0x7c00900,0x1230400,
-0xc00b109a,0x7c00300,0xe30000,0xc00b2914,0x7c00100,0x2530000,0xc00b2916,0x7c00100,0x2530c00,0xc00b2a00,0x4000000,0x34e00000,0xc040af53,0x7c00100,0x230400,0xc0c12b7e,
-0x4000000,0x200000,0xc14a44bf,0x4000000,0xe0000d,0xd000131f,0x2802c00,0x962460,0xd000171a,0x7c00100,0x230400,0xd0001821,0x2802100,0x962460,0xd0007300,0x24000000,
-0x200000,0xd0008e00,0x24000000,0x200000,0xd0008f3a,0x2806000,0x962460,0xd0009519,0x7c00100,0x220400,0xd0009519,0x7c00100,0x250400,0xd000a500,0x4000000,0x200000,
-0xd000c300,0x4000000,0x4e00000,0xd000d202,0x7c00100,0x230400,0xd000d476,0x7c00100,0x230400,0xd000d997,0x2802100,0x962460,0xd000d997,0x6800100,0x962540,0xd000e001,
-0x2802100,0x962460,0xd000e700,0x4000400,0x200000,0xd000e719,0x7c00100,0x220400,0xd000e719,0x7c00500,0x23040f,0xd000fa00,0x4000000,0x4e00000,0xd0010eaa,0x4000010,
-0x400000,0xd0010eaa,0x7c00100,0x230400,0xd0012dbd,0x4000000,0x200000,0xd0012dbd,0x7c00100,0x230400,0xd0012fbe,0x2802100,0x962460,0xd0012fbe,0x2802400,0x962460,
-0xd0012fbe,0x2806400,0x962460,0xd0012fbe,0x4000000,0x400000,0xd0012fbe,0x6800000,0x1329800,0xd0012fbe,0x6800100,0x962540,0xd0012fbe,0x6800100,0x962541,0xd0012fbe,
-0x6804400,0x962540,0xd0012fbe,0x7c00100,0x230400,0xd0012fbe,0x7c00100,0x230560,0xd0012fbe,0xc000010,0x448000,0xd0013183,0x7c00100,0x230400,0xd0013200,0x4000000,
-0x200000,0xd0013200,0x6800000,0x1329805,0xd00134c0,0x2802100,0x962460,0xd00134c0,0x4000002,0x400000,0xd00134c0,0x7c00100,0x230400,0xd00a4305,0x7c00100,0xe30400,
-0xd00a4611,0x7c40300,0xe30000,0xd00a4711,0x7c40300,0xe30000,0xd00a5e11,0x7c40300,0xe30000,0xd00acf00,0x4000000,0x34e00000,0xd00b0500,0x4000000,0x34e00000,0xd00b0500,
-0x4000000,0xb6800000,0xd00b0b11,0x6800500,0x962540,0xd00b0bbf,0x2802200,0xc62460,0xd00b119a,0x7c00300,0xe30000,0xd00b2a00,0x4000000,0x34e00000,0xd00b2e11,0x7c40300,
-0xe30000,0xd00b30bf,0x7c00300,0x230000,0xd00b339a,0x7c00300,0xe30000};
-
-static const int32_t countPropsVectors=6999;
-static const int32_t propsVectorsColumns=3;
-static const uint16_t scriptExtensions[262]={
-0x800e,0x8019,8,0x8059,8,2,8,0x8038,8,6,8,0x8019,2,0x22,0x25,0xb6,
-0x80c0,2,0x22,0x8025,2,0x11,2,0x22,0x54,0x79,0x7b,0xa7,0xb6,0x80b7,2,0x8022,
-2,0x25,0x80c0,2,0x20,2,0x80b6,4,0xa,0xf,0x10,0x15,0x19,0x1a,0x1f,0x23,
-0x24,0x89,0x97,0x809e,4,0xa,0xf,0x10,0x15,0x19,0x1a,0x1f,0x23,0x24,0x89,0x809e,
-4,0xa,0xf,0x10,0x15,0x1a,0x1f,0x21,0x23,0x24,0x3a,0x89,0x91,0x99,0x9e,0xa0,
-0xaf,0xb2,0xb3,0x80bb,4,0xa,0xf,0x10,0x15,0x1a,0x1f,0x21,0x23,0x24,0x30,0x3a,
-0x89,0x91,0x99,0x9e,0xa0,0xaf,0xb2,0xb3,0x80bb,0xa,0x78,0xa0,0x80b2,0xa,0x69,4,
-0x3a,0x8076,4,0x6f,0x10,0x80a4,0x10,0x74,0xf,0x809d,0xf,0x78,0x23,0x8089,0x23,0x7c,
-0x15,0x80bb,0x15,0x80,0x1c,0x34,0x8076,0x1c,0x84,0xc,0x8019,0x2a,0x2b,0x2c,0x802d,0x1b,
-0x805a,0x800a,4,0xa,0x15,0x8089,0xa,0x8089,4,0x800a,0xa,0x8097,0xa,0x15,0x1a,0x1f,
-0x23,0x8024,0xa,0x80bb,4,0xa,0x15,0x1f,0x24,0x89,0x9e,0x80bb,0x8004,8,0x8022,0x19,
-0x801b,0xa,0x19,0x8089,5,0x11,0x12,0x14,0x16,0x8029,5,0x11,0x12,0x14,0x8016,0x8011,
-5,0x8011,0x11,0x14,0x8016,0x11,0x8019,0xa,0xf,0x10,0x78,0x91,0x99,0x9d,0x9e,0xa0,
-0xa3,0x80b2,0xa,0xf,0x10,0x15,0x1a,0x78,0x91,0x99,0x9d,0x9e,0xa0,0xa3,0xb2,0x80bb,
-0xa,0xf,0x10,0x15,0x78,0x91,0x99,0x9d,0x9e,0xa0,0xa3,0xb2,0x80bb,0xa,0x98,0xa,
-0x8023,0xa,0xef,0x19,0x1c,0x804f,0x37,0x804e,2,0x8025,2,0xf8,0x2f,0x31,0x8053,0x2f,
-0x8031,2,0x8007,0x89,0x7c,0x8087};
-
-static const int32_t indexes[UPROPS_INDEX_COUNT]={0x2b96,0x2b96,0x2b96,0x2b96,0x6898,3,0x83ef,0x8472,0x8472,0x8472,0xb34c0,0x2a75a31,0,0,0,0};
-
-#endif // INCLUDED_FROM_UCHAR_C
diff --git a/contrib/libs/icu/common/ucharstrie.cpp b/contrib/libs/icu/common/ucharstrie.cpp
deleted file mode 100644
index e0b33af5194..00000000000
--- a/contrib/libs/icu/common/ucharstrie.cpp
+++ /dev/null
@@ -1,414 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2010-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: ucharstrie.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010nov14
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/appendable.h"
-#include "unicode/ucharstrie.h"
-#include "unicode/uobject.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "uassert.h"
-
-U_NAMESPACE_BEGIN
-
-UCharsTrie::~UCharsTrie() {
- uprv_free(ownedArray_);
-}
-
-UStringTrieResult
-UCharsTrie::current() const {
- const UChar *pos=pos_;
- if(pos==NULL) {
- return USTRINGTRIE_NO_MATCH;
- } else {
- int32_t node;
- return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
- valueResult(node) : USTRINGTRIE_NO_VALUE;
- }
-}
-
-UStringTrieResult
-UCharsTrie::firstForCodePoint(UChar32 cp) {
- return cp<=0xffff ?
- first(cp) :
- (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
- next(U16_TRAIL(cp)) :
- USTRINGTRIE_NO_MATCH);
-}
-
-UStringTrieResult
-UCharsTrie::nextForCodePoint(UChar32 cp) {
- return cp<=0xffff ?
- next(cp) :
- (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
- next(U16_TRAIL(cp)) :
- USTRINGTRIE_NO_MATCH);
-}
-
-UStringTrieResult
-UCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
- // Branch according to the current unit.
- if(length==0) {
- length=*pos++;
- }
- ++length;
- // The length of the branch is the number of units to select from.
- // The data structure encodes a binary search.
- while(length>kMaxBranchLinearSubNodeLength) {
- if(uchar<*pos++) {
- length>>=1;
- pos=jumpByDelta(pos);
- } else {
- length=length-(length>>1);
- pos=skipDelta(pos);
- }
- }
- // Drop down to linear search for the last few units.
- // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
- // and divides length by 2.
- do {
- if(uchar==*pos++) {
- UStringTrieResult result;
- int32_t node=*pos;
- if(node&kValueIsFinal) {
- // Leave the final value for getValue() to read.
- result=USTRINGTRIE_FINAL_VALUE;
- } else {
- // Use the non-final value as the jump delta.
- ++pos;
- // int32_t delta=readValue(pos, node);
- int32_t delta;
- if(node<kMinTwoUnitValueLead) {
- delta=node;
- } else if(node<kThreeUnitValueLead) {
- delta=((node-kMinTwoUnitValueLead)<<16)|*pos++;
- } else {
- delta=(pos[0]<<16)|pos[1];
- pos+=2;
- }
- // end readValue()
- pos+=delta;
- node=*pos;
- result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
- }
- pos_=pos;
- return result;
- }
- --length;
- pos=skipValue(pos);
- } while(length>1);
- if(uchar==*pos++) {
- pos_=pos;
- int32_t node=*pos;
- return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
- } else {
- stop();
- return USTRINGTRIE_NO_MATCH;
- }
-}
-
-UStringTrieResult
-UCharsTrie::nextImpl(const UChar *pos, int32_t uchar) {
- int32_t node=*pos++;
- for(;;) {
- if(node<kMinLinearMatch) {
- return branchNext(pos, node, uchar);
- } else if(node<kMinValueLead) {
- // Match the first of length+1 units.
- int32_t length=node-kMinLinearMatch; // Actual match length minus 1.
- if(uchar==*pos++) {
- remainingMatchLength_=--length;
- pos_=pos;
- return (length<0 && (node=*pos)>=kMinValueLead) ?
- valueResult(node) : USTRINGTRIE_NO_VALUE;
- } else {
- // No match.
- break;
- }
- } else if(node&kValueIsFinal) {
- // No further matching units.
- break;
- } else {
- // Skip intermediate value.
- pos=skipNodeValue(pos, node);
- node&=kNodeTypeMask;
- }
- }
- stop();
- return USTRINGTRIE_NO_MATCH;
-}
-
-UStringTrieResult
-UCharsTrie::next(int32_t uchar) {
- const UChar *pos=pos_;
- if(pos==NULL) {
- return USTRINGTRIE_NO_MATCH;
- }
- int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
- if(length>=0) {
- // Remaining part of a linear-match node.
- if(uchar==*pos++) {
- remainingMatchLength_=--length;
- pos_=pos;
- int32_t node;
- return (length<0 && (node=*pos)>=kMinValueLead) ?
- valueResult(node) : USTRINGTRIE_NO_VALUE;
- } else {
- stop();
- return USTRINGTRIE_NO_MATCH;
- }
- }
- return nextImpl(pos, uchar);
-}
-
-UStringTrieResult
-UCharsTrie::next(ConstChar16Ptr ptr, int32_t sLength) {
- const UChar *s=ptr;
- if(sLength<0 ? *s==0 : sLength==0) {
- // Empty input.
- return current();
- }
- const UChar *pos=pos_;
- if(pos==NULL) {
- return USTRINGTRIE_NO_MATCH;
- }
- int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
- for(;;) {
- // Fetch the next input unit, if there is one.
- // Continue a linear-match node without rechecking sLength<0.
- int32_t uchar;
- if(sLength<0) {
- for(;;) {
- if((uchar=*s++)==0) {
- remainingMatchLength_=length;
- pos_=pos;
- int32_t node;
- return (length<0 && (node=*pos)>=kMinValueLead) ?
- valueResult(node) : USTRINGTRIE_NO_VALUE;
- }
- if(length<0) {
- remainingMatchLength_=length;
- break;
- }
- if(uchar!=*pos) {
- stop();
- return USTRINGTRIE_NO_MATCH;
- }
- ++pos;
- --length;
- }
- } else {
- for(;;) {
- if(sLength==0) {
- remainingMatchLength_=length;
- pos_=pos;
- int32_t node;
- return (length<0 && (node=*pos)>=kMinValueLead) ?
- valueResult(node) : USTRINGTRIE_NO_VALUE;
- }
- uchar=*s++;
- --sLength;
- if(length<0) {
- remainingMatchLength_=length;
- break;
- }
- if(uchar!=*pos) {
- stop();
- return USTRINGTRIE_NO_MATCH;
- }
- ++pos;
- --length;
- }
- }
- int32_t node=*pos++;
- for(;;) {
- if(node<kMinLinearMatch) {
- UStringTrieResult result=branchNext(pos, node, uchar);
- if(result==USTRINGTRIE_NO_MATCH) {
- return USTRINGTRIE_NO_MATCH;
- }
- // Fetch the next input unit, if there is one.
- if(sLength<0) {
- if((uchar=*s++)==0) {
- return result;
- }
- } else {
- if(sLength==0) {
- return result;
- }
- uchar=*s++;
- --sLength;
- }
- if(result==USTRINGTRIE_FINAL_VALUE) {
- // No further matching units.
- stop();
- return USTRINGTRIE_NO_MATCH;
- }
- pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
- node=*pos++;
- } else if(node<kMinValueLead) {
- // Match length+1 units.
- length=node-kMinLinearMatch; // Actual match length minus 1.
- if(uchar!=*pos) {
- stop();
- return USTRINGTRIE_NO_MATCH;
- }
- ++pos;
- --length;
- break;
- } else if(node&kValueIsFinal) {
- // No further matching units.
- stop();
- return USTRINGTRIE_NO_MATCH;
- } else {
- // Skip intermediate value.
- pos=skipNodeValue(pos, node);
- node&=kNodeTypeMask;
- }
- }
- }
-}
-
-const UChar *
-UCharsTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
- UBool haveUniqueValue, int32_t &uniqueValue) {
- while(length>kMaxBranchLinearSubNodeLength) {
- ++pos; // ignore the comparison unit
- if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
- return NULL;
- }
- length=length-(length>>1);
- pos=skipDelta(pos);
- }
- do {
- ++pos; // ignore a comparison unit
- // handle its value
- int32_t node=*pos++;
- UBool isFinal=(UBool)(node>>15);
- node&=0x7fff;
- int32_t value=readValue(pos, node);
- pos=skipValue(pos, node);
- if(isFinal) {
- if(haveUniqueValue) {
- if(value!=uniqueValue) {
- return NULL;
- }
- } else {
- uniqueValue=value;
- haveUniqueValue=TRUE;
- }
- } else {
- if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
- return NULL;
- }
- haveUniqueValue=TRUE;
- }
- } while(--length>1);
- return pos+1; // ignore the last comparison unit
-}
-
-UBool
-UCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
- int32_t node=*pos++;
- for(;;) {
- if(node<kMinLinearMatch) {
- if(node==0) {
- node=*pos++;
- }
- pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
- if(pos==NULL) {
- return FALSE;
- }
- haveUniqueValue=TRUE;
- node=*pos++;
- } else if(node<kMinValueLead) {
- // linear-match node
- pos+=node-kMinLinearMatch+1; // Ignore the match units.
- node=*pos++;
- } else {
- UBool isFinal=(UBool)(node>>15);
- int32_t value;
- if(isFinal) {
- value=readValue(pos, node&0x7fff);
- } else {
- value=readNodeValue(pos, node);
- }
- if(haveUniqueValue) {
- if(value!=uniqueValue) {
- return FALSE;
- }
- } else {
- uniqueValue=value;
- haveUniqueValue=TRUE;
- }
- if(isFinal) {
- return TRUE;
- }
- pos=skipNodeValue(pos, node);
- node&=kNodeTypeMask;
- }
- }
-}
-
-int32_t
-UCharsTrie::getNextUChars(Appendable &out) const {
- const UChar *pos=pos_;
- if(pos==NULL) {
- return 0;
- }
- if(remainingMatchLength_>=0) {
- out.appendCodeUnit(*pos); // Next unit of a pending linear-match node.
- return 1;
- }
- int32_t node=*pos++;
- if(node>=kMinValueLead) {
- if(node&kValueIsFinal) {
- return 0;
- } else {
- pos=skipNodeValue(pos, node);
- node&=kNodeTypeMask;
- }
- }
- if(node<kMinLinearMatch) {
- if(node==0) {
- node=*pos++;
- }
- out.reserveAppendCapacity(++node);
- getNextBranchUChars(pos, node, out);
- return node;
- } else {
- // First unit of the linear-match node.
- out.appendCodeUnit(*pos);
- return 1;
- }
-}
-
-void
-UCharsTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) {
- while(length>kMaxBranchLinearSubNodeLength) {
- ++pos; // ignore the comparison unit
- getNextBranchUChars(jumpByDelta(pos), length>>1, out);
- length=length-(length>>1);
- pos=skipDelta(pos);
- }
- do {
- out.appendCodeUnit(*pos++);
- pos=skipValue(pos);
- } while(--length>1);
- out.appendCodeUnit(*pos);
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/ucharstriebuilder.cpp b/contrib/libs/icu/common/ucharstriebuilder.cpp
deleted file mode 100644
index 049997a2754..00000000000
--- a/contrib/libs/icu/common/ucharstriebuilder.cpp
+++ /dev/null
@@ -1,443 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2010-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: ucharstriebuilder.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010nov14
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/ucharstrie.h"
-#include "unicode/ucharstriebuilder.h"
-#include "unicode/unistr.h"
-#include "unicode/ustring.h"
-#include "cmemory.h"
-#include "uarrsort.h"
-#include "uassert.h"
-#include "uhash.h"
-#include "ustr_imp.h"
-
-U_NAMESPACE_BEGIN
-
-/*
- * Note: This builder implementation stores (string, value) pairs with full copies
- * of the 16-bit-unit sequences, until the UCharsTrie is built.
- * It might(!) take less memory if we collected the data in a temporary, dynamic trie.
- */
-
-class UCharsTrieElement : public UMemory {
-public:
- // Use compiler's default constructor, initializes nothing.
-
- void setTo(const UnicodeString &s, int32_t val, UnicodeString &strings, UErrorCode &errorCode);
-
- UnicodeString getString(const UnicodeString &strings) const {
- int32_t length=strings[stringOffset];
- return strings.tempSubString(stringOffset+1, length);
- }
- int32_t getStringLength(const UnicodeString &strings) const {
- return strings[stringOffset];
- }
-
- UChar charAt(int32_t index, const UnicodeString &strings) const {
- return strings[stringOffset+1+index];
- }
-
- int32_t getValue() const { return value; }
-
- int32_t compareStringTo(const UCharsTrieElement &o, const UnicodeString &strings) const;
-
-private:
- // The first strings unit contains the string length.
- // (Compared with a stringLength field here, this saves 2 bytes per string.)
- int32_t stringOffset;
- int32_t value;
-};
-
-void
-UCharsTrieElement::setTo(const UnicodeString &s, int32_t val,
- UnicodeString &strings, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- int32_t length=s.length();
- if(length>0xffff) {
- // Too long: We store the length in 1 unit.
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
- stringOffset=strings.length();
- strings.append((UChar)length);
- value=val;
- strings.append(s);
-}
-
-int32_t
-UCharsTrieElement::compareStringTo(const UCharsTrieElement &other, const UnicodeString &strings) const {
- return getString(strings).compare(other.getString(strings));
-}
-
-UCharsTrieBuilder::UCharsTrieBuilder(UErrorCode & /*errorCode*/)
- : elements(NULL), elementsCapacity(0), elementsLength(0),
- uchars(NULL), ucharsCapacity(0), ucharsLength(0) {}
-
-UCharsTrieBuilder::~UCharsTrieBuilder() {
- delete[] elements;
- uprv_free(uchars);
-}
-
-UCharsTrieBuilder &
-UCharsTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return *this;
- }
- if(ucharsLength>0) {
- // Cannot add elements after building.
- errorCode=U_NO_WRITE_PERMISSION;
- return *this;
- }
- if(elementsLength==elementsCapacity) {
- int32_t newCapacity;
- if(elementsCapacity==0) {
- newCapacity=1024;
- } else {
- newCapacity=4*elementsCapacity;
- }
- UCharsTrieElement *newElements=new UCharsTrieElement[newCapacity];
- if(newElements==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return *this;
- }
- if(elementsLength>0) {
- uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(UCharsTrieElement));
- }
- delete[] elements;
- elements=newElements;
- elementsCapacity=newCapacity;
- }
- elements[elementsLength++].setTo(s, value, strings, errorCode);
- if(U_SUCCESS(errorCode) && strings.isBogus()) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- }
- return *this;
-}
-
-U_CDECL_BEGIN
-
-static int32_t U_CALLCONV
-compareElementStrings(const void *context, const void *left, const void *right) {
- const UnicodeString *strings=static_cast<const UnicodeString *>(context);
- const UCharsTrieElement *leftElement=static_cast<const UCharsTrieElement *>(left);
- const UCharsTrieElement *rightElement=static_cast<const UCharsTrieElement *>(right);
- return leftElement->compareStringTo(*rightElement, *strings);
-}
-
-U_CDECL_END
-
-UCharsTrie *
-UCharsTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
- buildUChars(buildOption, errorCode);
- UCharsTrie *newTrie=NULL;
- if(U_SUCCESS(errorCode)) {
- newTrie=new UCharsTrie(uchars, uchars+(ucharsCapacity-ucharsLength));
- if(newTrie==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- } else {
- uchars=NULL; // The new trie now owns the array.
- ucharsCapacity=0;
- }
- }
- return newTrie;
-}
-
-UnicodeString &
-UCharsTrieBuilder::buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result,
- UErrorCode &errorCode) {
- buildUChars(buildOption, errorCode);
- if(U_SUCCESS(errorCode)) {
- result.setTo(FALSE, uchars+(ucharsCapacity-ucharsLength), ucharsLength);
- }
- return result;
-}
-
-void
-UCharsTrieBuilder::buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- if(uchars!=NULL && ucharsLength>0) {
- // Already built.
- return;
- }
- if(ucharsLength==0) {
- if(elementsLength==0) {
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
- if(strings.isBogus()) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement),
- compareElementStrings, &strings,
- FALSE, // need not be a stable sort
- &errorCode);
- if(U_FAILURE(errorCode)) {
- return;
- }
- // Duplicate strings are not allowed.
- UnicodeString prev=elements[0].getString(strings);
- for(int32_t i=1; i<elementsLength; ++i) {
- UnicodeString current=elements[i].getString(strings);
- if(prev==current) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- prev.fastCopyFrom(current);
- }
- }
- // Create and UChar-serialize the trie for the elements.
- ucharsLength=0;
- int32_t capacity=strings.length();
- if(capacity<1024) {
- capacity=1024;
- }
- if(ucharsCapacity<capacity) {
- uprv_free(uchars);
- uchars=static_cast<UChar *>(uprv_malloc(capacity*2));
- if(uchars==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- ucharsCapacity=0;
- return;
- }
- ucharsCapacity=capacity;
- }
- StringTrieBuilder::build(buildOption, elementsLength, errorCode);
- if(uchars==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-int32_t
-UCharsTrieBuilder::getElementStringLength(int32_t i) const {
- return elements[i].getStringLength(strings);
-}
-
-UChar
-UCharsTrieBuilder::getElementUnit(int32_t i, int32_t unitIndex) const {
- return elements[i].charAt(unitIndex, strings);
-}
-
-int32_t
-UCharsTrieBuilder::getElementValue(int32_t i) const {
- return elements[i].getValue();
-}
-
-int32_t
-UCharsTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const {
- const UCharsTrieElement &firstElement=elements[first];
- const UCharsTrieElement &lastElement=elements[last];
- int32_t minStringLength=firstElement.getStringLength(strings);
- while(++unitIndex<minStringLength &&
- firstElement.charAt(unitIndex, strings)==
- lastElement.charAt(unitIndex, strings)) {}
- return unitIndex;
-}
-
-int32_t
-UCharsTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const {
- int32_t length=0; // Number of different units at unitIndex.
- int32_t i=start;
- do {
- UChar unit=elements[i++].charAt(unitIndex, strings);
- while(i<limit && unit==elements[i].charAt(unitIndex, strings)) {
- ++i;
- }
- ++length;
- } while(i<limit);
- return length;
-}
-
-int32_t
-UCharsTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const {
- do {
- UChar unit=elements[i++].charAt(unitIndex, strings);
- while(unit==elements[i].charAt(unitIndex, strings)) {
- ++i;
- }
- } while(--count>0);
- return i;
-}
-
-int32_t
-UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const {
- while(unit==elements[i].charAt(unitIndex, strings)) {
- ++i;
- }
- return i;
-}
-
-UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
- : LinearMatchNode(len, nextNode), s(units) {
- hash=hash*37u+ustr_hashUCharsN(units, len);
-}
-
-UBool
-UCharsTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const {
- if(this==&other) {
- return TRUE;
- }
- if(!LinearMatchNode::operator==(other)) {
- return FALSE;
- }
- const UCTLinearMatchNode &o=(const UCTLinearMatchNode &)other;
- return 0==u_memcmp(s, o.s, length);
-}
-
-void
-UCharsTrieBuilder::UCTLinearMatchNode::write(StringTrieBuilder &builder) {
- UCharsTrieBuilder &b=(UCharsTrieBuilder &)builder;
- next->write(builder);
- b.write(s, length);
- offset=b.writeValueAndType(hasValue, value, b.getMinLinearMatch()+length-1);
-}
-
-StringTrieBuilder::Node *
-UCharsTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
- Node *nextNode) const {
- return new UCTLinearMatchNode(
- elements[i].getString(strings).getBuffer()+unitIndex,
- length,
- nextNode);
-}
-
-UBool
-UCharsTrieBuilder::ensureCapacity(int32_t length) {
- if(uchars==NULL) {
- return FALSE; // previous memory allocation had failed
- }
- if(length>ucharsCapacity) {
- int32_t newCapacity=ucharsCapacity;
- do {
- newCapacity*=2;
- } while(newCapacity<=length);
- UChar *newUChars=static_cast<UChar *>(uprv_malloc(newCapacity*2));
- if(newUChars==NULL) {
- // unable to allocate memory
- uprv_free(uchars);
- uchars=NULL;
- ucharsCapacity=0;
- return FALSE;
- }
- u_memcpy(newUChars+(newCapacity-ucharsLength),
- uchars+(ucharsCapacity-ucharsLength), ucharsLength);
- uprv_free(uchars);
- uchars=newUChars;
- ucharsCapacity=newCapacity;
- }
- return TRUE;
-}
-
-int32_t
-UCharsTrieBuilder::write(int32_t unit) {
- int32_t newLength=ucharsLength+1;
- if(ensureCapacity(newLength)) {
- ucharsLength=newLength;
- uchars[ucharsCapacity-ucharsLength]=(UChar)unit;
- }
- return ucharsLength;
-}
-
-int32_t
-UCharsTrieBuilder::write(const UChar *s, int32_t length) {
- int32_t newLength=ucharsLength+length;
- if(ensureCapacity(newLength)) {
- ucharsLength=newLength;
- u_memcpy(uchars+(ucharsCapacity-ucharsLength), s, length);
- }
- return ucharsLength;
-}
-
-int32_t
-UCharsTrieBuilder::writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) {
- return write(elements[i].getString(strings).getBuffer()+unitIndex, length);
-}
-
-int32_t
-UCharsTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
- if(0<=i && i<=UCharsTrie::kMaxOneUnitValue) {
- return write(i|(isFinal<<15));
- }
- UChar intUnits[3];
- int32_t length;
- if(i<0 || i>UCharsTrie::kMaxTwoUnitValue) {
- intUnits[0]=(UChar)(UCharsTrie::kThreeUnitValueLead);
- intUnits[1]=(UChar)((uint32_t)i>>16);
- intUnits[2]=(UChar)i;
- length=3;
- // } else if(i<=UCharsTrie::kMaxOneUnitValue) {
- // intUnits[0]=(UChar)(i);
- // length=1;
- } else {
- intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitValueLead+(i>>16));
- intUnits[1]=(UChar)i;
- length=2;
- }
- intUnits[0]=(UChar)(intUnits[0]|(isFinal<<15));
- return write(intUnits, length);
-}
-
-int32_t
-UCharsTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
- if(!hasValue) {
- return write(node);
- }
- UChar intUnits[3];
- int32_t length;
- if(value<0 || value>UCharsTrie::kMaxTwoUnitNodeValue) {
- intUnits[0]=(UChar)(UCharsTrie::kThreeUnitNodeValueLead);
- intUnits[1]=(UChar)((uint32_t)value>>16);
- intUnits[2]=(UChar)value;
- length=3;
- } else if(value<=UCharsTrie::kMaxOneUnitNodeValue) {
- intUnits[0]=(UChar)((value+1)<<6);
- length=1;
- } else {
- intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitNodeValueLead+((value>>10)&0x7fc0));
- intUnits[1]=(UChar)value;
- length=2;
- }
- intUnits[0]|=(UChar)node;
- return write(intUnits, length);
-}
-
-int32_t
-UCharsTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
- int32_t i=ucharsLength-jumpTarget;
- U_ASSERT(i>=0);
- if(i<=UCharsTrie::kMaxOneUnitDelta) {
- return write(i);
- }
- UChar intUnits[3];
- int32_t length;
- if(i<=UCharsTrie::kMaxTwoUnitDelta) {
- intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitDeltaLead+(i>>16));
- length=1;
- } else {
- intUnits[0]=(UChar)(UCharsTrie::kThreeUnitDeltaLead);
- intUnits[1]=(UChar)(i>>16);
- length=2;
- }
- intUnits[length++]=(UChar)i;
- return write(intUnits, length);
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/ucharstrieiterator.cpp b/contrib/libs/icu/common/ucharstrieiterator.cpp
deleted file mode 100644
index b3132241fe2..00000000000
--- a/contrib/libs/icu/common/ucharstrieiterator.cpp
+++ /dev/null
@@ -1,215 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2010-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: ucharstrieiterator.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010nov15
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/ucharstrie.h"
-#include "unicode/unistr.h"
-#include "uvectr32.h"
-
-U_NAMESPACE_BEGIN
-
-UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength,
- UErrorCode &errorCode)
- : uchars_(trieUChars),
- pos_(uchars_), initialPos_(uchars_),
- remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
- skipValue_(FALSE),
- maxLength_(maxStringLength), value_(0), stack_(NULL) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- // stack_ is a pointer so that it's easy to turn ucharstrie.h into
- // a public API header for which we would want it to depend only on
- // other public headers.
- // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
- // via the UnicodeString and UVector32 implementations, so this additional
- // cost is minimal.
- stack_=new UVector32(errorCode);
- if(stack_==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
- UErrorCode &errorCode)
- : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
- remainingMatchLength_(trie.remainingMatchLength_),
- initialRemainingMatchLength_(trie.remainingMatchLength_),
- skipValue_(FALSE),
- maxLength_(maxStringLength), value_(0), stack_(NULL) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- stack_=new UVector32(errorCode);
- if(U_FAILURE(errorCode)) {
- return;
- }
- if(stack_==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
- if(length>=0) {
- // Pending linear-match node, append remaining UChars to str_.
- ++length;
- if(maxLength_>0 && length>maxLength_) {
- length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
- }
- str_.append(pos_, length);
- pos_+=length;
- remainingMatchLength_-=length;
- }
-}
-
-UCharsTrie::Iterator::~Iterator() {
- delete stack_;
-}
-
-UCharsTrie::Iterator &
-UCharsTrie::Iterator::reset() {
- pos_=initialPos_;
- remainingMatchLength_=initialRemainingMatchLength_;
- skipValue_=FALSE;
- int32_t length=remainingMatchLength_+1; // Remaining match length.
- if(maxLength_>0 && length>maxLength_) {
- length=maxLength_;
- }
- str_.truncate(length);
- pos_+=length;
- remainingMatchLength_-=length;
- stack_->setSize(0);
- return *this;
-}
-
-UBool
-UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
-
-UBool
-UCharsTrie::Iterator::next(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- const UChar *pos=pos_;
- if(pos==NULL) {
- if(stack_->isEmpty()) {
- return FALSE;
- }
- // Pop the state off the stack and continue with the next outbound edge of
- // the branch node.
- int32_t stackSize=stack_->size();
- int32_t length=stack_->elementAti(stackSize-1);
- pos=uchars_+stack_->elementAti(stackSize-2);
- stack_->setSize(stackSize-2);
- str_.truncate(length&0xffff);
- length=(int32_t)((uint32_t)length>>16);
- if(length>1) {
- pos=branchNext(pos, length, errorCode);
- if(pos==NULL) {
- return TRUE; // Reached a final value.
- }
- } else {
- str_.append(*pos++);
- }
- }
- if(remainingMatchLength_>=0) {
- // We only get here if we started in a pending linear-match node
- // with more than maxLength remaining units.
- return truncateAndStop();
- }
- for(;;) {
- int32_t node=*pos++;
- if(node>=kMinValueLead) {
- if(skipValue_) {
- pos=skipNodeValue(pos, node);
- node&=kNodeTypeMask;
- skipValue_=FALSE;
- } else {
- // Deliver value for the string so far.
- UBool isFinal=(UBool)(node>>15);
- if(isFinal) {
- value_=readValue(pos, node&0x7fff);
- } else {
- value_=readNodeValue(pos, node);
- }
- if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
- pos_=NULL;
- } else {
- // We cannot skip the value right here because it shares its
- // lead unit with a match node which we have to evaluate
- // next time.
- // Instead, keep pos_ on the node lead unit itself.
- pos_=pos-1;
- skipValue_=TRUE;
- }
- return TRUE;
- }
- }
- if(maxLength_>0 && str_.length()==maxLength_) {
- return truncateAndStop();
- }
- if(node<kMinLinearMatch) {
- if(node==0) {
- node=*pos++;
- }
- pos=branchNext(pos, node+1, errorCode);
- if(pos==NULL) {
- return TRUE; // Reached a final value.
- }
- } else {
- // Linear-match node, append length units to str_.
- int32_t length=node-kMinLinearMatch+1;
- if(maxLength_>0 && str_.length()+length>maxLength_) {
- str_.append(pos, maxLength_-str_.length());
- return truncateAndStop();
- }
- str_.append(pos, length);
- pos+=length;
- }
- }
-}
-
-// Branch node, needs to take the first outbound edge and push state for the rest.
-const UChar *
-UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
- while(length>kMaxBranchLinearSubNodeLength) {
- ++pos; // ignore the comparison unit
- // Push state for the greater-or-equal edge.
- stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
- stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
- // Follow the less-than edge.
- length>>=1;
- pos=jumpByDelta(pos);
- }
- // List of key-value pairs where values are either final values or jump deltas.
- // Read the first (key, value) pair.
- UChar trieUnit=*pos++;
- int32_t node=*pos++;
- UBool isFinal=(UBool)(node>>15);
- int32_t value=readValue(pos, node&=0x7fff);
- pos=skipValue(pos, node);
- stack_->addElement((int32_t)(pos-uchars_), errorCode);
- stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
- str_.append(trieUnit);
- if(isFinal) {
- pos_=NULL;
- value_=value;
- return NULL;
- } else {
- return pos+value;
- }
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/uchriter.cpp b/contrib/libs/icu/common/uchriter.cpp
deleted file mode 100644
index bedbabc74c2..00000000000
--- a/contrib/libs/icu/common/uchriter.cpp
+++ /dev/null
@@ -1,367 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 1998-2012, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-*/
-
-#include "utypeinfo.h" // for 'typeid' to work
-
-#include "unicode/uchriter.h"
-#include "unicode/ustring.h"
-#include "unicode/utf16.h"
-#include "ustr_imp.h"
-
-U_NAMESPACE_BEGIN
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCharCharacterIterator)
-
-UCharCharacterIterator::UCharCharacterIterator()
- : CharacterIterator(),
- text(0)
-{
- // never default construct!
-}
-
-UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr,
- int32_t length)
- : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0),
- text(textPtr)
-{
-}
-
-UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr,
- int32_t length,
- int32_t position)
- : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, position),
- text(textPtr)
-{
-}
-
-UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr,
- int32_t length,
- int32_t textBegin,
- int32_t textEnd,
- int32_t position)
- : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, textBegin, textEnd, position),
- text(textPtr)
-{
-}
-
-UCharCharacterIterator::UCharCharacterIterator(const UCharCharacterIterator& that)
-: CharacterIterator(that),
- text(that.text)
-{
-}
-
-UCharCharacterIterator&
-UCharCharacterIterator::operator=(const UCharCharacterIterator& that) {
- CharacterIterator::operator=(that);
- text = that.text;
- return *this;
-}
-
-UCharCharacterIterator::~UCharCharacterIterator() {
-}
-
-UBool
-UCharCharacterIterator::operator==(const ForwardCharacterIterator& that) const {
- if (this == &that) {
- return TRUE;
- }
- if (typeid(*this) != typeid(that)) {
- return FALSE;
- }
-
- UCharCharacterIterator& realThat = (UCharCharacterIterator&)that;
-
- return text == realThat.text
- && textLength == realThat.textLength
- && pos == realThat.pos
- && begin == realThat.begin
- && end == realThat.end;
-}
-
-int32_t
-UCharCharacterIterator::hashCode() const {
- return ustr_hashUCharsN(text, textLength) ^ pos ^ begin ^ end;
-}
-
-UCharCharacterIterator*
-UCharCharacterIterator::clone() const {
- return new UCharCharacterIterator(*this);
-}
-
-UChar
-UCharCharacterIterator::first() {
- pos = begin;
- if(pos < end) {
- return text[pos];
- } else {
- return DONE;
- }
-}
-
-UChar
-UCharCharacterIterator::firstPostInc() {
- pos = begin;
- if(pos < end) {
- return text[pos++];
- } else {
- return DONE;
- }
-}
-
-UChar
-UCharCharacterIterator::last() {
- pos = end;
- if(pos > begin) {
- return text[--pos];
- } else {
- return DONE;
- }
-}
-
-UChar
-UCharCharacterIterator::setIndex(int32_t position) {
- if(position < begin) {
- pos = begin;
- } else if(position > end) {
- pos = end;
- } else {
- pos = position;
- }
- if(pos < end) {
- return text[pos];
- } else {
- return DONE;
- }
-}
-
-UChar
-UCharCharacterIterator::current() const {
- if (pos >= begin && pos < end) {
- return text[pos];
- } else {
- return DONE;
- }
-}
-
-UChar
-UCharCharacterIterator::next() {
- if (pos + 1 < end) {
- return text[++pos];
- } else {
- /* make current() return DONE */
- pos = end;
- return DONE;
- }
-}
-
-UChar
-UCharCharacterIterator::nextPostInc() {
- if (pos < end) {
- return text[pos++];
- } else {
- return DONE;
- }
-}
-
-UBool
-UCharCharacterIterator::hasNext() {
- return (UBool)(pos < end ? TRUE : FALSE);
-}
-
-UChar
-UCharCharacterIterator::previous() {
- if (pos > begin) {
- return text[--pos];
- } else {
- return DONE;
- }
-}
-
-UBool
-UCharCharacterIterator::hasPrevious() {
- return (UBool)(pos > begin ? TRUE : FALSE);
-}
-
-UChar32
-UCharCharacterIterator::first32() {
- pos = begin;
- if(pos < end) {
- int32_t i = pos;
- UChar32 c;
- U16_NEXT(text, i, end, c);
- return c;
- } else {
- return DONE;
- }
-}
-
-UChar32
-UCharCharacterIterator::first32PostInc() {
- pos = begin;
- if(pos < end) {
- UChar32 c;
- U16_NEXT(text, pos, end, c);
- return c;
- } else {
- return DONE;
- }
-}
-
-UChar32
-UCharCharacterIterator::last32() {
- pos = end;
- if(pos > begin) {
- UChar32 c;
- U16_PREV(text, begin, pos, c);
- return c;
- } else {
- return DONE;
- }
-}
-
-UChar32
-UCharCharacterIterator::setIndex32(int32_t position) {
- if(position < begin) {
- position = begin;
- } else if(position > end) {
- position = end;
- }
- if(position < end) {
- U16_SET_CP_START(text, begin, position);
- int32_t i = this->pos = position;
- UChar32 c;
- U16_NEXT(text, i, end, c);
- return c;
- } else {
- this->pos = position;
- return DONE;
- }
-}
-
-UChar32
-UCharCharacterIterator::current32() const {
- if (pos >= begin && pos < end) {
- UChar32 c;
- U16_GET(text, begin, pos, end, c);
- return c;
- } else {
- return DONE;
- }
-}
-
-UChar32
-UCharCharacterIterator::next32() {
- if (pos < end) {
- U16_FWD_1(text, pos, end);
- if(pos < end) {
- int32_t i = pos;
- UChar32 c;
- U16_NEXT(text, i, end, c);
- return c;
- }
- }
- /* make current() return DONE */
- pos = end;
- return DONE;
-}
-
-UChar32
-UCharCharacterIterator::next32PostInc() {
- if (pos < end) {
- UChar32 c;
- U16_NEXT(text, pos, end, c);
- return c;
- } else {
- return DONE;
- }
-}
-
-UChar32
-UCharCharacterIterator::previous32() {
- if (pos > begin) {
- UChar32 c;
- U16_PREV(text, begin, pos, c);
- return c;
- } else {
- return DONE;
- }
-}
-
-int32_t
-UCharCharacterIterator::move(int32_t delta, CharacterIterator::EOrigin origin) {
- switch(origin) {
- case kStart:
- pos = begin + delta;
- break;
- case kCurrent:
- pos += delta;
- break;
- case kEnd:
- pos = end + delta;
- break;
- default:
- break;
- }
-
- if(pos < begin) {
- pos = begin;
- } else if(pos > end) {
- pos = end;
- }
-
- return pos;
-}
-
-int32_t
-UCharCharacterIterator::move32(int32_t delta, CharacterIterator::EOrigin origin) {
- // this implementation relies on the "safe" version of the UTF macros
- // (or the trustworthiness of the caller)
- switch(origin) {
- case kStart:
- pos = begin;
- if(delta > 0) {
- U16_FWD_N(text, pos, end, delta);
- }
- break;
- case kCurrent:
- if(delta > 0) {
- U16_FWD_N(text, pos, end, delta);
- } else {
- U16_BACK_N(text, begin, pos, -delta);
- }
- break;
- case kEnd:
- pos = end;
- if(delta < 0) {
- U16_BACK_N(text, begin, pos, -delta);
- }
- break;
- default:
- break;
- }
-
- return pos;
-}
-
-void UCharCharacterIterator::setText(ConstChar16Ptr newText,
- int32_t newTextLength) {
- text = newText;
- if(newText == 0 || newTextLength < 0) {
- newTextLength = 0;
- }
- end = textLength = newTextLength;
- pos = begin = 0;
-}
-
-void
-UCharCharacterIterator::getText(UnicodeString& result) {
- result = UnicodeString(text, textLength);
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/ucln.h b/contrib/libs/icu/common/ucln.h
deleted file mode 100644
index fe6666efed3..00000000000
--- a/contrib/libs/icu/common/ucln.h
+++ /dev/null
@@ -1,91 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2001-2013, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ucln.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001July05
-* created by: George Rhoten
-*/
-
-#ifndef __UCLN_H__
-#define __UCLN_H__
-
-#include "unicode/utypes.h"
-
-/** These are the functions used to register a library's memory cleanup
- * functions. Each library should define a single library register function
- * to call this API. In the i18n library, it is ucln_i18n_registerCleanup().
- *
- * None of the cleanup functions should use a mutex to clean up an API's
- * allocated memory because a cleanup function is not meant to be thread safe,
- * and plenty of data cannot be reference counted in order to make sure that
- * no one else needs the allocated data.
- *
- * In order to make a cleanup function get called when u_cleanup is called,
- * You should add your function to the library specific cleanup function.
- * If the cleanup function is not in the common library, the code that
- * allocates the memory should call the library specific cleanup function.
- * For instance, in the i18n library, any memory allocated statically must
- * call ucln_i18n_registerCleanup() from the ucln_in.h header. These library
- * cleanup functions are needed in order to prevent a circular dependency
- * between the common library and any other library.
- *
- * The order of the cleanup is very important. In general, an API that
- * depends on a second API should be cleaned up before the second API.
- * For instance, the default converter in ustring depends upon the converter
- * API. So the default converter should be closed before the converter API
- * has its cache flushed. This will prevent any memory leaks due to
- * reference counting.
- *
- * Please see common/ucln_cmn.{h,c} and i18n/ucln_in.{h,c} for examples.
- */
-
-/**
- * Data Type for cleanup function selector. These roughly correspond to libraries.
- */
-typedef enum ECleanupLibraryType {
- UCLN_START = -1,
- UCLN_UPLUG, /* ICU plugins */
- UCLN_CUSTOM, /* Custom is for anyone else. */
- UCLN_CTESTFW,
- UCLN_TOOLUTIL,
- UCLN_LAYOUTEX,
- UCLN_LAYOUT,
- UCLN_IO,
- UCLN_I18N,
- UCLN_COMMON /* This must be the last one to cleanup. */
-} ECleanupLibraryType;
-
-/**
- * Data type for cleanup function pointer
- */
-U_CDECL_BEGIN
-typedef UBool U_CALLCONV cleanupFunc(void);
-typedef void U_CALLCONV initFunc(UErrorCode *);
-U_CDECL_END
-
-/**
- * Register a cleanup function
- * @param type which library to register for.
- * @param func the function pointer
- */
-U_CAPI void U_EXPORT2 ucln_registerCleanup(ECleanupLibraryType type,
- cleanupFunc *func);
-
-/**
- * Request cleanup for one specific library.
- * Not thread safe.
- * @param type which library to cleanup
- */
-U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type);
-
-#endif
diff --git a/contrib/libs/icu/common/ucln_cmn.cpp b/contrib/libs/icu/common/ucln_cmn.cpp
deleted file mode 100644
index f3e07c6b891..00000000000
--- a/contrib/libs/icu/common/ucln_cmn.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 2001-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-* file name: ucln_cmn.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001July05
-* created by: George Rhoten
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/uclean.h"
-#include "cmemory.h"
-#include "mutex.h"
-#include "uassert.h"
-#include "ucln.h"
-#include "ucln_cmn.h"
-#include "utracimp.h"
-#include "umutex.h"
-
-/** Auto-client for UCLN_COMMON **/
-#define UCLN_TYPE_IS_COMMON
-#include "ucln_imp.h"
-
-static cleanupFunc *gCommonCleanupFunctions[UCLN_COMMON_COUNT];
-static cleanupFunc *gLibCleanupFunctions[UCLN_COMMON];
-
-
-/************************************************
- The cleanup order is important in this function.
- Please be sure that you have read ucln.h
- ************************************************/
-U_CAPI void U_EXPORT2
-u_cleanup(void)
-{
- UTRACE_ENTRY_OC(UTRACE_U_CLEANUP);
- icu::umtx_lock(NULL); /* Force a memory barrier, so that we are sure to see */
- icu::umtx_unlock(NULL); /* all state left around by any other threads. */
-
- ucln_lib_cleanup();
-
- cmemory_cleanup(); /* undo any heap functions set by u_setMemoryFunctions(). */
- UTRACE_EXIT(); /* Must be before utrace_cleanup(), which turns off tracing. */
-/*#if U_ENABLE_TRACING*/
- utrace_cleanup();
-/*#endif*/
-}
-
-U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType)
-{
- if (gLibCleanupFunctions[libType])
- {
- gLibCleanupFunctions[libType]();
- gLibCleanupFunctions[libType] = NULL;
- }
-}
-
-U_CFUNC void
-ucln_common_registerCleanup(ECleanupCommonType type,
- cleanupFunc *func)
-{
- // Thread safety messiness: From ticket 10295, calls to registerCleanup() may occur
- // concurrently. Although such cases should be storing the same value, they raise errors
- // from the thread sanity checker. Doing the store within a mutex avoids those.
- // BUT that can trigger a recursive entry into std::call_once() in umutex.cpp when this code,
- // running from the call_once function, tries to grab the ICU global mutex, which
- // re-enters the mutex init path. So, work-around by special casing UCLN_COMMON_MUTEX, not
- // using the ICU global mutex for it.
- //
- // No other point in ICU uses std::call_once().
-
- U_ASSERT(UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT);
- if (type == UCLN_COMMON_MUTEX) {
- gCommonCleanupFunctions[type] = func;
- } else if (UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT) {
- icu::Mutex m; // See ticket 10295 for discussion.
- gCommonCleanupFunctions[type] = func;
- }
-#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL))
- ucln_registerAutomaticCleanup();
-#endif
-}
-
-// Note: ucln_registerCleanup() is called with the ICU global mutex locked.
-// Be aware if adding anything to the function.
-// See ticket 10295 for discussion.
-
-U_CAPI void U_EXPORT2
-ucln_registerCleanup(ECleanupLibraryType type,
- cleanupFunc *func)
-{
- U_ASSERT(UCLN_START < type && type < UCLN_COMMON);
- if (UCLN_START < type && type < UCLN_COMMON)
- {
- gLibCleanupFunctions[type] = func;
- }
-}
-
-U_CFUNC UBool ucln_lib_cleanup(void) {
- int32_t libType = UCLN_START;
- int32_t commonFunc = UCLN_COMMON_START;
-
- for (libType++; libType<UCLN_COMMON; libType++) {
- ucln_cleanupOne(static_cast<ECleanupLibraryType>(libType));
- }
-
- for (commonFunc++; commonFunc<UCLN_COMMON_COUNT; commonFunc++) {
- if (gCommonCleanupFunctions[commonFunc])
- {
- gCommonCleanupFunctions[commonFunc]();
- gCommonCleanupFunctions[commonFunc] = NULL;
- }
- }
-#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL))
- ucln_unRegisterAutomaticCleanup();
-#endif
- return TRUE;
-}
diff --git a/contrib/libs/icu/common/ucln_cmn.h b/contrib/libs/icu/common/ucln_cmn.h
deleted file mode 100644
index b837fb94629..00000000000
--- a/contrib/libs/icu/common/ucln_cmn.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 2001-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-* file name: ucln_cmn.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001July05
-* created by: George Rhoten
-*/
-
-#ifndef __UCLN_CMN_H__
-#define __UCLN_CMN_H__
-
-#include "unicode/utypes.h"
-#include "ucln.h"
-
-/* These are the cleanup functions for various APIs. */
-/* @return true if cleanup complete successfully.*/
-U_CFUNC UBool utrace_cleanup(void);
-
-U_CFUNC UBool ucln_lib_cleanup(void);
-
-/*
-Please keep the order of enums declared in same order
-as the cleanup functions are suppose to be called. */
-typedef enum ECleanupCommonType {
- UCLN_COMMON_START = -1,
- UCLN_COMMON_NUMPARSE_UNISETS,
- UCLN_COMMON_USPREP,
- UCLN_COMMON_BREAKITERATOR,
- UCLN_COMMON_RBBI,
- UCLN_COMMON_SERVICE,
- UCLN_COMMON_LOCALE_KEY_TYPE,
- UCLN_COMMON_LOCALE,
- UCLN_COMMON_LOCALE_AVAILABLE,
- UCLN_COMMON_LIKELY_SUBTAGS,
- UCLN_COMMON_LOCALE_DISTANCE,
- UCLN_COMMON_ULOC,
- UCLN_COMMON_CURRENCY,
- UCLN_COMMON_LOADED_NORMALIZER2,
- UCLN_COMMON_NORMALIZER2,
- UCLN_COMMON_CHARACTERPROPERTIES,
- UCLN_COMMON_USET,
- UCLN_COMMON_UNAMES,
- UCLN_COMMON_UPROPS,
- UCLN_COMMON_UCNV,
- UCLN_COMMON_UCNV_IO,
- UCLN_COMMON_UDATA,
- UCLN_COMMON_PUTIL,
- UCLN_COMMON_UINIT,
-
- /*
- Unified caches caches collation stuff. Collation data structures
- contain resource bundles which means that unified cache cleanup
- must happen before resource bundle clean up.
- */
- UCLN_COMMON_UNIFIED_CACHE,
- UCLN_COMMON_URES,
- UCLN_COMMON_MUTEX, // Mutexes should be the last to be cleaned up.
- UCLN_COMMON_COUNT /* This must be last */
-} ECleanupCommonType;
-
-/* Main library cleanup registration function. */
-/* See common/ucln.h for details on adding a cleanup function. */
-/* Note: the global mutex must not be held when calling this function. */
-U_CFUNC void U_EXPORT2 ucln_common_registerCleanup(ECleanupCommonType type,
- cleanupFunc *func);
-
-#endif
diff --git a/contrib/libs/icu/common/ucln_imp.h b/contrib/libs/icu/common/ucln_imp.h
deleted file mode 100644
index 1bfcde0fb8e..00000000000
--- a/contrib/libs/icu/common/ucln_imp.h
+++ /dev/null
@@ -1,182 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2009-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ucln_imp.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* This file contains the platform specific implementation of per-library cleanup.
-*
-*/
-
-
-#ifndef __UCLN_IMP_H__
-#define __UCLN_IMP_H__
-
-#include "ucln.h"
-#include <stdlib.h>
-
-/**
- * Auto cleanup of ICU libraries
- * There are several methods in per library cleanup of icu libraries:
- * 1) Compiler/Platform based cleanup:
- * a) Windows MSVC uses DllMain()
- * b) GCC uses destructor function attribute
- * c) Sun Studio, AIX VA, and HP-UX aCC uses a linker option to set the exit function
- * 2) Using atexit()
- * 3) Implementing own automatic cleanup functions
- *
- * For option 1, ensure that UCLN_NO_AUTO_CLEANUP is set to 0 by using --enable-auto-cleanup
- * configure option or by otherwise setting UCLN_NO_AUTO_CLEANUP to 0
- * For option 2, follow option 1 and also define UCLN_AUTO_ATEXIT
- * For option 3, follow option 1 and also define UCLN_AUTO_LOCAL (see below for more information)
- */
-
-#if !UCLN_NO_AUTO_CLEANUP
-
-/*
- * The following declarations are for when UCLN_AUTO_LOCAL or UCLN_AUTO_ATEXIT
- * are defined. They are commented out because they are static and will be defined
- * later. The information is still here to provide some guidance for the developer
- * who chooses to use UCLN_AUTO_LOCAL.
- */
-/**
- * Give the library an opportunity to register an automatic cleanup.
- * This may be called more than once.
- */
-/*static void ucln_registerAutomaticCleanup();*/
-/**
- * Unregister an automatic cleanup, if possible. Called from cleanup.
- */
-/*static void ucln_unRegisterAutomaticCleanup();*/
-
-#ifdef UCLN_TYPE_IS_COMMON
-# define UCLN_CLEAN_ME_UP u_cleanup()
-#else
-# define UCLN_CLEAN_ME_UP ucln_cleanupOne(UCLN_TYPE)
-#endif
-
-/* ------------ automatic cleanup: registration. Choose ONE ------- */
-#if defined(UCLN_AUTO_LOCAL)
-/* To use:
- * 1. define UCLN_AUTO_LOCAL,
- * 2. create ucln_local_hook.c containing implementations of
- * static void ucln_registerAutomaticCleanup()
- * static void ucln_unRegisterAutomaticCleanup()
- */
-#include "ucln_local_hook.c"
-
-#elif defined(UCLN_AUTO_ATEXIT)
-/*
- * Use the ANSI C 'atexit' function. Note that this mechanism does not
- * guarantee the order of cleanup relative to other users of ICU!
- */
-static UBool gAutoCleanRegistered = FALSE;
-
-static void ucln_atexit_handler()
-{
- UCLN_CLEAN_ME_UP;
-}
-
-static void ucln_registerAutomaticCleanup()
-{
- if(!gAutoCleanRegistered) {
- gAutoCleanRegistered = TRUE;
- atexit(&ucln_atexit_handler);
- }
-}
-
-static void ucln_unRegisterAutomaticCleanup () {
-}
-/* ------------end of automatic cleanup: registration. ------- */
-
-#elif defined (UCLN_FINI)
-/**
- * If UCLN_FINI is defined, it is the (versioned, etc) name of a cleanup
- * entrypoint. Add a stub to call ucln_cleanupOne
- * Used on AIX, Solaris, and HP-UX
- */
-U_CAPI void U_EXPORT2 UCLN_FINI (void);
-
-U_CAPI void U_EXPORT2 UCLN_FINI ()
-{
- /* This function must be defined, if UCLN_FINI is defined, else link error. */
- UCLN_CLEAN_ME_UP;
-}
-
-/* Windows: DllMain */
-#elif U_PLATFORM_HAS_WIN32_API
-/*
- * ICU's own DllMain.
- */
-
-/* these are from putil.c */
-/* READ READ READ READ! Are you getting compilation errors from windows.h?
- Any source file which includes this (ucln_imp.h) header MUST
- be defined with language extensions ON. */
-#ifndef WIN32_LEAN_AND_MEAN
-# define WIN32_LEAN_AND_MEAN
-#endif
-# define VC_EXTRALEAN
-# define NOUSER
-# define NOSERVICE
-# define NOIME
-# define NOMCX
-# include <windows.h>
-/*
- * This is a stub DllMain function with icu specific process handling code.
- */
-BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
-{
- BOOL status = TRUE;
-
- switch(fdwReason) {
- case DLL_PROCESS_ATTACH:
- /* ICU does not trap process attach, but must pass these through properly. */
- /* ICU specific process attach could go here */
- break;
-
- case DLL_PROCESS_DETACH:
- /* Here is the one we actually care about. */
-
- UCLN_CLEAN_ME_UP;
-
- break;
-
- case DLL_THREAD_ATTACH:
- /* ICU does not trap thread attach, but must pass these through properly. */
- /* ICU specific thread attach could go here */
- break;
-
- case DLL_THREAD_DETACH:
- /* ICU does not trap thread detach, but must pass these through properly. */
- /* ICU specific thread detach could go here */
- break;
-
- }
- return status;
-}
-
-#elif defined(__GNUC__)
-/* GCC - use __attribute((destructor)) */
-static void ucln_destructor() __attribute__((destructor)) ;
-
-static void ucln_destructor()
-{
- UCLN_CLEAN_ME_UP;
-}
-
-#endif
-
-#endif /* UCLN_NO_AUTO_CLEANUP */
-
-#else
-#error This file can only be included once.
-#endif
diff --git a/contrib/libs/icu/common/ucmndata.cpp b/contrib/libs/icu/common/ucmndata.cpp
deleted file mode 100644
index ba2310bb7ab..00000000000
--- a/contrib/libs/icu/common/ucmndata.cpp
+++ /dev/null
@@ -1,393 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************/
-
-
-/*------------------------------------------------------------------------------
- *
- * UCommonData An abstract interface for dealing with ICU Common Data Files.
- * ICU Common Data Files are a grouping of a number of individual
- * data items (resources, converters, tables, anything) into a
- * single file or dll. The combined format includes a table of
- * contents for locating the individual items by name.
- *
- * Two formats for the table of contents are supported, which is
- * why there is an abstract inteface involved.
- *
- */
-
-#include "unicode/utypes.h"
-#include "unicode/udata.h"
-#include "cstring.h"
-#include "ucmndata.h"
-#include "udatamem.h"
-
-#if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP)
-# include <stdio.h>
-#endif
-
-U_CFUNC uint16_t
-udata_getHeaderSize(const DataHeader *udh) {
- if(udh==NULL) {
- return 0;
- } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) {
- /* same endianness */
- return udh->dataHeader.headerSize;
- } else {
- /* opposite endianness */
- uint16_t x=udh->dataHeader.headerSize;
- return (uint16_t)((x<<8)|(x>>8));
- }
-}
-
-U_CFUNC uint16_t
-udata_getInfoSize(const UDataInfo *info) {
- if(info==NULL) {
- return 0;
- } else if(info->isBigEndian==U_IS_BIG_ENDIAN) {
- /* same endianness */
- return info->size;
- } else {
- /* opposite endianness */
- uint16_t x=info->size;
- return (uint16_t)((x<<8)|(x>>8));
- }
-}
-
-/*-----------------------------------------------------------------------------*
- * *
- * Pointer TOCs. TODO: This form of table-of-contents should be removed *
- * because DLLs must be relocated on loading to correct the *
- * pointer values and this operation makes shared memory *
- * mapping of the data much less likely to work. *
- * *
- *-----------------------------------------------------------------------------*/
-typedef struct {
- const char *entryName;
- const DataHeader *pHeader;
-} PointerTOCEntry;
-
-
-typedef struct {
- uint32_t count;
- uint32_t reserved;
- /**
- * Variable-length array declared with length 1 to disable bounds checkers.
- * The actual array length is in the count field.
- */
- PointerTOCEntry entry[1];
-} PointerTOC;
-
-
-/* definition of OffsetTOC struct types moved to ucmndata.h */
-
-/*-----------------------------------------------------------------------------*
- * *
- * entry point lookup implementations *
- * *
- *-----------------------------------------------------------------------------*/
-
-#ifndef MIN
-#define MIN(a,b) (((a)<(b)) ? (a) : (b))
-#endif
-
-/**
- * Compare strings where we know the shared prefix length,
- * and advance the prefix length as we find that the strings share even more characters.
- */
-static int32_t
-strcmpAfterPrefix(const char *s1, const char *s2, int32_t *pPrefixLength) {
- int32_t pl=*pPrefixLength;
- int32_t cmp=0;
- s1+=pl;
- s2+=pl;
- for(;;) {
- int32_t c1=(uint8_t)*s1++;
- int32_t c2=(uint8_t)*s2++;
- cmp=c1-c2;
- if(cmp!=0 || c1==0) { /* different or done */
- break;
- }
- ++pl; /* increment shared same-prefix length */
- }
- *pPrefixLength=pl;
- return cmp;
-}
-
-static int32_t
-offsetTOCPrefixBinarySearch(const char *s, const char *names,
- const UDataOffsetTOCEntry *toc, int32_t count) {
- int32_t start=0;
- int32_t limit=count;
- /*
- * Remember the shared prefix between s, start and limit,
- * and don't compare that shared prefix again.
- * The shared prefix should get longer as we narrow the [start, limit[ range.
- */
- int32_t startPrefixLength=0;
- int32_t limitPrefixLength=0;
- if(count==0) {
- return -1;
- }
- /*
- * Prime the prefix lengths so that we don't keep prefixLength at 0 until
- * both the start and limit indexes have moved.
- * At the same time, we find if s is one of the start and (limit-1) names,
- * and if not, exclude them from the actual binary search.
- */
- if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, &startPrefixLength)) {
- return 0;
- }
- ++start;
- --limit;
- if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, &limitPrefixLength)) {
- return limit;
- }
- while(start<limit) {
- int32_t i=(start+limit)/2;
- int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
- int32_t cmp=strcmpAfterPrefix(s, names+toc[i].nameOffset, &prefixLength);
- if(cmp<0) {
- limit=i;
- limitPrefixLength=prefixLength;
- } else if(cmp==0) {
- return i;
- } else {
- start=i+1;
- startPrefixLength=prefixLength;
- }
- }
- return -1;
-}
-
-static int32_t
-pointerTOCPrefixBinarySearch(const char *s, const PointerTOCEntry *toc, int32_t count) {
- int32_t start=0;
- int32_t limit=count;
- /*
- * Remember the shared prefix between s, start and limit,
- * and don't compare that shared prefix again.
- * The shared prefix should get longer as we narrow the [start, limit[ range.
- */
- int32_t startPrefixLength=0;
- int32_t limitPrefixLength=0;
- if(count==0) {
- return -1;
- }
- /*
- * Prime the prefix lengths so that we don't keep prefixLength at 0 until
- * both the start and limit indexes have moved.
- * At the same time, we find if s is one of the start and (limit-1) names,
- * and if not, exclude them from the actual binary search.
- */
- if(0==strcmpAfterPrefix(s, toc[0].entryName, &startPrefixLength)) {
- return 0;
- }
- ++start;
- --limit;
- if(0==strcmpAfterPrefix(s, toc[limit].entryName, &limitPrefixLength)) {
- return limit;
- }
- while(start<limit) {
- int32_t i=(start+limit)/2;
- int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
- int32_t cmp=strcmpAfterPrefix(s, toc[i].entryName, &prefixLength);
- if(cmp<0) {
- limit=i;
- limitPrefixLength=prefixLength;
- } else if(cmp==0) {
- return i;
- } else {
- start=i+1;
- startPrefixLength=prefixLength;
- }
- }
- return -1;
-}
-
-U_CDECL_BEGIN
-static uint32_t U_CALLCONV
-offsetTOCEntryCount(const UDataMemory *pData) {
- int32_t retVal=0;
- const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
- if (toc != NULL) {
- retVal = toc->count;
- }
- return retVal;
-}
-
-static const DataHeader * U_CALLCONV
-offsetTOCLookupFn(const UDataMemory *pData,
- const char *tocEntryName,
- int32_t *pLength,
- UErrorCode *pErrorCode) {
- (void)pErrorCode;
- const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
- if(toc!=NULL) {
- const char *base=(const char *)toc;
- int32_t number, count=(int32_t)toc->count;
-
- /* perform a binary search for the data in the common data's table of contents */
-#if defined (UDATA_DEBUG_DUMP)
- /* list the contents of the TOC each time .. not recommended */
- for(number=0; number<count; ++number) {
- fprintf(stderr, "\tx%d: %s\n", number, &base[toc->entry[number].nameOffset]);
- }
-#endif
- number=offsetTOCPrefixBinarySearch(tocEntryName, base, toc->entry, count);
- if(number>=0) {
- /* found it */
- const UDataOffsetTOCEntry *entry=toc->entry+number;
-#ifdef UDATA_DEBUG
- fprintf(stderr, "%s: Found.\n", tocEntryName);
-#endif
- if((number+1) < count) {
- *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset);
- } else {
- *pLength = -1;
- }
- return (const DataHeader *)(base+entry->dataOffset);
- } else {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "%s: Not found.\n", tocEntryName);
-#endif
- return NULL;
- }
- } else {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "returning header\n");
-#endif
-
- return pData->pHeader;
- }
-}
-
-
-static uint32_t U_CALLCONV pointerTOCEntryCount(const UDataMemory *pData) {
- const PointerTOC *toc = (PointerTOC *)pData->toc;
- return (uint32_t)((toc != NULL) ? (toc->count) : 0);
-}
-
-static const DataHeader * U_CALLCONV pointerTOCLookupFn(const UDataMemory *pData,
- const char *name,
- int32_t *pLength,
- UErrorCode *pErrorCode) {
- (void)pErrorCode;
- if(pData->toc!=NULL) {
- const PointerTOC *toc = (PointerTOC *)pData->toc;
- int32_t number, count=(int32_t)toc->count;
-
-#if defined (UDATA_DEBUG_DUMP)
- /* list the contents of the TOC each time .. not recommended */
- for(number=0; number<count; ++number) {
- fprintf(stderr, "\tx%d: %s\n", number, toc->entry[number].entryName);
- }
-#endif
- number=pointerTOCPrefixBinarySearch(name, toc->entry, count);
- if(number>=0) {
- /* found it */
-#ifdef UDATA_DEBUG
- fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName);
-#endif
- *pLength=-1;
- return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader);
- } else {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "%s: Not found.\n", name);
-#endif
- return NULL;
- }
- } else {
- return pData->pHeader;
- }
-}
-U_CDECL_END
-
-
-static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount};
-static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount};
-
-
-
-/*----------------------------------------------------------------------*
- * *
- * checkCommonData Validate the format of a common data file. *
- * Fill in the virtual function ptr based on TOC type *
- * If the data is invalid, close the UDataMemory *
- * and set the appropriate error code. *
- * *
- *----------------------------------------------------------------------*/
-U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) {
- if (U_FAILURE(*err)) {
- return;
- }
-
- if(udm==NULL || udm->pHeader==NULL) {
- *err=U_INVALID_FORMAT_ERROR;
- } else if(!(udm->pHeader->dataHeader.magic1==0xda &&
- udm->pHeader->dataHeader.magic2==0x27 &&
- udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN &&
- udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY)
- ) {
- /* header not valid */
- *err=U_INVALID_FORMAT_ERROR;
- }
- else if (udm->pHeader->info.dataFormat[0]==0x43 &&
- udm->pHeader->info.dataFormat[1]==0x6d &&
- udm->pHeader->info.dataFormat[2]==0x6e &&
- udm->pHeader->info.dataFormat[3]==0x44 &&
- udm->pHeader->info.formatVersion[0]==1
- ) {
- /* dataFormat="CmnD" */
- udm->vFuncs = &CmnDFuncs;
- udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
- }
- else if(udm->pHeader->info.dataFormat[0]==0x54 &&
- udm->pHeader->info.dataFormat[1]==0x6f &&
- udm->pHeader->info.dataFormat[2]==0x43 &&
- udm->pHeader->info.dataFormat[3]==0x50 &&
- udm->pHeader->info.formatVersion[0]==1
- ) {
- /* dataFormat="ToCP" */
- udm->vFuncs = &ToCPFuncs;
- udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
- }
- else {
- /* dataFormat not recognized */
- *err=U_INVALID_FORMAT_ERROR;
- }
-
- if (U_FAILURE(*err)) {
- /* If the data is no good and we memory-mapped it ourselves,
- * close the memory mapping so it doesn't leak. Note that this has
- * no effect on non-memory mapped data, other than clearing fields in udm.
- */
- udata_close(udm);
- }
-}
-
-/*
- * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package
- * header but not its sub-items.
- * This function will be needed for automatic runtime swapping.
- * Sub-items should not be swapped to limit the swapping to the parts of the
- * package that are actually used.
- *
- * Since lengths of items are implicit in the order and offsets of their
- * ToC entries, and since offsets are relative to the start of the ToC,
- * a swapped version may need to generate a different data structure
- * with pointers to the original data items and with their lengths
- * (-1 for the last one if it is not known), and maybe even pointers to the
- * swapped versions of the items.
- * These pointers to swapped versions would establish a cache;
- * instead, each open data item could simply own the storage for its swapped
- * data. This fits better with the current design.
- *
- * markus 2003sep18 Jitterbug 2235
- */
diff --git a/contrib/libs/icu/common/ucmndata.h b/contrib/libs/icu/common/ucmndata.h
deleted file mode 100644
index c3eba9f4d02..00000000000
--- a/contrib/libs/icu/common/ucmndata.h
+++ /dev/null
@@ -1,117 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************/
-
-
-/*----------------------------------------------------------------------------------
- *
- * UCommonData An abstract interface for dealing with ICU Common Data Files.
- * ICU Common Data Files are a grouping of a number of individual
- * data items (resources, converters, tables, anything) into a
- * single file or dll. The combined format includes a table of
- * contents for locating the individual items by name.
- *
- * Two formats for the table of contents are supported, which is
- * why there is an abstract inteface involved.
- *
- * These functions are part of the ICU internal implementation, and
- * are not inteded to be used directly by applications.
- */
-
-#ifndef __UCMNDATA_H__
-#define __UCMNDATA_H__
-
-#include "unicode/udata.h"
-#include "umapfile.h"
-
-
-#define COMMON_DATA_NAME U_ICUDATA_NAME
-
-typedef struct {
- uint16_t headerSize;
- uint8_t magic1;
- uint8_t magic2;
-} MappedData;
-
-
-typedef struct {
- MappedData dataHeader;
- UDataInfo info;
-} DataHeader;
-
-typedef struct {
- uint32_t nameOffset;
- uint32_t dataOffset;
-} UDataOffsetTOCEntry;
-
-typedef struct {
- uint32_t count;
- /**
- * Variable-length array declared with length 1 to disable bounds checkers.
- * The actual array length is in the count field.
- */
- UDataOffsetTOCEntry entry[1];
-} UDataOffsetTOC;
-
-/**
- * Get the header size from a const DataHeader *udh.
- * Handles opposite-endian data.
- *
- * @internal
- */
-U_CFUNC uint16_t
-udata_getHeaderSize(const DataHeader *udh);
-
-/**
- * Get the UDataInfo.size from a const UDataInfo *info.
- * Handles opposite-endian data.
- *
- * @internal
- */
-U_CFUNC uint16_t
-udata_getInfoSize(const UDataInfo *info);
-
-U_CDECL_BEGIN
-/*
- * "Virtual" functions for data lookup.
- * To call one, given a UDataMemory *p, the code looks like this:
- * p->vFuncs.Lookup(p, tocEntryName, pErrorCode);
- * (I sure do wish this was written in C++, not C)
- */
-
-typedef const DataHeader *
-(U_CALLCONV * LookupFn)(const UDataMemory *pData,
- const char *tocEntryName,
- int32_t *pLength,
- UErrorCode *pErrorCode);
-
-typedef uint32_t
-(U_CALLCONV * NumEntriesFn)(const UDataMemory *pData);
-
-U_CDECL_END
-
-typedef struct {
- LookupFn Lookup;
- NumEntriesFn NumEntries;
-} commonDataFuncs;
-
-
-/*
- * Functions to check whether a UDataMemory refers to memory containing
- * a recognizable header and table of contents a Common Data Format
- *
- * If a valid header and TOC are found,
- * set the CommonDataFuncs function dispatch vector in the UDataMemory
- * to point to the right functions for the TOC type.
- * otherwise
- * set an errorcode.
- */
-U_CFUNC void udata_checkCommonData(UDataMemory *pData, UErrorCode *pErrorCode);
-
-#endif
diff --git a/contrib/libs/icu/common/ucnv.cpp b/contrib/libs/icu/common/ucnv.cpp
deleted file mode 100644
index 5dcf35e0438..00000000000
--- a/contrib/libs/icu/common/ucnv.cpp
+++ /dev/null
@@ -1,2910 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1998-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* ucnv.c:
-* Implements APIs for the ICU's codeset conversion library;
-* mostly calls through internal functions;
-* created by Bertrand A. Damiba
-*
-* Modification History:
-*
-* Date Name Description
-* 04/04/99 helena Fixed internal header inclusion.
-* 05/09/00 helena Added implementation to handle fallback mappings.
-* 06/20/2000 helena OS/400 port changes; mostly typecast.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include <memory>
-
-#include "unicode/ustring.h"
-#include "unicode/ucnv.h"
-#include "unicode/ucnv_err.h"
-#include "unicode/uset.h"
-#include "unicode/utf.h"
-#include "unicode/utf16.h"
-#include "putilimp.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uassert.h"
-#include "utracimp.h"
-#include "ustr_imp.h"
-#include "ucnv_imp.h"
-#include "ucnv_cnv.h"
-#include "ucnv_bld.h"
-
-/* size of intermediate and preflighting buffers in ucnv_convert() */
-#define CHUNK_SIZE 1024
-
-typedef struct UAmbiguousConverter {
- const char *name;
- const UChar variant5c;
-} UAmbiguousConverter;
-
-static const UAmbiguousConverter ambiguousConverters[]={
- { "ibm-897_P100-1995", 0xa5 },
- { "ibm-942_P120-1999", 0xa5 },
- { "ibm-943_P130-1999", 0xa5 },
- { "ibm-946_P100-1995", 0xa5 },
- { "ibm-33722_P120-1999", 0xa5 },
- { "ibm-1041_P100-1995", 0xa5 },
- /*{ "ibm-54191_P100-2006", 0xa5 },*/
- /*{ "ibm-62383_P100-2007", 0xa5 },*/
- /*{ "ibm-891_P100-1995", 0x20a9 },*/
- { "ibm-944_P100-1995", 0x20a9 },
- { "ibm-949_P110-1999", 0x20a9 },
- { "ibm-1363_P110-1997", 0x20a9 },
- { "ISO_2022,locale=ko,version=0", 0x20a9 },
- { "ibm-1088_P100-1995", 0x20a9 }
-};
-
-/*Calls through createConverter */
-U_CAPI UConverter* U_EXPORT2
-ucnv_open (const char *name,
- UErrorCode * err)
-{
- UConverter *r;
-
- if (err == NULL || U_FAILURE (*err)) {
- return NULL;
- }
-
- r = ucnv_createConverter(NULL, name, err);
- return r;
-}
-
-U_CAPI UConverter* U_EXPORT2
-ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err)
-{
- return ucnv_createConverterFromPackage(packageName, converterName, err);
-}
-
-/*Extracts the UChar* to a char* and calls through createConverter */
-U_CAPI UConverter* U_EXPORT2
-ucnv_openU (const UChar * name,
- UErrorCode * err)
-{
- char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
-
- if (err == NULL || U_FAILURE(*err))
- return NULL;
- if (name == NULL)
- return ucnv_open (NULL, err);
- if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
- {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- return ucnv_open(u_austrcpy(asciiName, name), err);
-}
-
-/* Copy the string that is represented by the UConverterPlatform enum
- * @param platformString An output buffer
- * @param platform An enum representing a platform
- * @return the length of the copied string.
- */
-static int32_t
-ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
-{
- switch (pltfrm)
- {
- case UCNV_IBM:
- uprv_strcpy(platformString, "ibm-");
- return 4;
- case UCNV_UNKNOWN:
- break;
- }
-
- /* default to empty string */
- *platformString = 0;
- return 0;
-}
-
-/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
- *through createConverter*/
-U_CAPI UConverter* U_EXPORT2
-ucnv_openCCSID (int32_t codepage,
- UConverterPlatform platform,
- UErrorCode * err)
-{
- char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
- int32_t myNameLen;
-
- if (err == NULL || U_FAILURE (*err))
- return NULL;
-
- /* ucnv_copyPlatformString could return "ibm-" or "cp" */
- myNameLen = ucnv_copyPlatformString(myName, platform);
- T_CString_integerToString(myName + myNameLen, codepage, 10);
-
- return ucnv_createConverter(NULL, myName, err);
-}
-
-/* Creating a temporary stack-based object that can be used in one thread,
-and created from a converter that is shared across threads.
-*/
-
-U_CAPI UConverter* U_EXPORT2
-ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
-{
- UConverter *localConverter, *allocatedConverter;
- int32_t stackBufferSize;
- int32_t bufferSizeNeeded;
- UErrorCode cbErr;
- UConverterToUnicodeArgs toUArgs = {
- sizeof(UConverterToUnicodeArgs),
- TRUE,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
- };
- UConverterFromUnicodeArgs fromUArgs = {
- sizeof(UConverterFromUnicodeArgs),
- TRUE,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
- };
-
- UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
-
- if (status == NULL || U_FAILURE(*status)){
- UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
- return NULL;
- }
-
- if (cnv == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- UTRACE_EXIT_STATUS(*status);
- return NULL;
- }
-
- UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
- ucnv_getName(cnv, status), cnv, stackBuffer);
-
- if (cnv->sharedData->impl->safeClone != NULL) {
- /* call the custom safeClone function for sizing */
- bufferSizeNeeded = 0;
- cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
- if (U_FAILURE(*status)) {
- UTRACE_EXIT_STATUS(*status);
- return NULL;
- }
- }
- else
- {
- /* inherent sizing */
- bufferSizeNeeded = sizeof(UConverter);
- }
-
- if (pBufferSize == NULL) {
- stackBufferSize = 1;
- pBufferSize = &stackBufferSize;
- } else {
- stackBufferSize = *pBufferSize;
- if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
- *pBufferSize = bufferSizeNeeded;
- UTRACE_EXIT_VALUE(bufferSizeNeeded);
- return NULL;
- }
- }
-
- /* Adjust (if necessary) the stackBuffer pointer to be aligned correctly for a UConverter.
- * TODO(Jira ICU-20736) Redo this using std::align() once g++4.9 compatibility is no longer needed.
- */
- if (stackBuffer) {
- uintptr_t p = reinterpret_cast<uintptr_t>(stackBuffer);
- uintptr_t aligned_p = (p + alignof(UConverter) - 1) & ~(alignof(UConverter) - 1);
- ptrdiff_t pointerAdjustment = aligned_p - p;
- if (bufferSizeNeeded + pointerAdjustment <= stackBufferSize) {
- stackBuffer = reinterpret_cast<void *>(aligned_p);
- stackBufferSize -= static_cast<int32_t>(pointerAdjustment);
- } else {
- /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
- stackBufferSize = 1;
- }
- }
-
- /* Now, see if we must allocate any memory */
- if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL)
- {
- /* allocate one here...*/
- localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
-
- if(localConverter == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- UTRACE_EXIT_STATUS(*status);
- return NULL;
- }
- *status = U_SAFECLONE_ALLOCATED_WARNING;
-
- /* record the fact that memory was allocated */
- *pBufferSize = bufferSizeNeeded;
- } else {
- /* just use the stack buffer */
- localConverter = (UConverter*) stackBuffer;
- allocatedConverter = NULL;
- }
-
- uprv_memset(localConverter, 0, bufferSizeNeeded);
-
- /* Copy initial state */
- uprv_memcpy(localConverter, cnv, sizeof(UConverter));
- localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
-
- /* copy the substitution string */
- if (cnv->subChars == (uint8_t *)cnv->subUChars) {
- localConverter->subChars = (uint8_t *)localConverter->subUChars;
- } else {
- localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
- if (localConverter->subChars == NULL) {
- uprv_free(allocatedConverter);
- UTRACE_EXIT_STATUS(*status);
- return NULL;
- }
- uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
- }
-
- /* now either call the safeclone fcn or not */
- if (cnv->sharedData->impl->safeClone != NULL) {
- /* call the custom safeClone function */
- localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
- }
-
- if(localConverter==NULL || U_FAILURE(*status)) {
- if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
- uprv_free(allocatedConverter->subChars);
- }
- uprv_free(allocatedConverter);
- UTRACE_EXIT_STATUS(*status);
- return NULL;
- }
-
- /* increment refcount of shared data if needed */
- if (cnv->sharedData->isReferenceCounted) {
- ucnv_incrementRefCount(cnv->sharedData);
- }
-
- if(localConverter == (UConverter*)stackBuffer) {
- /* we're using user provided data - set to not destroy */
- localConverter->isCopyLocal = TRUE;
- }
-
- /* allow callback functions to handle any memory allocation */
- toUArgs.converter = fromUArgs.converter = localConverter;
- cbErr = U_ZERO_ERROR;
- cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
- cbErr = U_ZERO_ERROR;
- cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
-
- UTRACE_EXIT_PTR_STATUS(localConverter, *status);
- return localConverter;
-}
-
-
-
-/*Decreases the reference counter in the shared immutable section of the object
- *and frees the mutable part*/
-
-U_CAPI void U_EXPORT2
-ucnv_close (UConverter * converter)
-{
- UErrorCode errorCode = U_ZERO_ERROR;
-
- UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
-
- if (converter == NULL)
- {
- UTRACE_EXIT();
- return;
- }
-
- UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
- ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
-
- /* In order to speed up the close, only call the callbacks when they have been changed.
- This performance check will only work when the callbacks are set within a shared library
- or from user code that statically links this code. */
- /* first, notify the callback functions that the converter is closed */
- if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
- UConverterToUnicodeArgs toUArgs = {
- sizeof(UConverterToUnicodeArgs),
- TRUE,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
- };
-
- toUArgs.converter = converter;
- errorCode = U_ZERO_ERROR;
- converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
- }
- if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
- UConverterFromUnicodeArgs fromUArgs = {
- sizeof(UConverterFromUnicodeArgs),
- TRUE,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
- };
- fromUArgs.converter = converter;
- errorCode = U_ZERO_ERROR;
- converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
- }
-
- if (converter->sharedData->impl->close != NULL) {
- converter->sharedData->impl->close(converter);
- }
-
- if (converter->subChars != (uint8_t *)converter->subUChars) {
- uprv_free(converter->subChars);
- }
-
- if (converter->sharedData->isReferenceCounted) {
- ucnv_unloadSharedDataIfReady(converter->sharedData);
- }
-
- if(!converter->isCopyLocal){
- uprv_free(converter);
- }
-
- UTRACE_EXIT();
-}
-
-/*returns a single Name from the list, will return NULL if out of bounds
- */
-U_CAPI const char* U_EXPORT2
-ucnv_getAvailableName (int32_t n)
-{
- if (0 <= n && n <= 0xffff) {
- UErrorCode err = U_ZERO_ERROR;
- const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
- if (U_SUCCESS(err)) {
- return name;
- }
- }
- return NULL;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucnv_countAvailable ()
-{
- UErrorCode err = U_ZERO_ERROR;
- return ucnv_bld_countAvailableConverters(&err);
-}
-
-U_CAPI void U_EXPORT2
-ucnv_getSubstChars (const UConverter * converter,
- char *mySubChar,
- int8_t * len,
- UErrorCode * err)
-{
- if (U_FAILURE (*err))
- return;
-
- if (converter->subCharLen <= 0) {
- /* Unicode string or empty string from ucnv_setSubstString(). */
- *len = 0;
- return;
- }
-
- if (*len < converter->subCharLen) /*not enough space in subChars */
- {
- *err = U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
-
- uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */
- *len = converter->subCharLen; /*store # of bytes copied to buffer */
-}
-
-U_CAPI void U_EXPORT2
-ucnv_setSubstChars (UConverter * converter,
- const char *mySubChar,
- int8_t len,
- UErrorCode * err)
-{
- if (U_FAILURE (*err))
- return;
-
- /*Makes sure that the subChar is within the codepages char length boundaries */
- if ((len > converter->sharedData->staticData->maxBytesPerChar)
- || (len < converter->sharedData->staticData->minBytesPerChar))
- {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
- converter->subCharLen = len; /*sets the new len */
-
- /*
- * There is currently (2001Feb) no separate API to set/get subChar1.
- * In order to always have subChar written after it is explicitly set,
- * we set subChar1 to 0.
- */
- converter->subChar1 = 0;
-
- return;
-}
-
-U_CAPI void U_EXPORT2
-ucnv_setSubstString(UConverter *cnv,
- const UChar *s,
- int32_t length,
- UErrorCode *err) {
- alignas(UConverter) char cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE];
- char chars[UCNV_ERROR_BUFFER_LENGTH];
-
- UConverter *clone;
- uint8_t *subChars;
- int32_t cloneSize, length8;
-
- /* Let the following functions check all arguments. */
- cloneSize = sizeof(cloneBuffer);
- clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
- ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
- length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
- ucnv_close(clone);
- if (U_FAILURE(*err)) {
- return;
- }
-
- if (cnv->sharedData->impl->writeSub == NULL
-#if !UCONFIG_NO_LEGACY_CONVERSION
- || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
- ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
-#endif
- ) {
- /* The converter is not stateful. Store the charset bytes as a fixed string. */
- subChars = (uint8_t *)chars;
- } else {
- /*
- * The converter has a non-default writeSub() function, indicating
- * that it is stateful.
- * Store the Unicode string for on-the-fly conversion for correct
- * state handling.
- */
- if (length > UCNV_ERROR_BUFFER_LENGTH) {
- /*
- * Should not occur. The converter should output at least one byte
- * per UChar, which means that ucnv_fromUChars() should catch all
- * overflows.
- */
- *err = U_BUFFER_OVERFLOW_ERROR;
- return;
- }
- subChars = (uint8_t *)s;
- if (length < 0) {
- length = u_strlen(s);
- }
- length8 = length * U_SIZEOF_UCHAR;
- }
-
- /*
- * For storing the substitution string, select either the small buffer inside
- * UConverter or allocate a subChars buffer.
- */
- if (length8 > UCNV_MAX_SUBCHAR_LEN) {
- /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
- if (cnv->subChars == (uint8_t *)cnv->subUChars) {
- /* Allocate a new buffer for the string. */
- cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
- if (cnv->subChars == NULL) {
- cnv->subChars = (uint8_t *)cnv->subUChars;
- *err = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
- }
- }
-
- /* Copy the substitution string into the UConverter or its subChars buffer. */
- if (length8 == 0) {
- cnv->subCharLen = 0;
- } else {
- uprv_memcpy(cnv->subChars, subChars, length8);
- if (subChars == (uint8_t *)chars) {
- cnv->subCharLen = (int8_t)length8;
- } else /* subChars == s */ {
- cnv->subCharLen = (int8_t)-length;
- }
- }
-
- /* See comment in ucnv_setSubstChars(). */
- cnv->subChar1 = 0;
-}
-
-/*resets the internal states of a converter
- *goal : have the same behaviour than a freshly created converter
- */
-static void _reset(UConverter *converter, UConverterResetChoice choice,
- UBool callCallback) {
- if(converter == NULL) {
- return;
- }
-
- if(callCallback) {
- /* first, notify the callback functions that the converter is reset */
- UErrorCode errorCode;
-
- if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
- UConverterToUnicodeArgs toUArgs = {
- sizeof(UConverterToUnicodeArgs),
- TRUE,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
- };
- toUArgs.converter = converter;
- errorCode = U_ZERO_ERROR;
- converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
- }
- if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
- UConverterFromUnicodeArgs fromUArgs = {
- sizeof(UConverterFromUnicodeArgs),
- TRUE,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
- };
- fromUArgs.converter = converter;
- errorCode = U_ZERO_ERROR;
- converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
- }
- }
-
- /* now reset the converter itself */
- if(choice<=UCNV_RESET_TO_UNICODE) {
- converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
- converter->mode = 0;
- converter->toULength = 0;
- converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
- converter->preToULength = 0;
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- converter->fromUnicodeStatus = 0;
- converter->fromUChar32 = 0;
- converter->invalidUCharLength = converter->charErrorBufferLength = 0;
- converter->preFromUFirstCP = U_SENTINEL;
- converter->preFromULength = 0;
- }
-
- if (converter->sharedData->impl->reset != NULL) {
- /* call the custom reset function */
- converter->sharedData->impl->reset(converter, choice);
- }
-}
-
-U_CAPI void U_EXPORT2
-ucnv_reset(UConverter *converter)
-{
- _reset(converter, UCNV_RESET_BOTH, TRUE);
-}
-
-U_CAPI void U_EXPORT2
-ucnv_resetToUnicode(UConverter *converter)
-{
- _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
-}
-
-U_CAPI void U_EXPORT2
-ucnv_resetFromUnicode(UConverter *converter)
-{
- _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
-}
-
-U_CAPI int8_t U_EXPORT2
-ucnv_getMaxCharSize (const UConverter * converter)
-{
- return converter->maxBytesPerUChar;
-}
-
-
-U_CAPI int8_t U_EXPORT2
-ucnv_getMinCharSize (const UConverter * converter)
-{
- return converter->sharedData->staticData->minBytesPerChar;
-}
-
-U_CAPI const char* U_EXPORT2
-ucnv_getName (const UConverter * converter, UErrorCode * err)
-
-{
- if (U_FAILURE (*err))
- return NULL;
- if(converter->sharedData->impl->getName){
- const char* temp= converter->sharedData->impl->getName(converter);
- if(temp)
- return temp;
- }
- return converter->sharedData->staticData->name;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucnv_getCCSID(const UConverter * converter,
- UErrorCode * err)
-{
- int32_t ccsid;
- if (U_FAILURE (*err))
- return -1;
-
- ccsid = converter->sharedData->staticData->codepage;
- if (ccsid == 0) {
- /* Rare case. This is for cases like gb18030,
- which doesn't have an IBM canonical name, but does have an IBM alias. */
- const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
- if (U_SUCCESS(*err) && standardName) {
- const char *ccsidStr = uprv_strchr(standardName, '-');
- if (ccsidStr) {
- ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
- }
- }
- }
- return ccsid;
-}
-
-
-U_CAPI UConverterPlatform U_EXPORT2
-ucnv_getPlatform (const UConverter * converter,
- UErrorCode * err)
-{
- if (U_FAILURE (*err))
- return UCNV_UNKNOWN;
-
- return (UConverterPlatform)converter->sharedData->staticData->platform;
-}
-
-U_CAPI void U_EXPORT2
- ucnv_getToUCallBack (const UConverter * converter,
- UConverterToUCallback *action,
- const void **context)
-{
- *action = converter->fromCharErrorBehaviour;
- *context = converter->toUContext;
-}
-
-U_CAPI void U_EXPORT2
- ucnv_getFromUCallBack (const UConverter * converter,
- UConverterFromUCallback *action,
- const void **context)
-{
- *action = converter->fromUCharErrorBehaviour;
- *context = converter->fromUContext;
-}
-
-U_CAPI void U_EXPORT2
-ucnv_setToUCallBack (UConverter * converter,
- UConverterToUCallback newAction,
- const void* newContext,
- UConverterToUCallback *oldAction,
- const void** oldContext,
- UErrorCode * err)
-{
- if (U_FAILURE (*err))
- return;
- if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
- converter->fromCharErrorBehaviour = newAction;
- if (oldContext) *oldContext = converter->toUContext;
- converter->toUContext = newContext;
-}
-
-U_CAPI void U_EXPORT2
-ucnv_setFromUCallBack (UConverter * converter,
- UConverterFromUCallback newAction,
- const void* newContext,
- UConverterFromUCallback *oldAction,
- const void** oldContext,
- UErrorCode * err)
-{
- if (U_FAILURE (*err))
- return;
- if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
- converter->fromUCharErrorBehaviour = newAction;
- if (oldContext) *oldContext = converter->fromUContext;
- converter->fromUContext = newContext;
-}
-
-static void
-_updateOffsets(int32_t *offsets, int32_t length,
- int32_t sourceIndex, int32_t errorInputLength) {
- int32_t *limit;
- int32_t delta, offset;
-
- if(sourceIndex>=0) {
- /*
- * adjust each offset by adding the previous sourceIndex
- * minus the length of the input sequence that caused an
- * error, if any
- */
- delta=sourceIndex-errorInputLength;
- } else {
- /*
- * set each offset to -1 because this conversion function
- * does not handle offsets
- */
- delta=-1;
- }
-
- limit=offsets+length;
- if(delta==0) {
- /* most common case, nothing to do */
- } else if(delta>0) {
- /* add the delta to each offset (but not if the offset is <0) */
- while(offsets<limit) {
- offset=*offsets;
- if(offset>=0) {
- *offsets=offset+delta;
- }
- ++offsets;
- }
- } else /* delta<0 */ {
- /*
- * set each offset to -1 because this conversion function
- * does not handle offsets
- * or the error input sequence started in a previous buffer
- */
- while(offsets<limit) {
- *offsets++=-1;
- }
- }
-}
-
-/* ucnv_fromUnicode --------------------------------------------------------- */
-
-/*
- * Implementation note for m:n conversions
- *
- * While collecting source units to find the longest match for m:n conversion,
- * some source units may need to be stored for a partial match.
- * When a second buffer does not yield a match on all of the previously stored
- * source units, then they must be "replayed", i.e., fed back into the converter.
- *
- * The code relies on the fact that replaying will not nest -
- * converting a replay buffer will not result in a replay.
- * This is because a replay is necessary only after the _continuation_ of a
- * partial match failed, but a replay buffer is converted as a whole.
- * It may result in some of its units being stored again for a partial match,
- * but there will not be a continuation _during_ the replay which could fail.
- *
- * It is conceivable that a callback function could call the converter
- * recursively in a way that causes another replay to be stored, but that
- * would be an error in the callback function.
- * Such violations will cause assertion failures in a debug build,
- * and wrong output, but they will not cause a crash.
- */
-
-static void
-_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
- UConverterFromUnicode fromUnicode;
- UConverter *cnv;
- const UChar *s;
- char *t;
- int32_t *offsets;
- int32_t sourceIndex;
- int32_t errorInputLength;
- UBool converterSawEndOfInput, calledCallback;
-
- /* variables for m:n conversion */
- UChar replay[UCNV_EXT_MAX_UCHARS];
- const UChar *realSource, *realSourceLimit;
- int32_t realSourceIndex;
- UBool realFlush;
-
- cnv=pArgs->converter;
- s=pArgs->source;
- t=pArgs->target;
- offsets=pArgs->offsets;
-
- /* get the converter implementation function */
- sourceIndex=0;
- if(offsets==NULL) {
- fromUnicode=cnv->sharedData->impl->fromUnicode;
- } else {
- fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
- if(fromUnicode==NULL) {
- /* there is no WithOffsets implementation */
- fromUnicode=cnv->sharedData->impl->fromUnicode;
- /* we will write -1 for each offset */
- sourceIndex=-1;
- }
- }
-
- if(cnv->preFromULength>=0) {
- /* normal mode */
- realSource=NULL;
-
- /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
- realSourceLimit=NULL;
- realFlush=FALSE;
- realSourceIndex=0;
- } else {
- /*
- * Previous m:n conversion stored source units from a partial match
- * and failed to consume all of them.
- * We need to "replay" them from a temporary buffer and convert them first.
- */
- realSource=pArgs->source;
- realSourceLimit=pArgs->sourceLimit;
- realFlush=pArgs->flush;
- realSourceIndex=sourceIndex;
-
- uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
- pArgs->source=replay;
- pArgs->sourceLimit=replay-cnv->preFromULength;
- pArgs->flush=FALSE;
- sourceIndex=-1;
-
- cnv->preFromULength=0;
- }
-
- /*
- * loop for conversion and error handling
- *
- * loop {
- * convert
- * loop {
- * update offsets
- * handle end of input
- * handle errors/call callback
- * }
- * }
- */
- for(;;) {
- if(U_SUCCESS(*err)) {
- /* convert */
- fromUnicode(pArgs, err);
-
- /*
- * set a flag for whether the converter
- * successfully processed the end of the input
- *
- * need not check cnv->preFromULength==0 because a replay (<0) will cause
- * s<sourceLimit before converterSawEndOfInput is checked
- */
- converterSawEndOfInput=
- (UBool)(U_SUCCESS(*err) &&
- pArgs->flush && pArgs->source==pArgs->sourceLimit &&
- cnv->fromUChar32==0);
- } else {
- /* handle error from ucnv_convertEx() */
- converterSawEndOfInput=FALSE;
- }
-
- /* no callback called yet for this iteration */
- calledCallback=FALSE;
-
- /* no sourceIndex adjustment for conversion, only for callback output */
- errorInputLength=0;
-
- /*
- * loop for offsets and error handling
- *
- * iterates at most 3 times:
- * 1. to clean up after the conversion function
- * 2. after the callback
- * 3. after the callback again if there was truncated input
- */
- for(;;) {
- /* update offsets if we write any */
- if(offsets!=NULL) {
- int32_t length=(int32_t)(pArgs->target-t);
- if(length>0) {
- _updateOffsets(offsets, length, sourceIndex, errorInputLength);
-
- /*
- * if a converter handles offsets and updates the offsets
- * pointer at the end, then pArgs->offset should not change
- * here;
- * however, some converters do not handle offsets at all
- * (sourceIndex<0) or may not update the offsets pointer
- */
- pArgs->offsets=offsets+=length;
- }
-
- if(sourceIndex>=0) {
- sourceIndex+=(int32_t)(pArgs->source-s);
- }
- }
-
- if(cnv->preFromULength<0) {
- /*
- * switch the source to new replay units (cannot occur while replaying)
- * after offset handling and before end-of-input and callback handling
- */
- if(realSource==NULL) {
- realSource=pArgs->source;
- realSourceLimit=pArgs->sourceLimit;
- realFlush=pArgs->flush;
- realSourceIndex=sourceIndex;
-
- uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
- pArgs->source=replay;
- pArgs->sourceLimit=replay-cnv->preFromULength;
- pArgs->flush=FALSE;
- if((sourceIndex+=cnv->preFromULength)<0) {
- sourceIndex=-1;
- }
-
- cnv->preFromULength=0;
- } else {
- /* see implementation note before _fromUnicodeWithCallback() */
- U_ASSERT(realSource==NULL);
- *err=U_INTERNAL_PROGRAM_ERROR;
- }
- }
-
- /* update pointers */
- s=pArgs->source;
- t=pArgs->target;
-
- if(U_SUCCESS(*err)) {
- if(s<pArgs->sourceLimit) {
- /*
- * continue with the conversion loop while there is still input left
- * (continue converting by breaking out of only the inner loop)
- */
- break;
- } else if(realSource!=NULL) {
- /* switch back from replaying to the real source and continue */
- pArgs->source=realSource;
- pArgs->sourceLimit=realSourceLimit;
- pArgs->flush=realFlush;
- sourceIndex=realSourceIndex;
-
- realSource=NULL;
- break;
- } else if(pArgs->flush && cnv->fromUChar32!=0) {
- /*
- * the entire input stream is consumed
- * and there is a partial, truncated input sequence left
- */
-
- /* inject an error and continue with callback handling */
- *err=U_TRUNCATED_CHAR_FOUND;
- calledCallback=FALSE; /* new error condition */
- } else {
- /* input consumed */
- if(pArgs->flush) {
- /*
- * return to the conversion loop once more if the flush
- * flag is set and the conversion function has not
- * successfully processed the end of the input yet
- *
- * (continue converting by breaking out of only the inner loop)
- */
- if(!converterSawEndOfInput) {
- break;
- }
-
- /* reset the converter without calling the callback function */
- _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
- }
-
- /* done successfully */
- return;
- }
- }
-
- /* U_FAILURE(*err) */
- {
- UErrorCode e;
-
- if( calledCallback ||
- (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
- (e!=U_INVALID_CHAR_FOUND &&
- e!=U_ILLEGAL_CHAR_FOUND &&
- e!=U_TRUNCATED_CHAR_FOUND)
- ) {
- /*
- * the callback did not or cannot resolve the error:
- * set output pointers and return
- *
- * the check for buffer overflow is redundant but it is
- * a high-runner case and hopefully documents the intent
- * well
- *
- * if we were replaying, then the replay buffer must be
- * copied back into the UConverter
- * and the real arguments must be restored
- */
- if(realSource!=NULL) {
- int32_t length;
-
- U_ASSERT(cnv->preFromULength==0);
-
- length=(int32_t)(pArgs->sourceLimit-pArgs->source);
- if(length>0) {
- u_memcpy(cnv->preFromU, pArgs->source, length);
- cnv->preFromULength=(int8_t)-length;
- }
-
- pArgs->source=realSource;
- pArgs->sourceLimit=realSourceLimit;
- pArgs->flush=realFlush;
- }
-
- return;
- }
- }
-
- /* callback handling */
- {
- UChar32 codePoint;
-
- /* get and write the code point */
- codePoint=cnv->fromUChar32;
- errorInputLength=0;
- U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
- cnv->invalidUCharLength=(int8_t)errorInputLength;
-
- /* set the converter state to deal with the next character */
- cnv->fromUChar32=0;
-
- /* call the callback function */
- cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
- cnv->invalidUCharBuffer, errorInputLength, codePoint,
- *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
- err);
- }
-
- /*
- * loop back to the offset handling
- *
- * this flag will indicate after offset handling
- * that a callback was called;
- * if the callback did not resolve the error, then we return
- */
- calledCallback=TRUE;
- }
- }
-}
-
-/*
- * Output the fromUnicode overflow buffer.
- * Call this function if(cnv->charErrorBufferLength>0).
- * @return TRUE if overflow
- */
-static UBool
-ucnv_outputOverflowFromUnicode(UConverter *cnv,
- char **target, const char *targetLimit,
- int32_t **pOffsets,
- UErrorCode *err) {
- int32_t *offsets;
- char *overflow, *t;
- int32_t i, length;
-
- t=*target;
- if(pOffsets!=NULL) {
- offsets=*pOffsets;
- } else {
- offsets=NULL;
- }
-
- overflow=(char *)cnv->charErrorBuffer;
- length=cnv->charErrorBufferLength;
- i=0;
- while(i<length) {
- if(t==targetLimit) {
- /* the overflow buffer contains too much, keep the rest */
- int32_t j=0;
-
- do {
- overflow[j++]=overflow[i++];
- } while(i<length);
-
- cnv->charErrorBufferLength=(int8_t)j;
- *target=t;
- if(offsets!=NULL) {
- *pOffsets=offsets;
- }
- *err=U_BUFFER_OVERFLOW_ERROR;
- return TRUE;
- }
-
- /* copy the overflow contents to the target */
- *t++=overflow[i++];
- if(offsets!=NULL) {
- *offsets++=-1; /* no source index available for old output */
- }
- }
-
- /* the overflow buffer is completely copied to the target */
- cnv->charErrorBufferLength=0;
- *target=t;
- if(offsets!=NULL) {
- *pOffsets=offsets;
- }
- return FALSE;
-}
-
-U_CAPI void U_EXPORT2
-ucnv_fromUnicode(UConverter *cnv,
- char **target, const char *targetLimit,
- const UChar **source, const UChar *sourceLimit,
- int32_t *offsets,
- UBool flush,
- UErrorCode *err) {
- UConverterFromUnicodeArgs args;
- const UChar *s;
- char *t;
-
- /* check parameters */
- if(err==NULL || U_FAILURE(*err)) {
- return;
- }
-
- if(cnv==NULL || target==NULL || source==NULL) {
- *err=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- s=*source;
- t=*target;
-
- if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
- /*
- Prevent code from going into an infinite loop in case we do hit this
- limit. The limit pointer is expected to be on a UChar * boundary.
- This also prevents the next argument check from failing.
- */
- sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
- }
-
- /*
- * All these conditions should never happen.
- *
- * 1) Make sure that the limits are >= to the address source or target
- *
- * 2) Make sure that the buffer sizes do not exceed the number range for
- * int32_t because some functions use the size (in units or bytes)
- * rather than comparing pointers, and because offsets are int32_t values.
- *
- * size_t is guaranteed to be unsigned and large enough for the job.
- *
- * Return with an error instead of adjusting the limits because we would
- * not be able to maintain the semantics that either the source must be
- * consumed or the target filled (unless an error occurs).
- * An adjustment would be targetLimit=t+0x7fffffff; for example.
- *
- * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
- * to a char * pointer and provide an incomplete UChar code unit.
- */
- if (sourceLimit<s || targetLimit<t ||
- ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
- ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
- (((const char *)sourceLimit-(const char *)s) & 1) != 0)
- {
- *err=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- /* output the target overflow buffer */
- if( cnv->charErrorBufferLength>0 &&
- ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
- ) {
- /* U_BUFFER_OVERFLOW_ERROR */
- return;
- }
- /* *target may have moved, therefore stop using t */
-
- if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
- /* the overflow buffer is emptied and there is no new input: we are done */
- return;
- }
-
- /*
- * Do not simply return with a buffer overflow error if
- * !flush && t==targetLimit
- * because it is possible that the source will not generate any output.
- * For example, the skip callback may be called;
- * it does not output anything.
- */
-
- /* prepare the converter arguments */
- args.converter=cnv;
- args.flush=flush;
- args.offsets=offsets;
- args.source=s;
- args.sourceLimit=sourceLimit;
- args.target=*target;
- args.targetLimit=targetLimit;
- args.size=sizeof(args);
-
- _fromUnicodeWithCallback(&args, err);
-
- *source=args.source;
- *target=args.target;
-}
-
-/* ucnv_toUnicode() --------------------------------------------------------- */
-
-static void
-_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
- UConverterToUnicode toUnicode;
- UConverter *cnv;
- const char *s;
- UChar *t;
- int32_t *offsets;
- int32_t sourceIndex;
- int32_t errorInputLength;
- UBool converterSawEndOfInput, calledCallback;
-
- /* variables for m:n conversion */
- char replay[UCNV_EXT_MAX_BYTES];
- const char *realSource, *realSourceLimit;
- int32_t realSourceIndex;
- UBool realFlush;
-
- cnv=pArgs->converter;
- s=pArgs->source;
- t=pArgs->target;
- offsets=pArgs->offsets;
-
- /* get the converter implementation function */
- sourceIndex=0;
- if(offsets==NULL) {
- toUnicode=cnv->sharedData->impl->toUnicode;
- } else {
- toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
- if(toUnicode==NULL) {
- /* there is no WithOffsets implementation */
- toUnicode=cnv->sharedData->impl->toUnicode;
- /* we will write -1 for each offset */
- sourceIndex=-1;
- }
- }
-
- if(cnv->preToULength>=0) {
- /* normal mode */
- realSource=NULL;
-
- /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
- realSourceLimit=NULL;
- realFlush=FALSE;
- realSourceIndex=0;
- } else {
- /*
- * Previous m:n conversion stored source units from a partial match
- * and failed to consume all of them.
- * We need to "replay" them from a temporary buffer and convert them first.
- */
- realSource=pArgs->source;
- realSourceLimit=pArgs->sourceLimit;
- realFlush=pArgs->flush;
- realSourceIndex=sourceIndex;
-
- uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
- pArgs->source=replay;
- pArgs->sourceLimit=replay-cnv->preToULength;
- pArgs->flush=FALSE;
- sourceIndex=-1;
-
- cnv->preToULength=0;
- }
-
- /*
- * loop for conversion and error handling
- *
- * loop {
- * convert
- * loop {
- * update offsets
- * handle end of input
- * handle errors/call callback
- * }
- * }
- */
- for(;;) {
- if(U_SUCCESS(*err)) {
- /* convert */
- toUnicode(pArgs, err);
-
- /*
- * set a flag for whether the converter
- * successfully processed the end of the input
- *
- * need not check cnv->preToULength==0 because a replay (<0) will cause
- * s<sourceLimit before converterSawEndOfInput is checked
- */
- converterSawEndOfInput=
- (UBool)(U_SUCCESS(*err) &&
- pArgs->flush && pArgs->source==pArgs->sourceLimit &&
- cnv->toULength==0);
- } else {
- /* handle error from getNextUChar() or ucnv_convertEx() */
- converterSawEndOfInput=FALSE;
- }
-
- /* no callback called yet for this iteration */
- calledCallback=FALSE;
-
- /* no sourceIndex adjustment for conversion, only for callback output */
- errorInputLength=0;
-
- /*
- * loop for offsets and error handling
- *
- * iterates at most 3 times:
- * 1. to clean up after the conversion function
- * 2. after the callback
- * 3. after the callback again if there was truncated input
- */
- for(;;) {
- /* update offsets if we write any */
- if(offsets!=NULL) {
- int32_t length=(int32_t)(pArgs->target-t);
- if(length>0) {
- _updateOffsets(offsets, length, sourceIndex, errorInputLength);
-
- /*
- * if a converter handles offsets and updates the offsets
- * pointer at the end, then pArgs->offset should not change
- * here;
- * however, some converters do not handle offsets at all
- * (sourceIndex<0) or may not update the offsets pointer
- */
- pArgs->offsets=offsets+=length;
- }
-
- if(sourceIndex>=0) {
- sourceIndex+=(int32_t)(pArgs->source-s);
- }
- }
-
- if(cnv->preToULength<0) {
- /*
- * switch the source to new replay units (cannot occur while replaying)
- * after offset handling and before end-of-input and callback handling
- */
- if(realSource==NULL) {
- realSource=pArgs->source;
- realSourceLimit=pArgs->sourceLimit;
- realFlush=pArgs->flush;
- realSourceIndex=sourceIndex;
-
- uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
- pArgs->source=replay;
- pArgs->sourceLimit=replay-cnv->preToULength;
- pArgs->flush=FALSE;
- if((sourceIndex+=cnv->preToULength)<0) {
- sourceIndex=-1;
- }
-
- cnv->preToULength=0;
- } else {
- /* see implementation note before _fromUnicodeWithCallback() */
- U_ASSERT(realSource==NULL);
- *err=U_INTERNAL_PROGRAM_ERROR;
- }
- }
-
- /* update pointers */
- s=pArgs->source;
- t=pArgs->target;
-
- if(U_SUCCESS(*err)) {
- if(s<pArgs->sourceLimit) {
- /*
- * continue with the conversion loop while there is still input left
- * (continue converting by breaking out of only the inner loop)
- */
- break;
- } else if(realSource!=NULL) {
- /* switch back from replaying to the real source and continue */
- pArgs->source=realSource;
- pArgs->sourceLimit=realSourceLimit;
- pArgs->flush=realFlush;
- sourceIndex=realSourceIndex;
-
- realSource=NULL;
- break;
- } else if(pArgs->flush && cnv->toULength>0) {
- /*
- * the entire input stream is consumed
- * and there is a partial, truncated input sequence left
- */
-
- /* inject an error and continue with callback handling */
- *err=U_TRUNCATED_CHAR_FOUND;
- calledCallback=FALSE; /* new error condition */
- } else {
- /* input consumed */
- if(pArgs->flush) {
- /*
- * return to the conversion loop once more if the flush
- * flag is set and the conversion function has not
- * successfully processed the end of the input yet
- *
- * (continue converting by breaking out of only the inner loop)
- */
- if(!converterSawEndOfInput) {
- break;
- }
-
- /* reset the converter without calling the callback function */
- _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
- }
-
- /* done successfully */
- return;
- }
- }
-
- /* U_FAILURE(*err) */
- {
- UErrorCode e;
-
- if( calledCallback ||
- (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
- (e!=U_INVALID_CHAR_FOUND &&
- e!=U_ILLEGAL_CHAR_FOUND &&
- e!=U_TRUNCATED_CHAR_FOUND &&
- e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
- e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
- ) {
- /*
- * the callback did not or cannot resolve the error:
- * set output pointers and return
- *
- * the check for buffer overflow is redundant but it is
- * a high-runner case and hopefully documents the intent
- * well
- *
- * if we were replaying, then the replay buffer must be
- * copied back into the UConverter
- * and the real arguments must be restored
- */
- if(realSource!=NULL) {
- int32_t length;
-
- U_ASSERT(cnv->preToULength==0);
-
- length=(int32_t)(pArgs->sourceLimit-pArgs->source);
- if(length>0) {
- uprv_memcpy(cnv->preToU, pArgs->source, length);
- cnv->preToULength=(int8_t)-length;
- }
-
- pArgs->source=realSource;
- pArgs->sourceLimit=realSourceLimit;
- pArgs->flush=realFlush;
- }
-
- return;
- }
- }
-
- /* copy toUBytes[] to invalidCharBuffer[] */
- errorInputLength=cnv->invalidCharLength=cnv->toULength;
- if(errorInputLength>0) {
- uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
- }
-
- /* set the converter state to deal with the next character */
- cnv->toULength=0;
-
- /* call the callback function */
- if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
- cnv->toUCallbackReason = UCNV_UNASSIGNED;
- }
- cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
- cnv->invalidCharBuffer, errorInputLength,
- cnv->toUCallbackReason,
- err);
- cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
-
- /*
- * loop back to the offset handling
- *
- * this flag will indicate after offset handling
- * that a callback was called;
- * if the callback did not resolve the error, then we return
- */
- calledCallback=TRUE;
- }
- }
-}
-
-/*
- * Output the toUnicode overflow buffer.
- * Call this function if(cnv->UCharErrorBufferLength>0).
- * @return TRUE if overflow
- */
-static UBool
-ucnv_outputOverflowToUnicode(UConverter *cnv,
- UChar **target, const UChar *targetLimit,
- int32_t **pOffsets,
- UErrorCode *err) {
- int32_t *offsets;
- UChar *overflow, *t;
- int32_t i, length;
-
- t=*target;
- if(pOffsets!=NULL) {
- offsets=*pOffsets;
- } else {
- offsets=NULL;
- }
-
- overflow=cnv->UCharErrorBuffer;
- length=cnv->UCharErrorBufferLength;
- i=0;
- while(i<length) {
- if(t==targetLimit) {
- /* the overflow buffer contains too much, keep the rest */
- int32_t j=0;
-
- do {
- overflow[j++]=overflow[i++];
- } while(i<length);
-
- cnv->UCharErrorBufferLength=(int8_t)j;
- *target=t;
- if(offsets!=NULL) {
- *pOffsets=offsets;
- }
- *err=U_BUFFER_OVERFLOW_ERROR;
- return TRUE;
- }
-
- /* copy the overflow contents to the target */
- *t++=overflow[i++];
- if(offsets!=NULL) {
- *offsets++=-1; /* no source index available for old output */
- }
- }
-
- /* the overflow buffer is completely copied to the target */
- cnv->UCharErrorBufferLength=0;
- *target=t;
- if(offsets!=NULL) {
- *pOffsets=offsets;
- }
- return FALSE;
-}
-
-U_CAPI void U_EXPORT2
-ucnv_toUnicode(UConverter *cnv,
- UChar **target, const UChar *targetLimit,
- const char **source, const char *sourceLimit,
- int32_t *offsets,
- UBool flush,
- UErrorCode *err) {
- UConverterToUnicodeArgs args;
- const char *s;
- UChar *t;
-
- /* check parameters */
- if(err==NULL || U_FAILURE(*err)) {
- return;
- }
-
- if(cnv==NULL || target==NULL || source==NULL) {
- *err=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- s=*source;
- t=*target;
-
- if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
- /*
- Prevent code from going into an infinite loop in case we do hit this
- limit. The limit pointer is expected to be on a UChar * boundary.
- This also prevents the next argument check from failing.
- */
- targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
- }
-
- /*
- * All these conditions should never happen.
- *
- * 1) Make sure that the limits are >= to the address source or target
- *
- * 2) Make sure that the buffer sizes do not exceed the number range for
- * int32_t because some functions use the size (in units or bytes)
- * rather than comparing pointers, and because offsets are int32_t values.
- *
- * size_t is guaranteed to be unsigned and large enough for the job.
- *
- * Return with an error instead of adjusting the limits because we would
- * not be able to maintain the semantics that either the source must be
- * consumed or the target filled (unless an error occurs).
- * An adjustment would be sourceLimit=t+0x7fffffff; for example.
- *
- * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
- * to a char * pointer and provide an incomplete UChar code unit.
- */
- if (sourceLimit<s || targetLimit<t ||
- ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
- ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
- (((const char *)targetLimit-(const char *)t) & 1) != 0
- ) {
- *err=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- /* output the target overflow buffer */
- if( cnv->UCharErrorBufferLength>0 &&
- ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
- ) {
- /* U_BUFFER_OVERFLOW_ERROR */
- return;
- }
- /* *target may have moved, therefore stop using t */
-
- if(!flush && s==sourceLimit && cnv->preToULength>=0) {
- /* the overflow buffer is emptied and there is no new input: we are done */
- return;
- }
-
- /*
- * Do not simply return with a buffer overflow error if
- * !flush && t==targetLimit
- * because it is possible that the source will not generate any output.
- * For example, the skip callback may be called;
- * it does not output anything.
- */
-
- /* prepare the converter arguments */
- args.converter=cnv;
- args.flush=flush;
- args.offsets=offsets;
- args.source=s;
- args.sourceLimit=sourceLimit;
- args.target=*target;
- args.targetLimit=targetLimit;
- args.size=sizeof(args);
-
- _toUnicodeWithCallback(&args, err);
-
- *source=args.source;
- *target=args.target;
-}
-
-/* ucnv_to/fromUChars() ----------------------------------------------------- */
-
-U_CAPI int32_t U_EXPORT2
-ucnv_fromUChars(UConverter *cnv,
- char *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- UErrorCode *pErrorCode) {
- const UChar *srcLimit;
- char *originalDest, *destLimit;
- int32_t destLength;
-
- /* check arguments */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if( cnv==NULL ||
- destCapacity<0 || (destCapacity>0 && dest==NULL) ||
- srcLength<-1 || (srcLength!=0 && src==NULL)
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* initialize */
- ucnv_resetFromUnicode(cnv);
- originalDest=dest;
- if(srcLength==-1) {
- srcLength=u_strlen(src);
- }
- if(srcLength>0) {
- srcLimit=src+srcLength;
- destCapacity=pinCapacity(dest, destCapacity);
- destLimit=dest+destCapacity;
-
- /* perform the conversion */
- ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
- destLength=(int32_t)(dest-originalDest);
-
- /* if an overflow occurs, then get the preflighting length */
- if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
- char buffer[1024];
-
- destLimit=buffer+sizeof(buffer);
- do {
- dest=buffer;
- *pErrorCode=U_ZERO_ERROR;
- ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
- destLength+=(int32_t)(dest-buffer);
- } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
- }
- } else {
- destLength=0;
- }
-
- return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-ucnv_toUChars(UConverter *cnv,
- UChar *dest, int32_t destCapacity,
- const char *src, int32_t srcLength,
- UErrorCode *pErrorCode) {
- const char *srcLimit;
- UChar *originalDest, *destLimit;
- int32_t destLength;
-
- /* check arguments */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if( cnv==NULL ||
- destCapacity<0 || (destCapacity>0 && dest==NULL) ||
- srcLength<-1 || (srcLength!=0 && src==NULL))
- {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* initialize */
- ucnv_resetToUnicode(cnv);
- originalDest=dest;
- if(srcLength==-1) {
- srcLength=(int32_t)uprv_strlen(src);
- }
- if(srcLength>0) {
- srcLimit=src+srcLength;
- destCapacity=pinCapacity(dest, destCapacity);
- destLimit=dest+destCapacity;
-
- /* perform the conversion */
- ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
- destLength=(int32_t)(dest-originalDest);
-
- /* if an overflow occurs, then get the preflighting length */
- if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
- {
- UChar buffer[1024];
-
- destLimit=buffer+UPRV_LENGTHOF(buffer);
- do {
- dest=buffer;
- *pErrorCode=U_ZERO_ERROR;
- ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
- destLength+=(int32_t)(dest-buffer);
- }
- while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
- }
- } else {
- destLength=0;
- }
-
- return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
-}
-
-/* ucnv_getNextUChar() ------------------------------------------------------ */
-
-U_CAPI UChar32 U_EXPORT2
-ucnv_getNextUChar(UConverter *cnv,
- const char **source, const char *sourceLimit,
- UErrorCode *err) {
- UConverterToUnicodeArgs args;
- UChar buffer[U16_MAX_LENGTH];
- const char *s;
- UChar32 c;
- int32_t i, length;
-
- /* check parameters */
- if(err==NULL || U_FAILURE(*err)) {
- return 0xffff;
- }
-
- if(cnv==NULL || source==NULL) {
- *err=U_ILLEGAL_ARGUMENT_ERROR;
- return 0xffff;
- }
-
- s=*source;
- if(sourceLimit<s) {
- *err=U_ILLEGAL_ARGUMENT_ERROR;
- return 0xffff;
- }
-
- /*
- * Make sure that the buffer sizes do not exceed the number range for
- * int32_t because some functions use the size (in units or bytes)
- * rather than comparing pointers, and because offsets are int32_t values.
- *
- * size_t is guaranteed to be unsigned and large enough for the job.
- *
- * Return with an error instead of adjusting the limits because we would
- * not be able to maintain the semantics that either the source must be
- * consumed or the target filled (unless an error occurs).
- * An adjustment would be sourceLimit=t+0x7fffffff; for example.
- */
- if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
- *err=U_ILLEGAL_ARGUMENT_ERROR;
- return 0xffff;
- }
-
- c=U_SENTINEL;
-
- /* flush the target overflow buffer */
- if(cnv->UCharErrorBufferLength>0) {
- UChar *overflow;
-
- overflow=cnv->UCharErrorBuffer;
- i=0;
- length=cnv->UCharErrorBufferLength;
- U16_NEXT(overflow, i, length, c);
-
- /* move the remaining overflow contents up to the beginning */
- if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
- uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
- cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
- }
-
- if(!U16_IS_LEAD(c) || i<length) {
- return c;
- }
- /*
- * Continue if the overflow buffer contained only a lead surrogate,
- * in case the converter outputs single surrogates from complete
- * input sequences.
- */
- }
-
- /*
- * flush==TRUE is implied for ucnv_getNextUChar()
- *
- * do not simply return even if s==sourceLimit because the converter may
- * not have seen flush==TRUE before
- */
-
- /* prepare the converter arguments */
- args.converter=cnv;
- args.flush=TRUE;
- args.offsets=NULL;
- args.source=s;
- args.sourceLimit=sourceLimit;
- args.target=buffer;
- args.targetLimit=buffer+1;
- args.size=sizeof(args);
-
- if(c<0) {
- /*
- * call the native getNextUChar() implementation if we are
- * at a character boundary (toULength==0)
- *
- * unlike with _toUnicode(), getNextUChar() implementations must set
- * U_TRUNCATED_CHAR_FOUND for truncated input,
- * in addition to setting toULength/toUBytes[]
- */
- if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
- c=cnv->sharedData->impl->getNextUChar(&args, err);
- *source=s=args.source;
- if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
- /* reset the converter without calling the callback function */
- _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
- return 0xffff; /* no output */
- } else if(U_SUCCESS(*err) && c>=0) {
- return c;
- /*
- * else fall through to use _toUnicode() because
- * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
- * U_FAILURE: call _toUnicode() for callback handling (do not output c)
- */
- }
- }
-
- /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
- _toUnicodeWithCallback(&args, err);
-
- if(*err==U_BUFFER_OVERFLOW_ERROR) {
- *err=U_ZERO_ERROR;
- }
-
- i=0;
- length=(int32_t)(args.target-buffer);
- } else {
- /* write the lead surrogate from the overflow buffer */
- buffer[0]=(UChar)c;
- args.target=buffer+1;
- i=0;
- length=1;
- }
-
- /* buffer contents starts at i and ends before length */
-
- if(U_FAILURE(*err)) {
- c=0xffff; /* no output */
- } else if(length==0) {
- /* no input or only state changes */
- *err=U_INDEX_OUTOFBOUNDS_ERROR;
- /* no need to reset explicitly because _toUnicodeWithCallback() did it */
- c=0xffff; /* no output */
- } else {
- c=buffer[0];
- i=1;
- if(!U16_IS_LEAD(c)) {
- /* consume c=buffer[0], done */
- } else {
- /* got a lead surrogate, see if a trail surrogate follows */
- UChar c2;
-
- if(cnv->UCharErrorBufferLength>0) {
- /* got overflow output from the conversion */
- if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
- /* got a trail surrogate, too */
- c=U16_GET_SUPPLEMENTARY(c, c2);
-
- /* move the remaining overflow contents up to the beginning */
- if((--cnv->UCharErrorBufferLength)>0) {
- uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
- cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
- }
- } else {
- /* c is an unpaired lead surrogate, just return it */
- }
- } else if(args.source<sourceLimit) {
- /* convert once more, to buffer[1] */
- args.targetLimit=buffer+2;
- _toUnicodeWithCallback(&args, err);
- if(*err==U_BUFFER_OVERFLOW_ERROR) {
- *err=U_ZERO_ERROR;
- }
-
- length=(int32_t)(args.target-buffer);
- if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
- /* got a trail surrogate, too */
- c=U16_GET_SUPPLEMENTARY(c, c2);
- i=2;
- }
- }
- }
- }
-
- /*
- * move leftover output from buffer[i..length[
- * into the beginning of the overflow buffer
- */
- if(i<length) {
- /* move further overflow back */
- int32_t delta=length-i;
- if((length=cnv->UCharErrorBufferLength)>0) {
- uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
- length*U_SIZEOF_UCHAR);
- }
- cnv->UCharErrorBufferLength=(int8_t)(length+delta);
-
- cnv->UCharErrorBuffer[0]=buffer[i++];
- if(delta>1) {
- cnv->UCharErrorBuffer[1]=buffer[i];
- }
- }
-
- *source=args.source;
- return c;
-}
-
-/* ucnv_convert() and siblings ---------------------------------------------- */
-
-U_CAPI void U_EXPORT2
-ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
- char **target, const char *targetLimit,
- const char **source, const char *sourceLimit,
- UChar *pivotStart, UChar **pivotSource,
- UChar **pivotTarget, const UChar *pivotLimit,
- UBool reset, UBool flush,
- UErrorCode *pErrorCode) {
- UChar pivotBuffer[CHUNK_SIZE];
- const UChar *myPivotSource;
- UChar *myPivotTarget;
- const char *s;
- char *t;
-
- UConverterToUnicodeArgs toUArgs;
- UConverterFromUnicodeArgs fromUArgs;
- UConverterConvert convert;
-
- /* error checking */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- if( targetCnv==NULL || sourceCnv==NULL ||
- source==NULL || *source==NULL ||
- target==NULL || *target==NULL || targetLimit==NULL
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- s=*source;
- t=*target;
- if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- /*
- * Make sure that the buffer sizes do not exceed the number range for
- * int32_t. See ucnv_toUnicode() for a more detailed comment.
- */
- if(
- (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
- ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- if(pivotStart==NULL) {
- if(!flush) {
- /* streaming conversion requires an explicit pivot buffer */
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- /* use the stack pivot buffer */
- myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
- pivotSource=(UChar **)&myPivotSource;
- pivotTarget=&myPivotTarget;
- pivotLimit=pivotBuffer+CHUNK_SIZE;
- } else if( pivotStart>=pivotLimit ||
- pivotSource==NULL || *pivotSource==NULL ||
- pivotTarget==NULL || *pivotTarget==NULL ||
- pivotLimit==NULL
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- if(sourceLimit==NULL) {
- /* get limit of single-byte-NUL-terminated source string */
- sourceLimit=uprv_strchr(*source, 0);
- }
-
- if(reset) {
- ucnv_resetToUnicode(sourceCnv);
- ucnv_resetFromUnicode(targetCnv);
- *pivotSource=*pivotTarget=pivotStart;
- } else if(targetCnv->charErrorBufferLength>0) {
- /* output the targetCnv overflow buffer */
- if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
- /* U_BUFFER_OVERFLOW_ERROR */
- return;
- }
- /* *target has moved, therefore stop using t */
-
- if( !flush &&
- targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
- sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
- ) {
- /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
- return;
- }
- }
-
- /* Is direct-UTF-8 conversion available? */
- if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
- targetCnv->sharedData->impl->fromUTF8!=NULL
- ) {
- convert=targetCnv->sharedData->impl->fromUTF8;
- } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
- sourceCnv->sharedData->impl->toUTF8!=NULL
- ) {
- convert=sourceCnv->sharedData->impl->toUTF8;
- } else {
- convert=NULL;
- }
-
- /*
- * If direct-UTF-8 conversion is available, then we use a smaller
- * pivot buffer for error handling and partial matches
- * so that we quickly return to direct conversion.
- *
- * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
- *
- * We could reduce the pivot buffer size further, at the cost of
- * buffer overflows from callbacks.
- * The pivot buffer should not be smaller than the maximum number of
- * fromUnicode extension table input UChars
- * (for m:n conversion, see
- * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
- * or 2 for surrogate pairs.
- *
- * Too small a buffer can cause thrashing between pivoting and direct
- * conversion, with function call overhead outweighing the benefits
- * of direct conversion.
- */
- if(convert!=NULL && (pivotLimit-pivotStart)>32) {
- pivotLimit=pivotStart+32;
- }
-
- /* prepare the converter arguments */
- fromUArgs.converter=targetCnv;
- fromUArgs.flush=FALSE;
- fromUArgs.offsets=NULL;
- fromUArgs.target=*target;
- fromUArgs.targetLimit=targetLimit;
- fromUArgs.size=sizeof(fromUArgs);
-
- toUArgs.converter=sourceCnv;
- toUArgs.flush=flush;
- toUArgs.offsets=NULL;
- toUArgs.source=s;
- toUArgs.sourceLimit=sourceLimit;
- toUArgs.targetLimit=pivotLimit;
- toUArgs.size=sizeof(toUArgs);
-
- /*
- * TODO: Consider separating this function into two functions,
- * extracting exactly the conversion loop,
- * for readability and to reduce the set of visible variables.
- *
- * Otherwise stop using s and t from here on.
- */
- s=t=NULL;
-
- /*
- * conversion loop
- *
- * The sequence of steps in the loop may appear backward,
- * but the principle is simple:
- * In the chain of
- * source - sourceCnv overflow - pivot - targetCnv overflow - target
- * empty out later buffers before refilling them from earlier ones.
- *
- * The targetCnv overflow buffer is flushed out only once before the loop.
- */
- for(;;) {
- /*
- * if(pivot not empty or error or replay or flush fromUnicode) {
- * fromUnicode(pivot -> target);
- * }
- *
- * For pivoting conversion; and for direct conversion for
- * error callback handling and flushing the replay buffer.
- */
- if( *pivotSource<*pivotTarget ||
- U_FAILURE(*pErrorCode) ||
- targetCnv->preFromULength<0 ||
- fromUArgs.flush
- ) {
- fromUArgs.source=*pivotSource;
- fromUArgs.sourceLimit=*pivotTarget;
- _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- /* target overflow, or conversion error */
- *pivotSource=(UChar *)fromUArgs.source;
- break;
- }
-
- /*
- * _fromUnicodeWithCallback() must have consumed the pivot contents
- * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
- */
- }
-
- /* The pivot buffer is empty; reset it so we start at pivotStart. */
- *pivotSource=*pivotTarget=pivotStart;
-
- /*
- * if(sourceCnv overflow buffer not empty) {
- * move(sourceCnv overflow buffer -> pivot);
- * continue;
- * }
- */
- /* output the sourceCnv overflow buffer */
- if(sourceCnv->UCharErrorBufferLength>0) {
- if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
- /* U_BUFFER_OVERFLOW_ERROR */
- *pErrorCode=U_ZERO_ERROR;
- }
- continue;
- }
-
- /*
- * check for end of input and break if done
- *
- * Checking both flush and fromUArgs.flush ensures that the converters
- * have been called with the flush flag set if the ucnv_convertEx()
- * caller set it.
- */
- if( toUArgs.source==sourceLimit &&
- sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
- (!flush || fromUArgs.flush)
- ) {
- /* done successfully */
- break;
- }
-
- /*
- * use direct conversion if available
- * but not if continuing a partial match
- * or flushing the toUnicode replay buffer
- */
- if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
- if(*pErrorCode==U_USING_DEFAULT_WARNING) {
- /* remove a warning that may be set by this function */
- *pErrorCode=U_ZERO_ERROR;
- }
- convert(&fromUArgs, &toUArgs, pErrorCode);
- if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
- break;
- } else if(U_FAILURE(*pErrorCode)) {
- if(sourceCnv->toULength>0) {
- /*
- * Fall through to calling _toUnicodeWithCallback()
- * for callback handling.
- *
- * The pivot buffer will be reset with
- * *pivotSource=*pivotTarget=pivotStart;
- * which indicates a toUnicode error to the caller
- * (*pivotSource==pivotStart shows no pivot UChars consumed).
- */
- } else {
- /*
- * Indicate a fromUnicode error to the caller
- * (*pivotSource>pivotStart shows some pivot UChars consumed).
- */
- *pivotSource=*pivotTarget=pivotStart+1;
- /*
- * Loop around to calling _fromUnicodeWithCallbacks()
- * for callback handling.
- */
- continue;
- }
- } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
- /*
- * No error, but the implementation requested to temporarily
- * fall back to pivoting.
- */
- *pErrorCode=U_ZERO_ERROR;
- /*
- * The following else branches are almost identical to the end-of-input
- * handling in _toUnicodeWithCallback().
- * Avoid calling it just for the end of input.
- */
- } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
- /*
- * the entire input stream is consumed
- * and there is a partial, truncated input sequence left
- */
-
- /* inject an error and continue with callback handling */
- *pErrorCode=U_TRUNCATED_CHAR_FOUND;
- } else {
- /* input consumed */
- if(flush) {
- /* reset the converters without calling the callback functions */
- _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
- _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
- }
-
- /* done successfully */
- break;
- }
- }
-
- /*
- * toUnicode(source -> pivot);
- *
- * For pivoting conversion; and for direct conversion for
- * error callback handling, continuing partial matches
- * and flushing the replay buffer.
- *
- * The pivot buffer is empty and reset.
- */
- toUArgs.target=pivotStart; /* ==*pivotTarget */
- /* toUArgs.targetLimit=pivotLimit; already set before the loop */
- _toUnicodeWithCallback(&toUArgs, pErrorCode);
- *pivotTarget=toUArgs.target;
- if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
- /* pivot overflow: continue with the conversion loop */
- *pErrorCode=U_ZERO_ERROR;
- } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
- /* conversion error, or there was nothing left to convert */
- break;
- }
- /*
- * else:
- * _toUnicodeWithCallback() wrote into the pivot buffer,
- * continue with fromUnicode conversion.
- *
- * Set the fromUnicode flush flag if we flush and if toUnicode has
- * processed the end of the input.
- */
- if( flush && toUArgs.source==sourceLimit &&
- sourceCnv->preToULength>=0 &&
- sourceCnv->UCharErrorBufferLength==0
- ) {
- fromUArgs.flush=TRUE;
- }
- }
-
- /*
- * The conversion loop is exited when one of the following is true:
- * - the entire source text has been converted successfully to the target buffer
- * - a target buffer overflow occurred
- * - a conversion error occurred
- */
-
- *source=toUArgs.source;
- *target=fromUArgs.target;
-
- /* terminate the target buffer if possible */
- if(flush && U_SUCCESS(*pErrorCode)) {
- if(*target!=targetLimit) {
- **target=0;
- if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
- *pErrorCode=U_ZERO_ERROR;
- }
- } else {
- *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
- }
- }
-}
-
-/* internal implementation of ucnv_convert() etc. with preflighting */
-static int32_t
-ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
- char *target, int32_t targetCapacity,
- const char *source, int32_t sourceLength,
- UErrorCode *pErrorCode) {
- UChar pivotBuffer[CHUNK_SIZE];
- UChar *pivot, *pivot2;
-
- char *myTarget;
- const char *sourceLimit;
- const char *targetLimit;
- int32_t targetLength=0;
-
- /* set up */
- if(sourceLength<0) {
- sourceLimit=uprv_strchr(source, 0);
- } else {
- sourceLimit=source+sourceLength;
- }
-
- /* if there is no input data, we're done */
- if(source==sourceLimit) {
- return u_terminateChars(target, targetCapacity, 0, pErrorCode);
- }
-
- pivot=pivot2=pivotBuffer;
- myTarget=target;
- targetLength=0;
-
- if(targetCapacity>0) {
- /* perform real conversion */
- targetLimit=target+targetCapacity;
- ucnv_convertEx(outConverter, inConverter,
- &myTarget, targetLimit,
- &source, sourceLimit,
- pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
- FALSE,
- TRUE,
- pErrorCode);
- targetLength=(int32_t)(myTarget-target);
- }
-
- /*
- * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
- * to it but continue the conversion in order to store in targetCapacity
- * the number of bytes that was required.
- */
- if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
- {
- char targetBuffer[CHUNK_SIZE];
-
- targetLimit=targetBuffer+CHUNK_SIZE;
- do {
- *pErrorCode=U_ZERO_ERROR;
- myTarget=targetBuffer;
- ucnv_convertEx(outConverter, inConverter,
- &myTarget, targetLimit,
- &source, sourceLimit,
- pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
- FALSE,
- TRUE,
- pErrorCode);
- targetLength+=(int32_t)(myTarget-targetBuffer);
- } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
-
- /* done with preflighting, set warnings and errors as appropriate */
- return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
- }
-
- /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
- return targetLength;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucnv_convert(const char *toConverterName, const char *fromConverterName,
- char *target, int32_t targetCapacity,
- const char *source, int32_t sourceLength,
- UErrorCode *pErrorCode) {
- UConverter in, out; /* stack-allocated */
- UConverter *inConverter, *outConverter;
- int32_t targetLength;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if( source==NULL || sourceLength<-1 ||
- targetCapacity<0 || (targetCapacity>0 && target==NULL)
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* if there is no input data, we're done */
- if(sourceLength==0 || (sourceLength<0 && *source==0)) {
- return u_terminateChars(target, targetCapacity, 0, pErrorCode);
- }
-
- /* create the converters */
- inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- ucnv_close(inConverter);
- return 0;
- }
-
- targetLength=ucnv_internalConvert(outConverter, inConverter,
- target, targetCapacity,
- source, sourceLength,
- pErrorCode);
-
- ucnv_close(inConverter);
- ucnv_close(outConverter);
-
- return targetLength;
-}
-
-/* @internal */
-static int32_t
-ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
- UConverterType algorithmicType,
- UConverter *cnv,
- char *target, int32_t targetCapacity,
- const char *source, int32_t sourceLength,
- UErrorCode *pErrorCode) {
- UConverter algoConverterStatic; /* stack-allocated */
- UConverter *algoConverter, *to, *from;
- int32_t targetLength;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if( cnv==NULL || source==NULL || sourceLength<-1 ||
- targetCapacity<0 || (targetCapacity>0 && target==NULL)
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* if there is no input data, we're done */
- if(sourceLength==0 || (sourceLength<0 && *source==0)) {
- return u_terminateChars(target, targetCapacity, 0, pErrorCode);
- }
-
- /* create the algorithmic converter */
- algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
- "", 0, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* reset the other converter */
- if(convertToAlgorithmic) {
- /* cnv->Unicode->algo */
- ucnv_resetToUnicode(cnv);
- to=algoConverter;
- from=cnv;
- } else {
- /* algo->Unicode->cnv */
- ucnv_resetFromUnicode(cnv);
- from=algoConverter;
- to=cnv;
- }
-
- targetLength=ucnv_internalConvert(to, from,
- target, targetCapacity,
- source, sourceLength,
- pErrorCode);
-
- ucnv_close(algoConverter);
-
- return targetLength;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucnv_toAlgorithmic(UConverterType algorithmicType,
- UConverter *cnv,
- char *target, int32_t targetCapacity,
- const char *source, int32_t sourceLength,
- UErrorCode *pErrorCode) {
- return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
- target, targetCapacity,
- source, sourceLength,
- pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-ucnv_fromAlgorithmic(UConverter *cnv,
- UConverterType algorithmicType,
- char *target, int32_t targetCapacity,
- const char *source, int32_t sourceLength,
- UErrorCode *pErrorCode) {
- return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
- target, targetCapacity,
- source, sourceLength,
- pErrorCode);
-}
-
-U_CAPI UConverterType U_EXPORT2
-ucnv_getType(const UConverter* converter)
-{
- int8_t type = converter->sharedData->staticData->conversionType;
-#if !UCONFIG_NO_LEGACY_CONVERSION
- if(type == UCNV_MBCS) {
- return ucnv_MBCSGetType(converter);
- }
-#endif
- return (UConverterType)type;
-}
-
-U_CAPI void U_EXPORT2
-ucnv_getStarters(const UConverter* converter,
- UBool starters[256],
- UErrorCode* err)
-{
- if (err == NULL || U_FAILURE(*err)) {
- return;
- }
-
- if(converter->sharedData->impl->getStarters != NULL) {
- converter->sharedData->impl->getStarters(converter, starters, err);
- } else {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
-{
- UErrorCode errorCode;
- const char *name;
- int32_t i;
-
- if(cnv==NULL) {
- return NULL;
- }
-
- errorCode=U_ZERO_ERROR;
- name=ucnv_getName(cnv, &errorCode);
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
-
- for(i=0; i<UPRV_LENGTHOF(ambiguousConverters); ++i)
- {
- if(0==uprv_strcmp(name, ambiguousConverters[i].name))
- {
- return ambiguousConverters+i;
- }
- }
-
- return NULL;
-}
-
-U_CAPI void U_EXPORT2
-ucnv_fixFileSeparator(const UConverter *cnv,
- UChar* source,
- int32_t sourceLength) {
- const UAmbiguousConverter *a;
- int32_t i;
- UChar variant5c;
-
- if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
- {
- return;
- }
-
- variant5c=a->variant5c;
- for(i=0; i<sourceLength; ++i) {
- if(source[i]==variant5c) {
- source[i]=0x5c;
- }
- }
-}
-
-U_CAPI UBool U_EXPORT2
-ucnv_isAmbiguous(const UConverter *cnv) {
- return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
-}
-
-U_CAPI void U_EXPORT2
-ucnv_setFallback(UConverter *cnv, UBool usesFallback)
-{
- cnv->useFallback = usesFallback;
-}
-
-U_CAPI UBool U_EXPORT2
-ucnv_usesFallback(const UConverter *cnv)
-{
- return cnv->useFallback;
-}
-
-U_CAPI void U_EXPORT2
-ucnv_getInvalidChars (const UConverter * converter,
- char *errBytes,
- int8_t * len,
- UErrorCode * err)
-{
- if (err == NULL || U_FAILURE(*err))
- {
- return;
- }
- if (len == NULL || errBytes == NULL || converter == NULL)
- {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if (*len < converter->invalidCharLength)
- {
- *err = U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
- if ((*len = converter->invalidCharLength) > 0)
- {
- uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
- }
-}
-
-U_CAPI void U_EXPORT2
-ucnv_getInvalidUChars (const UConverter * converter,
- UChar *errChars,
- int8_t * len,
- UErrorCode * err)
-{
- if (err == NULL || U_FAILURE(*err))
- {
- return;
- }
- if (len == NULL || errChars == NULL || converter == NULL)
- {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if (*len < converter->invalidUCharLength)
- {
- *err = U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
- if ((*len = converter->invalidUCharLength) > 0)
- {
- u_memcpy (errChars, converter->invalidUCharBuffer, *len);
- }
-}
-
-#define SIG_MAX_LEN 5
-
-U_CAPI const char* U_EXPORT2
-ucnv_detectUnicodeSignature( const char* source,
- int32_t sourceLength,
- int32_t* signatureLength,
- UErrorCode* pErrorCode) {
- int32_t dummy;
-
- /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
- * bytes we don't misdetect something
- */
- char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
- int i = 0;
-
- if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
- return NULL;
- }
-
- if(source == NULL || sourceLength < -1){
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- if(signatureLength == NULL) {
- signatureLength = &dummy;
- }
-
- if(sourceLength==-1){
- sourceLength=(int32_t)uprv_strlen(source);
- }
-
-
- while(i<sourceLength&& i<SIG_MAX_LEN){
- start[i]=source[i];
- i++;
- }
-
- if(start[0] == '\xFE' && start[1] == '\xFF') {
- *signatureLength=2;
- return "UTF-16BE";
- } else if(start[0] == '\xFF' && start[1] == '\xFE') {
- if(start[2] == '\x00' && start[3] =='\x00') {
- *signatureLength=4;
- return "UTF-32LE";
- } else {
- *signatureLength=2;
- return "UTF-16LE";
- }
- } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
- *signatureLength=3;
- return "UTF-8";
- } else if(start[0] == '\x00' && start[1] == '\x00' &&
- start[2] == '\xFE' && start[3]=='\xFF') {
- *signatureLength=4;
- return "UTF-32BE";
- } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
- *signatureLength=3;
- return "SCSU";
- } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
- *signatureLength=3;
- return "BOCU-1";
- } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
- /*
- * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
- * depending on the second UTF-16 code unit.
- * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
- * if it occurs.
- *
- * So far we have +/v
- */
- if(start[3] == '\x38' && start[4] == '\x2D') {
- /* 5 bytes +/v8- */
- *signatureLength=5;
- return "UTF-7";
- } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
- /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
- *signatureLength=4;
- return "UTF-7";
- }
- }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
- *signatureLength=4;
- return "UTF-EBCDIC";
- }
-
-
- /* no known Unicode signature byte sequence recognized */
- *signatureLength=0;
- return NULL;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
-{
- if(status == NULL || U_FAILURE(*status)){
- return -1;
- }
- if(cnv == NULL){
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return -1;
- }
-
- if(cnv->preFromUFirstCP >= 0){
- return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
- }else if(cnv->preFromULength < 0){
- return -cnv->preFromULength ;
- }else if(cnv->fromUChar32 > 0){
- return 1;
- }
- return 0;
-
-}
-
-U_CAPI int32_t U_EXPORT2
-ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
-
- if(status == NULL || U_FAILURE(*status)){
- return -1;
- }
- if(cnv == NULL){
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return -1;
- }
-
- if(cnv->preToULength > 0){
- return cnv->preToULength ;
- }else if(cnv->preToULength < 0){
- return -cnv->preToULength;
- }else if(cnv->toULength > 0){
- return cnv->toULength;
- }
- return 0;
-}
-
-U_CAPI UBool U_EXPORT2
-ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
- if (U_FAILURE(*status)) {
- return FALSE;
- }
-
- if (cnv == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
-
- switch (ucnv_getType(cnv)) {
- case UCNV_SBCS:
- case UCNV_DBCS:
- case UCNV_UTF32_BigEndian:
- case UCNV_UTF32_LittleEndian:
- case UCNV_UTF32:
- case UCNV_US_ASCII:
- return TRUE;
- default:
- return FALSE;
- }
-}
-#endif
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/contrib/libs/icu/common/ucnv2022.cpp b/contrib/libs/icu/common/ucnv2022.cpp
deleted file mode 100644
index 169ad4c5261..00000000000
--- a/contrib/libs/icu/common/ucnv2022.cpp
+++ /dev/null
@@ -1,3973 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2000-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* file name: ucnv2022.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2000feb03
-* created by: Markus W. Scherer
-*
-* Change history:
-*
-* 06/29/2000 helena Major rewrite of the callback APIs.
-* 08/08/2000 Ram Included support for ISO-2022-JP-2
-* Changed implementation of toUnicode
-* function
-* 08/21/2000 Ram Added support for ISO-2022-KR
-* 08/29/2000 Ram Seperated implementation of EBCDIC to
-* ucnvebdc.c
-* 09/20/2000 Ram Added support for ISO-2022-CN
-* Added implementations for getNextUChar()
-* for specific 2022 country variants.
-* 10/31/2000 Ram Implemented offsets logic functions
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/uset.h"
-#include "unicode/ucnv_err.h"
-#include "unicode/ucnv_cb.h"
-#include "unicode/utf16.h"
-#include "ucnv_imp.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "ucnvmbcs.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "uassert.h"
-
-#ifdef U_ENABLE_GENERIC_ISO_2022
-/*
- * I am disabling the generic ISO-2022 converter after proposing to do so on
- * the icu mailing list two days ago.
- *
- * Reasons:
- * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
- * its designation sequences, single shifts with return to the previous state,
- * switch-with-no-return to UTF-16BE or similar, etc.
- * This is unlike the language-specific variants like ISO-2022-JP which
- * require a much smaller repertoire of ISO-2022 features.
- * These variants continue to be supported.
- * 2. I believe that no one is really using the generic ISO-2022 converter
- * but rather always one of the language-specific variants.
- * Note that ICU's generic ISO-2022 converter has always output one escape
- * sequence followed by UTF-8 for the whole stream.
- * 3. Switching between subcharsets is extremely slow, because each time
- * the previous converter is closed and a new one opened,
- * without any kind of caching, least-recently-used list, etc.
- * 4. The code is currently buggy, and given the above it does not seem
- * reasonable to spend the time on maintenance.
- * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
- * This means, for example, that when ISO-8859-7 is designated, the following
- * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
- * The ICU ISO-2022 converter does not handle this - and has no information
- * about which subconverter would have to be shifted vs. which is designed
- * for 7-bit ISO-2022.
- *
- * Markus Scherer 2003-dec-03
- */
-#endif
-
-#if !UCONFIG_ONLY_HTML_CONVERSION
-static const char SHIFT_IN_STR[] = "\x0F";
-// static const char SHIFT_OUT_STR[] = "\x0E";
-#endif
-
-#define CR 0x0D
-#define LF 0x0A
-#define H_TAB 0x09
-#define V_TAB 0x0B
-#define SPACE 0x20
-
-enum {
- HWKANA_START=0xff61,
- HWKANA_END=0xff9f
-};
-
-/*
- * 94-character sets with native byte values A1..FE are encoded in ISO 2022
- * as bytes 21..7E. (Subtract 0x80.)
- * 96-character sets with native byte values A0..FF are encoded in ISO 2022
- * as bytes 20..7F. (Subtract 0x80.)
- * Do not encode C1 control codes with native bytes 80..9F
- * as bytes 00..1F (C0 control codes).
- */
-enum {
- GR94_START=0xa1,
- GR94_END=0xfe,
- GR96_START=0xa0,
- GR96_END=0xff
-};
-
-/*
- * ISO 2022 control codes must not be converted from Unicode
- * because they would mess up the byte stream.
- * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
- * corresponding to SO, SI, and ESC.
- */
-#define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
-
-/* for ISO-2022-JP and -CN implementations */
-typedef enum {
- /* shared values */
- INVALID_STATE=-1,
- ASCII = 0,
-
- SS2_STATE=0x10,
- SS3_STATE,
-
- /* JP */
- ISO8859_1 = 1 ,
- ISO8859_7 = 2 ,
- JISX201 = 3,
- JISX208 = 4,
- JISX212 = 5,
- GB2312 =6,
- KSC5601 =7,
- HWKANA_7BIT=8, /* Halfwidth Katakana 7 bit */
-
- /* CN */
- /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
- GB2312_1=1,
- ISO_IR_165=2,
- CNS_11643=3,
-
- /*
- * these are used in StateEnum and ISO2022State variables,
- * but CNS_11643 must be used to index into myConverterArray[]
- */
- CNS_11643_0=0x20,
- CNS_11643_1,
- CNS_11643_2,
- CNS_11643_3,
- CNS_11643_4,
- CNS_11643_5,
- CNS_11643_6,
- CNS_11643_7
-} StateEnum;
-
-/* is the StateEnum charset value for a DBCS charset? */
-#if UCONFIG_ONLY_HTML_CONVERSION
-#define IS_JP_DBCS(cs) (JISX208==(cs))
-#else
-#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
-#endif
-
-#define CSM(cs) ((uint16_t)1<<(cs))
-
-/*
- * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
- * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
- *
- * Note: The converter uses some leniency:
- * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
- * all versions, not just JIS7 and JIS8.
- * - ICU does not distinguish between different versions of JIS X 0208.
- */
-#if UCONFIG_ONLY_HTML_CONVERSION
-enum { MAX_JA_VERSION=0 };
-#else
-enum { MAX_JA_VERSION=4 };
-#endif
-static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
- CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
-#if !UCONFIG_ONLY_HTML_CONVERSION
- CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
- CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
- CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
- CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
-#endif
-};
-
-typedef enum {
- ASCII1=0,
- LATIN1,
- SBCS,
- DBCS,
- MBCS,
- HWKANA
-}Cnv2022Type;
-
-typedef struct ISO2022State {
- int8_t cs[4]; /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
- int8_t g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
- int8_t prevG; /* g before single shift (SS2 or SS3) */
-} ISO2022State;
-
-#define UCNV_OPTIONS_VERSION_MASK 0xf
-#define UCNV_2022_MAX_CONVERTERS 10
-
-typedef struct{
- UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
- UConverter *currentConverter;
- Cnv2022Type currentType;
- ISO2022State toU2022State, fromU2022State;
- uint32_t key;
- uint32_t version;
-#ifdef U_ENABLE_GENERIC_ISO_2022
- UBool isFirstBuffer;
-#endif
- UBool isEmptySegment;
- char name[30];
- char locale[3];
-}UConverterDataISO2022;
-
-/* Protos */
-/* ISO-2022 ----------------------------------------------------------------- */
-
-/*Forward declaration */
-U_CFUNC void U_CALLCONV
-ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
- UErrorCode * err);
-U_CFUNC void U_CALLCONV
-ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
- UErrorCode * err);
-
-#define ESC_2022 0x1B /*ESC*/
-
-typedef enum
-{
- INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
- VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
- VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
- VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
-} UCNV_TableStates_2022;
-
-/*
-* The way these state transition arrays work is:
-* ex : ESC$B is the sequence for JISX208
-* a) First Iteration: char is ESC
-* i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
-* int x = normalize_esq_chars_2022[27] which is equal to 1
-* ii) Search for this value in escSeqStateTable_Key_2022[]
-* value of x is stored at escSeqStateTable_Key_2022[0]
-* iii) Save this index as offset
-* iv) Get state of this sequence from escSeqStateTable_Value_2022[]
-* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
-* b) Switch on this state and continue to next char
-* i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
-* which is normalize_esq_chars_2022[36] == 4
-* ii) x is currently 1(from above)
-* x<<=5 -- x is now 32
-* x+=normalize_esq_chars_2022[36]
-* now x is 36
-* iii) Search for this value in escSeqStateTable_Key_2022[]
-* value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
-* iv) Get state of this sequence from escSeqStateTable_Value_2022[]
-* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
-* c) Switch on this state and continue to next char
-* i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index
-* ii) x is currently 36 (from above)
-* x<<=5 -- x is now 1152
-* x+=normalize_esq_chars_2022[66]
-* now x is 1161
-* iii) Search for this value in escSeqStateTable_Key_2022[]
-* value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
-* iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
-* escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
-* v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
-*/
-
-
-/*Below are the 3 arrays depicting a state transition table*/
-static const int8_t normalize_esq_chars_2022[256] = {
-/* 0 1 2 3 4 5 6 7 8 9 */
-
- 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,29 ,0
- ,2 ,24 ,26 ,27 ,0 ,3 ,23 ,6 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12
- ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,25 ,28
- ,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
- ,0 ,0 ,0 ,0 ,0 ,0
-};
-
-#ifdef U_ENABLE_GENERIC_ISO_2022
-/*
- * When the generic ISO-2022 converter is completely removed, not just disabled
- * per #ifdef, then the following state table and the associated tables that are
- * dimensioned with MAX_STATES_2022 should be trimmed.
- *
- * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
- * the associated escape sequences starting with ESC ( B should be removed.
- * This includes the ones with key values 1097 and all of the ones above 1000000.
- *
- * For the latter, the tables can simply be truncated.
- * For the former, since the tables must be kept parallel, it is probably best
- * to simply duplicate an adjacent table cell, parallel in all tables.
- *
- * It may make sense to restructure the tables, especially by using small search
- * tables for the variants instead of indexing them parallel to the table here.
- */
-#endif
-
-#define MAX_STATES_2022 74
-static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
-/* 0 1 2 3 4 5 6 7 8 9 */
-
- 1 ,34 ,36 ,39 ,55 ,57 ,60 ,61 ,1093 ,1096
- ,1097 ,1098 ,1099 ,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106
- ,1109 ,1154 ,1157 ,1160 ,1161 ,1176 ,1178 ,1179 ,1254 ,1257
- ,1768 ,1773 ,1957 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940
- ,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,37640 ,37642 ,37644
- ,37646 ,37711 ,37744 ,37745 ,37746 ,37747 ,37748 ,40133 ,40136 ,40138
- ,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630
- ,35947631 ,35947635 ,35947636 ,35947638
-};
-
-#ifdef U_ENABLE_GENERIC_ISO_2022
-
-static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
- /* 0 1 2 3 4 5 6 7 8 9 */
-
- NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1"
- ,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX0201" ,"JISX0201" ,"latin1"
- ,"latin1" ,NULL ,"JISX-208" ,"ibm-5478" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8"
- ,"ISO-8859-1" ,"ISO-8859-7" ,"JIS-X-208" ,NULL ,"ibm-955" ,"ibm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383"
- ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-5478" ,"ibm-949" ,"ISO-IR-165"
- ,"CNS-11643-1992,1" ,"CNS-11643-1992,2" ,"CNS-11643-1992,3" ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
- ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089"
- ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"
-};
-
-#endif
-
-static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
-/* 0 1 2 3 4 5 6 7 8 9 */
- VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
- ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
- ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
- ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
- ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
- ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
- ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
- ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
-};
-
-/* Type def for refactoring changeState_2022 code*/
-typedef enum{
-#ifdef U_ENABLE_GENERIC_ISO_2022
- ISO_2022=0,
-#endif
- ISO_2022_JP=1,
-#if !UCONFIG_ONLY_HTML_CONVERSION
- ISO_2022_KR=2,
- ISO_2022_CN=3
-#endif
-} Variant2022;
-
-/*********** ISO 2022 Converter Protos ***********/
-static void U_CALLCONV
-_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
-
-static void U_CALLCONV
- _ISO2022Close(UConverter *converter);
-
-static void U_CALLCONV
-_ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
-
-U_CDECL_BEGIN
-static const char * U_CALLCONV
-_ISO2022getName(const UConverter* cnv);
-U_CDECL_END
-
-static void U_CALLCONV
-_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
-
-U_CDECL_BEGIN
-static UConverter * U_CALLCONV
-_ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
-
-U_CDECL_END
-
-#ifdef U_ENABLE_GENERIC_ISO_2022
-static void U_CALLCONV
-T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
-#endif
-
-namespace {
-
-/*const UConverterSharedData _ISO2022Data;*/
-extern const UConverterSharedData _ISO2022JPData;
-
-#if !UCONFIG_ONLY_HTML_CONVERSION
-extern const UConverterSharedData _ISO2022KRData;
-extern const UConverterSharedData _ISO2022CNData;
-#endif
-
-} // namespace
-
-/*************** Converter implementations ******************/
-
-/* The purpose of this function is to get around gcc compiler warnings. */
-static inline void
-fromUWriteUInt8(UConverter *cnv,
- const char *bytes, int32_t length,
- uint8_t **target, const char *targetLimit,
- int32_t **offsets,
- int32_t sourceIndex,
- UErrorCode *pErrorCode)
-{
- char *targetChars = (char *)*target;
- ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
- offsets, sourceIndex, pErrorCode);
- *target = (uint8_t*)targetChars;
-
-}
-
-static inline void
-setInitialStateToUnicodeKR(UConverter* /*converter*/, UConverterDataISO2022 *myConverterData){
- if(myConverterData->version == 1) {
- UConverter *cnv = myConverterData->currentConverter;
-
- cnv->toUnicodeStatus=0; /* offset */
- cnv->mode=0; /* state */
- cnv->toULength=0; /* byteIndex */
- }
-}
-
-static inline void
-setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
- /* in ISO-2022-KR the designator sequence appears only once
- * in a file so we append it only once
- */
- if( converter->charErrorBufferLength==0){
-
- converter->charErrorBufferLength = 4;
- converter->charErrorBuffer[0] = 0x1b;
- converter->charErrorBuffer[1] = 0x24;
- converter->charErrorBuffer[2] = 0x29;
- converter->charErrorBuffer[3] = 0x43;
- }
- if(myConverterData->version == 1) {
- UConverter *cnv = myConverterData->currentConverter;
-
- cnv->fromUChar32=0;
- cnv->fromUnicodeStatus=1; /* prevLength */
- }
-}
-
-static void U_CALLCONV
-_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
-
- char myLocale[7]={' ',' ',' ',' ',' ',' ', '\0'};
-
- cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
- if(cnv->extraInfo != NULL) {
- UConverterNamePieces stackPieces;
- UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
- UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
- uint32_t version;
-
- stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
-
- uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
- myConverterData->currentType = ASCII1;
- cnv->fromUnicodeStatus =FALSE;
- if(pArgs->locale){
- uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale)-1);
- }
- version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
- myConverterData->version = version;
- if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
- (myLocale[2]=='_' || myLocale[2]=='\0'))
- {
- /* open the required converters and cache them */
- if(version>MAX_JA_VERSION) {
- // ICU 55 fails to open a converter for an unsupported version.
- // Previously, it fell back to version 0, but that would yield
- // unexpected behavior.
- *errorCode = U_MISSING_RESOURCE_ERROR;
- return;
- }
- if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
- myConverterData->myConverterArray[ISO8859_7] =
- ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
- }
- myConverterData->myConverterArray[JISX208] =
- ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
- if(jpCharsetMasks[version]&CSM(JISX212)) {
- myConverterData->myConverterArray[JISX212] =
- ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
- }
- if(jpCharsetMasks[version]&CSM(GB2312)) {
- myConverterData->myConverterArray[GB2312] =
- ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */
- }
- if(jpCharsetMasks[version]&CSM(KSC5601)) {
- myConverterData->myConverterArray[KSC5601] =
- ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
- }
-
- /* set the function pointers to appropriate funtions */
- cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
- uprv_strcpy(myConverterData->locale,"ja");
-
- (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
- size_t len = uprv_strlen(myConverterData->name);
- myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
- myConverterData->name[len+1]='\0';
- }
-#if !UCONFIG_ONLY_HTML_CONVERSION
- else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
- (myLocale[2]=='_' || myLocale[2]=='\0'))
- {
- if(version>1) {
- // ICU 55 fails to open a converter for an unsupported version.
- // Previously, it fell back to version 0, but that would yield
- // unexpected behavior.
- *errorCode = U_MISSING_RESOURCE_ERROR;
- return;
- }
- const char *cnvName;
- if(version==1) {
- cnvName="icu-internal-25546";
- } else {
- cnvName="ibm-949";
- myConverterData->version=version=0;
- }
- if(pArgs->onlyTestIsLoadable) {
- ucnv_canCreateConverter(cnvName, errorCode); /* errorCode carries result */
- uprv_free(cnv->extraInfo);
- cnv->extraInfo=NULL;
- return;
- } else {
- myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
- if (U_FAILURE(*errorCode)) {
- _ISO2022Close(cnv);
- return;
- }
-
- if(version==1) {
- (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
- uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
- cnv->subCharLen = myConverterData->currentConverter->subCharLen;
- }else{
- (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
- }
-
- /* initialize the state variables */
- setInitialStateToUnicodeKR(cnv, myConverterData);
- setInitialStateFromUnicodeKR(cnv, myConverterData);
-
- /* set the function pointers to appropriate funtions */
- cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
- uprv_strcpy(myConverterData->locale,"ko");
- }
- }
- else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
- (myLocale[2]=='_' || myLocale[2]=='\0'))
- {
- if(version>2) {
- // ICU 55 fails to open a converter for an unsupported version.
- // Previously, it fell back to version 0, but that would yield
- // unexpected behavior.
- *errorCode = U_MISSING_RESOURCE_ERROR;
- return;
- }
-
- /* open the required converters and cache them */
- myConverterData->myConverterArray[GB2312_1] =
- ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);
- if(version==1) {
- myConverterData->myConverterArray[ISO_IR_165] =
- ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);
- }
- myConverterData->myConverterArray[CNS_11643] =
- ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);
-
-
- /* set the function pointers to appropriate funtions */
- cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
- uprv_strcpy(myConverterData->locale,"cn");
-
- if (version==0){
- myConverterData->version = 0;
- (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
- }else if (version==1){
- myConverterData->version = 1;
- (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
- }else {
- myConverterData->version = 2;
- (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
- }
- }
-#endif // !UCONFIG_ONLY_HTML_CONVERSION
- else{
-#ifdef U_ENABLE_GENERIC_ISO_2022
- myConverterData->isFirstBuffer = TRUE;
-
- /* append the UTF-8 escape sequence */
- cnv->charErrorBufferLength = 3;
- cnv->charErrorBuffer[0] = 0x1b;
- cnv->charErrorBuffer[1] = 0x25;
- cnv->charErrorBuffer[2] = 0x42;
-
- cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
- /* initialize the state variables */
- uprv_strcpy(myConverterData->name,"ISO_2022");
-#else
- *errorCode = U_MISSING_RESOURCE_ERROR;
- // Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard
- // data loading error code.
- return;
-#endif
- }
-
- cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
-
- if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
- _ISO2022Close(cnv);
- }
- } else {
- *errorCode = U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-
-static void U_CALLCONV
-_ISO2022Close(UConverter *converter) {
- UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
- UConverterSharedData **array = myData->myConverterArray;
- int32_t i;
-
- if (converter->extraInfo != NULL) {
- /*close the array of converter pointers and free the memory*/
- for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
- if(array[i]!=NULL) {
- ucnv_unloadSharedDataIfReady(array[i]);
- }
- }
-
- ucnv_close(myData->currentConverter);
-
- if(!converter->isExtraLocal){
- uprv_free (converter->extraInfo);
- converter->extraInfo = NULL;
- }
- }
-}
-
-static void U_CALLCONV
-_ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
- UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
- if(choice<=UCNV_RESET_TO_UNICODE) {
- uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
- myConverterData->key = 0;
- myConverterData->isEmptySegment = FALSE;
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
- }
-#ifdef U_ENABLE_GENERIC_ISO_2022
- if(myConverterData->locale[0] == 0){
- if(choice<=UCNV_RESET_TO_UNICODE) {
- myConverterData->isFirstBuffer = TRUE;
- myConverterData->key = 0;
- if (converter->mode == UCNV_SO){
- ucnv_close (myConverterData->currentConverter);
- myConverterData->currentConverter=NULL;
- }
- converter->mode = UCNV_SI;
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- /* re-append UTF-8 escape sequence */
- converter->charErrorBufferLength = 3;
- converter->charErrorBuffer[0] = 0x1b;
- converter->charErrorBuffer[1] = 0x28;
- converter->charErrorBuffer[2] = 0x42;
- }
- }
- else
-#endif
- {
- /* reset the state variables */
- if(myConverterData->locale[0] == 'k'){
- if(choice<=UCNV_RESET_TO_UNICODE) {
- setInitialStateToUnicodeKR(converter, myConverterData);
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- setInitialStateFromUnicodeKR(converter, myConverterData);
- }
- }
- }
-}
-
-U_CDECL_BEGIN
-
-static const char * U_CALLCONV
-_ISO2022getName(const UConverter* cnv){
- if(cnv->extraInfo){
- UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
- return myData->name;
- }
- return NULL;
-}
-
-U_CDECL_END
-
-
-/*************** to unicode *******************/
-/****************************************************************************
- * Recognized escape sequences are
- * <ESC>(B ASCII
- * <ESC>.A ISO-8859-1
- * <ESC>.F ISO-8859-7
- * <ESC>(J JISX-201
- * <ESC>(I JISX-201
- * <ESC>$B JISX-208
- * <ESC>$@ JISX-208
- * <ESC>$(D JISX-212
- * <ESC>$A GB2312
- * <ESC>$(C KSC5601
- */
-static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
-/* 0 1 2 3 4 5 6 7 8 9 */
- INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
- ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STATE
- ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
- ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STATE
- ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
- ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
- ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
- ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
-};
-
-#if !UCONFIG_ONLY_HTML_CONVERSION
-/*************** to unicode *******************/
-static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
-/* 0 1 2 3 4 5 6 7 8 9 */
- INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
- ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
- ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
- ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
- ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165
- ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
- ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
- ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
-};
-#endif
-
-
-static UCNV_TableStates_2022
-getKey_2022(char c,int32_t* key,int32_t* offset){
- int32_t togo;
- int32_t low = 0;
- int32_t hi = MAX_STATES_2022;
- int32_t oldmid=0;
-
- togo = normalize_esq_chars_2022[(uint8_t)c];
- if(togo == 0) {
- /* not a valid character anywhere in an escape sequence */
- *key = 0;
- *offset = 0;
- return INVALID_2022;
- }
- togo = (*key << 5) + togo;
-
- while (hi != low) /*binary search*/{
-
- int32_t mid = (hi+low) >> 1; /*Finds median*/
-
- if (mid == oldmid)
- break;
-
- if (escSeqStateTable_Key_2022[mid] > togo){
- hi = mid;
- }
- else if (escSeqStateTable_Key_2022[mid] < togo){
- low = mid;
- }
- else /*we found it*/{
- *key = togo;
- *offset = mid;
- return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
- }
- oldmid = mid;
-
- }
-
- *key = 0;
- *offset = 0;
- return INVALID_2022;
-}
-
-/*runs through a state machine to determine the escape sequence - codepage correspondance
- */
-static void
-changeState_2022(UConverter* _this,
- const char** source,
- const char* sourceLimit,
- Variant2022 var,
- UErrorCode* err){
- UCNV_TableStates_2022 value;
- UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
- uint32_t key = myData2022->key;
- int32_t offset = 0;
- int8_t initialToULength = _this->toULength;
- char c;
-
- value = VALID_NON_TERMINAL_2022;
- while (*source < sourceLimit) {
- c = *(*source)++;
- _this->toUBytes[_this->toULength++]=(uint8_t)c;
- value = getKey_2022(c,(int32_t *) &key, &offset);
-
- switch (value){
-
- case VALID_NON_TERMINAL_2022 :
- /* continue with the loop */
- break;
-
- case VALID_TERMINAL_2022:
- key = 0;
- goto DONE;
-
- case INVALID_2022:
- goto DONE;
-
- case VALID_MAYBE_TERMINAL_2022:
-#ifdef U_ENABLE_GENERIC_ISO_2022
- /* ESC ( B is ambiguous only for ISO_2022 itself */
- if(var == ISO_2022) {
- /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
- _this->toULength = 0;
-
- /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
-
- /* continue with the loop */
- value = VALID_NON_TERMINAL_2022;
- break;
- } else
-#endif
- {
- /* not ISO_2022 itself, finish here */
- value = VALID_TERMINAL_2022;
- key = 0;
- goto DONE;
- }
- }
- }
-
-DONE:
- myData2022->key = key;
-
- if (value == VALID_NON_TERMINAL_2022) {
- /* indicate that the escape sequence is incomplete: key!=0 */
- return;
- } else if (value == INVALID_2022 ) {
- *err = U_ILLEGAL_ESCAPE_SEQUENCE;
- } else /* value == VALID_TERMINAL_2022 */ {
- switch(var){
-#ifdef U_ENABLE_GENERIC_ISO_2022
- case ISO_2022:
- {
- const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
- if(chosenConverterName == NULL) {
- /* SS2 or SS3 */
- *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
- _this->toUCallbackReason = UCNV_UNASSIGNED;
- return;
- }
-
- _this->mode = UCNV_SI;
- ucnv_close(myData2022->currentConverter);
- myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
- if(U_SUCCESS(*err)) {
- myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
- _this->mode = UCNV_SO;
- }
- break;
- }
-#endif
- case ISO_2022_JP:
- {
- StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
- switch(tempState) {
- case INVALID_STATE:
- *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
- break;
- case SS2_STATE:
- if(myData2022->toU2022State.cs[2]!=0) {
- if(myData2022->toU2022State.g<2) {
- myData2022->toU2022State.prevG=myData2022->toU2022State.g;
- }
- myData2022->toU2022State.g=2;
- } else {
- /* illegal to have SS2 before a matching designator */
- *err = U_ILLEGAL_ESCAPE_SEQUENCE;
- }
- break;
- /* case SS3_STATE: not used in ISO-2022-JP-x */
- case ISO8859_1:
- case ISO8859_7:
- if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
- *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
- } else {
- /* G2 charset for SS2 */
- myData2022->toU2022State.cs[2]=(int8_t)tempState;
- }
- break;
- default:
- if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
- *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
- } else {
- /* G0 charset */
- myData2022->toU2022State.cs[0]=(int8_t)tempState;
- }
- break;
- }
- }
- break;
-#if !UCONFIG_ONLY_HTML_CONVERSION
- case ISO_2022_CN:
- {
- StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
- switch(tempState) {
- case INVALID_STATE:
- *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
- break;
- case SS2_STATE:
- if(myData2022->toU2022State.cs[2]!=0) {
- if(myData2022->toU2022State.g<2) {
- myData2022->toU2022State.prevG=myData2022->toU2022State.g;
- }
- myData2022->toU2022State.g=2;
- } else {
- /* illegal to have SS2 before a matching designator */
- *err = U_ILLEGAL_ESCAPE_SEQUENCE;
- }
- break;
- case SS3_STATE:
- if(myData2022->toU2022State.cs[3]!=0) {
- if(myData2022->toU2022State.g<2) {
- myData2022->toU2022State.prevG=myData2022->toU2022State.g;
- }
- myData2022->toU2022State.g=3;
- } else {
- /* illegal to have SS3 before a matching designator */
- *err = U_ILLEGAL_ESCAPE_SEQUENCE;
- }
- break;
- case ISO_IR_165:
- if(myData2022->version==0) {
- *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
- break;
- }
- U_FALLTHROUGH;
- case GB2312_1:
- U_FALLTHROUGH;
- case CNS_11643_1:
- myData2022->toU2022State.cs[1]=(int8_t)tempState;
- break;
- case CNS_11643_2:
- myData2022->toU2022State.cs[2]=(int8_t)tempState;
- break;
- default:
- /* other CNS 11643 planes */
- if(myData2022->version==0) {
- *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
- } else {
- myData2022->toU2022State.cs[3]=(int8_t)tempState;
- }
- break;
- }
- }
- break;
- case ISO_2022_KR:
- if(offset==0x30){
- /* nothing to be done, just accept this one escape sequence */
- } else {
- *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
- }
- break;
-#endif // !UCONFIG_ONLY_HTML_CONVERSION
-
- default:
- *err = U_ILLEGAL_ESCAPE_SEQUENCE;
- break;
- }
- }
- if(U_SUCCESS(*err)) {
- _this->toULength = 0;
- } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
- if(_this->toULength>1) {
- /*
- * Ticket 5691: consistent illegal sequences:
- * - We include at least the first byte (ESC) in the illegal sequence.
- * - If any of the non-initial bytes could be the start of a character,
- * we stop the illegal sequence before the first one of those.
- * In escape sequences, all following bytes are "printable", that is,
- * unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
- * they are valid single/lead bytes.
- * For simplicity, we always only report the initial ESC byte as the
- * illegal sequence and back out all other bytes we looked at.
- */
- /* Back out some bytes. */
- int8_t backOutDistance=_this->toULength-1;
- int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
- if(backOutDistance<=bytesFromThisBuffer) {
- /* same as initialToULength<=1 */
- *source-=backOutDistance;
- } else {
- /* Back out bytes from the previous buffer: Need to replay them. */
- _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
- /* same as -(initialToULength-1) */
- /* preToULength is negative! */
- uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
- *source-=bytesFromThisBuffer;
- }
- _this->toULength=1;
- }
- } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
- _this->toUCallbackReason = UCNV_UNASSIGNED;
- }
-}
-
-#if !UCONFIG_ONLY_HTML_CONVERSION
-/*Checks the characters of the buffer against valid 2022 escape sequences
-*if the match we return a pointer to the initial start of the sequence otherwise
-*we return sourceLimit
-*/
-/*for 2022 looks ahead in the stream
- *to determine the longest possible convertible
- *data stream
- */
-static inline const char*
-getEndOfBuffer_2022(const char** source,
- const char* sourceLimit,
- UBool /*flush*/){
-
- const char* mySource = *source;
-
-#ifdef U_ENABLE_GENERIC_ISO_2022
- if (*source >= sourceLimit)
- return sourceLimit;
-
- do{
-
- if (*mySource == ESC_2022){
- int8_t i;
- int32_t key = 0;
- int32_t offset;
- UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
-
- /* Kludge: I could not
- * figure out the reason for validating an escape sequence
- * twice - once here and once in changeState_2022().
- * is it possible to have an ESC character in a ISO2022
- * byte stream which is valid in a code page? Is it legal?
- */
- for (i=0;
- (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
- i++) {
- value = getKey_2022(*(mySource+i), &key, &offset);
- }
- if (value > 0 || *mySource==ESC_2022)
- return mySource;
-
- if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
- return sourceLimit;
- }
- }while (++mySource < sourceLimit);
-
- return sourceLimit;
-#else
- while(mySource < sourceLimit && *mySource != ESC_2022) {
- ++mySource;
- }
- return mySource;
-#endif
-}
-#endif
-
-/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
- * any future change in _MBCSFromUChar32() function should be reflected here.
- * @return number of bytes in *value; negative number if fallback; 0 if no mapping
- */
-static inline int32_t
-MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
- UChar32 c,
- uint32_t* value,
- UBool useFallback,
- int outputType)
-{
- const int32_t *cx;
- const uint16_t *table;
- uint32_t stage2Entry;
- uint32_t myValue;
- int32_t length;
- const uint8_t *p;
- /*
- * TODO(markus): Use and require new, faster MBCS conversion table structures.
- * Use internal version of ucnv_open() that verifies that the new structures are available,
- * else U_INTERNAL_PROGRAM_ERROR.
- */
- /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
- if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
- table=sharedData->mbcs.fromUnicodeTable;
- stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
- /* get the bytes and the length for the output */
- if(outputType==MBCS_OUTPUT_2){
- myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
- if(myValue<=0xff) {
- length=1;
- } else {
- length=2;
- }
- } else /* outputType==MBCS_OUTPUT_3 */ {
- p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
- myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
- if(myValue<=0xff) {
- length=1;
- } else if(myValue<=0xffff) {
- length=2;
- } else {
- length=3;
- }
- }
- /* is this code point assigned, or do we use fallbacks? */
- if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
- /* assigned */
- *value=myValue;
- return length;
- } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
- /*
- * We allow a 0 byte output if the "assigned" bit is set for this entry.
- * There is no way with this data structure for fallback output
- * to be a zero byte.
- */
- *value=myValue;
- return -length;
- }
- }
-
- cx=sharedData->mbcs.extIndexes;
- if(cx!=NULL) {
- return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
- }
-
- /* unassigned */
- return 0;
-}
-
-/* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
- * any future change in _MBCSSingleFromUChar32() function should be reflected here.
- * @param retval pointer to output byte
- * @return 1 roundtrip byte 0 no mapping -1 fallback byte
- */
-static inline int32_t
-MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
- UChar32 c,
- uint32_t* retval,
- UBool useFallback)
-{
- const uint16_t *table;
- int32_t value;
- /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
- if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
- return 0;
- }
- /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
- table=sharedData->mbcs.fromUnicodeTable;
- /* get the byte for the output */
- value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
- /* is this code point assigned, or do we use fallbacks? */
- *retval=(uint32_t)(value&0xff);
- if(value>=0xf00) {
- return 1; /* roundtrip */
- } else if(useFallback ? value>=0x800 : value>=0xc00) {
- return -1; /* fallback taken */
- } else {
- return 0; /* no mapping */
- }
-}
-
-/*
- * Check that the result is a 2-byte value with each byte in the range A1..FE
- * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
- * to move it to the ISO 2022 range 21..7E.
- * Return 0 if out of range.
- */
-static inline uint32_t
-_2022FromGR94DBCS(uint32_t value) {
- if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
- (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
- ) {
- return value - 0x8080; /* shift down to 21..7e byte range */
- } else {
- return 0; /* not valid for ISO 2022 */
- }
-}
-
-#if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
-/*
- * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
- * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
- * unchanged.
- */
-static inline uint32_t
-_2022ToGR94DBCS(uint32_t value) {
- uint32_t returnValue = value + 0x8080;
- if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
- (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
- return returnValue;
- } else {
- return value;
- }
-}
-#endif
-
-#ifdef U_ENABLE_GENERIC_ISO_2022
-
-/**********************************************************************************
-* ISO-2022 Converter
-*
-*
-*/
-
-static void U_CALLCONV
-T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
- UErrorCode* err){
- const char* mySourceLimit, *realSourceLimit;
- const char* sourceStart;
- const UChar* myTargetStart;
- UConverter* saveThis;
- UConverterDataISO2022* myData;
- int8_t length;
-
- saveThis = args->converter;
- myData=((UConverterDataISO2022*)(saveThis->extraInfo));
-
- realSourceLimit = args->sourceLimit;
- while (args->source < realSourceLimit) {
- if(myData->key == 0) { /* are we in the middle of an escape sequence? */
- /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
- mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
-
- if(args->source < mySourceLimit) {
- if(myData->currentConverter==NULL) {
- myData->currentConverter = ucnv_open("ASCII",err);
- if(U_FAILURE(*err)){
- return;
- }
-
- myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
- saveThis->mode = UCNV_SO;
- }
-
- /* convert to before the ESC or until the end of the buffer */
- myData->isFirstBuffer=FALSE;
- sourceStart = args->source;
- myTargetStart = args->target;
- args->converter = myData->currentConverter;
- ucnv_toUnicode(args->converter,
- &args->target,
- args->targetLimit,
- &args->source,
- mySourceLimit,
- args->offsets,
- (UBool)(args->flush && mySourceLimit == realSourceLimit),
- err);
- args->converter = saveThis;
-
- if (*err == U_BUFFER_OVERFLOW_ERROR) {
- /* move the overflow buffer */
- length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
- myData->currentConverter->UCharErrorBufferLength = 0;
- if(length > 0) {
- uprv_memcpy(saveThis->UCharErrorBuffer,
- myData->currentConverter->UCharErrorBuffer,
- length*U_SIZEOF_UCHAR);
- }
- return;
- }
-
- /*
- * At least one of:
- * -Error while converting
- * -Done with entire buffer
- * -Need to write offsets or update the current offset
- * (leave that up to the code in ucnv.c)
- *
- * or else we just stopped at an ESC byte and continue with changeState_2022()
- */
- if (U_FAILURE(*err) ||
- (args->source == realSourceLimit) ||
- (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
- (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
- ) {
- /* copy partial or error input for truncated detection and error handling */
- if(U_FAILURE(*err)) {
- length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
- if(length > 0) {
- uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
- }
- } else {
- length = saveThis->toULength = myData->currentConverter->toULength;
- if(length > 0) {
- uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
- if(args->source < mySourceLimit) {
- *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
- }
- }
- }
- return;
- }
- }
- }
-
- sourceStart = args->source;
- changeState_2022(args->converter,
- &(args->source),
- realSourceLimit,
- ISO_2022,
- err);
- if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
- /* let the ucnv.c code update its current offset */
- return;
- }
- }
-}
-
-#endif
-
-/*
- * To Unicode Callback helper function
- */
-static void
-toUnicodeCallback(UConverter *cnv,
- const uint32_t sourceChar, const uint32_t targetUniChar,
- UErrorCode* err){
- if(sourceChar>0xff){
- cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
- cnv->toUBytes[1] = (uint8_t)sourceChar;
- cnv->toULength = 2;
- }
- else{
- cnv->toUBytes[0] =(char) sourceChar;
- cnv->toULength = 1;
- }
-
- if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
- *err = U_INVALID_CHAR_FOUND;
- }
- else{
- *err = U_ILLEGAL_CHAR_FOUND;
- }
-}
-
-/**************************************ISO-2022-JP*************************************************/
-
-/************************************** IMPORTANT **************************************************
-* The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
-* MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
-* The converter iterates over each Unicode codepoint
-* to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
-* processed one char at a time it would make sense to reduce the extra processing a canned converter
-* would do as far as possible.
-*
-* If the implementation of these macros or structure of sharedData struct change in the future, make
-* sure that ISO-2022 is also changed.
-***************************************************************************************************
-*/
-
-/***************************************************************************************************
-* Rules for ISO-2022-jp encoding
-* (i) Escape sequences must be fully contained within a line they should not
-* span new lines or CRs
-* (ii) If the last character on a line is represented by two bytes then an ASCII or
-* JIS-Roman character escape sequence should follow before the line terminates
-* (iii) If the first character on the line is represented by two bytes then a two
-* byte character escape sequence should precede it
-* (iv) If no escape sequence is encountered then the characters are ASCII
-* (v) Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
-* and invoked with SS2 (ESC N).
-* (vi) If there is any G0 designation in text, there must be a switch to
-* ASCII or to JIS X 0201-Roman before a space character (but not
-* necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
-* characters such as tab or CRLF.
-* (vi) Supported encodings:
-* ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
-*
-* source : RFC-1554
-*
-* JISX201, JISX208,JISX212 : new .cnv data files created
-* KSC5601 : alias to ibm-949 mapping table
-* GB2312 : alias to ibm-1386 mapping table
-* ISO-8859-1 : Algorithmic implemented as LATIN1 case
-* ISO-8859-7 : alisas to ibm-9409 mapping table
-*/
-
-/* preference order of JP charsets */
-static const StateEnum jpCharsetPref[]={
- ASCII,
- JISX201,
- ISO8859_1,
- JISX208,
- ISO8859_7,
- JISX212,
- GB2312,
- KSC5601,
- HWKANA_7BIT
-};
-
-/*
- * The escape sequences must be in order of the enum constants like JISX201 = 3,
- * not in order of jpCharsetPref[]!
- */
-static const char escSeqChars[][6] ={
- "\x1B\x28\x42", /* <ESC>(B ASCII */
- "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */
- "\x1B\x2E\x46", /* <ESC>.F ISO-8859-7 */
- "\x1B\x28\x4A", /* <ESC>(J JISX-201 */
- "\x1B\x24\x42", /* <ESC>$B JISX-208 */
- "\x1B\x24\x28\x44", /* <ESC>$(D JISX-212 */
- "\x1B\x24\x41", /* <ESC>$A GB2312 */
- "\x1B\x24\x28\x43", /* <ESC>$(C KSC5601 */
- "\x1B\x28\x49" /* <ESC>(I HWKANA_7BIT */
-
-};
-static const int8_t escSeqCharsLen[] ={
- 3, /* length of <ESC>(B ASCII */
- 3, /* length of <ESC>.A ISO-8859-1 */
- 3, /* length of <ESC>.F ISO-8859-7 */
- 3, /* length of <ESC>(J JISX-201 */
- 3, /* length of <ESC>$B JISX-208 */
- 4, /* length of <ESC>$(D JISX-212 */
- 3, /* length of <ESC>$A GB2312 */
- 4, /* length of <ESC>$(C KSC5601 */
- 3 /* length of <ESC>(I HWKANA_7BIT */
-};
-
-/*
-* The iteration over various code pages works this way:
-* i) Get the currentState from myConverterData->currentState
-* ii) Check if the character is mapped to a valid character in the currentState
-* Yes -> a) set the initIterState to currentState
-* b) remain in this state until an invalid character is found
-* No -> a) go to the next code page and find the character
-* iii) Before changing the state increment the current state check if the current state
-* is equal to the intitIteration state
-* Yes -> A character that cannot be represented in any of the supported encodings
-* break and return a U_INVALID_CHARACTER error
-* No -> Continue and find the character in next code page
-*
-*
-* TODO: Implement a priority technique where the users are allowed to set the priority of code pages
-*/
-
-/* Map 00..7F to Unicode according to JIS X 0201. */
-static inline uint32_t
-jisx201ToU(uint32_t value) {
- if(value < 0x5c) {
- return value;
- } else if(value == 0x5c) {
- return 0xa5;
- } else if(value == 0x7e) {
- return 0x203e;
- } else /* value <= 0x7f */ {
- return value;
- }
-}
-
-/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
-static inline uint32_t
-jisx201FromU(uint32_t value) {
- if(value<=0x7f) {
- if(value!=0x5c && value!=0x7e) {
- return value;
- }
- } else if(value==0xa5) {
- return 0x5c;
- } else if(value==0x203e) {
- return 0x7e;
- }
- return 0xfffe;
-}
-
-/*
- * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
- * to JIS X 0208, and convert it to a pair of 21..7E bytes.
- * Return 0 if the byte pair is out of range.
- */
-static inline uint32_t
-_2022FromSJIS(uint32_t value) {
- uint8_t trail;
-
- if(value > 0xEFFC) {
- return 0; /* beyond JIS X 0208 */
- }
-
- trail = (uint8_t)value;
-
- value &= 0xff00; /* lead byte */
- if(value <= 0x9f00) {
- value -= 0x7000;
- } else /* 0xe000 <= value <= 0xef00 */ {
- value -= 0xb000;
- }
- value <<= 1;
-
- if(trail <= 0x9e) {
- value -= 0x100;
- if(trail <= 0x7e) {
- value |= trail - 0x1f;
- } else {
- value |= trail - 0x20;
- }
- } else /* trail <= 0xfc */ {
- value |= trail - 0x7e;
- }
- return value;
-}
-
-/*
- * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
- * If either byte is outside 21..7E make sure that the result is not valid
- * for Shift-JIS so that the converter catches it.
- * Some invalid byte values already turn into equally invalid Shift-JIS
- * byte values and need not be tested explicitly.
- */
-static inline void
-_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
- if(c1&1) {
- ++c1;
- if(c2 <= 0x5f) {
- c2 += 0x1f;
- } else if(c2 <= 0x7e) {
- c2 += 0x20;
- } else {
- c2 = 0; /* invalid */
- }
- } else {
- if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
- c2 += 0x7e;
- } else {
- c2 = 0; /* invalid */
- }
- }
- c1 >>= 1;
- if(c1 <= 0x2f) {
- c1 += 0x70;
- } else if(c1 <= 0x3f) {
- c1 += 0xb0;
- } else {
- c1 = 0; /* invalid */
- }
- bytes[0] = (char)c1;
- bytes[1] = (char)c2;
-}
-
-/*
- * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
- * Katakana.
- * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
- * because Shift-JIS roundtrips half-width Katakana to single bytes.
- * These were the only fallbacks in ICU's jisx-208.ucm file.
- */
-static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
- 0x2123, /* U+FF61 */
- 0x2156,
- 0x2157,
- 0x2122,
- 0x2126,
- 0x2572,
- 0x2521,
- 0x2523,
- 0x2525,
- 0x2527,
- 0x2529,
- 0x2563,
- 0x2565,
- 0x2567,
- 0x2543,
- 0x213C, /* U+FF70 */
- 0x2522,
- 0x2524,
- 0x2526,
- 0x2528,
- 0x252A,
- 0x252B,
- 0x252D,
- 0x252F,
- 0x2531,
- 0x2533,
- 0x2535,
- 0x2537,
- 0x2539,
- 0x253B,
- 0x253D,
- 0x253F, /* U+FF80 */
- 0x2541,
- 0x2544,
- 0x2546,
- 0x2548,
- 0x254A,
- 0x254B,
- 0x254C,
- 0x254D,
- 0x254E,
- 0x254F,
- 0x2552,
- 0x2555,
- 0x2558,
- 0x255B,
- 0x255E,
- 0x255F, /* U+FF90 */
- 0x2560,
- 0x2561,
- 0x2562,
- 0x2564,
- 0x2566,
- 0x2568,
- 0x2569,
- 0x256A,
- 0x256B,
- 0x256C,
- 0x256D,
- 0x256F,
- 0x2573,
- 0x212B,
- 0x212C /* U+FF9F */
-};
-
-static void U_CALLCONV
-UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
- UConverter *cnv = args->converter;
- UConverterDataISO2022 *converterData;
- ISO2022State *pFromU2022State;
- uint8_t *target = (uint8_t *) args->target;
- const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
- const UChar* source = args->source;
- const UChar* sourceLimit = args->sourceLimit;
- int32_t* offsets = args->offsets;
- UChar32 sourceChar;
- char buffer[8];
- int32_t len, outLen;
- int8_t choices[10];
- int32_t choiceCount;
- uint32_t targetValue = 0;
- UBool useFallback;
-
- int32_t i;
- int8_t cs, g;
-
- /* set up the state */
- converterData = (UConverterDataISO2022*)cnv->extraInfo;
- pFromU2022State = &converterData->fromU2022State;
-
- choiceCount = 0;
-
- /* check if the last codepoint of previous buffer was a lead surrogate*/
- if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
- goto getTrail;
- }
-
- while(source < sourceLimit) {
- if(target < targetLimit) {
-
- sourceChar = *(source++);
- /*check if the char is a First surrogate*/
- if(U16_IS_SURROGATE(sourceChar)) {
- if(U16_IS_SURROGATE_LEAD(sourceChar)) {
-getTrail:
- /*look ahead to find the trail surrogate*/
- if(source < sourceLimit) {
- /* test the following code unit */
- UChar trail=(UChar) *source;
- if(U16_IS_TRAIL(trail)) {
- source++;
- sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
- cnv->fromUChar32=0x00;
- /* convert this supplementary code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- cnv->fromUChar32=sourceChar;
- break;
- }
- } else {
- /* no more input */
- cnv->fromUChar32=sourceChar;
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- cnv->fromUChar32=sourceChar;
- break;
- }
- }
-
- /* do not convert SO/SI/ESC */
- if(IS_2022_CONTROL(sourceChar)) {
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- cnv->fromUChar32=sourceChar;
- break;
- }
-
- /* do the conversion */
-
- if(choiceCount == 0) {
- uint16_t csm;
-
- /*
- * The csm variable keeps track of which charsets are allowed
- * and not used yet while building the choices[].
- */
- csm = jpCharsetMasks[converterData->version];
- choiceCount = 0;
-
- /* JIS7/8: try single-byte half-width Katakana before JISX208 */
- if(converterData->version == 3 || converterData->version == 4) {
- choices[choiceCount++] = (int8_t)HWKANA_7BIT;
- }
- /* Do not try single-byte half-width Katakana for other versions. */
- csm &= ~CSM(HWKANA_7BIT);
-
- /* try the current G0 charset */
- choices[choiceCount++] = cs = pFromU2022State->cs[0];
- csm &= ~CSM(cs);
-
- /* try the current G2 charset */
- if((cs = pFromU2022State->cs[2]) != 0) {
- choices[choiceCount++] = cs;
- csm &= ~CSM(cs);
- }
-
- /* try all the other possible charsets */
- for(i = 0; i < UPRV_LENGTHOF(jpCharsetPref); ++i) {
- cs = (int8_t)jpCharsetPref[i];
- if(CSM(cs) & csm) {
- choices[choiceCount++] = cs;
- csm &= ~CSM(cs);
- }
- }
- }
-
- cs = g = 0;
- /*
- * len==0: no mapping found yet
- * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
- * len>0: found a roundtrip result, done
- */
- len = 0;
- /*
- * We will turn off useFallback after finding a fallback,
- * but we still get fallbacks from PUA code points as usual.
- * Therefore, we will also need to check that we don't overwrite
- * an early fallback with a later one.
- */
- useFallback = cnv->useFallback;
-
- for(i = 0; i < choiceCount && len <= 0; ++i) {
- uint32_t value;
- int32_t len2;
- int8_t cs0 = choices[i];
- switch(cs0) {
- case ASCII:
- if(sourceChar <= 0x7f) {
- targetValue = (uint32_t)sourceChar;
- len = 1;
- cs = cs0;
- g = 0;
- }
- break;
- case ISO8859_1:
- if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
- targetValue = (uint32_t)sourceChar - 0x80;
- len = 1;
- cs = cs0;
- g = 2;
- }
- break;
- case HWKANA_7BIT:
- if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
- if(converterData->version==3) {
- /* JIS7: use G1 (SO) */
- /* Shift U+FF61..U+FF9F to bytes 21..5F. */
- targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
- len = 1;
- pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
- g = 1;
- } else if(converterData->version==4) {
- /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
- /* Shift U+FF61..U+FF9F to bytes A1..DF. */
- targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
- len = 1;
-
- cs = pFromU2022State->cs[0];
- if(IS_JP_DBCS(cs)) {
- /* switch from a DBCS charset to JISX201 */
- cs = (int8_t)JISX201;
- }
- /* else stay in the current G0 charset */
- g = 0;
- }
- /* else do not use HWKANA_7BIT with other versions */
- }
- break;
- case JISX201:
- /* G0 SBCS */
- value = jisx201FromU(sourceChar);
- if(value <= 0x7f) {
- targetValue = value;
- len = 1;
- cs = cs0;
- g = 0;
- useFallback = FALSE;
- }
- break;
- case JISX208:
- /* G0 DBCS from Shift-JIS table */
- len2 = MBCS_FROM_UCHAR32_ISO2022(
- converterData->myConverterArray[cs0],
- sourceChar, &value,
- useFallback, MBCS_OUTPUT_2);
- if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
- value = _2022FromSJIS(value);
- if(value != 0) {
- targetValue = value;
- len = len2;
- cs = cs0;
- g = 0;
- useFallback = FALSE;
- }
- } else if(len == 0 && useFallback &&
- (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
- targetValue = hwkana_fb[sourceChar - HWKANA_START];
- len = -2;
- cs = cs0;
- g = 0;
- useFallback = FALSE;
- }
- break;
- case ISO8859_7:
- /* G0 SBCS forced to 7-bit output */
- len2 = MBCS_SINGLE_FROM_UCHAR32(
- converterData->myConverterArray[cs0],
- sourceChar, &value,
- useFallback);
- if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
- targetValue = value - 0x80;
- len = len2;
- cs = cs0;
- g = 2;
- useFallback = FALSE;
- }
- break;
- default:
- /* G0 DBCS */
- len2 = MBCS_FROM_UCHAR32_ISO2022(
- converterData->myConverterArray[cs0],
- sourceChar, &value,
- useFallback, MBCS_OUTPUT_2);
- if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
- if(cs0 == KSC5601) {
- /*
- * Check for valid bytes for the encoding scheme.
- * This is necessary because the sub-converter (windows-949)
- * has a broader encoding scheme than is valid for 2022.
- */
- value = _2022FromGR94DBCS(value);
- if(value == 0) {
- break;
- }
- }
- targetValue = value;
- len = len2;
- cs = cs0;
- g = 0;
- useFallback = FALSE;
- }
- break;
- }
- }
-
- if(len != 0) {
- if(len < 0) {
- len = -len; /* fallback */
- }
- outLen = 0; /* count output bytes */
-
- /* write SI if necessary (only for JIS7) */
- if(pFromU2022State->g == 1 && g == 0) {
- buffer[outLen++] = UCNV_SI;
- pFromU2022State->g = 0;
- }
-
- /* write the designation sequence if necessary */
- if(cs != pFromU2022State->cs[g]) {
- int32_t escLen = escSeqCharsLen[cs];
- uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
- outLen += escLen;
- pFromU2022State->cs[g] = cs;
-
- /* invalidate the choices[] */
- choiceCount = 0;
- }
-
- /* write the shift sequence if necessary */
- if(g != pFromU2022State->g) {
- switch(g) {
- /* case 0 handled before writing escapes */
- case 1:
- buffer[outLen++] = UCNV_SO;
- pFromU2022State->g = 1;
- break;
- default: /* case 2 */
- buffer[outLen++] = 0x1b;
- buffer[outLen++] = 0x4e;
- break;
- /* no case 3: no SS3 in ISO-2022-JP-x */
- }
- }
-
- /* write the output bytes */
- if(len == 1) {
- buffer[outLen++] = (char)targetValue;
- } else /* len == 2 */ {
- buffer[outLen++] = (char)(targetValue >> 8);
- buffer[outLen++] = (char)targetValue;
- }
- } else {
- /*
- * if we cannot find the character after checking all codepages
- * then this is an error
- */
- *err = U_INVALID_CHAR_FOUND;
- cnv->fromUChar32=sourceChar;
- break;
- }
-
- if(sourceChar == CR || sourceChar == LF) {
- /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
- pFromU2022State->cs[2] = 0;
- choiceCount = 0;
- }
-
- /* output outLen>0 bytes in buffer[] */
- if(outLen == 1) {
- *target++ = buffer[0];
- if(offsets) {
- *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
- }
- } else if(outLen == 2 && (target + 2) <= targetLimit) {
- *target++ = buffer[0];
- *target++ = buffer[1];
- if(offsets) {
- int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
- *offsets++ = sourceIndex;
- *offsets++ = sourceIndex;
- }
- } else {
- fromUWriteUInt8(
- cnv,
- buffer, outLen,
- &target, (const char *)targetLimit,
- &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
- err);
- if(U_FAILURE(*err)) {
- break;
- }
- }
- } /* end if(myTargetIndex<myTargetLength) */
- else{
- *err =U_BUFFER_OVERFLOW_ERROR;
- break;
- }
-
- }/* end while(mySourceIndex<mySourceLength) */
-
- /*
- * the end of the input stream and detection of truncated input
- * are handled by the framework, but for ISO-2022-JP conversion
- * we need to be in ASCII mode at the very end
- *
- * conditions:
- * successful
- * in SO mode or not in ASCII mode
- * end of input and no truncated input
- */
- if( U_SUCCESS(*err) &&
- (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
- args->flush && source>=sourceLimit && cnv->fromUChar32==0
- ) {
- int32_t sourceIndex;
-
- outLen = 0;
-
- if(pFromU2022State->g != 0) {
- buffer[outLen++] = UCNV_SI;
- pFromU2022State->g = 0;
- }
-
- if(pFromU2022State->cs[0] != ASCII) {
- int32_t escLen = escSeqCharsLen[ASCII];
- uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
- outLen += escLen;
- pFromU2022State->cs[0] = (int8_t)ASCII;
- }
-
- /* get the source index of the last input character */
- /*
- * TODO this would be simpler and more reliable if we used a pair
- * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
- * so that we could simply use the prevSourceIndex here;
- * this code gives an incorrect result for the rare case of an unmatched
- * trail surrogate that is alone in the last buffer of the text stream
- */
- sourceIndex=(int32_t)(source-args->source);
- if(sourceIndex>0) {
- --sourceIndex;
- if( U16_IS_TRAIL(args->source[sourceIndex]) &&
- (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
- ) {
- --sourceIndex;
- }
- } else {
- sourceIndex=-1;
- }
-
- fromUWriteUInt8(
- cnv,
- buffer, outLen,
- &target, (const char *)targetLimit,
- &offsets, sourceIndex,
- err);
- }
-
- /*save the state and return */
- args->source = source;
- args->target = (char*)target;
-}
-
-/*************** to unicode *******************/
-
-static void U_CALLCONV
-UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
- UErrorCode* err){
- char tempBuf[2];
- const char *mySource = (char *) args->source;
- UChar *myTarget = args->target;
- const char *mySourceLimit = args->sourceLimit;
- uint32_t targetUniChar = 0x0000;
- uint32_t mySourceChar = 0x0000;
- uint32_t tmpSourceChar = 0x0000;
- UConverterDataISO2022* myData;
- ISO2022State *pToU2022State;
- StateEnum cs;
-
- myData=(UConverterDataISO2022*)(args->converter->extraInfo);
- pToU2022State = &myData->toU2022State;
-
- if(myData->key != 0) {
- /* continue with a partial escape sequence */
- goto escape;
- } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
- /* continue with a partial double-byte character */
- mySourceChar = args->converter->toUBytes[0];
- args->converter->toULength = 0;
- cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
- targetUniChar = missingCharMarker;
- goto getTrailByte;
- }
-
- while(mySource < mySourceLimit){
-
- targetUniChar =missingCharMarker;
-
- if(myTarget < args->targetLimit){
-
- mySourceChar= (unsigned char) *mySource++;
-
- switch(mySourceChar) {
- case UCNV_SI:
- if(myData->version==3) {
- pToU2022State->g=0;
- continue;
- } else {
- /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
- myData->isEmptySegment = FALSE; /* reset this, we have a different error */
- break;
- }
-
- case UCNV_SO:
- if(myData->version==3) {
- /* JIS7: switch to G1 half-width Katakana */
- pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
- pToU2022State->g=1;
- continue;
- } else {
- /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
- myData->isEmptySegment = FALSE; /* reset this, we have a different error */
- break;
- }
-
- case ESC_2022:
- mySource--;
-escape:
- {
- const char * mySourceBefore = mySource;
- int8_t toULengthBefore = args->converter->toULength;
-
- changeState_2022(args->converter,&(mySource),
- mySourceLimit, ISO_2022_JP,err);
-
- /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
- if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
- *err = U_ILLEGAL_ESCAPE_SEQUENCE;
- args->converter->toUCallbackReason = UCNV_IRREGULAR;
- args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
- }
- }
-
- /* invalid or illegal escape sequence */
- if(U_FAILURE(*err)){
- args->target = myTarget;
- args->source = mySource;
- myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */
- return;
- }
- /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
- if(myData->key==0) {
- myData->isEmptySegment = TRUE;
- }
- continue;
-
- /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
-
- case CR:
- case LF:
- /* automatically reset to single-byte mode */
- if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
- pToU2022State->cs[0] = (int8_t)ASCII;
- }
- pToU2022State->cs[2] = 0;
- pToU2022State->g = 0;
- U_FALLTHROUGH;
- default:
- /* convert one or two bytes */
- myData->isEmptySegment = FALSE;
- cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
- if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
- !IS_JP_DBCS(cs)
- ) {
- /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
- targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
-
- /* return from a single-shift state to the previous one */
- if(pToU2022State->g >= 2) {
- pToU2022State->g=pToU2022State->prevG;
- }
- } else switch(cs) {
- case ASCII:
- if(mySourceChar <= 0x7f) {
- targetUniChar = mySourceChar;
- }
- break;
- case ISO8859_1:
- if(mySourceChar <= 0x7f) {
- targetUniChar = mySourceChar + 0x80;
- }
- /* return from a single-shift state to the previous one */
- pToU2022State->g=pToU2022State->prevG;
- break;
- case ISO8859_7:
- if(mySourceChar <= 0x7f) {
- /* convert mySourceChar+0x80 to use a normal 8-bit table */
- targetUniChar =
- _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
- myData->myConverterArray[cs],
- mySourceChar + 0x80);
- }
- /* return from a single-shift state to the previous one */
- pToU2022State->g=pToU2022State->prevG;
- break;
- case JISX201:
- if(mySourceChar <= 0x7f) {
- targetUniChar = jisx201ToU(mySourceChar);
- }
- break;
- case HWKANA_7BIT:
- if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
- /* 7-bit halfwidth Katakana */
- targetUniChar = mySourceChar + (HWKANA_START - 0x21);
- }
- break;
- default:
- /* G0 DBCS */
- if(mySource < mySourceLimit) {
- int leadIsOk, trailIsOk;
- uint8_t trailByte;
-getTrailByte:
- trailByte = (uint8_t)*mySource;
- /*
- * Ticket 5691: consistent illegal sequences:
- * - We include at least the first byte in the illegal sequence.
- * - If any of the non-initial bytes could be the start of a character,
- * we stop the illegal sequence before the first one of those.
- *
- * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
- * an ESC/SO/SI, we report only the first byte as the illegal sequence.
- * Otherwise we convert or report the pair of bytes.
- */
- leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
- trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
- if (leadIsOk && trailIsOk) {
- ++mySource;
- tmpSourceChar = (mySourceChar << 8) | trailByte;
- if(cs == JISX208) {
- _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
- mySourceChar = tmpSourceChar;
- } else {
- /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
- mySourceChar = tmpSourceChar;
- if (cs == KSC5601) {
- tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
- }
- tempBuf[0] = (char)(tmpSourceChar >> 8);
- tempBuf[1] = (char)(tmpSourceChar);
- }
- targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
- } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
- /* report a pair of illegal bytes if the second byte is not a DBCS starter */
- ++mySource;
- /* add another bit so that the code below writes 2 bytes in case of error */
- mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
- }
- } else {
- args->converter->toUBytes[0] = (uint8_t)mySourceChar;
- args->converter->toULength = 1;
- goto endloop;
- }
- } /* End of inner switch */
- break;
- } /* End of outer switch */
- if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
- if(args->offsets){
- args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
- }
- *(myTarget++)=(UChar)targetUniChar;
- }
- else if(targetUniChar > missingCharMarker){
- /* disassemble the surrogate pair and write to output*/
- targetUniChar-=0x0010000;
- *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
- if(args->offsets){
- args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
- }
- ++myTarget;
- if(myTarget< args->targetLimit){
- *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
- if(args->offsets){
- args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
- }
- ++myTarget;
- }else{
- args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
- (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
- }
-
- }
- else{
- /* Call the callback function*/
- toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
- break;
- }
- }
- else{ /* goes with "if(myTarget < args->targetLimit)" way up near top of function */
- *err =U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-endloop:
- args->target = myTarget;
- args->source = mySource;
-}
-
-
-#if !UCONFIG_ONLY_HTML_CONVERSION
-/***************************************************************
-* Rules for ISO-2022-KR encoding
-* i) The KSC5601 designator sequence should appear only once in a file,
-* at the begining of a line before any KSC5601 characters. This usually
-* means that it appears by itself on the first line of the file
-* ii) There are only 2 shifting sequences SO to shift into double byte mode
-* and SI to shift into single byte mode
-*/
-static void U_CALLCONV
-UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
-
- UConverter* saveConv = args->converter;
- UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;
- args->converter=myConverterData->currentConverter;
-
- myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
- ucnv_MBCSFromUnicodeWithOffsets(args,err);
- saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
-
- if(*err == U_BUFFER_OVERFLOW_ERROR) {
- if(myConverterData->currentConverter->charErrorBufferLength > 0) {
- uprv_memcpy(
- saveConv->charErrorBuffer,
- myConverterData->currentConverter->charErrorBuffer,
- myConverterData->currentConverter->charErrorBufferLength);
- }
- saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
- myConverterData->currentConverter->charErrorBufferLength = 0;
- }
- args->converter=saveConv;
-}
-
-static void U_CALLCONV
-UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
-
- const UChar *source = args->source;
- const UChar *sourceLimit = args->sourceLimit;
- unsigned char *target = (unsigned char *) args->target;
- unsigned char *targetLimit = (unsigned char *) args->targetLimit;
- int32_t* offsets = args->offsets;
- uint32_t targetByteUnit = 0x0000;
- UChar32 sourceChar = 0x0000;
- UBool isTargetByteDBCS;
- UBool oldIsTargetByteDBCS;
- UConverterDataISO2022 *converterData;
- UConverterSharedData* sharedData;
- UBool useFallback;
- int32_t length =0;
-
- converterData=(UConverterDataISO2022*)args->converter->extraInfo;
- /* if the version is 1 then the user is requesting
- * conversion with ibm-25546 pass the arguments to
- * MBCS converter and return
- */
- if(converterData->version==1){
- UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
- return;
- }
-
- /* initialize data */
- sharedData = converterData->currentConverter->sharedData;
- useFallback = args->converter->useFallback;
- isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
- oldIsTargetByteDBCS = isTargetByteDBCS;
-
- isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus;
- if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
- goto getTrail;
- }
- while(source < sourceLimit){
-
- targetByteUnit = missingCharMarker;
-
- if(target < (unsigned char*) args->targetLimit){
- sourceChar = *source++;
-
- /* do not convert SO/SI/ESC */
- if(IS_2022_CONTROL(sourceChar)) {
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- args->converter->fromUChar32=sourceChar;
- break;
- }
-
- length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
- if(length < 0) {
- length = -length; /* fallback */
- }
- /* only DBCS or SBCS characters are expected*/
- /* DB characters with high bit set to 1 are expected */
- if( length > 2 || length==0 ||
- (length == 1 && targetByteUnit > 0x7f) ||
- (length == 2 &&
- ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
- (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
- ) {
- targetByteUnit=missingCharMarker;
- }
- if (targetByteUnit != missingCharMarker){
-
- oldIsTargetByteDBCS = isTargetByteDBCS;
- isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
- /* append the shift sequence */
- if (oldIsTargetByteDBCS != isTargetByteDBCS ){
-
- if (isTargetByteDBCS)
- *target++ = UCNV_SO;
- else
- *target++ = UCNV_SI;
- if(offsets)
- *(offsets++) = (int32_t)(source - args->source-1);
- }
- /* write the targetUniChar to target */
- if(targetByteUnit <= 0x00FF){
- if( target < targetLimit){
- *(target++) = (unsigned char) targetByteUnit;
- if(offsets){
- *(offsets++) = (int32_t)(source - args->source-1);
- }
-
- }else{
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }else{
- if(target < targetLimit){
- *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
- if(offsets){
- *(offsets++) = (int32_t)(source - args->source-1);
- }
- if(target < targetLimit){
- *(target++) =(unsigned char) (targetByteUnit -0x80);
- if(offsets){
- *(offsets++) = (int32_t)(source - args->source-1);
- }
- }else{
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }else{
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }
-
- }
- else{
- /* oops.. the code point is unassingned
- * set the error and reason
- */
-
- /*check if the char is a First surrogate*/
- if(U16_IS_SURROGATE(sourceChar)) {
- if(U16_IS_SURROGATE_LEAD(sourceChar)) {
-getTrail:
- /*look ahead to find the trail surrogate*/
- if(source < sourceLimit) {
- /* test the following code unit */
- UChar trail=(UChar) *source;
- if(U16_IS_TRAIL(trail)) {
- source++;
- sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
- *err = U_INVALID_CHAR_FOUND;
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- }
- } else {
- /* no more input */
- *err = U_ZERO_ERROR;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- }
- } else {
- /* callback(unassigned) for a BMP code point */
- *err = U_INVALID_CHAR_FOUND;
- }
-
- args->converter->fromUChar32=sourceChar;
- break;
- }
- } /* end if(myTargetIndex<myTargetLength) */
- else{
- *err =U_BUFFER_OVERFLOW_ERROR;
- break;
- }
-
- }/* end while(mySourceIndex<mySourceLength) */
-
- /*
- * the end of the input stream and detection of truncated input
- * are handled by the framework, but for ISO-2022-KR conversion
- * we need to be in ASCII mode at the very end
- *
- * conditions:
- * successful
- * not in ASCII mode
- * end of input and no truncated input
- */
- if( U_SUCCESS(*err) &&
- isTargetByteDBCS &&
- args->flush && source>=sourceLimit && args->converter->fromUChar32==0
- ) {
- int32_t sourceIndex;
-
- /* we are switching to ASCII */
- isTargetByteDBCS=FALSE;
-
- /* get the source index of the last input character */
- /*
- * TODO this would be simpler and more reliable if we used a pair
- * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
- * so that we could simply use the prevSourceIndex here;
- * this code gives an incorrect result for the rare case of an unmatched
- * trail surrogate that is alone in the last buffer of the text stream
- */
- sourceIndex=(int32_t)(source-args->source);
- if(sourceIndex>0) {
- --sourceIndex;
- if( U16_IS_TRAIL(args->source[sourceIndex]) &&
- (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
- ) {
- --sourceIndex;
- }
- } else {
- sourceIndex=-1;
- }
-
- fromUWriteUInt8(
- args->converter,
- SHIFT_IN_STR, 1,
- &target, (const char *)targetLimit,
- &offsets, sourceIndex,
- err);
- }
-
- /*save the state and return */
- args->source = source;
- args->target = (char*)target;
- args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
-}
-
-/************************ To Unicode ***************************************/
-
-static void U_CALLCONV
-UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
- UErrorCode* err){
- char const* sourceStart;
- UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
-
- UConverterToUnicodeArgs subArgs;
- int32_t minArgsSize;
-
- /* set up the subconverter arguments */
- if(args->size<sizeof(UConverterToUnicodeArgs)) {
- minArgsSize = args->size;
- } else {
- minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
- }
-
- uprv_memcpy(&subArgs, args, minArgsSize);
- subArgs.size = (uint16_t)minArgsSize;
- subArgs.converter = myData->currentConverter;
-
- /* remember the original start of the input for offsets */
- sourceStart = args->source;
-
- if(myData->key != 0) {
- /* continue with a partial escape sequence */
- goto escape;
- }
-
- while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
- /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
- subArgs.source = args->source;
- subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
- if(subArgs.source != subArgs.sourceLimit) {
- /*
- * get the current partial byte sequence
- *
- * it needs to be moved between the public and the subconverter
- * so that the conversion framework, which only sees the public
- * converter, can handle truncated and illegal input etc.
- */
- if(args->converter->toULength > 0) {
- uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
- }
- subArgs.converter->toULength = args->converter->toULength;
-
- /*
- * Convert up to the end of the input, or to before the next escape character.
- * Does not handle conversion extensions because the preToU[] state etc.
- * is not copied.
- */
- ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
-
- if(args->offsets != NULL && sourceStart != args->source) {
- /* update offsets to base them on the actual start of the input */
- int32_t *offsets = args->offsets;
- UChar *target = args->target;
- int32_t delta = (int32_t)(args->source - sourceStart);
- while(target < subArgs.target) {
- if(*offsets >= 0) {
- *offsets += delta;
- }
- ++offsets;
- ++target;
- }
- }
- args->source = subArgs.source;
- args->target = subArgs.target;
- args->offsets = subArgs.offsets;
-
- /* copy input/error/overflow buffers */
- if(subArgs.converter->toULength > 0) {
- uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
- }
- args->converter->toULength = subArgs.converter->toULength;
-
- if(*err == U_BUFFER_OVERFLOW_ERROR) {
- if(subArgs.converter->UCharErrorBufferLength > 0) {
- uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
- subArgs.converter->UCharErrorBufferLength);
- }
- args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
- subArgs.converter->UCharErrorBufferLength = 0;
- }
- }
-
- if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
- return;
- }
-
-escape:
- changeState_2022(args->converter,
- &(args->source),
- args->sourceLimit,
- ISO_2022_KR,
- err);
- }
-}
-
-static void U_CALLCONV
-UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
- UErrorCode* err){
- char tempBuf[2];
- const char *mySource = ( char *) args->source;
- UChar *myTarget = args->target;
- const char *mySourceLimit = args->sourceLimit;
- UChar32 targetUniChar = 0x0000;
- UChar mySourceChar = 0x0000;
- UConverterDataISO2022* myData;
- UConverterSharedData* sharedData ;
- UBool useFallback;
-
- myData=(UConverterDataISO2022*)(args->converter->extraInfo);
- if(myData->version==1){
- UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
- return;
- }
-
- /* initialize state */
- sharedData = myData->currentConverter->sharedData;
- useFallback = args->converter->useFallback;
-
- if(myData->key != 0) {
- /* continue with a partial escape sequence */
- goto escape;
- } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
- /* continue with a partial double-byte character */
- mySourceChar = args->converter->toUBytes[0];
- args->converter->toULength = 0;
- goto getTrailByte;
- }
-
- while(mySource< mySourceLimit){
-
- if(myTarget < args->targetLimit){
-
- mySourceChar= (unsigned char) *mySource++;
-
- if(mySourceChar==UCNV_SI){
- myData->toU2022State.g = 0;
- if (myData->isEmptySegment) {
- myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
- *err = U_ILLEGAL_ESCAPE_SEQUENCE;
- args->converter->toUCallbackReason = UCNV_IRREGULAR;
- args->converter->toUBytes[0] = (uint8_t)mySourceChar;
- args->converter->toULength = 1;
- args->target = myTarget;
- args->source = mySource;
- return;
- }
- /*consume the source */
- continue;
- }else if(mySourceChar==UCNV_SO){
- myData->toU2022State.g = 1;
- myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */
- /*consume the source */
- continue;
- }else if(mySourceChar==ESC_2022){
- mySource--;
-escape:
- myData->isEmptySegment = FALSE; /* Any invalid ESC sequences will be detected separately, so just reset this */
- changeState_2022(args->converter,&(mySource),
- mySourceLimit, ISO_2022_KR, err);
- if(U_FAILURE(*err)){
- args->target = myTarget;
- args->source = mySource;
- return;
- }
- continue;
- }
-
- myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */
- if(myData->toU2022State.g == 1) {
- if(mySource < mySourceLimit) {
- int leadIsOk, trailIsOk;
- uint8_t trailByte;
-getTrailByte:
- targetUniChar = missingCharMarker;
- trailByte = (uint8_t)*mySource;
- /*
- * Ticket 5691: consistent illegal sequences:
- * - We include at least the first byte in the illegal sequence.
- * - If any of the non-initial bytes could be the start of a character,
- * we stop the illegal sequence before the first one of those.
- *
- * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
- * an ESC/SO/SI, we report only the first byte as the illegal sequence.
- * Otherwise we convert or report the pair of bytes.
- */
- leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
- trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
- if (leadIsOk && trailIsOk) {
- ++mySource;
- tempBuf[0] = (char)(mySourceChar + 0x80);
- tempBuf[1] = (char)(trailByte + 0x80);
- targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
- mySourceChar = (mySourceChar << 8) | trailByte;
- } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
- /* report a pair of illegal bytes if the second byte is not a DBCS starter */
- ++mySource;
- /* add another bit so that the code below writes 2 bytes in case of error */
- mySourceChar = static_cast<UChar>(0x10000 | (mySourceChar << 8) | trailByte);
- }
- } else {
- args->converter->toUBytes[0] = (uint8_t)mySourceChar;
- args->converter->toULength = 1;
- break;
- }
- }
- else if(mySourceChar <= 0x7f) {
- targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
- } else {
- targetUniChar = 0xffff;
- }
- if(targetUniChar < 0xfffe){
- if(args->offsets) {
- args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
- }
- *(myTarget++)=(UChar)targetUniChar;
- }
- else {
- /* Call the callback function*/
- toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
- break;
- }
- }
- else{
- *err =U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- args->target = myTarget;
- args->source = mySource;
-}
-
-/*************************** END ISO2022-KR *********************************/
-
-/*************************** ISO-2022-CN *********************************
-*
-* Rules for ISO-2022-CN Encoding:
-* i) The designator sequence must appear once on a line before any instance
-* of character set it designates.
-* ii) If two lines contain characters from the same character set, both lines
-* must include the designator sequence.
-* iii) Once the designator sequence is known, a shifting sequence has to be found
-* to invoke the shifting
-* iv) All lines start in ASCII and end in ASCII.
-* v) Four shifting sequences are employed for this purpose:
-*
-* Sequcence ASCII Eq Charsets
-* ---------- ------- ---------
-* SI <SI> US-ASCII
-* SO <SO> CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
-* SS2 <ESC>N CNS-11643-1992 Plane 2
-* SS3 <ESC>O CNS-11643-1992 Planes 3-7
-*
-* vi)
-* SOdesignator : ESC "$" ")" finalchar_for_SO
-* SS2designator : ESC "$" "*" finalchar_for_SS2
-* SS3designator : ESC "$" "+" finalchar_for_SS3
-*
-* ESC $ ) A Indicates the bytes following SO are Chinese
-* characters as defined in GB 2312-80, until
-* another SOdesignation appears
-*
-*
-* ESC $ ) E Indicates the bytes following SO are as defined
-* in ISO-IR-165 (for details, see section 2.1),
-* until another SOdesignation appears
-*
-* ESC $ ) G Indicates the bytes following SO are as defined
-* in CNS 11643-plane-1, until another
-* SOdesignation appears
-*
-* ESC $ * H Indicates the two bytes immediately following
-* SS2 is a Chinese character as defined in CNS
-* 11643-plane-2, until another SS2designation
-* appears
-* (Meaning <ESC>N must preceed every 2 byte
-* sequence.)
-*
-* ESC $ + I Indicates the immediate two bytes following SS3
-* is a Chinese character as defined in CNS
-* 11643-plane-3, until another SS3designation
-* appears
-* (Meaning <ESC>O must preceed every 2 byte
-* sequence.)
-*
-* ESC $ + J Indicates the immediate two bytes following SS3
-* is a Chinese character as defined in CNS
-* 11643-plane-4, until another SS3designation
-* appears
-* (In English: <ESC>O must preceed every 2 byte
-* sequence.)
-*
-* ESC $ + K Indicates the immediate two bytes following SS3
-* is a Chinese character as defined in CNS
-* 11643-plane-5, until another SS3designation
-* appears
-*
-* ESC $ + L Indicates the immediate two bytes following SS3
-* is a Chinese character as defined in CNS
-* 11643-plane-6, until another SS3designation
-* appears
-*
-* ESC $ + M Indicates the immediate two bytes following SS3
-* is a Chinese character as defined in CNS
-* 11643-plane-7, until another SS3designation
-* appears
-*
-* As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
-* has its own designation information before any Chinese characters
-* appear
-*
-*/
-
-/* The following are defined this way to make the strings truly readonly */
-static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
-static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
-static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
-static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
-static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
-static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
-static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
-static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
-static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
-
-/********************** ISO2022-CN Data **************************/
-static const char* const escSeqCharsCN[10] ={
- SHIFT_IN_STR, /* 0 ASCII */
- GB_2312_80_STR, /* 1 GB2312_1 */
- ISO_IR_165_STR, /* 2 ISO_IR_165 */
- CNS_11643_1992_Plane_1_STR,
- CNS_11643_1992_Plane_2_STR,
- CNS_11643_1992_Plane_3_STR,
- CNS_11643_1992_Plane_4_STR,
- CNS_11643_1992_Plane_5_STR,
- CNS_11643_1992_Plane_6_STR,
- CNS_11643_1992_Plane_7_STR
-};
-
-static void U_CALLCONV
-UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
- UConverter *cnv = args->converter;
- UConverterDataISO2022 *converterData;
- ISO2022State *pFromU2022State;
- uint8_t *target = (uint8_t *) args->target;
- const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
- const UChar* source = args->source;
- const UChar* sourceLimit = args->sourceLimit;
- int32_t* offsets = args->offsets;
- UChar32 sourceChar;
- char buffer[8];
- int32_t len;
- int8_t choices[3];
- int32_t choiceCount;
- uint32_t targetValue = 0;
- UBool useFallback;
-
- /* set up the state */
- converterData = (UConverterDataISO2022*)cnv->extraInfo;
- pFromU2022State = &converterData->fromU2022State;
-
- choiceCount = 0;
-
- /* check if the last codepoint of previous buffer was a lead surrogate*/
- if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
- goto getTrail;
- }
-
- while( source < sourceLimit){
- if(target < targetLimit){
-
- sourceChar = *(source++);
- /*check if the char is a First surrogate*/
- if(U16_IS_SURROGATE(sourceChar)) {
- if(U16_IS_SURROGATE_LEAD(sourceChar)) {
-getTrail:
- /*look ahead to find the trail surrogate*/
- if(source < sourceLimit) {
- /* test the following code unit */
- UChar trail=(UChar) *source;
- if(U16_IS_TRAIL(trail)) {
- source++;
- sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
- cnv->fromUChar32=0x00;
- /* convert this supplementary code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- cnv->fromUChar32=sourceChar;
- break;
- }
- } else {
- /* no more input */
- cnv->fromUChar32=sourceChar;
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- cnv->fromUChar32=sourceChar;
- break;
- }
- }
-
- /* do the conversion */
- if(sourceChar <= 0x007f ){
- /* do not convert SO/SI/ESC */
- if(IS_2022_CONTROL(sourceChar)) {
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- cnv->fromUChar32=sourceChar;
- break;
- }
-
- /* US-ASCII */
- if(pFromU2022State->g == 0) {
- buffer[0] = (char)sourceChar;
- len = 1;
- } else {
- buffer[0] = UCNV_SI;
- buffer[1] = (char)sourceChar;
- len = 2;
- pFromU2022State->g = 0;
- choiceCount = 0;
- }
- if(sourceChar == CR || sourceChar == LF) {
- /* reset the state at the end of a line */
- uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
- choiceCount = 0;
- }
- }
- else{
- /* convert U+0080..U+10ffff */
- int32_t i;
- int8_t cs, g;
-
- if(choiceCount == 0) {
- /* try the current SO/G1 converter first */
- choices[0] = pFromU2022State->cs[1];
-
- /* default to GB2312_1 if none is designated yet */
- if(choices[0] == 0) {
- choices[0] = GB2312_1;
- }
-
- if(converterData->version == 0) {
- /* ISO-2022-CN */
-
- /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
- if(choices[0] == GB2312_1) {
- choices[1] = (int8_t)CNS_11643_1;
- } else {
- choices[1] = (int8_t)GB2312_1;
- }
-
- choiceCount = 2;
- } else if (converterData->version == 1) {
- /* ISO-2022-CN-EXT */
-
- /* try one of the other converters */
- switch(choices[0]) {
- case GB2312_1:
- choices[1] = (int8_t)CNS_11643_1;
- choices[2] = (int8_t)ISO_IR_165;
- break;
- case ISO_IR_165:
- choices[1] = (int8_t)GB2312_1;
- choices[2] = (int8_t)CNS_11643_1;
- break;
- default: /* CNS_11643_x */
- choices[1] = (int8_t)GB2312_1;
- choices[2] = (int8_t)ISO_IR_165;
- break;
- }
-
- choiceCount = 3;
- } else {
- choices[0] = (int8_t)CNS_11643_1;
- choices[1] = (int8_t)GB2312_1;
- }
- }
-
- cs = g = 0;
- /*
- * len==0: no mapping found yet
- * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
- * len>0: found a roundtrip result, done
- */
- len = 0;
- /*
- * We will turn off useFallback after finding a fallback,
- * but we still get fallbacks from PUA code points as usual.
- * Therefore, we will also need to check that we don't overwrite
- * an early fallback with a later one.
- */
- useFallback = cnv->useFallback;
-
- for(i = 0; i < choiceCount && len <= 0; ++i) {
- int8_t cs0 = choices[i];
- if(cs0 > 0) {
- uint32_t value;
- int32_t len2;
- if(cs0 >= CNS_11643_0) {
- len2 = MBCS_FROM_UCHAR32_ISO2022(
- converterData->myConverterArray[CNS_11643],
- sourceChar,
- &value,
- useFallback,
- MBCS_OUTPUT_3);
- if(len2 == 3 || (len2 == -3 && len == 0)) {
- targetValue = value;
- cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
- if(len2 >= 0) {
- len = 2;
- } else {
- len = -2;
- useFallback = FALSE;
- }
- if(cs == CNS_11643_1) {
- g = 1;
- } else if(cs == CNS_11643_2) {
- g = 2;
- } else /* plane 3..7 */ if(converterData->version == 1) {
- g = 3;
- } else {
- /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
- len = 0;
- }
- }
- } else {
- /* GB2312_1 or ISO-IR-165 */
- U_ASSERT(cs0<UCNV_2022_MAX_CONVERTERS);
- len2 = MBCS_FROM_UCHAR32_ISO2022(
- converterData->myConverterArray[cs0],
- sourceChar,
- &value,
- useFallback,
- MBCS_OUTPUT_2);
- if(len2 == 2 || (len2 == -2 && len == 0)) {
- targetValue = value;
- len = len2;
- cs = cs0;
- g = 1;
- useFallback = FALSE;
- }
- }
- }
- }
-
- if(len != 0) {
- len = 0; /* count output bytes; it must have been abs(len) == 2 */
-
- /* write the designation sequence if necessary */
- if(cs != pFromU2022State->cs[g]) {
- if(cs < CNS_11643) {
- uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
- } else {
- U_ASSERT(cs >= CNS_11643_1);
- uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
- }
- len = 4;
- pFromU2022State->cs[g] = cs;
- if(g == 1) {
- /* changing the SO/G1 charset invalidates the choices[] */
- choiceCount = 0;
- }
- }
-
- /* write the shift sequence if necessary */
- if(g != pFromU2022State->g) {
- switch(g) {
- case 1:
- buffer[len++] = UCNV_SO;
-
- /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
- pFromU2022State->g = 1;
- break;
- case 2:
- buffer[len++] = 0x1b;
- buffer[len++] = 0x4e;
- break;
- default: /* case 3 */
- buffer[len++] = 0x1b;
- buffer[len++] = 0x4f;
- break;
- }
- }
-
- /* write the two output bytes */
- buffer[len++] = (char)(targetValue >> 8);
- buffer[len++] = (char)targetValue;
- } else {
- /* if we cannot find the character after checking all codepages
- * then this is an error
- */
- *err = U_INVALID_CHAR_FOUND;
- cnv->fromUChar32=sourceChar;
- break;
- }
- }
-
- /* output len>0 bytes in buffer[] */
- if(len == 1) {
- *target++ = buffer[0];
- if(offsets) {
- *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
- }
- } else if(len == 2 && (target + 2) <= targetLimit) {
- *target++ = buffer[0];
- *target++ = buffer[1];
- if(offsets) {
- int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
- *offsets++ = sourceIndex;
- *offsets++ = sourceIndex;
- }
- } else {
- fromUWriteUInt8(
- cnv,
- buffer, len,
- &target, (const char *)targetLimit,
- &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
- err);
- if(U_FAILURE(*err)) {
- break;
- }
- }
- } /* end if(myTargetIndex<myTargetLength) */
- else{
- *err =U_BUFFER_OVERFLOW_ERROR;
- break;
- }
-
- }/* end while(mySourceIndex<mySourceLength) */
-
- /*
- * the end of the input stream and detection of truncated input
- * are handled by the framework, but for ISO-2022-CN conversion
- * we need to be in ASCII mode at the very end
- *
- * conditions:
- * successful
- * not in ASCII mode
- * end of input and no truncated input
- */
- if( U_SUCCESS(*err) &&
- pFromU2022State->g!=0 &&
- args->flush && source>=sourceLimit && cnv->fromUChar32==0
- ) {
- int32_t sourceIndex;
-
- /* we are switching to ASCII */
- pFromU2022State->g=0;
-
- /* get the source index of the last input character */
- /*
- * TODO this would be simpler and more reliable if we used a pair
- * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
- * so that we could simply use the prevSourceIndex here;
- * this code gives an incorrect result for the rare case of an unmatched
- * trail surrogate that is alone in the last buffer of the text stream
- */
- sourceIndex=(int32_t)(source-args->source);
- if(sourceIndex>0) {
- --sourceIndex;
- if( U16_IS_TRAIL(args->source[sourceIndex]) &&
- (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
- ) {
- --sourceIndex;
- }
- } else {
- sourceIndex=-1;
- }
-
- fromUWriteUInt8(
- cnv,
- SHIFT_IN_STR, 1,
- &target, (const char *)targetLimit,
- &offsets, sourceIndex,
- err);
- }
-
- /*save the state and return */
- args->source = source;
- args->target = (char*)target;
-}
-
-
-static void U_CALLCONV
-UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
- UErrorCode* err){
- char tempBuf[3];
- const char *mySource = (char *) args->source;
- UChar *myTarget = args->target;
- const char *mySourceLimit = args->sourceLimit;
- uint32_t targetUniChar = 0x0000;
- uint32_t mySourceChar = 0x0000;
- UConverterDataISO2022* myData;
- ISO2022State *pToU2022State;
-
- myData=(UConverterDataISO2022*)(args->converter->extraInfo);
- pToU2022State = &myData->toU2022State;
-
- if(myData->key != 0) {
- /* continue with a partial escape sequence */
- goto escape;
- } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
- /* continue with a partial double-byte character */
- mySourceChar = args->converter->toUBytes[0];
- args->converter->toULength = 0;
- targetUniChar = missingCharMarker;
- goto getTrailByte;
- }
-
- while(mySource < mySourceLimit){
-
- targetUniChar =missingCharMarker;
-
- if(myTarget < args->targetLimit){
-
- mySourceChar= (unsigned char) *mySource++;
-
- switch(mySourceChar){
- case UCNV_SI:
- pToU2022State->g=0;
- if (myData->isEmptySegment) {
- myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
- *err = U_ILLEGAL_ESCAPE_SEQUENCE;
- args->converter->toUCallbackReason = UCNV_IRREGULAR;
- args->converter->toUBytes[0] = static_cast<uint8_t>(mySourceChar);
- args->converter->toULength = 1;
- args->target = myTarget;
- args->source = mySource;
- return;
- }
- continue;
-
- case UCNV_SO:
- if(pToU2022State->cs[1] != 0) {
- pToU2022State->g=1;
- myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */
- continue;
- } else {
- /* illegal to have SO before a matching designator */
- myData->isEmptySegment = FALSE; /* Handling a different error, reset this to avoid future spurious errs */
- break;
- }
-
- case ESC_2022:
- mySource--;
-escape:
- {
- const char * mySourceBefore = mySource;
- int8_t toULengthBefore = args->converter->toULength;
-
- changeState_2022(args->converter,&(mySource),
- mySourceLimit, ISO_2022_CN,err);
-
- /* After SO there must be at least one character before a designator (designator error handled separately) */
- if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
- *err = U_ILLEGAL_ESCAPE_SEQUENCE;
- args->converter->toUCallbackReason = UCNV_IRREGULAR;
- args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
- }
- }
-
- /* invalid or illegal escape sequence */
- if(U_FAILURE(*err)){
- args->target = myTarget;
- args->source = mySource;
- myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */
- return;
- }
- continue;
-
- /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
-
- case CR:
- case LF:
- uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
- U_FALLTHROUGH;
- default:
- /* convert one or two bytes */
- myData->isEmptySegment = FALSE;
- if(pToU2022State->g != 0) {
- if(mySource < mySourceLimit) {
- UConverterSharedData *cnv;
- StateEnum tempState;
- int32_t tempBufLen;
- int leadIsOk, trailIsOk;
- uint8_t trailByte;
-getTrailByte:
- trailByte = (uint8_t)*mySource;
- /*
- * Ticket 5691: consistent illegal sequences:
- * - We include at least the first byte in the illegal sequence.
- * - If any of the non-initial bytes could be the start of a character,
- * we stop the illegal sequence before the first one of those.
- *
- * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
- * an ESC/SO/SI, we report only the first byte as the illegal sequence.
- * Otherwise we convert or report the pair of bytes.
- */
- leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
- trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
- if (leadIsOk && trailIsOk) {
- ++mySource;
- tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
- if(tempState >= CNS_11643_0) {
- cnv = myData->myConverterArray[CNS_11643];
- tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
- tempBuf[1] = (char) (mySourceChar);
- tempBuf[2] = (char) trailByte;
- tempBufLen = 3;
-
- }else{
- U_ASSERT(tempState<UCNV_2022_MAX_CONVERTERS);
- cnv = myData->myConverterArray[tempState];
- tempBuf[0] = (char) (mySourceChar);
- tempBuf[1] = (char) trailByte;
- tempBufLen = 2;
- }
- targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
- mySourceChar = (mySourceChar << 8) | trailByte;
- } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
- /* report a pair of illegal bytes if the second byte is not a DBCS starter */
- ++mySource;
- /* add another bit so that the code below writes 2 bytes in case of error */
- mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
- }
- if(pToU2022State->g>=2) {
- /* return from a single-shift state to the previous one */
- pToU2022State->g=pToU2022State->prevG;
- }
- } else {
- args->converter->toUBytes[0] = (uint8_t)mySourceChar;
- args->converter->toULength = 1;
- goto endloop;
- }
- }
- else{
- if(mySourceChar <= 0x7f) {
- targetUniChar = (UChar) mySourceChar;
- }
- }
- break;
- }
- if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
- if(args->offsets){
- args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
- }
- *(myTarget++)=(UChar)targetUniChar;
- }
- else if(targetUniChar > missingCharMarker){
- /* disassemble the surrogate pair and write to output*/
- targetUniChar-=0x0010000;
- *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
- if(args->offsets){
- args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
- }
- ++myTarget;
- if(myTarget< args->targetLimit){
- *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
- if(args->offsets){
- args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
- }
- ++myTarget;
- }else{
- args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
- (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
- }
-
- }
- else{
- /* Call the callback function*/
- toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
- break;
- }
- }
- else{
- *err =U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-endloop:
- args->target = myTarget;
- args->source = mySource;
-}
-#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
-
-static void U_CALLCONV
-_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
- UConverter *cnv = args->converter;
- UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
- ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
- char *p, *subchar;
- char buffer[8];
- int32_t length;
-
- subchar=(char *)cnv->subChars;
- length=cnv->subCharLen; /* assume length==1 for most variants */
-
- p = buffer;
- switch(myConverterData->locale[0]){
- case 'j':
- {
- int8_t cs;
-
- if(pFromU2022State->g == 1) {
- /* JIS7: switch from G1 to G0 */
- pFromU2022State->g = 0;
- *p++ = UCNV_SI;
- }
-
- cs = pFromU2022State->cs[0];
- if(cs != ASCII && cs != JISX201) {
- /* not in ASCII or JIS X 0201: switch to ASCII */
- pFromU2022State->cs[0] = (int8_t)ASCII;
- *p++ = '\x1b';
- *p++ = '\x28';
- *p++ = '\x42';
- }
-
- *p++ = subchar[0];
- break;
- }
- case 'c':
- if(pFromU2022State->g != 0) {
- /* not in ASCII mode: switch to ASCII */
- pFromU2022State->g = 0;
- *p++ = UCNV_SI;
- }
- *p++ = subchar[0];
- break;
- case 'k':
- if(myConverterData->version == 0) {
- if(length == 1) {
- if(args->converter->fromUnicodeStatus) {
- /* in DBCS mode: switch to SBCS */
- args->converter->fromUnicodeStatus = 0;
- *p++ = UCNV_SI;
- }
- *p++ = subchar[0];
- } else /* length == 2*/ {
- if(!args->converter->fromUnicodeStatus) {
- /* in SBCS mode: switch to DBCS */
- args->converter->fromUnicodeStatus = 1;
- *p++ = UCNV_SO;
- }
- *p++ = subchar[0];
- *p++ = subchar[1];
- }
- break;
- } else {
- /* save the subconverter's substitution string */
- uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
- int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
-
- /* set our substitution string into the subconverter */
- myConverterData->currentConverter->subChars = (uint8_t *)subchar;
- myConverterData->currentConverter->subCharLen = (int8_t)length;
-
- /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
- args->converter = myConverterData->currentConverter;
- myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
- ucnv_cbFromUWriteSub(args, 0, err);
- cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
- args->converter = cnv;
-
- /* restore the subconverter's substitution string */
- myConverterData->currentConverter->subChars = currentSubChars;
- myConverterData->currentConverter->subCharLen = currentSubCharLen;
-
- if(*err == U_BUFFER_OVERFLOW_ERROR) {
- if(myConverterData->currentConverter->charErrorBufferLength > 0) {
- uprv_memcpy(
- cnv->charErrorBuffer,
- myConverterData->currentConverter->charErrorBuffer,
- myConverterData->currentConverter->charErrorBufferLength);
- }
- cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
- myConverterData->currentConverter->charErrorBufferLength = 0;
- }
- return;
- }
- default:
- /* not expected */
- break;
- }
- ucnv_cbFromUWriteBytes(args,
- buffer, (int32_t)(p - buffer),
- offsetIndex, err);
-}
-
-/*
- * Structure for cloning an ISO 2022 converter into a single memory block.
- */
-struct cloneStruct
-{
- UConverter cnv;
- UConverter currentConverter;
- UConverterDataISO2022 mydata;
-};
-
-
-U_CDECL_BEGIN
-
-static UConverter * U_CALLCONV
-_ISO_2022_SafeClone(
- const UConverter *cnv,
- void *stackBuffer,
- int32_t *pBufferSize,
- UErrorCode *status)
-{
- struct cloneStruct * localClone;
- UConverterDataISO2022 *cnvData;
- int32_t i, size;
-
- if (U_FAILURE(*status)){
- return nullptr;
- }
-
- if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
- *pBufferSize = (int32_t)sizeof(struct cloneStruct);
- return NULL;
- }
-
- cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
- localClone = (struct cloneStruct *)stackBuffer;
-
- /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
-
- uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
- localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
- localClone->cnv.isExtraLocal = TRUE;
-
- /* share the subconverters */
-
- if(cnvData->currentConverter != NULL) {
- size = (int32_t)sizeof(UConverter);
- localClone->mydata.currentConverter =
- ucnv_safeClone(cnvData->currentConverter,
- &localClone->currentConverter,
- &size, status);
- if(U_FAILURE(*status)) {
- return NULL;
- }
- }
-
- for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
- if(cnvData->myConverterArray[i] != NULL) {
- ucnv_incrementRefCount(cnvData->myConverterArray[i]);
- }
- }
-
- return &localClone->cnv;
-}
-
-U_CDECL_END
-
-static void U_CALLCONV
-_ISO_2022_GetUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode)
-{
- int32_t i;
- UConverterDataISO2022* cnvData;
-
- if (U_FAILURE(*pErrorCode)) {
- return;
- }
-#ifdef U_ENABLE_GENERIC_ISO_2022
- if (cnv->sharedData == &_ISO2022Data) {
- /* We use UTF-8 in this case */
- sa->addRange(sa->set, 0, 0xd7FF);
- sa->addRange(sa->set, 0xE000, 0x10FFFF);
- return;
- }
-#endif
-
- cnvData = (UConverterDataISO2022*)cnv->extraInfo;
-
- /* open a set and initialize it with code points that are algorithmically round-tripped */
- switch(cnvData->locale[0]){
- case 'j':
- /* include JIS X 0201 which is hardcoded */
- sa->add(sa->set, 0xa5);
- sa->add(sa->set, 0x203e);
- if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
- /* include Latin-1 for some variants of JP */
- sa->addRange(sa->set, 0, 0xff);
- } else {
- /* include ASCII for JP */
- sa->addRange(sa->set, 0, 0x7f);
- }
- if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
- /*
- * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
- * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
- * use half-width Katakana.
- * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
- * half-width Katakana via the ESC ( I sequence.
- * However, we only emit (fromUnicode) half-width Katakana according to the
- * definition of each variant.
- *
- * When including fallbacks,
- * we need to include half-width Katakana Unicode code points for all JP variants because
- * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
- */
- /* include half-width Katakana for JP */
- sa->addRange(sa->set, HWKANA_START, HWKANA_END);
- }
- break;
-#if !UCONFIG_ONLY_HTML_CONVERSION
- case 'c':
- case 'z':
- /* include ASCII for CN */
- sa->addRange(sa->set, 0, 0x7f);
- break;
- case 'k':
- /* there is only one converter for KR, and it is not in the myConverterArray[] */
- cnvData->currentConverter->sharedData->impl->getUnicodeSet(
- cnvData->currentConverter, sa, which, pErrorCode);
- /* the loop over myConverterArray[] will simply not find another converter */
- break;
-#endif
- default:
- break;
- }
-
-#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
- if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
- cnvData->version==0 && i==CNS_11643
- ) {
- /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
- ucnv_MBCSGetUnicodeSetForBytes(
- cnvData->myConverterArray[i],
- sa, UCNV_ROUNDTRIP_SET,
- 0, 0x81, 0x82,
- pErrorCode);
- }
-#endif
-
- for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
- UConverterSetFilter filter;
- if(cnvData->myConverterArray[i]!=NULL) {
- if(cnvData->locale[0]=='j' && i==JISX208) {
- /*
- * Only add code points that map to Shift-JIS codes
- * corresponding to JIS X 0208.
- */
- filter=UCNV_SET_FILTER_SJIS;
-#if !UCONFIG_ONLY_HTML_CONVERSION
- } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
- cnvData->version==0 && i==CNS_11643) {
- /*
- * Version-specific for CN:
- * CN version 0 does not map CNS planes 3..7 although
- * they are all available in the CNS conversion table;
- * CN version 1 (-EXT) does map them all.
- * The two versions create different Unicode sets.
- */
- filter=UCNV_SET_FILTER_2022_CN;
- } else if(i==KSC5601) {
- /*
- * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
- * are broader than GR94.
- */
- filter=UCNV_SET_FILTER_GR94DBCS;
-#endif
- } else {
- filter=UCNV_SET_FILTER_NONE;
- }
- ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
- }
- }
-
- /*
- * ISO 2022 converters must not convert SO/SI/ESC despite what
- * sub-converters do by themselves.
- * Remove these characters from the set.
- */
- sa->remove(sa->set, 0x0e);
- sa->remove(sa->set, 0x0f);
- sa->remove(sa->set, 0x1b);
-
- /* ISO 2022 converters do not convert C1 controls either */
- sa->removeRange(sa->set, 0x80, 0x9f);
-}
-
-static const UConverterImpl _ISO2022Impl={
- UCNV_ISO_2022,
-
- NULL,
- NULL,
-
- _ISO2022Open,
- _ISO2022Close,
- _ISO2022Reset,
-
-#ifdef U_ENABLE_GENERIC_ISO_2022
- T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
- T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
- ucnv_fromUnicode_UTF8,
- ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
-#else
- NULL,
- NULL,
- NULL,
- NULL,
-#endif
- NULL,
-
- NULL,
- _ISO2022getName,
- _ISO_2022_WriteSub,
- _ISO_2022_SafeClone,
- _ISO_2022_GetUnicodeSet,
-
- NULL,
- NULL
-};
-static const UConverterStaticData _ISO2022StaticData={
- sizeof(UConverterStaticData),
- "ISO_2022",
- 2022,
- UCNV_IBM,
- UCNV_ISO_2022,
- 1,
- 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
- { 0x1a, 0, 0, 0 },
- 1,
- FALSE,
- FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-const UConverterSharedData _ISO2022Data=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022StaticData, &_ISO2022Impl);
-
-/*************JP****************/
-static const UConverterImpl _ISO2022JPImpl={
- UCNV_ISO_2022,
-
- NULL,
- NULL,
-
- _ISO2022Open,
- _ISO2022Close,
- _ISO2022Reset,
-
- UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
- UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
- UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
- UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
- NULL,
-
- NULL,
- _ISO2022getName,
- _ISO_2022_WriteSub,
- _ISO_2022_SafeClone,
- _ISO_2022_GetUnicodeSet,
-
- NULL,
- NULL
-};
-static const UConverterStaticData _ISO2022JPStaticData={
- sizeof(UConverterStaticData),
- "ISO_2022_JP",
- 0,
- UCNV_IBM,
- UCNV_ISO_2022,
- 1,
- 6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
- { 0x1a, 0, 0, 0 },
- 1,
- FALSE,
- FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-namespace {
-
-const UConverterSharedData _ISO2022JPData=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022JPImpl);
-
-} // namespace
-
-#if !UCONFIG_ONLY_HTML_CONVERSION
-/************* KR ***************/
-static const UConverterImpl _ISO2022KRImpl={
- UCNV_ISO_2022,
-
- NULL,
- NULL,
-
- _ISO2022Open,
- _ISO2022Close,
- _ISO2022Reset,
-
- UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
- UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
- UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
- UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
- NULL,
-
- NULL,
- _ISO2022getName,
- _ISO_2022_WriteSub,
- _ISO_2022_SafeClone,
- _ISO_2022_GetUnicodeSet,
-
- NULL,
- NULL
-};
-static const UConverterStaticData _ISO2022KRStaticData={
- sizeof(UConverterStaticData),
- "ISO_2022_KR",
- 0,
- UCNV_IBM,
- UCNV_ISO_2022,
- 1,
- 8, /* max 8 bytes per UChar */
- { 0x1a, 0, 0, 0 },
- 1,
- FALSE,
- FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-namespace {
-
-const UConverterSharedData _ISO2022KRData=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022KRStaticData, &_ISO2022KRImpl);
-
-} // namespace
-
-/*************** CN ***************/
-static const UConverterImpl _ISO2022CNImpl={
-
- UCNV_ISO_2022,
-
- NULL,
- NULL,
-
- _ISO2022Open,
- _ISO2022Close,
- _ISO2022Reset,
-
- UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
- UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
- UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
- UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
- NULL,
-
- NULL,
- _ISO2022getName,
- _ISO_2022_WriteSub,
- _ISO_2022_SafeClone,
- _ISO_2022_GetUnicodeSet,
-
- NULL,
- NULL
-};
-static const UConverterStaticData _ISO2022CNStaticData={
- sizeof(UConverterStaticData),
- "ISO_2022_CN",
- 0,
- UCNV_IBM,
- UCNV_ISO_2022,
- 1,
- 8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
- { 0x1a, 0, 0, 0 },
- 1,
- FALSE,
- FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-namespace {
-
-const UConverterSharedData _ISO2022CNData=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl);
-
-} // namespace
-#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
-
-#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
diff --git a/contrib/libs/icu/common/ucnv_bld.cpp b/contrib/libs/icu/common/ucnv_bld.cpp
deleted file mode 100644
index 56fc3d6822f..00000000000
--- a/contrib/libs/icu/common/ucnv_bld.cpp
+++ /dev/null
@@ -1,1689 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- ********************************************************************
- * COPYRIGHT:
- * Copyright (c) 1996-2016, International Business Machines Corporation and
- * others. All Rights Reserved.
- ********************************************************************
- *
- * ucnv_bld.cpp:
- *
- * Defines functions that are used in the creation/initialization/deletion
- * of converters and related structures.
- * uses uconv_io.h routines to access disk information
- * is used by ucnv.h to implement public API create/delete/flushCache routines
- * Modification History:
- *
- * Date Name Description
- *
- * 06/20/2000 helena OS/400 port changes; mostly typecast.
- * 06/29/2000 helena Major rewrite of the callback interface.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/putil.h"
-#include "unicode/udata.h"
-#include "unicode/ucnv.h"
-#include "unicode/uloc.h"
-#include "mutex.h"
-#include "putilimp.h"
-#include "uassert.h"
-#include "utracimp.h"
-#include "ucnv_io.h"
-#include "ucnv_bld.h"
-#include "ucnvmbcs.h"
-#include "ucnv_ext.h"
-#include "ucnv_cnv.h"
-#include "ucnv_imp.h"
-#include "uhash.h"
-#include "umutex.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "ucln_cmn.h"
-#include "ustr_cnv.h"
-
-
-#if 0
-#include <stdio.h>
-extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l);
-#define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__)
-#else
-# define UCNV_DEBUG_LOG(x,y,z)
-#endif
-
-static const UConverterSharedData * const
-converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
- NULL, NULL,
-
-#if UCONFIG_NO_LEGACY_CONVERSION
- NULL,
-#else
- &_MBCSData,
-#endif
-
- &_Latin1Data,
- &_UTF8Data, &_UTF16BEData, &_UTF16LEData,
-#if UCONFIG_ONLY_HTML_CONVERSION
- NULL, NULL,
-#else
- &_UTF32BEData, &_UTF32LEData,
-#endif
- NULL,
-
-#if UCONFIG_NO_LEGACY_CONVERSION
- NULL,
-#else
- &_ISO2022Data,
-#endif
-
-#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
- NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL,
- NULL,
-#else
- &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
- &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19,
- &_HZData,
-#endif
-
-#if UCONFIG_ONLY_HTML_CONVERSION
- NULL,
-#else
- &_SCSUData,
-#endif
-
-
-#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
- NULL,
-#else
- &_ISCIIData,
-#endif
-
- &_ASCIIData,
-#if UCONFIG_ONLY_HTML_CONVERSION
- NULL, NULL, &_UTF16Data, NULL, NULL, NULL,
-#else
- &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData,
-#endif
-
-#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
- NULL,
-#else
- &_CompoundTextData
-#endif
-};
-
-/* Please keep this in binary sorted order for getAlgorithmicTypeFromName.
- Also the name should be in lower case and all spaces, dashes and underscores
- removed
-*/
-static struct {
- const char *name;
- const UConverterType type;
-} const cnvNameType[] = {
-#if !UCONFIG_ONLY_HTML_CONVERSION
- { "bocu1", UCNV_BOCU1 },
- { "cesu8", UCNV_CESU8 },
-#endif
-#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
- { "hz",UCNV_HZ },
-#endif
-#if !UCONFIG_ONLY_HTML_CONVERSION
- { "imapmailboxname", UCNV_IMAP_MAILBOX },
-#endif
-#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
- { "iscii", UCNV_ISCII },
-#endif
-#if !UCONFIG_NO_LEGACY_CONVERSION
- { "iso2022", UCNV_ISO_2022 },
-#endif
- { "iso88591", UCNV_LATIN_1 },
-#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
- { "lmbcs1", UCNV_LMBCS_1 },
- { "lmbcs11",UCNV_LMBCS_11 },
- { "lmbcs16",UCNV_LMBCS_16 },
- { "lmbcs17",UCNV_LMBCS_17 },
- { "lmbcs18",UCNV_LMBCS_18 },
- { "lmbcs19",UCNV_LMBCS_19 },
- { "lmbcs2", UCNV_LMBCS_2 },
- { "lmbcs3", UCNV_LMBCS_3 },
- { "lmbcs4", UCNV_LMBCS_4 },
- { "lmbcs5", UCNV_LMBCS_5 },
- { "lmbcs6", UCNV_LMBCS_6 },
- { "lmbcs8", UCNV_LMBCS_8 },
-#endif
-#if !UCONFIG_ONLY_HTML_CONVERSION
- { "scsu", UCNV_SCSU },
-#endif
- { "usascii", UCNV_US_ASCII },
- { "utf16", UCNV_UTF16 },
- { "utf16be", UCNV_UTF16_BigEndian },
- { "utf16le", UCNV_UTF16_LittleEndian },
-#if U_IS_BIG_ENDIAN
- { "utf16oppositeendian", UCNV_UTF16_LittleEndian },
- { "utf16platformendian", UCNV_UTF16_BigEndian },
-#else
- { "utf16oppositeendian", UCNV_UTF16_BigEndian},
- { "utf16platformendian", UCNV_UTF16_LittleEndian },
-#endif
-#if !UCONFIG_ONLY_HTML_CONVERSION
- { "utf32", UCNV_UTF32 },
- { "utf32be", UCNV_UTF32_BigEndian },
- { "utf32le", UCNV_UTF32_LittleEndian },
-#if U_IS_BIG_ENDIAN
- { "utf32oppositeendian", UCNV_UTF32_LittleEndian },
- { "utf32platformendian", UCNV_UTF32_BigEndian },
-#else
- { "utf32oppositeendian", UCNV_UTF32_BigEndian },
- { "utf32platformendian", UCNV_UTF32_LittleEndian },
-#endif
-#endif
-#if !UCONFIG_ONLY_HTML_CONVERSION
- { "utf7", UCNV_UTF7 },
-#endif
- { "utf8", UCNV_UTF8 },
-#if !UCONFIG_ONLY_HTML_CONVERSION
- { "x11compoundtext", UCNV_COMPOUND_TEXT}
-#endif
-};
-
-
-/*initializes some global variables */
-static UHashtable *SHARED_DATA_HASHTABLE = NULL;
-static icu::UMutex cnvCacheMutex;
-/* Note: the global mutex is used for */
-/* reference count updates. */
-
-static const char **gAvailableConverters = NULL;
-static uint16_t gAvailableConverterCount = 0;
-static icu::UInitOnce gAvailableConvertersInitOnce = U_INITONCE_INITIALIZER;
-
-#if !U_CHARSET_IS_UTF8
-
-/* This contains the resolved converter name. So no further alias lookup is needed again. */
-static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */
-static const char *gDefaultConverterName = NULL;
-
-/*
-If the default converter is an algorithmic converter, this is the cached value.
-We don't cache a full UConverter and clone it because ucnv_clone doesn't have
-less overhead than an algorithmic open. We don't cache non-algorithmic converters
-because ucnv_flushCache must be able to unload the default converter and its table.
-*/
-static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL;
-
-/* Does gDefaultConverterName have a converter option and require extra parsing? */
-static UBool gDefaultConverterContainsOption;
-
-#endif /* !U_CHARSET_IS_UTF8 */
-
-static const char DATA_TYPE[] = "cnv";
-
-/* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup().
- * If it is ever to be called from elsewhere, synchronization
- * will need to be considered.
- */
-static void
-ucnv_flushAvailableConverterCache() {
- gAvailableConverterCount = 0;
- if (gAvailableConverters) {
- uprv_free((char **)gAvailableConverters);
- gAvailableConverters = NULL;
- }
- gAvailableConvertersInitOnce.reset();
-}
-
-/* ucnv_cleanup - delete all storage held by the converter cache, except any */
-/* in use by open converters. */
-/* Not thread safe. */
-/* Not supported API. */
-static UBool U_CALLCONV ucnv_cleanup(void) {
- ucnv_flushCache();
- if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
- uhash_close(SHARED_DATA_HASHTABLE);
- SHARED_DATA_HASHTABLE = NULL;
- }
-
- /* Isn't called from flushCache because other threads may have preexisting references to the table. */
- ucnv_flushAvailableConverterCache();
-
-#if !U_CHARSET_IS_UTF8
- gDefaultConverterName = NULL;
- gDefaultConverterNameBuffer[0] = 0;
- gDefaultConverterContainsOption = FALSE;
- gDefaultAlgorithmicSharedData = NULL;
-#endif
-
- return (SHARED_DATA_HASHTABLE == NULL);
-}
-
-U_CAPI void U_EXPORT2
-ucnv_enableCleanup() {
- ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup);
-}
-
-static UBool U_CALLCONV
-isCnvAcceptable(void * /*context*/,
- const char * /*type*/, const char * /*name*/,
- const UDataInfo *pInfo) {
- return (UBool)(
- pInfo->size>=20 &&
- pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
- pInfo->charsetFamily==U_CHARSET_FAMILY &&
- pInfo->sizeofUChar==U_SIZEOF_UCHAR &&
- pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */
- pInfo->dataFormat[1]==0x6e &&
- pInfo->dataFormat[2]==0x76 &&
- pInfo->dataFormat[3]==0x74 &&
- pInfo->formatVersion[0]==6); /* Everything will be version 6 */
-}
-
-/**
- * Un flatten shared data from a UDATA..
- */
-static UConverterSharedData*
-ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status)
-{
- /* UDataInfo info; -- necessary only if some converters have different formatVersion */
- const uint8_t *raw = (const uint8_t *)udata_getMemory(pData);
- const UConverterStaticData *source = (const UConverterStaticData *) raw;
- UConverterSharedData *data;
- UConverterType type = (UConverterType)source->conversionType;
-
- if(U_FAILURE(*status))
- return NULL;
-
- if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES ||
- converterData[type] == NULL ||
- !converterData[type]->isReferenceCounted ||
- converterData[type]->referenceCounter != 1 ||
- source->structSize != sizeof(UConverterStaticData))
- {
- *status = U_INVALID_TABLE_FORMAT;
- return NULL;
- }
-
- data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData));
- if(data == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- /* copy initial values from the static structure for this type */
- uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData));
-
- data->staticData = source;
-
- data->sharedDataCached = FALSE;
-
- /* fill in fields from the loaded data */
- data->dataMemory = (void*)pData; /* for future use */
-
- if(data->impl->load != NULL) {
- data->impl->load(data, pArgs, raw + source->structSize, status);
- if(U_FAILURE(*status)) {
- uprv_free(data);
- return NULL;
- }
- }
- return data;
-}
-
-/*Takes an alias name gets an actual converter file name
- *goes to disk and opens it.
- *allocates the memory and returns a new UConverter object
- */
-static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err)
-{
- UDataMemory *data;
- UConverterSharedData *sharedData;
-
- UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD);
-
- if (U_FAILURE (*err)) {
- UTRACE_EXIT_STATUS(*err);
- return NULL;
- }
-
- UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg);
-
- data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err);
- if(U_FAILURE(*err))
- {
- UTRACE_EXIT_STATUS(*err);
- return NULL;
- }
-
- sharedData = ucnv_data_unFlattenClone(pArgs, data, err);
- if(U_FAILURE(*err))
- {
- udata_close(data);
- UTRACE_EXIT_STATUS(*err);
- return NULL;
- }
-
- /*
- * TODO Store pkg in a field in the shared data so that delta-only converters
- * can load base converters from the same package.
- * If the pkg name is longer than the field, then either do not load the converter
- * in the first place, or just set the pkg field to "".
- */
-
- UTRACE_EXIT_PTR_STATUS(sharedData, *err);
- return sharedData;
-}
-
-/*returns a converter type from a string
- */
-static const UConverterSharedData *
-getAlgorithmicTypeFromName(const char *realName)
-{
- uint32_t mid, start, limit;
- uint32_t lastMid;
- int result;
- char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
-
- /* Lower case and remove ignoreable characters. */
- ucnv_io_stripForCompare(strippedName, realName);
-
- /* do a binary search for the alias */
- start = 0;
- limit = UPRV_LENGTHOF(cnvNameType);
- mid = limit;
- lastMid = UINT32_MAX;
-
- for (;;) {
- mid = (uint32_t)((start + limit) / 2);
- if (lastMid == mid) { /* Have we moved? */
- break; /* We haven't moved, and it wasn't found. */
- }
- lastMid = mid;
- result = uprv_strcmp(strippedName, cnvNameType[mid].name);
-
- if (result < 0) {
- limit = mid;
- } else if (result > 0) {
- start = mid;
- } else {
- return converterData[cnvNameType[mid].type];
- }
- }
-
- return NULL;
-}
-
-/*
-* Based on the number of known converters, this determines how many times larger
-* the shared data hash table should be. When on small platforms, or just a couple
-* of converters are used, this number should be 2. When memory is plentiful, or
-* when ucnv_countAvailable is ever used with a lot of available converters,
-* this should be 4.
-* Larger numbers reduce the number of hash collisions, but use more memory.
-*/
-#define UCNV_CACHE_LOAD_FACTOR 2
-
-/* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */
-/* Will always be called with the cnvCacheMutex alrady being held */
-/* by the calling function. */
-/* Stores the shared data in the SHARED_DATA_HASHTABLE
- * @param data The shared data
- */
-static void
-ucnv_shareConverterData(UConverterSharedData * data)
-{
- UErrorCode err = U_ZERO_ERROR;
- /*Lazy evaluates the Hashtable itself */
- /*void *sanity = NULL;*/
-
- if (SHARED_DATA_HASHTABLE == NULL)
- {
- SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL,
- ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR,
- &err);
- ucnv_enableCleanup();
-
- if (U_FAILURE(err))
- return;
- }
-
- /* ### check to see if the element is not already there! */
-
- /*
- sanity = ucnv_getSharedConverterData (data->staticData->name);
- if(sanity != NULL)
- {
- UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity);
- }
- UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity);
- */
-
- /* Mark it shared */
- data->sharedDataCached = TRUE;
-
- uhash_put(SHARED_DATA_HASHTABLE,
- (void*) data->staticData->name, /* Okay to cast away const as long as
- keyDeleter == NULL */
- data,
- &err);
- UCNV_DEBUG_LOG("put", data->staticData->name,data);
-
-}
-
-/* Look up a converter name in the shared data cache. */
-/* cnvCacheMutex must be held by the caller to protect the hash table. */
-/* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there)
- * @param name The name of the shared data
- * @return the shared data from the SHARED_DATA_HASHTABLE
- */
-static UConverterSharedData *
-ucnv_getSharedConverterData(const char *name)
-{
- /*special case when no Table has yet been created we return NULL */
- if (SHARED_DATA_HASHTABLE == NULL)
- {
- return NULL;
- }
- else
- {
- UConverterSharedData *rc;
-
- rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name);
- UCNV_DEBUG_LOG("get",name,rc);
- return rc;
- }
-}
-
-/*frees the string of memory blocks associates with a sharedConverter
- *if and only if the referenceCounter == 0
- */
-/* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to
- * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and
- * returns TRUE,
- * otherwise returns FALSE
- * @param sharedConverterData The shared data
- * @return if not it frees all the memory stemming from sharedConverterData and
- * returns TRUE, otherwise returns FALSE
- */
-static UBool
-ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData)
-{
- UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD);
- UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData);
-
- if (deadSharedData->referenceCounter > 0) {
- UTRACE_EXIT_VALUE((int32_t)FALSE);
- return FALSE;
- }
-
- if (deadSharedData->impl->unload != NULL) {
- deadSharedData->impl->unload(deadSharedData);
- }
-
- if(deadSharedData->dataMemory != NULL)
- {
- UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory;
- udata_close(data);
- }
-
- uprv_free(deadSharedData);
-
- UTRACE_EXIT_VALUE((int32_t)TRUE);
- return TRUE;
-}
-
-/**
- * Load a non-algorithmic converter.
- * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
- */
-UConverterSharedData *
-ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) {
- UConverterSharedData *mySharedConverterData;
-
- if(err == NULL || U_FAILURE(*err)) {
- return NULL;
- }
-
- if(pArgs->pkg != NULL && *pArgs->pkg != 0) {
- /* application-provided converters are not currently cached */
- return createConverterFromFile(pArgs, err);
- }
-
- mySharedConverterData = ucnv_getSharedConverterData(pArgs->name);
- if (mySharedConverterData == NULL)
- {
- /*Not cached, we need to stream it in from file */
- mySharedConverterData = createConverterFromFile(pArgs, err);
- if (U_FAILURE (*err) || (mySharedConverterData == NULL))
- {
- return NULL;
- }
- else if (!pArgs->onlyTestIsLoadable)
- {
- /* share it with other library clients */
- ucnv_shareConverterData(mySharedConverterData);
- }
- }
- else
- {
- /* The data for this converter was already in the cache. */
- /* Update the reference counter on the shared data: one more client */
- mySharedConverterData->referenceCounter++;
- }
-
- return mySharedConverterData;
-}
-
-/**
- * Unload a non-algorithmic converter.
- * It must be sharedData->isReferenceCounted
- * and this function must be called inside umtx_lock(&cnvCacheMutex).
- */
-U_CAPI void
-ucnv_unload(UConverterSharedData *sharedData) {
- if(sharedData != NULL) {
- if (sharedData->referenceCounter > 0) {
- sharedData->referenceCounter--;
- }
-
- if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) {
- ucnv_deleteSharedConverterData(sharedData);
- }
- }
-}
-
-U_CFUNC void
-ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData)
-{
- if(sharedData != NULL && sharedData->isReferenceCounted) {
- umtx_lock(&cnvCacheMutex);
- ucnv_unload(sharedData);
- umtx_unlock(&cnvCacheMutex);
- }
-}
-
-U_CFUNC void
-ucnv_incrementRefCount(UConverterSharedData *sharedData)
-{
- if(sharedData != NULL && sharedData->isReferenceCounted) {
- umtx_lock(&cnvCacheMutex);
- sharedData->referenceCounter++;
- umtx_unlock(&cnvCacheMutex);
- }
-}
-
-/*
- * *pPieces must be initialized.
- * The name without options will be copied to pPieces->cnvName.
- * The locale and options will be copied to pPieces only if present in inName,
- * otherwise the existing values in pPieces remain.
- * *pArgs will be set to the pPieces values.
- */
-static void
-parseConverterOptions(const char *inName,
- UConverterNamePieces *pPieces,
- UConverterLoadArgs *pArgs,
- UErrorCode *err)
-{
- char *cnvName = pPieces->cnvName;
- char c;
- int32_t len = 0;
-
- pArgs->name=inName;
- pArgs->locale=pPieces->locale;
- pArgs->options=pPieces->options;
-
- /* copy the converter name itself to cnvName */
- while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) {
- if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) {
- *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */
- pPieces->cnvName[0]=0;
- return;
- }
- *cnvName++=c;
- inName++;
- }
- *cnvName=0;
- pArgs->name=pPieces->cnvName;
-
- /* parse options. No more name copying should occur. */
- while((c=*inName)!=0) {
- if(c==UCNV_OPTION_SEP_CHAR) {
- ++inName;
- }
-
- /* inName is behind an option separator */
- if(uprv_strncmp(inName, "locale=", 7)==0) {
- /* do not modify locale itself in case we have multiple locale options */
- char *dest=pPieces->locale;
-
- /* copy the locale option value */
- inName+=7;
- len=0;
- while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) {
- ++inName;
-
- if(++len>=ULOC_FULLNAME_CAPACITY) {
- *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */
- pPieces->locale[0]=0;
- return;
- }
-
- *dest++=c;
- }
- *dest=0;
- } else if(uprv_strncmp(inName, "version=", 8)==0) {
- /* copy the version option value into bits 3..0 of pPieces->options */
- inName+=8;
- c=*inName;
- if(c==0) {
- pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION);
- return;
- } else if((uint8_t)(c-'0')<10) {
- pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0');
- ++inName;
- }
- } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) {
- inName+=8;
- pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL);
- /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */
- } else {
- /* ignore any other options until we define some */
- while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) {
- }
- if(c==0) {
- return;
- }
- }
- }
-}
-
-/*Logic determines if the converter is Algorithmic AND/OR cached
- *depending on that:
- * -we either go to get data from disk and cache it (Data=TRUE, Cached=False)
- * -Get it from a Hashtable (Data=X, Cached=TRUE)
- * -Call dataConverter initializer (Data=TRUE, Cached=TRUE)
- * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE)
- */
-U_CFUNC UConverterSharedData *
-ucnv_loadSharedData(const char *converterName,
- UConverterNamePieces *pPieces,
- UConverterLoadArgs *pArgs,
- UErrorCode * err) {
- UConverterNamePieces stackPieces;
- UConverterLoadArgs stackArgs;
- UConverterSharedData *mySharedConverterData = NULL;
- UErrorCode internalErrorCode = U_ZERO_ERROR;
- UBool mayContainOption = TRUE;
- UBool checkForAlgorithmic = TRUE;
-
- if (U_FAILURE (*err)) {
- return NULL;
- }
-
- if(pPieces == NULL) {
- if(pArgs != NULL) {
- /*
- * Bad: We may set pArgs pointers to stackPieces fields
- * which will be invalid after this function returns.
- */
- *err = U_INTERNAL_PROGRAM_ERROR;
- return NULL;
- }
- pPieces = &stackPieces;
- }
- if(pArgs == NULL) {
- uprv_memset(&stackArgs, 0, sizeof(stackArgs));
- stackArgs.size = (int32_t)sizeof(stackArgs);
- pArgs = &stackArgs;
- }
-
- pPieces->cnvName[0] = 0;
- pPieces->locale[0] = 0;
- pPieces->options = 0;
-
- pArgs->name = converterName;
- pArgs->locale = pPieces->locale;
- pArgs->options = pPieces->options;
-
- /* In case "name" is NULL we want to open the default converter. */
- if (converterName == NULL) {
-#if U_CHARSET_IS_UTF8
- pArgs->name = "UTF-8";
- return (UConverterSharedData *)converterData[UCNV_UTF8];
-#else
- /* Call ucnv_getDefaultName first to query the name from the OS. */
- pArgs->name = ucnv_getDefaultName();
- if (pArgs->name == NULL) {
- *err = U_MISSING_RESOURCE_ERROR;
- return NULL;
- }
- mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData;
- checkForAlgorithmic = FALSE;
- mayContainOption = gDefaultConverterContainsOption;
- /* the default converter name is already canonical */
-#endif
- }
- else if(UCNV_FAST_IS_UTF8(converterName)) {
- /* fastpath for UTF-8 */
- pArgs->name = "UTF-8";
- return (UConverterSharedData *)converterData[UCNV_UTF8];
- }
- else {
- /* separate the converter name from the options */
- parseConverterOptions(converterName, pPieces, pArgs, err);
- if (U_FAILURE(*err)) {
- /* Very bad name used. */
- return NULL;
- }
-
- /* get the canonical converter name */
- pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode);
- if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) {
- /*
- * set the input name in case the converter was added
- * without updating the alias table, or when there is no alias table
- */
- pArgs->name = pPieces->cnvName;
- } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) {
- *err = U_AMBIGUOUS_ALIAS_WARNING;
- }
- }
-
- /* separate the converter name from the options */
- if(mayContainOption && pArgs->name != pPieces->cnvName) {
- parseConverterOptions(pArgs->name, pPieces, pArgs, err);
- }
-
- /* get the shared data for an algorithmic converter, if it is one */
- if (checkForAlgorithmic) {
- mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name);
- }
- if (mySharedConverterData == NULL)
- {
- /* it is a data-based converter, get its shared data. */
- /* Hold the cnvCacheMutex through the whole process of checking the */
- /* converter data cache, and adding new entries to the cache */
- /* to prevent other threads from modifying the cache during the */
- /* process. */
- pArgs->nestedLoads=1;
- pArgs->pkg=NULL;
-
- umtx_lock(&cnvCacheMutex);
- mySharedConverterData = ucnv_load(pArgs, err);
- umtx_unlock(&cnvCacheMutex);
- if (U_FAILURE (*err) || (mySharedConverterData == NULL))
- {
- return NULL;
- }
- }
-
- return mySharedConverterData;
-}
-
-U_CAPI UConverter *
-ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err)
-{
- UConverterNamePieces stackPieces;
- UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
- UConverterSharedData *mySharedConverterData;
-
- UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN);
-
- if(U_SUCCESS(*err)) {
- UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName);
-
- mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err);
-
- myUConverter = ucnv_createConverterFromSharedData(
- myUConverter, mySharedConverterData,
- &stackArgs,
- err);
-
- if(U_SUCCESS(*err)) {
- UTRACE_EXIT_PTR_STATUS(myUConverter, *err);
- return myUConverter;
- }
- }
-
- /* exit with error */
- UTRACE_EXIT_STATUS(*err);
- return NULL;
-}
-
-U_CFUNC UBool
-ucnv_canCreateConverter(const char *converterName, UErrorCode *err) {
- UConverter myUConverter;
- UConverterNamePieces stackPieces;
- UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
- UConverterSharedData *mySharedConverterData;
-
- UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN);
-
- if(U_SUCCESS(*err)) {
- UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName);
-
- stackArgs.onlyTestIsLoadable=TRUE;
- mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err);
- ucnv_createConverterFromSharedData(
- &myUConverter, mySharedConverterData,
- &stackArgs,
- err);
- ucnv_unloadSharedDataIfReady(mySharedConverterData);
- }
-
- UTRACE_EXIT_STATUS(*err);
- return U_SUCCESS(*err);
-}
-
-UConverter *
-ucnv_createAlgorithmicConverter(UConverter *myUConverter,
- UConverterType type,
- const char *locale, uint32_t options,
- UErrorCode *err) {
- UConverter *cnv;
- const UConverterSharedData *sharedData;
- UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
-
- UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC);
- UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type);
-
- if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR);
- return NULL;
- }
-
- sharedData = converterData[type];
- if(sharedData == NULL || sharedData->isReferenceCounted) {
- /* not a valid type, or not an algorithmic converter */
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR);
- return NULL;
- }
-
- stackArgs.name = "";
- stackArgs.options = options;
- stackArgs.locale=locale;
- cnv = ucnv_createConverterFromSharedData(
- myUConverter, (UConverterSharedData *)sharedData,
- &stackArgs, err);
-
- UTRACE_EXIT_PTR_STATUS(cnv, *err);
- return cnv;
-}
-
-U_CFUNC UConverter*
-ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err)
-{
- UConverter *myUConverter;
- UConverterSharedData *mySharedConverterData;
- UConverterNamePieces stackPieces;
- UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
-
- UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE);
-
- if(U_FAILURE(*err)) {
- UTRACE_EXIT_STATUS(*err);
- return NULL;
- }
-
- UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName);
-
- /* first, get the options out of the converterName string */
- stackPieces.cnvName[0] = 0;
- stackPieces.locale[0] = 0;
- stackPieces.options = 0;
- parseConverterOptions(converterName, &stackPieces, &stackArgs, err);
- if (U_FAILURE(*err)) {
- /* Very bad name used. */
- UTRACE_EXIT_STATUS(*err);
- return NULL;
- }
- stackArgs.nestedLoads=1;
- stackArgs.pkg=packageName;
-
- /* open the data, unflatten the shared structure */
- mySharedConverterData = createConverterFromFile(&stackArgs, err);
-
- if (U_FAILURE(*err)) {
- UTRACE_EXIT_STATUS(*err);
- return NULL;
- }
-
- /* create the actual converter */
- myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err);
-
- if (U_FAILURE(*err)) {
- ucnv_close(myUConverter);
- UTRACE_EXIT_STATUS(*err);
- return NULL;
- }
-
- UTRACE_EXIT_PTR_STATUS(myUConverter, *err);
- return myUConverter;
-}
-
-
-U_CFUNC UConverter*
-ucnv_createConverterFromSharedData(UConverter *myUConverter,
- UConverterSharedData *mySharedConverterData,
- UConverterLoadArgs *pArgs,
- UErrorCode *err)
-{
- UBool isCopyLocal;
-
- if(U_FAILURE(*err)) {
- ucnv_unloadSharedDataIfReady(mySharedConverterData);
- return myUConverter;
- }
- if(myUConverter == NULL)
- {
- myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
- if(myUConverter == NULL)
- {
- *err = U_MEMORY_ALLOCATION_ERROR;
- ucnv_unloadSharedDataIfReady(mySharedConverterData);
- return NULL;
- }
- isCopyLocal = FALSE;
- } else {
- isCopyLocal = TRUE;
- }
-
- /* initialize the converter */
- uprv_memset(myUConverter, 0, sizeof(UConverter));
- myUConverter->isCopyLocal = isCopyLocal;
- /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */
- myUConverter->sharedData = mySharedConverterData;
- myUConverter->options = pArgs->options;
- if(!pArgs->onlyTestIsLoadable) {
- myUConverter->preFromUFirstCP = U_SENTINEL;
- myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK;
- myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK;
- myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus;
- myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar;
- myUConverter->subChar1 = mySharedConverterData->staticData->subChar1;
- myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen;
- myUConverter->subChars = (uint8_t *)myUConverter->subUChars;
- uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen);
- myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */
- }
-
- if(mySharedConverterData->impl->open != NULL) {
- mySharedConverterData->impl->open(myUConverter, pArgs, err);
- if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) {
- /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */
- ucnv_close(myUConverter);
- return NULL;
- }
- }
-
- return myUConverter;
-}
-
-/*Frees all shared immutable objects that aren't referred to (reference count = 0)
- */
-U_CAPI int32_t U_EXPORT2
-ucnv_flushCache ()
-{
- UConverterSharedData *mySharedData = NULL;
- int32_t pos;
- int32_t tableDeletedNum = 0;
- const UHashElement *e;
- /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/
- int32_t i, remaining;
-
- UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE);
-
- /* Close the default converter without creating a new one so that everything will be flushed. */
- u_flushDefaultConverter();
-
- /*if shared data hasn't even been lazy evaluated yet
- * return 0
- */
- if (SHARED_DATA_HASHTABLE == NULL) {
- UTRACE_EXIT_VALUE((int32_t)0);
- return 0;
- }
-
- /*creates an enumeration to iterate through every element in the
- * table
- *
- * Synchronization: holding cnvCacheMutex will prevent any other thread from
- * accessing or modifying the hash table during the iteration.
- * The reference count of an entry may be decremented by
- * ucnv_close while the iteration is in process, but this is
- * benign. It can't be incremented (in ucnv_createConverter())
- * because the sequence of looking up in the cache + incrementing
- * is protected by cnvCacheMutex.
- */
- umtx_lock(&cnvCacheMutex);
- /*
- * double loop: A delta/extension-only converter has a pointer to its base table's
- * shared data; the first iteration of the outer loop may see the delta converter
- * before the base converter, and unloading the delta converter may get the base
- * converter's reference counter down to 0.
- */
- i = 0;
- do {
- remaining = 0;
- pos = UHASH_FIRST;
- while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL)
- {
- mySharedData = (UConverterSharedData *) e->value.pointer;
- /*deletes only if reference counter == 0 */
- if (mySharedData->referenceCounter == 0)
- {
- tableDeletedNum++;
-
- UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData);
-
- uhash_removeElement(SHARED_DATA_HASHTABLE, e);
- mySharedData->sharedDataCached = FALSE;
- ucnv_deleteSharedConverterData (mySharedData);
- } else {
- ++remaining;
- }
- }
- } while(++i == 1 && remaining > 0);
- umtx_unlock(&cnvCacheMutex);
-
- UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining);
-
- UTRACE_EXIT_VALUE(tableDeletedNum);
- return tableDeletedNum;
-}
-
-/* available converters list --------------------------------------------------- */
-
-static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) {
- U_ASSERT(gAvailableConverterCount == 0);
- U_ASSERT(gAvailableConverters == NULL);
-
- ucnv_enableCleanup();
- UEnumeration *allConvEnum = ucnv_openAllNames(&errCode);
- int32_t allConverterCount = uenum_count(allConvEnum, &errCode);
- if (U_FAILURE(errCode)) {
- return;
- }
-
- /* We can't have more than "*converterTable" converters to open */
- gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*));
- if (!gAvailableConverters) {
- errCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- /* Open the default converter to make sure that it has first dibs in the hash table. */
- UErrorCode localStatus = U_ZERO_ERROR;
- UConverter tempConverter;
- ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus));
-
- gAvailableConverterCount = 0;
-
- for (int32_t idx = 0; idx < allConverterCount; idx++) {
- localStatus = U_ZERO_ERROR;
- const char *converterName = uenum_next(allConvEnum, NULL, &localStatus);
- if (ucnv_canCreateConverter(converterName, &localStatus)) {
- gAvailableConverters[gAvailableConverterCount++] = converterName;
- }
- }
-
- uenum_close(allConvEnum);
-}
-
-
-static UBool haveAvailableConverterList(UErrorCode *pErrorCode) {
- umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode);
- return U_SUCCESS(*pErrorCode);
-}
-
-U_CFUNC uint16_t
-ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) {
- if (haveAvailableConverterList(pErrorCode)) {
- return gAvailableConverterCount;
- }
- return 0;
-}
-
-U_CFUNC const char *
-ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) {
- if (haveAvailableConverterList(pErrorCode)) {
- if (n < gAvailableConverterCount) {
- return gAvailableConverters[n];
- }
- *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- }
- return NULL;
-}
-
-/* default converter name --------------------------------------------------- */
-
-#if !U_CHARSET_IS_UTF8
-/*
-Copy the canonical converter name.
-ucnv_getDefaultName must be thread safe, which can call this function.
-
-ucnv_setDefaultName calls this function and it doesn't have to be
-thread safe because there is no reliable/safe way to reset the
-converter in use in all threads. If you did reset the converter, you
-would not be sure that retrieving a default converter for one string
-would be the same type of default converter for a successive string.
-Since the name is a returned via ucnv_getDefaultName without copying,
-you shouldn't be modifying or deleting the string from a separate thread.
-*/
-static inline void
-internalSetName(const char *name, UErrorCode *status) {
- UConverterNamePieces stackPieces;
- UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
- int32_t length=(int32_t)(uprv_strlen(name));
- UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL);
- const UConverterSharedData *algorithmicSharedData;
-
- stackArgs.name = name;
- if(containsOption) {
- stackPieces.cnvName[0] = 0;
- stackPieces.locale[0] = 0;
- stackPieces.options = 0;
- parseConverterOptions(name, &stackPieces, &stackArgs, status);
- if(U_FAILURE(*status)) {
- return;
- }
- }
- algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name);
-
- umtx_lock(&cnvCacheMutex);
-
- gDefaultAlgorithmicSharedData = algorithmicSharedData;
- gDefaultConverterContainsOption = containsOption;
- uprv_memcpy(gDefaultConverterNameBuffer, name, length);
- gDefaultConverterNameBuffer[length]=0;
-
- /* gDefaultConverterName MUST be the last global var set by this function. */
- /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */
- // But there is nothing here preventing that from being reordered, either by the compiler
- // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough.
- // -- Andy
- gDefaultConverterName = gDefaultConverterNameBuffer;
-
- ucnv_enableCleanup();
-
- umtx_unlock(&cnvCacheMutex);
-}
-#endif
-
-/*
- * In order to be really thread-safe, the get function would have to take
- * a buffer parameter and copy the current string inside a mutex block.
- * This implementation only tries to be really thread-safe while
- * setting the name.
- * It assumes that setting a pointer is atomic.
- */
-
-U_CAPI const char* U_EXPORT2
-ucnv_getDefaultName() {
-#if U_CHARSET_IS_UTF8
- return "UTF-8";
-#else
- /* local variable to be thread-safe */
- const char *name;
-
- /*
- Concurrent calls to ucnv_getDefaultName must be thread safe,
- but ucnv_setDefaultName is not thread safe.
- */
- {
- icu::Mutex lock(&cnvCacheMutex);
- name = gDefaultConverterName;
- }
- if(name==NULL) {
- UErrorCode errorCode = U_ZERO_ERROR;
- UConverter *cnv = NULL;
-
- name = uprv_getDefaultCodepage();
-
- /* if the name is there, test it out and get the canonical name with options */
- if(name != NULL) {
- cnv = ucnv_open(name, &errorCode);
- if(U_SUCCESS(errorCode) && cnv != NULL) {
- name = ucnv_getName(cnv, &errorCode);
- }
- }
-
- if(name == NULL || name[0] == 0
- || U_FAILURE(errorCode) || cnv == NULL
- || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer))
- {
- /* Panic time, let's use a fallback. */
-#if (U_CHARSET_FAMILY == U_ASCII_FAMILY)
- name = "US-ASCII";
- /* there is no 'algorithmic' converter for EBCDIC */
-#elif U_PLATFORM == U_PF_OS390
- name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING;
-#else
- name = "ibm-37_P100-1995";
-#endif
- }
-
- internalSetName(name, &errorCode);
-
- /* The close may make the current name go away. */
- ucnv_close(cnv);
- }
-
- return name;
-#endif
-}
-
-#if U_CHARSET_IS_UTF8
-U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {}
-#else
-/*
-This function is not thread safe, and it can't be thread safe.
-See internalSetName or the API reference for details.
-*/
-U_CAPI void U_EXPORT2
-ucnv_setDefaultName(const char *converterName) {
- if(converterName==NULL) {
- /* reset to the default codepage */
- gDefaultConverterName=NULL;
- } else {
- UErrorCode errorCode = U_ZERO_ERROR;
- UConverter *cnv = NULL;
- const char *name = NULL;
-
- /* if the name is there, test it out and get the canonical name with options */
- cnv = ucnv_open(converterName, &errorCode);
- if(U_SUCCESS(errorCode) && cnv != NULL) {
- name = ucnv_getName(cnv, &errorCode);
- }
-
- if(U_SUCCESS(errorCode) && name!=NULL) {
- internalSetName(name, &errorCode);
- }
- /* else this converter is bad to use. Don't change it to a bad value. */
-
- /* The close may make the current name go away. */
- ucnv_close(cnv);
-
- /* reset the converter cache */
- u_flushDefaultConverter();
- }
-}
-#endif
-
-/* data swapping ------------------------------------------------------------ */
-
-/* most of this might belong more properly into ucnvmbcs.c, but that is so large */
-
-#if !UCONFIG_NO_LEGACY_CONVERSION
-
-U_CAPI int32_t U_EXPORT2
-ucnv_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UDataInfo *pInfo;
- int32_t headerSize;
-
- const uint8_t *inBytes;
- uint8_t *outBytes;
-
- uint32_t offset, count, staticDataSize;
- int32_t size;
-
- const UConverterStaticData *inStaticData;
- UConverterStaticData *outStaticData;
-
- const _MBCSHeader *inMBCSHeader;
- _MBCSHeader *outMBCSHeader;
- _MBCSHeader mbcsHeader;
- uint32_t mbcsHeaderLength;
- UBool noFromU=FALSE;
-
- uint8_t outputType;
-
- int32_t maxFastUChar, mbcsIndexLength;
-
- const int32_t *inExtIndexes;
- int32_t extOffset;
-
- /* udata_swapDataHeader checks the arguments */
- headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* check data format and format version */
- pInfo=(const UDataInfo *)((const char *)inData+4);
- if(!(
- pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */
- pInfo->dataFormat[1]==0x6e &&
- pInfo->dataFormat[2]==0x76 &&
- pInfo->dataFormat[3]==0x74 &&
- pInfo->formatVersion[0]==6 &&
- pInfo->formatVersion[1]>=2
- )) {
- udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n",
- pInfo->dataFormat[0], pInfo->dataFormat[1],
- pInfo->dataFormat[2], pInfo->dataFormat[3],
- pInfo->formatVersion[0], pInfo->formatVersion[1]);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- inBytes=(const uint8_t *)inData+headerSize;
- outBytes=(uint8_t *)outData+headerSize;
-
- /* read the initial UConverterStaticData structure after the UDataInfo header */
- inStaticData=(const UConverterStaticData *)inBytes;
- outStaticData=(UConverterStaticData *)outBytes;
-
- if(length<0) {
- staticDataSize=ds->readUInt32(inStaticData->structSize);
- } else {
- length-=headerSize;
- if( length<(int32_t)sizeof(UConverterStaticData) ||
- (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize))
- ) {
- udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
- length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
-
- if(length>=0) {
- /* swap the static data */
- if(inStaticData!=outStaticData) {
- uprv_memcpy(outStaticData, inStaticData, staticDataSize);
- }
-
- ds->swapArray32(ds, &inStaticData->structSize, 4,
- &outStaticData->structSize, pErrorCode);
- ds->swapArray32(ds, &inStaticData->codepage, 4,
- &outStaticData->codepage, pErrorCode);
-
- ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name),
- outStaticData->name, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "ucnv_swap(): error swapping converter name\n");
- return 0;
- }
- }
-
- inBytes+=staticDataSize;
- outBytes+=staticDataSize;
- if(length>=0) {
- length-=(int32_t)staticDataSize;
- }
-
- /* check for supported conversionType values */
- if(inStaticData->conversionType==UCNV_MBCS) {
- /* swap MBCS data */
- inMBCSHeader=(const _MBCSHeader *)inBytes;
- outMBCSHeader=(_MBCSHeader *)outBytes;
-
- if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) {
- udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
- length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) {
- mbcsHeaderLength=MBCS_HEADER_V4_LENGTH;
- } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 &&
- ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))&
- MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0
- ) {
- mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK;
- noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0);
- } else {
- udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n",
- inMBCSHeader->version[0], inMBCSHeader->version[1]);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4);
- mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates);
- mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks);
- mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits);
- mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable);
- mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes);
- mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags);
- mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength);
- /* mbcsHeader.options have been read above */
-
- extOffset=(int32_t)(mbcsHeader.flags>>8);
- outputType=(uint8_t)mbcsHeader.flags;
- if(noFromU && outputType==MBCS_OUTPUT_1) {
- udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n");
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- /* make sure that the output type is known */
- switch(outputType) {
- case MBCS_OUTPUT_1:
- case MBCS_OUTPUT_2:
- case MBCS_OUTPUT_3:
- case MBCS_OUTPUT_4:
- case MBCS_OUTPUT_3_EUC:
- case MBCS_OUTPUT_4_EUC:
- case MBCS_OUTPUT_2_SISO:
- case MBCS_OUTPUT_EXT_ONLY:
- /* OK */
- break;
- default:
- udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n",
- outputType);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- /* calculate the length of the MBCS data */
-
- /*
- * utf8Friendly MBCS files (mbcsHeader.version 4.3)
- * contain an additional mbcsIndex table:
- * uint16_t[(maxFastUChar+1)>>6];
- * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff).
- */
- maxFastUChar=0;
- mbcsIndexLength=0;
- if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 &&
- mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0
- ) {
- maxFastUChar=(maxFastUChar<<8)|0xff;
- mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */
- }
-
- if(extOffset==0) {
- size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength);
- if(!noFromU) {
- size+=(int32_t)mbcsHeader.fromUBytesLength;
- }
-
- /* avoid compiler warnings - not otherwise necessary, and the value does not matter */
- inExtIndexes=NULL;
- } else {
- /* there is extension data after the base data, see ucnv_ext.h */
- if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) {
- udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
- length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- inExtIndexes=(const int32_t *)(inBytes+extOffset);
- size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]);
- }
-
- if(length>=0) {
- if(length<size) {
- udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
- length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- /* copy the data for inaccessible bytes */
- if(inBytes!=outBytes) {
- uprv_memcpy(outBytes, inBytes, size);
- }
-
- /* swap the MBCSHeader, except for the version field */
- count=mbcsHeaderLength*4;
- ds->swapArray32(ds, &inMBCSHeader->countStates, count-4,
- &outMBCSHeader->countStates, pErrorCode);
-
- if(outputType==MBCS_OUTPUT_EXT_ONLY) {
- /*
- * extension-only file,
- * contains a base name instead of normal base table data
- */
-
- /* swap the base name, between the header and the extension data */
- const char *inBaseName=(const char *)inBytes+count;
- char *outBaseName=(char *)outBytes+count;
- ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName),
- outBaseName, pErrorCode);
- } else {
- /* normal file with base table data */
-
- /* swap the state table, 1kB per state */
- offset=count;
- count=mbcsHeader.countStates*1024;
- ds->swapArray32(ds, inBytes+offset, (int32_t)count,
- outBytes+offset, pErrorCode);
-
- /* swap the toUFallbacks[] */
- offset+=count;
- count=mbcsHeader.countToUFallbacks*8;
- ds->swapArray32(ds, inBytes+offset, (int32_t)count,
- outBytes+offset, pErrorCode);
-
- /* swap the unicodeCodeUnits[] */
- offset=mbcsHeader.offsetToUCodeUnits;
- count=mbcsHeader.offsetFromUTable-offset;
- ds->swapArray16(ds, inBytes+offset, (int32_t)count,
- outBytes+offset, pErrorCode);
-
- /* offset to the stage 1 table, independent of the outputType */
- offset=mbcsHeader.offsetFromUTable;
-
- if(outputType==MBCS_OUTPUT_1) {
- /* SBCS: swap the fromU tables, all 16 bits wide */
- count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength;
- ds->swapArray16(ds, inBytes+offset, (int32_t)count,
- outBytes+offset, pErrorCode);
- } else {
- /* otherwise: swap the stage tables separately */
-
- /* stage 1 table: uint16_t[0x440 or 0x40] */
- if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
- count=0x440*2; /* for all of Unicode */
- } else {
- count=0x40*2; /* only BMP */
- }
- ds->swapArray16(ds, inBytes+offset, (int32_t)count,
- outBytes+offset, pErrorCode);
-
- /* stage 2 table: uint32_t[] */
- offset+=count;
- count=mbcsHeader.offsetFromUBytes-offset;
- ds->swapArray32(ds, inBytes+offset, (int32_t)count,
- outBytes+offset, pErrorCode);
-
- /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */
- offset=mbcsHeader.offsetFromUBytes;
- count= noFromU ? 0 : mbcsHeader.fromUBytesLength;
- switch(outputType) {
- case MBCS_OUTPUT_2:
- case MBCS_OUTPUT_3_EUC:
- case MBCS_OUTPUT_2_SISO:
- ds->swapArray16(ds, inBytes+offset, (int32_t)count,
- outBytes+offset, pErrorCode);
- break;
- case MBCS_OUTPUT_4:
- ds->swapArray32(ds, inBytes+offset, (int32_t)count,
- outBytes+offset, pErrorCode);
- break;
- default:
- /* just uint8_t[], nothing to swap */
- break;
- }
-
- if(mbcsIndexLength!=0) {
- offset+=count;
- count=mbcsIndexLength;
- ds->swapArray16(ds, inBytes+offset, (int32_t)count,
- outBytes+offset, pErrorCode);
- }
- }
- }
-
- if(extOffset!=0) {
- /* swap the extension data */
- inBytes+=extOffset;
- outBytes+=extOffset;
-
- /* swap toUTable[] */
- offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]);
- length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]);
- ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode);
-
- /* swap toUUChars[] */
- offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]);
- length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]);
- ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
-
- /* swap fromUTableUChars[] */
- offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]);
- length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]);
- ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
-
- /* swap fromUTableValues[] */
- offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]);
- /* same length as for fromUTableUChars[] */
- ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode);
-
- /* no need to swap fromUBytes[] */
-
- /* swap fromUStage12[] */
- offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]);
- length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]);
- ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
-
- /* swap fromUStage3[] */
- offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]);
- length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]);
- ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
-
- /* swap fromUStage3b[] */
- offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]);
- length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]);
- ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode);
-
- /* swap indexes[] */
- length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]);
- ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode);
- }
- }
- } else {
- udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n",
- inStaticData->conversionType);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- return headerSize+(int32_t)staticDataSize+size;
-}
-
-#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
-
-#endif
diff --git a/contrib/libs/icu/common/ucnv_bld.h b/contrib/libs/icu/common/ucnv_bld.h
deleted file mode 100644
index 18b3795e4d8..00000000000
--- a/contrib/libs/icu/common/ucnv_bld.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2015 International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*
-*
-* ucnv_bld.h:
-* Contains internal data structure definitions
-* Created by Bertrand A. Damiba
-*
-* Change history:
-*
-* 06/29/2000 helena Major rewrite of the callback APIs.
-*/
-
-#ifndef UCNV_BLD_H
-#define UCNV_BLD_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/ucnv_err.h"
-#include "unicode/utf16.h"
-#include "ucnv_cnv.h"
-#include "ucnvmbcs.h"
-#include "ucnv_ext.h"
-#include "udataswp.h"
-
-/* size of the overflow buffers in UConverter, enough for escaping callbacks */
-#define UCNV_ERROR_BUFFER_LENGTH 32
-
-/* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */
-#define UCNV_MAX_SUBCHAR_LEN 4
-
-/* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */
-#define UCNV_MAX_CHAR_LEN 8
-
-/* converter options bits */
-#define UCNV_OPTION_VERSION 0xf
-#define UCNV_OPTION_SWAP_LFNL 0x10
-
-#define UCNV_GET_VERSION(cnv) ((cnv)->options&UCNV_OPTION_VERSION)
-
-U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
- itself is compiled under C++, the linkage of the funcptrs will
- work.
- */
-
-union UConverterTable {
- UConverterMBCSTable mbcs;
-};
-
-typedef union UConverterTable UConverterTable;
-
-struct UConverterImpl;
-typedef struct UConverterImpl UConverterImpl;
-
-/** values for the unicodeMask */
-#define UCNV_HAS_SUPPLEMENTARY 1
-#define UCNV_HAS_SURROGATES 2
-
-typedef struct UConverterStaticData { /* +offset: size */
- uint32_t structSize; /* +0: 4 Size of this structure */
-
- char name
- [UCNV_MAX_CONVERTER_NAME_LENGTH]; /* +4: 60 internal name of the converter- invariant chars */
-
- int32_t codepage; /* +64: 4 codepage # (now IBM-$codepage) */
-
- int8_t platform; /* +68: 1 platform of the converter (only IBM now) */
- int8_t conversionType; /* +69: 1 conversion type */
-
- int8_t minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */
- int8_t maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */
-
- uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* +72: 4 [note: 4 and 8 byte boundary] */
- int8_t subCharLen; /* +76: 1 */
-
- uint8_t hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
- uint8_t hasFromUnicodeFallback; /* +78: 1 */
- uint8_t unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */
- uint8_t subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */
- uint8_t reserved[19]; /* +81: 19 to round out the structure */
- /* total size: 100 */
-} UConverterStaticData;
-
-/*
- * Defines the UConverterSharedData struct,
- * the immutable, shared part of UConverter.
- */
-struct UConverterSharedData {
- uint32_t structSize; /* Size of this structure */
- uint32_t referenceCounter; /* used to count number of clients, unused for static/immutable SharedData */
-
- const void *dataMemory; /* from udata_openChoice() - for cleanup */
-
- const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */
-
- UBool sharedDataCached; /* TRUE: shared data is in cache, don't destroy on ucnv_close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
- /** If FALSE, then referenceCounter is not used. Must not change after initialization. */
- UBool isReferenceCounted;
-
- const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */
-
- /*initial values of some members of the mutable part of object */
- uint32_t toUnicodeStatus;
-
- /*
- * Shared data structures currently come in two flavors:
- * - readonly for built-in algorithmic converters
- * - allocated for MBCS, with a pointer to an allocated UConverterTable
- * which always has a UConverterMBCSTable
- *
- * To eliminate one allocation, I am making the UConverterMBCSTable
- * a member of the shared data.
- *
- * markus 2003-nov-07
- */
- UConverterMBCSTable mbcs;
-};
-
-/** UConverterSharedData initializer for static, non-reference-counted converters. */
-#define UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(pStaticData, pImpl) \
- { \
- sizeof(UConverterSharedData), ~((uint32_t)0), \
- NULL, pStaticData, FALSE, FALSE, pImpl, \
- 0, UCNV_MBCS_TABLE_INITIALIZER \
- }
-
-/* Defines a UConverter, the lightweight mutable part the user sees */
-
-struct UConverter {
- /*
- * Error function pointer called when conversion issues
- * occur during a ucnv_fromUnicode call
- */
- void (U_EXPORT2 *fromUCharErrorBehaviour) (const void *context,
- UConverterFromUnicodeArgs *args,
- const UChar *codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode *);
- /*
- * Error function pointer called when conversion issues
- * occur during a ucnv_toUnicode call
- */
- void (U_EXPORT2 *fromCharErrorBehaviour) (const void *context,
- UConverterToUnicodeArgs *args,
- const char *codeUnits,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode *);
-
- /*
- * Pointer to additional data that depends on the converter type.
- * Used by ISO 2022, SCSU, GB 18030 converters, possibly more.
- */
- void *extraInfo;
-
- const void *fromUContext;
- const void *toUContext;
-
- /*
- * Pointer to charset bytes for substitution string if subCharLen>0,
- * or pointer to Unicode string (UChar *) if subCharLen<0.
- * subCharLen==0 is equivalent to using a skip callback.
- * If the pointer is !=subUChars then it is allocated with
- * UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR bytes.
- * The subUChars field is declared as UChar[] not uint8_t[] to
- * guarantee alignment for UChars.
- */
- uint8_t *subChars;
-
- UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */
-
- uint32_t options; /* options flags from UConverterOpen, may contain additional bits */
-
- UBool sharedDataIsCached; /* TRUE: shared data is in cache, don't destroy on ucnv_close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
- UBool isCopyLocal; /* TRUE if UConverter is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */
- UBool isExtraLocal; /* TRUE if extraInfo is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */
-
- UBool useFallback;
- int8_t toULength; /* number of bytes in toUBytes */
- uint8_t toUBytes[UCNV_MAX_CHAR_LEN-1];/* more "toU status"; keeps the bytes of the current character */
- uint32_t toUnicodeStatus; /* Used to internalize stream status information */
- int32_t mode;
- uint32_t fromUnicodeStatus;
-
- /*
- * More fromUnicode() status. Serves 3 purposes:
- * - keeps a lead surrogate between buffers (similar to toUBytes[])
- * - keeps a lead surrogate at the end of the stream,
- * which the framework handles as truncated input
- * - if the fromUnicode() implementation returns to the framework
- * (ucnv.c ucnv_fromUnicode()), then the framework calls the callback
- * for this code point
- */
- UChar32 fromUChar32;
-
- /*
- * value for ucnv_getMaxCharSize()
- *
- * usually simply copied from the static data, but ucnvmbcs.c modifies
- * the value depending on the converter type and options
- */
- int8_t maxBytesPerUChar;
-
- int8_t subCharLen; /* length of the codepage specific character sequence */
- int8_t invalidCharLength;
- int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */
-
- int8_t invalidUCharLength;
- int8_t UCharErrorBufferLength; /* number of valid UChars in charErrorBuffer */
-
- uint8_t subChar1; /* single-byte substitution character if different from subChar */
- UBool useSubChar1;
- char invalidCharBuffer[UCNV_MAX_CHAR_LEN]; /* bytes from last error/callback situation */
- uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */
- UChar subUChars[UCNV_MAX_SUBCHAR_LEN/U_SIZEOF_UCHAR]; /* see subChars documentation */
-
- UChar invalidUCharBuffer[U16_MAX_LENGTH]; /* UChars from last error/callback situation */
- UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */
-
- /* fields for conversion extension */
-
- /* store previous UChars/chars to continue partial matches */
- UChar32 preFromUFirstCP; /* >=0: partial match */
- UChar preFromU[UCNV_EXT_MAX_UCHARS];
- char preToU[UCNV_EXT_MAX_BYTES];
- int8_t preFromULength, preToULength; /* negative: replay */
- int8_t preToUFirstLength; /* length of first character */
-
- /* new fields for ICU 4.0 */
- UConverterCallbackReason toUCallbackReason; /* (*fromCharErrorBehaviour) reason, set when error is detected */
-};
-
-U_CDECL_END /* end of UConverter */
-
-#define CONVERTER_FILE_EXTENSION ".cnv"
-
-
-/**
- * Return the number of all converter names.
- * @param pErrorCode The error code
- * @return the number of all converter names
- */
-U_CFUNC uint16_t
-ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode);
-
-/**
- * Return the (n)th converter name in mixed case, or NULL
- * if there is none (typically, if the data cannot be loaded).
- * 0<=index<ucnv_io_countAvailableConverters().
- * @param n The number specifies which converter name to get
- * @param pErrorCode The error code
- * @return the (n)th converter name in mixed case, or NULL if there is none.
- */
-U_CFUNC const char *
-ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode);
-
-/**
- * Load a non-algorithmic converter.
- * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
- */
-U_CAPI UConverterSharedData *
-ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err);
-
-/**
- * Unload a non-algorithmic converter.
- * It must be sharedData->isReferenceCounted
- * and this function must be called inside umtx_lock(&cnvCacheMutex).
- */
-U_CAPI void
-ucnv_unload(UConverterSharedData *sharedData);
-
-/**
- * Swap ICU .cnv conversion tables. See udataswp.h.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-ucnv_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-U_CAPI void U_EXPORT2
-ucnv_enableCleanup();
-
-#endif
-
-#endif /* _UCNV_BLD */
diff --git a/contrib/libs/icu/common/ucnv_cb.cpp b/contrib/libs/icu/common/ucnv_cb.cpp
deleted file mode 100644
index 1bb00120149..00000000000
--- a/contrib/libs/icu/common/ucnv_cb.cpp
+++ /dev/null
@@ -1,261 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2000-2006, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
- * ucnv_cb.c:
- * External APIs for the ICU's codeset conversion library
- * Helena Shih
- *
- * Modification History:
- *
- * Date Name Description
- * 7/28/2000 srl Implementation
- */
-
-/**
- * @name Character Conversion C API
- *
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucnv_cb.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "cmemory.h"
-
-/* need to update the offsets when the target moves. */
-/* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
-if you don't use ucnv_cbXXX functions. Make sure you don't use the same callback within
-the same call stack if the complexity arises. */
-U_CAPI void U_EXPORT2
-ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
- const char* source,
- int32_t length,
- int32_t offsetIndex,
- UErrorCode * err)
-{
- if(U_FAILURE(*err)) {
- return;
- }
-
- ucnv_fromUWriteBytes(
- args->converter,
- source, length,
- &args->target, args->targetLimit,
- &args->offsets, offsetIndex,
- err);
-}
-
-U_CAPI void U_EXPORT2
-ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
- const UChar** source,
- const UChar* sourceLimit,
- int32_t offsetIndex,
- UErrorCode * err)
-{
- /*
- This is a fun one. Recursion can occur - we're basically going to
- just retry shoving data through the same converter. Note, if you got
- here through some kind of invalid sequence, you maybe should emit a
- reset sequence of some kind and/or call ucnv_reset(). Since this
- IS an actual conversion, take care that you've changed the callback
- or the data, or you'll get an infinite loop.
-
- Please set the err value to something reasonable before calling
- into this.
- */
-
- char *oldTarget;
-
- if(U_FAILURE(*err))
- {
- return;
- }
-
- oldTarget = args->target;
-
- ucnv_fromUnicode(args->converter,
- &args->target,
- args->targetLimit,
- source,
- sourceLimit,
- NULL, /* no offsets */
- FALSE, /* no flush */
- err);
-
- if(args->offsets)
- {
- while (args->target != oldTarget) /* if it moved at all.. */
- {
- *(args->offsets)++ = offsetIndex;
- oldTarget++;
- }
- }
-
- /*
- Note, if you did something like used a Stop subcallback, things would get interesting.
- In fact, here's where we want to return the partially consumed in-source!
- */
- if(*err == U_BUFFER_OVERFLOW_ERROR)
- /* && (*source < sourceLimit && args->target >= args->targetLimit)
- -- S. Hrcek */
- {
- /* Overflowed the target. Now, we'll write into the charErrorBuffer.
- It's a fixed size. If we overflow it... Hmm */
- char *newTarget;
- const char *newTargetLimit;
- UErrorCode err2 = U_ZERO_ERROR;
-
- int8_t errBuffLen;
-
- errBuffLen = args->converter->charErrorBufferLength;
-
- /* start the new target at the first free slot in the errbuff.. */
- newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
-
- newTargetLimit = (char *)(args->converter->charErrorBuffer +
- sizeof(args->converter->charErrorBuffer));
-
- if(newTarget >= newTargetLimit)
- {
- *err = U_INTERNAL_PROGRAM_ERROR;
- return;
- }
-
- /* We're going to tell the converter that the errbuff len is empty.
- This prevents the existing errbuff from being 'flushed' out onto
- itself. If the errbuff is needed by the converter this time,
- we're hosed - we're out of space! */
-
- args->converter->charErrorBufferLength = 0;
-
- ucnv_fromUnicode(args->converter,
- &newTarget,
- newTargetLimit,
- source,
- sourceLimit,
- NULL,
- FALSE,
- &err2);
-
- /* We can go ahead and overwrite the length here. We know just how
- to recalculate it. */
-
- args->converter->charErrorBufferLength = (int8_t)(
- newTarget - (char*)args->converter->charErrorBuffer);
-
- if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
- {
- /* now we're REALLY in trouble.
- Internal program error - callback shouldn't have written this much
- data!
- */
- *err = U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- /*else {*/
- /* sub errs could be invalid/truncated/illegal chars or w/e.
- These might want to be passed on up.. But the problem is, we already
- need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
- other errs.. */
-
- /*
- if(U_FAILURE(err2))
- ??
- */
- /*}*/
- }
-}
-
-U_CAPI void U_EXPORT2
-ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
- int32_t offsetIndex,
- UErrorCode * err)
-{
- UConverter *converter;
- int32_t length;
-
- if(U_FAILURE(*err)) {
- return;
- }
- converter = args->converter;
- length = converter->subCharLen;
-
- if(length == 0) {
- return;
- }
-
- if(length < 0) {
- /*
- * Write/convert the substitution string. Its real length is -length.
- * Unlike the escape callback, we need not change the converter's
- * callback function because ucnv_setSubstString() verified that
- * the string can be converted, so we will not get a conversion error
- * and will not recurse.
- * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
- */
- const UChar *source = (const UChar *)converter->subChars;
- ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
- return;
- }
-
- if(converter->sharedData->impl->writeSub!=NULL) {
- converter->sharedData->impl->writeSub(args, offsetIndex, err);
- }
- else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
- /*
- TODO: Is this untestable because the MBCS converter has a writeSub function to call
- and the other converters don't use subChar1?
- */
- ucnv_cbFromUWriteBytes(args,
- (const char *)&converter->subChar1, 1,
- offsetIndex, err);
- }
- else {
- ucnv_cbFromUWriteBytes(args,
- (const char *)converter->subChars, length,
- offsetIndex, err);
- }
-}
-
-U_CAPI void U_EXPORT2
-ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
- const UChar* source,
- int32_t length,
- int32_t offsetIndex,
- UErrorCode * err)
-{
- if(U_FAILURE(*err)) {
- return;
- }
-
- ucnv_toUWriteUChars(
- args->converter,
- source, length,
- &args->target, args->targetLimit,
- &args->offsets, offsetIndex,
- err);
-}
-
-U_CAPI void U_EXPORT2
-ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
- int32_t offsetIndex,
- UErrorCode * err)
-{
- static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
-
- /* could optimize this case, just one uchar */
- if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
- ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
- } else {
- ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
- }
-}
-
-#endif
diff --git a/contrib/libs/icu/common/ucnv_cnv.cpp b/contrib/libs/icu/common/ucnv_cnv.cpp
deleted file mode 100644
index ea71acf92c7..00000000000
--- a/contrib/libs/icu/common/ucnv_cnv.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2004, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* uconv_cnv.c:
-* Implements all the low level conversion functions
-* T_UnicodeConverter_{to,from}Unicode_$ConversionType
-*
-* Change history:
-*
-* 06/29/2000 helena Major rewrite of the callback APIs.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucnv_err.h"
-#include "unicode/ucnv.h"
-#include "unicode/uset.h"
-#include "ucnv_cnv.h"
-#include "ucnv_bld.h"
-#include "cmemory.h"
-
-U_CFUNC void
-ucnv_getCompleteUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode) {
- (void)cnv;
- (void)which;
- (void)pErrorCode;
- sa->addRange(sa->set, 0, 0x10ffff);
-}
-
-U_CFUNC void
-ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode) {
- (void)cnv;
- (void)which;
- (void)pErrorCode;
- sa->addRange(sa->set, 0, 0xd7ff);
- sa->addRange(sa->set, 0xe000, 0x10ffff);
-}
-
-U_CFUNC void
-ucnv_fromUWriteBytes(UConverter *cnv,
- const char *bytes, int32_t length,
- char **target, const char *targetLimit,
- int32_t **offsets,
- int32_t sourceIndex,
- UErrorCode *pErrorCode) {
- char *t=*target;
- int32_t *o;
-
- /* write bytes */
- if(offsets==NULL || (o=*offsets)==NULL) {
- while(length>0 && t<targetLimit) {
- *t++=*bytes++;
- --length;
- }
- } else {
- /* output with offsets */
- while(length>0 && t<targetLimit) {
- *t++=*bytes++;
- *o++=sourceIndex;
- --length;
- }
- *offsets=o;
- }
- *target=t;
-
- /* write overflow */
- if(length>0) {
- if(cnv!=NULL) {
- t=(char *)cnv->charErrorBuffer;
- cnv->charErrorBufferLength=(int8_t)length;
- do {
- *t++=(uint8_t)*bytes++;
- } while(--length>0);
- }
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
-}
-
-U_CFUNC void
-ucnv_toUWriteUChars(UConverter *cnv,
- const UChar *uchars, int32_t length,
- UChar **target, const UChar *targetLimit,
- int32_t **offsets,
- int32_t sourceIndex,
- UErrorCode *pErrorCode) {
- UChar *t=*target;
- int32_t *o;
-
- /* write UChars */
- if(offsets==NULL || (o=*offsets)==NULL) {
- while(length>0 && t<targetLimit) {
- *t++=*uchars++;
- --length;
- }
- } else {
- /* output with offsets */
- while(length>0 && t<targetLimit) {
- *t++=*uchars++;
- *o++=sourceIndex;
- --length;
- }
- *offsets=o;
- }
- *target=t;
-
- /* write overflow */
- if(length>0) {
- if(cnv!=NULL) {
- t=cnv->UCharErrorBuffer;
- cnv->UCharErrorBufferLength=(int8_t)length;
- do {
- *t++=*uchars++;
- } while(--length>0);
- }
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
-}
-
-U_CFUNC void
-ucnv_toUWriteCodePoint(UConverter *cnv,
- UChar32 c,
- UChar **target, const UChar *targetLimit,
- int32_t **offsets,
- int32_t sourceIndex,
- UErrorCode *pErrorCode) {
- UChar *t;
- int32_t *o;
-
- t=*target;
-
- if(t<targetLimit) {
- if(c<=0xffff) {
- *t++=(UChar)c;
- c=U_SENTINEL;
- } else /* c is a supplementary code point */ {
- *t++=U16_LEAD(c);
- c=U16_TRAIL(c);
- if(t<targetLimit) {
- *t++=(UChar)c;
- c=U_SENTINEL;
- }
- }
-
- /* write offsets */
- if(offsets!=NULL && (o=*offsets)!=NULL) {
- *o++=sourceIndex;
- if((*target+1)<t) {
- *o++=sourceIndex;
- }
- *offsets=o;
- }
- }
-
- *target=t;
-
- /* write overflow from c */
- if(c>=0) {
- if(cnv!=NULL) {
- int8_t i=0;
- U16_APPEND_UNSAFE(cnv->UCharErrorBuffer, i, c);
- cnv->UCharErrorBufferLength=i;
- }
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
-}
-
-#endif
diff --git a/contrib/libs/icu/common/ucnv_cnv.h b/contrib/libs/icu/common/ucnv_cnv.h
deleted file mode 100644
index 2eed2c60df6..00000000000
--- a/contrib/libs/icu/common/ucnv_cnv.h
+++ /dev/null
@@ -1,323 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*
-* ucnv_cnv.h:
-* Definitions for converter implementations.
-*
-* Modification History:
-*
-* Date Name Description
-* 05/09/00 helena Added implementation to handle fallback mappings.
-* 06/29/2000 helena Major rewrite of the callback APIs.
-*/
-
-#ifndef UCNV_CNV_H
-#define UCNV_CNV_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/ucnv_err.h"
-#include "unicode/uset.h"
-#include "uset_imp.h"
-
-U_CDECL_BEGIN
-
-/* this is used in fromUnicode DBCS tables as an "unassigned" marker */
-#define missingCharMarker 0xFFFF
-
-/*
- * #define missingUCharMarker 0xfffe
- *
- * commented out because there are actually two values used in toUnicode tables:
- * U+fffe "unassigned"
- * U+ffff "illegal"
- */
-
-/** Forward declaration, see ucnv_bld.h */
-struct UConverterSharedData;
-typedef struct UConverterSharedData UConverterSharedData;
-
-/* function types for UConverterImpl ---------------------------------------- */
-
-/* struct with arguments for UConverterLoad and ucnv_load() */
-typedef struct {
- int32_t size; /* sizeof(UConverterLoadArgs) */
- int32_t nestedLoads; /* count nested ucnv_load() calls */
- UBool onlyTestIsLoadable; /* input: don't actually load */
- UBool reserved0; /* reserved - for good alignment of the pointers */
- int16_t reserved; /* reserved - for good alignment of the pointers */
- uint32_t options;
- const char *pkg, *name, *locale;
-} UConverterLoadArgs;
-
-#define UCNV_LOAD_ARGS_INITIALIZER \
- { (int32_t)sizeof(UConverterLoadArgs), 0, FALSE, FALSE, 0, 0, NULL, NULL, NULL }
-
-typedef void (*UConverterLoad) (UConverterSharedData *sharedData,
- UConverterLoadArgs *pArgs,
- const uint8_t *raw, UErrorCode *pErrorCode);
-typedef void (*UConverterUnload) (UConverterSharedData *sharedData);
-
-typedef void (*UConverterOpen) (UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *pErrorCode);
-typedef void (*UConverterClose) (UConverter *cnv);
-
-typedef enum UConverterResetChoice {
- UCNV_RESET_BOTH,
- UCNV_RESET_TO_UNICODE,
- UCNV_RESET_FROM_UNICODE
-} UConverterResetChoice;
-
-typedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice);
-
-/*
- * Converter implementation function(s) for ucnv_toUnicode().
- * If the toUnicodeWithOffsets function pointer is NULL,
- * then the toUnicode function will be used and the offsets will be set to -1.
- *
- * Must maintain state across buffers. Use toUBytes[toULength] for partial input
- * sequences; it will be checked in ucnv.c at the end of the input stream
- * to detect truncated input.
- * Some converters may need additional detection and may then set U_TRUNCATED_CHAR_FOUND.
- *
- * The toUnicodeWithOffsets must write exactly as many offset values as target
- * units. Write offset values of -1 for when the source index corresponding to
- * the output unit is not known (e.g., the character started in an earlier buffer).
- * The pArgs->offsets pointer need not be moved forward.
- *
- * At function return, either one of the following conditions must be true:
- * - U_BUFFER_OVERFLOW_ERROR and the target is full: target==targetLimit
- * - another error code with toUBytes[toULength] set to the offending input
- * - no error, and the source is consumed: source==sourceLimit
- *
- * The ucnv.c code will handle the end of the input (reset)
- * (reset, and truncation detection) and callbacks.
- */
-typedef void (*UConverterToUnicode) (UConverterToUnicodeArgs *, UErrorCode *);
-
-/*
- * Same rules as for UConverterToUnicode.
- * A lead surrogate is kept in fromUChar32 across buffers, and if an error
- * occurs, then the offending input code point must be put into fromUChar32
- * as well.
- */
-typedef void (*UConverterFromUnicode) (UConverterFromUnicodeArgs *, UErrorCode *);
-
-/*
- * Converter implementation function for ucnv_convertEx(), for direct conversion
- * between two charsets without pivoting through UTF-16.
- * The rules are the same as for UConverterToUnicode and UConverterFromUnicode.
- * In addition,
- * - The toUnicode side must behave and keep state exactly like the
- * UConverterToUnicode implementation for the same source charset.
- * - A U_USING_DEFAULT_WARNING can be set to request to temporarily fall back
- * to pivoting. When this function is called, the conversion framework makes
- * sure that this warning is not set on input.
- * - Continuing a partial match and flushing the toUnicode replay buffer
- * are handled by pivoting, using the toUnicode and fromUnicode functions.
- */
-typedef void (*UConverterConvert) (UConverterFromUnicodeArgs *pFromUArgs,
- UConverterToUnicodeArgs *pToUArgs,
- UErrorCode *pErrorCode);
-
-/*
- * Converter implementation function for ucnv_getNextUChar().
- * If the function pointer is NULL, then the toUnicode function will be used.
- *
- * Will be called at a character boundary (toULength==0).
- * May return with
- * - U_INDEX_OUTOFBOUNDS_ERROR if there was no output for the input
- * (the return value will be ignored)
- * - U_TRUNCATED_CHAR_FOUND or another error code (never U_BUFFER_OVERFLOW_ERROR!)
- * with toUBytes[toULength] set to the offending input
- * (the return value will be ignored)
- * - return UCNV_GET_NEXT_UCHAR_USE_TO_U, without moving the source pointer,
- * to indicate that the ucnv.c code shall call the toUnicode function instead
- * - return a real code point result
- *
- * Unless UCNV_GET_NEXT_UCHAR_USE_TO_U is returned, the source bytes must be consumed.
- *
- * The ucnv.c code will handle the end of the input (reset)
- * (except for truncation detection!) and callbacks.
- */
-typedef UChar32 (*UConverterGetNextUChar) (UConverterToUnicodeArgs *, UErrorCode *);
-
-typedef void (*UConverterGetStarters)(const UConverter* converter,
- UBool starters[256],
- UErrorCode *pErrorCode);
-
-/* If this function pointer is null or if the function returns null
- * the name field in static data struct should be returned by
- * ucnv_getName() API function
- */
-typedef const char * (*UConverterGetName) (const UConverter *cnv);
-
-/**
- * Write the codepage substitution character.
- * If this function is not set, then ucnv_cbFromUWriteSub() writes
- * the substitution character from UConverter.
- * For stateful converters, it is typically necessary to handle this
- * specificially for the converter in order to properly maintain the state.
- */
-typedef void (*UConverterWriteSub) (UConverterFromUnicodeArgs *pArgs, int32_t offsetIndex, UErrorCode *pErrorCode);
-
-/**
- * For converter-specific safeClone processing
- * If this function is not set, then ucnv_safeClone assumes that the converter has no private data that changes
- * after the converter is done opening.
- * If this function is set, then it is called just after a memcpy() of
- * converter data to the new, empty converter, and is expected to set up
- * the initial state of the converter. It is not expected to increment the
- * reference counts of the standard data types such as the shared data.
- */
-typedef UConverter * (*UConverterSafeClone) (const UConverter *cnv,
- void *stackBuffer,
- int32_t *pBufferSize,
- UErrorCode *status);
-
-/**
- * Filters for some ucnv_getUnicodeSet() implementation code.
- */
-typedef enum UConverterSetFilter {
- UCNV_SET_FILTER_NONE,
- UCNV_SET_FILTER_DBCS_ONLY,
- UCNV_SET_FILTER_2022_CN,
- UCNV_SET_FILTER_SJIS,
- UCNV_SET_FILTER_GR94DBCS,
- UCNV_SET_FILTER_HZ,
- UCNV_SET_FILTER_COUNT
-} UConverterSetFilter;
-
-/**
- * Fills the set of Unicode code points that can be converted by an ICU converter.
- * The API function ucnv_getUnicodeSet() clears the USet before calling
- * the converter's getUnicodeSet() implementation; the converter should only
- * add the appropriate code points to allow recursive use.
- * For example, the ISO-2022-JP converter will call each subconverter's
- * getUnicodeSet() implementation to consecutively add code points to
- * the same USet, which will result in a union of the sets of all subconverters.
- *
- * For more documentation, see ucnv_getUnicodeSet() in ucnv.h.
- */
-typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode);
-
-UBool CONVERSION_U_SUCCESS (UErrorCode err);
-
-/**
- * UConverterImpl contains all the data and functions for a converter type.
- * Its function pointers work much like a C++ vtable.
- * Many converter types need to define only a subset of the functions;
- * when a function pointer is NULL, then a default action will be performed.
- *
- * Every converter type must implement toUnicode, fromUnicode, and getNextUChar,
- * otherwise the converter may crash.
- * Every converter type that has variable-length codepage sequences should
- * also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for
- * correct offset handling.
- * All other functions may or may not be implemented - it depends only on
- * whether the converter type needs them.
- *
- * When open() fails, then close() will be called, if present.
- */
-struct UConverterImpl {
- UConverterType type;
-
- UConverterLoad load;
- UConverterUnload unload;
-
- UConverterOpen open;
- UConverterClose close;
- UConverterReset reset;
-
- UConverterToUnicode toUnicode;
- UConverterToUnicode toUnicodeWithOffsets;
- UConverterFromUnicode fromUnicode;
- UConverterFromUnicode fromUnicodeWithOffsets;
- UConverterGetNextUChar getNextUChar;
-
- UConverterGetStarters getStarters;
- UConverterGetName getName;
- UConverterWriteSub writeSub;
- UConverterSafeClone safeClone;
- UConverterGetUnicodeSet getUnicodeSet;
-
- UConverterConvert toUTF8;
- UConverterConvert fromUTF8;
-};
-
-extern const UConverterSharedData
- _MBCSData, _Latin1Data,
- _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,
- _ISO2022Data,
- _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
- _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,
- _HZData,_ISCIIData, _SCSUData, _ASCIIData,
- _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData;
-
-U_CDECL_END
-
-/** Always use fallbacks from codepage to Unicode */
-#define TO_U_USE_FALLBACK(useFallback) TRUE
-#define UCNV_TO_U_USE_FALLBACK(cnv) TRUE
-
-/** Use fallbacks from Unicode to codepage when cnv->useFallback or for private-use code points */
-#define IS_PRIVATE_USE(c) ((uint32_t)((c)-0xe000)<0x1900 || (uint32_t)((c)-0xf0000)<0x20000)
-#define FROM_U_USE_FALLBACK(useFallback, c) ((useFallback) || IS_PRIVATE_USE(c))
-#define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c)
-
-/**
- * Magic number for ucnv_getNextUChar(), returned by a
- * getNextUChar() implementation to indicate to use the converter's toUnicode()
- * instead of the native function.
- * @internal
- */
-#define UCNV_GET_NEXT_UCHAR_USE_TO_U -9
-
-U_CFUNC void
-ucnv_getCompleteUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode);
-
-U_CFUNC void
-ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode);
-
-U_CFUNC void
-ucnv_fromUWriteBytes(UConverter *cnv,
- const char *bytes, int32_t length,
- char **target, const char *targetLimit,
- int32_t **offsets,
- int32_t sourceIndex,
- UErrorCode *pErrorCode);
-U_CFUNC void
-ucnv_toUWriteUChars(UConverter *cnv,
- const UChar *uchars, int32_t length,
- UChar **target, const UChar *targetLimit,
- int32_t **offsets,
- int32_t sourceIndex,
- UErrorCode *pErrorCode);
-
-U_CFUNC void
-ucnv_toUWriteCodePoint(UConverter *cnv,
- UChar32 c,
- UChar **target, const UChar *targetLimit,
- int32_t **offsets,
- int32_t sourceIndex,
- UErrorCode *pErrorCode);
-
-#endif
-
-#endif /* UCNV_CNV */
diff --git a/contrib/libs/icu/common/ucnv_ct.cpp b/contrib/libs/icu/common/ucnv_ct.cpp
deleted file mode 100644
index b40e1b2c970..00000000000
--- a/contrib/libs/icu/common/ucnv_ct.cpp
+++ /dev/null
@@ -1,646 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2010-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* file name: ucnv_ct.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010Dec09
-* created by: Michael Ow
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/uset.h"
-#include "unicode/ucnv_err.h"
-#include "unicode/ucnv_cb.h"
-#include "unicode/utf16.h"
-#include "ucnv_imp.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "ucnvmbcs.h"
-#include "cstring.h"
-#include "cmemory.h"
-
-typedef enum {
- INVALID = -2,
- DO_SEARCH = -1,
-
- COMPOUND_TEXT_SINGLE_0 = 0,
- COMPOUND_TEXT_SINGLE_1 = 1,
- COMPOUND_TEXT_SINGLE_2 = 2,
- COMPOUND_TEXT_SINGLE_3 = 3,
-
- COMPOUND_TEXT_DOUBLE_1 = 4,
- COMPOUND_TEXT_DOUBLE_2 = 5,
- COMPOUND_TEXT_DOUBLE_3 = 6,
- COMPOUND_TEXT_DOUBLE_4 = 7,
- COMPOUND_TEXT_DOUBLE_5 = 8,
- COMPOUND_TEXT_DOUBLE_6 = 9,
- COMPOUND_TEXT_DOUBLE_7 = 10,
-
- COMPOUND_TEXT_TRIPLE_DOUBLE = 11,
-
- IBM_915 = 12,
- IBM_916 = 13,
- IBM_914 = 14,
- IBM_874 = 15,
- IBM_912 = 16,
- IBM_913 = 17,
- ISO_8859_14 = 18,
- IBM_923 = 19,
- NUM_OF_CONVERTERS = 20
-} COMPOUND_TEXT_CONVERTERS;
-
-#define SEARCH_LENGTH 12
-
-static const uint8_t escSeqCompoundText[NUM_OF_CONVERTERS][5] = {
- /* Single */
- { 0x1B, 0x2D, 0x41, 0, 0 },
- { 0x1B, 0x2D, 0x4D, 0, 0 },
- { 0x1B, 0x2D, 0x46, 0, 0 },
- { 0x1B, 0x2D, 0x47, 0, 0 },
-
- /* Double */
- { 0x1B, 0x24, 0x29, 0x41, 0 },
- { 0x1B, 0x24, 0x29, 0x42, 0 },
- { 0x1B, 0x24, 0x29, 0x43, 0 },
- { 0x1B, 0x24, 0x29, 0x44, 0 },
- { 0x1B, 0x24, 0x29, 0x47, 0 },
- { 0x1B, 0x24, 0x29, 0x48, 0 },
- { 0x1B, 0x24, 0x29, 0x49, 0 },
-
- /* Triple/Double */
- { 0x1B, 0x25, 0x47, 0, 0 },
-
- /*IBM-915*/
- { 0x1B, 0x2D, 0x4C, 0, 0 },
- /*IBM-916*/
- { 0x1B, 0x2D, 0x48, 0, 0 },
- /*IBM-914*/
- { 0x1B, 0x2D, 0x44, 0, 0 },
- /*IBM-874*/
- { 0x1B, 0x2D, 0x54, 0, 0 },
- /*IBM-912*/
- { 0x1B, 0x2D, 0x42, 0, 0 },
- /* IBM-913 */
- { 0x1B, 0x2D, 0x43, 0, 0 },
- /* ISO-8859_14 */
- { 0x1B, 0x2D, 0x5F, 0, 0 },
- /* IBM-923 */
- { 0x1B, 0x2D, 0x62, 0, 0 },
-};
-
-#define ESC_START 0x1B
-
-#define isASCIIRange(codepoint) \
- ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || \
- (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF))
-
-#define isIBM915(codepoint) \
- ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116))
-
-#define isIBM916(codepoint) \
- ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E))
-
-#define isCompoundS3(codepoint) \
- ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || \
- (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || \
- (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE))
-
-#define isCompoundS2(codepoint) \
- ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015))
-
-#define isIBM914(codepoint) \
- ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || \
- (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || \
- (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint == 0x0146) || \
- (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) || \
- (codepoint == 0x0172) || (codepoint == 0x0173))
-
-#define isIBM874(codepoint) \
- ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B))
-
-#define isIBM912(codepoint) \
- ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || \
- (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || \
- (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) || \
- (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) || \
- (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || \
- (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD))
-
-#define isIBM913(codepoint) \
- ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || \
- (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || \
- (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) || \
- (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint == 0x016D))
-
-#define isCompoundS1(codepoint) \
- ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || \
- (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B))
-
-#define isISO8859_14(codepoint) \
- ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || \
- (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || \
- (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) || \
- (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) || \
- (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) || \
- (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85))
-
-#define isIBM923(codepoint) \
- ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC))
-
-
-typedef struct{
- UConverterSharedData *myConverterArray[NUM_OF_CONVERTERS];
- COMPOUND_TEXT_CONVERTERS state;
-} UConverterDataCompoundText;
-
-/*********** Compound Text Converter Protos ***********/
-U_CDECL_BEGIN
-static void U_CALLCONV
-_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
-
-static void U_CALLCONV
- _CompoundTextClose(UConverter *converter);
-
-static void U_CALLCONV
-_CompoundTextReset(UConverter *converter, UConverterResetChoice choice);
-
-static const char* U_CALLCONV
-_CompoundTextgetName(const UConverter* cnv);
-
-
-static int32_t findNextEsc(const char *source, const char *sourceLimit) {
- int32_t length = static_cast<int32_t>(sourceLimit - source);
- int32_t i;
- for (i = 1; i < length; i++) {
- if (*(source + i) == 0x1B) {
- return i;
- }
- }
-
- return length;
-}
-
-static COMPOUND_TEXT_CONVERTERS getState(int codepoint) {
- COMPOUND_TEXT_CONVERTERS state = DO_SEARCH;
-
- if (isASCIIRange(codepoint)) {
- state = COMPOUND_TEXT_SINGLE_0;
- } else if (isIBM912(codepoint)) {
- state = IBM_912;
- }else if (isIBM913(codepoint)) {
- state = IBM_913;
- } else if (isISO8859_14(codepoint)) {
- state = ISO_8859_14;
- } else if (isIBM923(codepoint)) {
- state = IBM_923;
- } else if (isIBM874(codepoint)) {
- state = IBM_874;
- } else if (isIBM914(codepoint)) {
- state = IBM_914;
- } else if (isCompoundS2(codepoint)) {
- state = COMPOUND_TEXT_SINGLE_2;
- } else if (isCompoundS3(codepoint)) {
- state = COMPOUND_TEXT_SINGLE_3;
- } else if (isIBM916(codepoint)) {
- state = IBM_916;
- } else if (isIBM915(codepoint)) {
- state = IBM_915;
- } else if (isCompoundS1(codepoint)) {
- state = COMPOUND_TEXT_SINGLE_1;
- }
-
- return state;
-}
-
-static COMPOUND_TEXT_CONVERTERS findStateFromEscSeq(const char* source, const char* sourceLimit, const uint8_t* toUBytesBuffer, int32_t toUBytesBufferLength, UErrorCode *err) {
- COMPOUND_TEXT_CONVERTERS state = INVALID;
- UBool matchFound = FALSE;
- int32_t i, n, offset = toUBytesBufferLength;
-
- for (i = 0; i < NUM_OF_CONVERTERS; i++) {
- matchFound = TRUE;
- for (n = 0; escSeqCompoundText[i][n] != 0; n++) {
- if (n < toUBytesBufferLength) {
- if (toUBytesBuffer[n] != escSeqCompoundText[i][n]) {
- matchFound = FALSE;
- break;
- }
- } else if ((source + (n - offset)) >= sourceLimit) {
- *err = U_TRUNCATED_CHAR_FOUND;
- matchFound = FALSE;
- break;
- } else if (*(source + (n - offset)) != escSeqCompoundText[i][n]) {
- matchFound = FALSE;
- break;
- }
- }
-
- if (matchFound) {
- break;
- }
- }
-
- if (matchFound) {
- state = (COMPOUND_TEXT_CONVERTERS)i;
- }
-
- return state;
-}
-
-static void U_CALLCONV
-_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
- cnv->extraInfo = uprv_malloc (sizeof (UConverterDataCompoundText));
- if (cnv->extraInfo != NULL) {
- UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo;
-
- UConverterNamePieces stackPieces;
- UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
-
- myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_0] = NULL;
- myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_1] = ucnv_loadSharedData("icu-internal-compound-s1", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_2] = ucnv_loadSharedData("icu-internal-compound-s2", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_3] = ucnv_loadSharedData("icu-internal-compound-s3", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_1] = ucnv_loadSharedData("icu-internal-compound-d1", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_2] = ucnv_loadSharedData("icu-internal-compound-d2", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_3] = ucnv_loadSharedData("icu-internal-compound-d3", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_4] = ucnv_loadSharedData("icu-internal-compound-d4", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_5] = ucnv_loadSharedData("icu-internal-compound-d5", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_6] = ucnv_loadSharedData("icu-internal-compound-d6", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_7] = ucnv_loadSharedData("icu-internal-compound-d7", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[COMPOUND_TEXT_TRIPLE_DOUBLE] = ucnv_loadSharedData("icu-internal-compound-t", &stackPieces, &stackArgs, errorCode);
-
- myConverterData->myConverterArray[IBM_915] = ucnv_loadSharedData("ibm-915_P100-1995", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[IBM_916] = ucnv_loadSharedData("ibm-916_P100-1995", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[IBM_914] = ucnv_loadSharedData("ibm-914_P100-1995", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[IBM_874] = ucnv_loadSharedData("ibm-874_P100-1995", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[IBM_912] = ucnv_loadSharedData("ibm-912_P100-1995", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[IBM_913] = ucnv_loadSharedData("ibm-913_P100-2000", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[ISO_8859_14] = ucnv_loadSharedData("iso-8859_14-1998", &stackPieces, &stackArgs, errorCode);
- myConverterData->myConverterArray[IBM_923] = ucnv_loadSharedData("ibm-923_P100-1998", &stackPieces, &stackArgs, errorCode);
-
- if (U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
- _CompoundTextClose(cnv);
- return;
- }
-
- myConverterData->state = (COMPOUND_TEXT_CONVERTERS)0;
- } else {
- *errorCode = U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-
-static void U_CALLCONV
-_CompoundTextClose(UConverter *converter) {
- UConverterDataCompoundText* myConverterData = (UConverterDataCompoundText*)(converter->extraInfo);
- int32_t i;
-
- if (converter->extraInfo != NULL) {
- /*close the array of converter pointers and free the memory*/
- for (i = 0; i < NUM_OF_CONVERTERS; i++) {
- if (myConverterData->myConverterArray[i] != NULL) {
- ucnv_unloadSharedDataIfReady(myConverterData->myConverterArray[i]);
- }
- }
-
- uprv_free(converter->extraInfo);
- converter->extraInfo = NULL;
- }
-}
-
-static void U_CALLCONV
-_CompoundTextReset(UConverter *converter, UConverterResetChoice choice) {
- (void)converter;
- (void)choice;
-}
-
-static const char* U_CALLCONV
-_CompoundTextgetName(const UConverter* cnv){
- (void)cnv;
- return "x11-compound-text";
-}
-
-static void U_CALLCONV
-UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs* args, UErrorCode* err){
- UConverter *cnv = args->converter;
- uint8_t *target = (uint8_t *) args->target;
- const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
- const UChar* source = args->source;
- const UChar* sourceLimit = args->sourceLimit;
- /* int32_t* offsets = args->offsets; */
- UChar32 sourceChar;
- UBool useFallback = cnv->useFallback;
- uint8_t tmpTargetBuffer[7];
- int32_t tmpTargetBufferLength = 0;
- COMPOUND_TEXT_CONVERTERS currentState, tmpState;
- uint32_t pValue;
- int32_t pValueLength = 0;
- int32_t i, n, j;
-
- UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo;
-
- currentState = myConverterData->state;
-
- /* check if the last codepoint of previous buffer was a lead surrogate*/
- if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
- goto getTrail;
- }
-
- while( source < sourceLimit){
- if(target < targetLimit){
-
- sourceChar = *(source++);
- /*check if the char is a First surrogate*/
- if(U16_IS_SURROGATE(sourceChar)) {
- if(U16_IS_SURROGATE_LEAD(sourceChar)) {
-getTrail:
- /*look ahead to find the trail surrogate*/
- if(source < sourceLimit) {
- /* test the following code unit */
- UChar trail=(UChar) *source;
- if(U16_IS_TRAIL(trail)) {
- source++;
- sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
- cnv->fromUChar32=0x00;
- /* convert this supplementary code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- cnv->fromUChar32=sourceChar;
- break;
- }
- } else {
- /* no more input */
- cnv->fromUChar32=sourceChar;
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- cnv->fromUChar32=sourceChar;
- break;
- }
- }
-
- tmpTargetBufferLength = 0;
- tmpState = getState(sourceChar);
-
- if (tmpState != DO_SEARCH && currentState != tmpState) {
- /* Get escape sequence if necessary */
- currentState = tmpState;
- for (i = 0; escSeqCompoundText[currentState][i] != 0; i++) {
- tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][i];
- }
- }
-
- if (tmpState == DO_SEARCH) {
- /* Test all available converters */
- for (i = 1; i < SEARCH_LENGTH; i++) {
- pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[i], sourceChar, &pValue, useFallback);
- if (pValueLength > 0) {
- tmpState = (COMPOUND_TEXT_CONVERTERS)i;
- if (currentState != tmpState) {
- currentState = tmpState;
- for (j = 0; escSeqCompoundText[currentState][j] != 0; j++) {
- tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][j];
- }
- }
- for (n = (pValueLength - 1); n >= 0; n--) {
- tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8));
- }
- break;
- }
- }
- } else if (tmpState == COMPOUND_TEXT_SINGLE_0) {
- tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)sourceChar;
- } else {
- pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[currentState], sourceChar, &pValue, useFallback);
- if (pValueLength > 0) {
- for (n = (pValueLength - 1); n >= 0; n--) {
- tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8));
- }
- }
- }
-
- for (i = 0; i < tmpTargetBufferLength; i++) {
- if (target < targetLimit) {
- *target++ = tmpTargetBuffer[i];
- } else {
- *err = U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- if (*err == U_BUFFER_OVERFLOW_ERROR) {
- for (; i < tmpTargetBufferLength; i++) {
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = tmpTargetBuffer[i];
- }
- }
- } else {
- *err = U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- /*save the state and return */
- myConverterData->state = currentState;
- args->source = source;
- args->target = (char*)target;
-}
-
-
-static void U_CALLCONV
-UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs *args,
- UErrorCode* err){
- const char *mySource = (char *) args->source;
- UChar *myTarget = args->target;
- const char *mySourceLimit = args->sourceLimit;
- const char *tmpSourceLimit = mySourceLimit;
- uint32_t mySourceChar = 0x0000;
- COMPOUND_TEXT_CONVERTERS currentState, tmpState;
- int32_t sourceOffset = 0;
- UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) args->converter->extraInfo;
- UConverterSharedData* savedSharedData = NULL;
-
- UConverterToUnicodeArgs subArgs;
- int32_t minArgsSize;
-
- /* set up the subconverter arguments */
- if(args->size<sizeof(UConverterToUnicodeArgs)) {
- minArgsSize = args->size;
- } else {
- minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
- }
-
- uprv_memcpy(&subArgs, args, minArgsSize);
- subArgs.size = (uint16_t)minArgsSize;
-
- currentState = tmpState = myConverterData->state;
-
- while(mySource < mySourceLimit){
- if(myTarget < args->targetLimit){
- if (args->converter->toULength > 0) {
- mySourceChar = args->converter->toUBytes[0];
- } else {
- mySourceChar = (uint8_t)*mySource;
- }
-
- if (mySourceChar == ESC_START) {
- tmpState = findStateFromEscSeq(mySource, mySourceLimit, args->converter->toUBytes, args->converter->toULength, err);
-
- if (*err == U_TRUNCATED_CHAR_FOUND) {
- for (; mySource < mySourceLimit;) {
- args->converter->toUBytes[args->converter->toULength++] = *mySource++;
- }
- *err = U_ZERO_ERROR;
- break;
- } else if (tmpState == INVALID) {
- if (args->converter->toULength == 0) {
- mySource++; /* skip over the 0x1b byte */
- }
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
-
- if (tmpState != currentState) {
- currentState = tmpState;
- }
-
- sourceOffset = static_cast<int32_t>(uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength);
-
- mySource += sourceOffset;
-
- args->converter->toULength = 0;
- }
-
- if (currentState == COMPOUND_TEXT_SINGLE_0) {
- while (mySource < mySourceLimit) {
- if (*mySource == ESC_START) {
- break;
- }
- if (myTarget < args->targetLimit) {
- *myTarget++ = 0x00ff&(*mySource++);
- } else {
- *err = U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- } else if (mySource < mySourceLimit){
- sourceOffset = findNextEsc(mySource, mySourceLimit);
-
- tmpSourceLimit = mySource + sourceOffset;
-
- subArgs.source = mySource;
- subArgs.sourceLimit = tmpSourceLimit;
- subArgs.target = myTarget;
- savedSharedData = subArgs.converter->sharedData;
- subArgs.converter->sharedData = myConverterData->myConverterArray[currentState];
-
- ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
-
- subArgs.converter->sharedData = savedSharedData;
-
- mySource = subArgs.source;
- myTarget = subArgs.target;
-
- if (U_FAILURE(*err)) {
- if(*err == U_BUFFER_OVERFLOW_ERROR) {
- if(subArgs.converter->UCharErrorBufferLength > 0) {
- uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
- subArgs.converter->UCharErrorBufferLength);
- }
- args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
- subArgs.converter->UCharErrorBufferLength = 0;
- }
- break;
- }
- }
- } else {
- *err = U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- myConverterData->state = currentState;
- args->target = myTarget;
- args->source = mySource;
-}
-
-static void U_CALLCONV
-_CompoundText_GetUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode) {
- UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *)cnv->extraInfo;
- int32_t i;
-
- for (i = 1; i < NUM_OF_CONVERTERS; i++) {
- ucnv_MBCSGetUnicodeSetForUnicode(myConverterData->myConverterArray[i], sa, which, pErrorCode);
- }
- sa->add(sa->set, 0x0000);
- sa->add(sa->set, 0x0009);
- sa->add(sa->set, 0x000A);
- sa->addRange(sa->set, 0x0020, 0x007F);
- sa->addRange(sa->set, 0x00A0, 0x00FF);
-}
-U_CDECL_END
-
-static const UConverterImpl _CompoundTextImpl = {
-
- UCNV_COMPOUND_TEXT,
-
- NULL,
- NULL,
-
- _CompoundTextOpen,
- _CompoundTextClose,
- _CompoundTextReset,
-
- UConverter_toUnicode_CompoundText_OFFSETS,
- UConverter_toUnicode_CompoundText_OFFSETS,
- UConverter_fromUnicode_CompoundText_OFFSETS,
- UConverter_fromUnicode_CompoundText_OFFSETS,
- NULL,
-
- NULL,
- _CompoundTextgetName,
- NULL,
- NULL,
- _CompoundText_GetUnicodeSet,
- NULL,
- NULL
-};
-
-static const UConverterStaticData _CompoundTextStaticData = {
- sizeof(UConverterStaticData),
- "COMPOUND_TEXT",
- 0,
- UCNV_IBM,
- UCNV_COMPOUND_TEXT,
- 1,
- 6,
- { 0xef, 0, 0, 0 },
- 1,
- FALSE,
- FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-const UConverterSharedData _CompoundTextData =
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CompoundTextStaticData, &_CompoundTextImpl);
-
-#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
diff --git a/contrib/libs/icu/common/ucnv_err.cpp b/contrib/libs/icu/common/ucnv_err.cpp
deleted file mode 100644
index 6b738face5e..00000000000
--- a/contrib/libs/icu/common/ucnv_err.cpp
+++ /dev/null
@@ -1,486 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- *****************************************************************************
- *
- * Copyright (C) 1998-2016, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- *****************************************************************************
- *
- * ucnv_err.c
- * Implements error behaviour functions called by T_UConverter_{from,to}Unicode
- *
- *
-* Change history:
-*
-* 06/29/2000 helena Major rewrite of the callback APIs.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucnv_err.h"
-#include "unicode/ucnv_cb.h"
-#include "ucnv_cnv.h"
-#include "cmemory.h"
-#include "unicode/ucnv.h"
-#include "ustrfmt.h"
-
-#define VALUE_STRING_LENGTH 48
-/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
-#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
-#define UNICODE_U_CODEPOINT 0x0055
-#define UNICODE_X_CODEPOINT 0x0058
-#define UNICODE_RS_CODEPOINT 0x005C
-#define UNICODE_U_LOW_CODEPOINT 0x0075
-#define UNICODE_X_LOW_CODEPOINT 0x0078
-#define UNICODE_AMP_CODEPOINT 0x0026
-#define UNICODE_HASH_CODEPOINT 0x0023
-#define UNICODE_SEMICOLON_CODEPOINT 0x003B
-#define UNICODE_PLUS_CODEPOINT 0x002B
-#define UNICODE_LEFT_CURLY_CODEPOINT 0x007B
-#define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D
-#define UNICODE_SPACE_CODEPOINT 0x0020
-#define UCNV_PRV_ESCAPE_ICU 0
-#define UCNV_PRV_ESCAPE_C 'C'
-#define UCNV_PRV_ESCAPE_XML_DEC 'D'
-#define UCNV_PRV_ESCAPE_XML_HEX 'X'
-#define UCNV_PRV_ESCAPE_JAVA 'J'
-#define UCNV_PRV_ESCAPE_UNICODE 'U'
-#define UCNV_PRV_ESCAPE_CSS2 'S'
-#define UCNV_PRV_STOP_ON_ILLEGAL 'i'
-
-/*
- * IS_DEFAULT_IGNORABLE_CODE_POINT
- * This is to check if a code point has the default ignorable unicode property.
- * As such, this list needs to be updated if the ignorable code point list ever
- * changes.
- * To avoid dependency on other code, this list is hard coded here.
- * When an ignorable code point is found and is unmappable, the default callbacks
- * will ignore them.
- * For a list of the default ignorable code points, use this link:
- * https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i=
- *
- * This list should be sync with the one in CharsetCallback.java
- */
-#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \
- (c == 0x00AD) || \
- (c == 0x034F) || \
- (c == 0x061C) || \
- (c == 0x115F) || \
- (c == 0x1160) || \
- (0x17B4 <= c && c <= 0x17B5) || \
- (0x180B <= c && c <= 0x180E) || \
- (0x200B <= c && c <= 0x200F) || \
- (0x202A <= c && c <= 0x202E) || \
- (0x2060 <= c && c <= 0x206F) || \
- (c == 0x3164) || \
- (0xFE00 <= c && c <= 0xFE0F) || \
- (c == 0xFEFF) || \
- (c == 0xFFA0) || \
- (0xFFF0 <= c && c <= 0xFFF8) || \
- (0x1BCA0 <= c && c <= 0x1BCA3) || \
- (0x1D173 <= c && c <= 0x1D17A) || \
- (0xE0000 <= c && c <= 0xE0FFF))
-
-
-/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
-U_CAPI void U_EXPORT2
-UCNV_FROM_U_CALLBACK_STOP (
- const void *context,
- UConverterFromUnicodeArgs *fromUArgs,
- const UChar* codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode * err)
-{
- (void)context;
- (void)fromUArgs;
- (void)codeUnits;
- (void)length;
- if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
- {
- /*
- * Skip if the codepoint has unicode property of default ignorable.
- */
- *err = U_ZERO_ERROR;
- }
- /* the caller must have set the error code accordingly */
- return;
-}
-
-
-/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
-U_CAPI void U_EXPORT2
-UCNV_TO_U_CALLBACK_STOP (
- const void *context,
- UConverterToUnicodeArgs *toUArgs,
- const char* codePoints,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode * err)
-{
- /* the caller must have set the error code accordingly */
- (void)context; (void)toUArgs; (void)codePoints; (void)length; (void)reason; (void)err;
- return;
-}
-
-U_CAPI void U_EXPORT2
-UCNV_FROM_U_CALLBACK_SKIP (
- const void *context,
- UConverterFromUnicodeArgs *fromUArgs,
- const UChar* codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode * err)
-{
- (void)fromUArgs;
- (void)codeUnits;
- (void)length;
- if (reason <= UCNV_IRREGULAR)
- {
- if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
- {
- /*
- * Skip if the codepoint has unicode property of default ignorable.
- */
- *err = U_ZERO_ERROR;
- }
- else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
- {
- *err = U_ZERO_ERROR;
- }
- /* else the caller must have set the error code accordingly. */
- }
- /* else ignore the reset, close and clone calls. */
-}
-
-U_CAPI void U_EXPORT2
-UCNV_FROM_U_CALLBACK_SUBSTITUTE (
- const void *context,
- UConverterFromUnicodeArgs *fromArgs,
- const UChar* codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode * err)
-{
- (void)codeUnits;
- (void)length;
- if (reason <= UCNV_IRREGULAR)
- {
- if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
- {
- /*
- * Skip if the codepoint has unicode property of default ignorable.
- */
- *err = U_ZERO_ERROR;
- }
- else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
- {
- *err = U_ZERO_ERROR;
- ucnv_cbFromUWriteSub(fromArgs, 0, err);
- }
- /* else the caller must have set the error code accordingly. */
- }
- /* else ignore the reset, close and clone calls. */
-}
-
-/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
- *uses a clean copy (resetted) of the converter, to convert that unicode
- *escape sequence to the target codepage (if conversion failure happens then
- *we revert to substituting with subchar)
- */
-U_CAPI void U_EXPORT2
-UCNV_FROM_U_CALLBACK_ESCAPE (
- const void *context,
- UConverterFromUnicodeArgs *fromArgs,
- const UChar *codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode * err)
-{
-
- UChar valueString[VALUE_STRING_LENGTH];
- int32_t valueStringLength = 0;
- int32_t i = 0;
-
- const UChar *myValueSource = NULL;
- UErrorCode err2 = U_ZERO_ERROR;
- UConverterFromUCallback original = NULL;
- const void *originalContext;
-
- UConverterFromUCallback ignoredCallback = NULL;
- const void *ignoredContext;
-
- if (reason > UCNV_IRREGULAR)
- {
- return;
- }
- else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
- {
- /*
- * Skip if the codepoint has unicode property of default ignorable.
- */
- *err = U_ZERO_ERROR;
- return;
- }
-
- ucnv_setFromUCallBack (fromArgs->converter,
- (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
- NULL,
- &original,
- &originalContext,
- &err2);
-
- if (U_FAILURE (err2))
- {
- *err = err2;
- return;
- }
- if(context==NULL)
- {
- while (i < length)
- {
- valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
- valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
- }
- }
- else
- {
- switch(*((char*)context))
- {
- case UCNV_PRV_ESCAPE_JAVA:
- while (i < length)
- {
- valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
- valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
- }
- break;
-
- case UCNV_PRV_ESCAPE_C:
- valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
-
- if(length==2){
- valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
-
- }
- else{
- valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
- }
- break;
-
- case UCNV_PRV_ESCAPE_XML_DEC:
-
- valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
- valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
- if(length==2){
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
- }
- else{
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
- }
- valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
- break;
-
- case UCNV_PRV_ESCAPE_XML_HEX:
-
- valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
- valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
- valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
- if(length==2){
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
- }
- else{
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
- }
- valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
- break;
-
- case UCNV_PRV_ESCAPE_UNICODE:
- valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
- valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
- valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
- if (length == 2) {
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
- } else {
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
- }
- valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
- break;
-
- case UCNV_PRV_ESCAPE_CSS2:
- valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
- /* Always add space character, becase the next character might be whitespace,
- which would erroneously be considered the termination of the escape sequence. */
- valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
- break;
-
- default:
- while (i < length)
- {
- valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
- valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
- }
- }
- }
- myValueSource = valueString;
-
- /* reset the error */
- *err = U_ZERO_ERROR;
-
- ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
-
- ucnv_setFromUCallBack (fromArgs->converter,
- original,
- originalContext,
- &ignoredCallback,
- &ignoredContext,
- &err2);
- if (U_FAILURE (err2))
- {
- *err = err2;
- return;
- }
-
- return;
-}
-
-
-
-U_CAPI void U_EXPORT2
-UCNV_TO_U_CALLBACK_SKIP (
- const void *context,
- UConverterToUnicodeArgs *toArgs,
- const char* codeUnits,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode * err)
-{
- (void)toArgs;
- (void)codeUnits;
- (void)length;
- if (reason <= UCNV_IRREGULAR)
- {
- if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
- {
- *err = U_ZERO_ERROR;
- }
- /* else the caller must have set the error code accordingly. */
- }
- /* else ignore the reset, close and clone calls. */
-}
-
-U_CAPI void U_EXPORT2
-UCNV_TO_U_CALLBACK_SUBSTITUTE (
- const void *context,
- UConverterToUnicodeArgs *toArgs,
- const char* codeUnits,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode * err)
-{
- (void)codeUnits;
- (void)length;
- if (reason <= UCNV_IRREGULAR)
- {
- if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
- {
- *err = U_ZERO_ERROR;
- ucnv_cbToUWriteSub(toArgs,0,err);
- }
- /* else the caller must have set the error code accordingly. */
- }
- /* else ignore the reset, close and clone calls. */
-}
-
-/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
- *and uses that as the substitution sequence
- */
-U_CAPI void U_EXPORT2
-UCNV_TO_U_CALLBACK_ESCAPE (
- const void *context,
- UConverterToUnicodeArgs *toArgs,
- const char* codeUnits,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode * err)
-{
- UChar uniValueString[VALUE_STRING_LENGTH];
- int32_t valueStringLength = 0;
- int32_t i = 0;
-
- if (reason > UCNV_IRREGULAR)
- {
- return;
- }
-
- if(context==NULL)
- {
- while (i < length)
- {
- uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
- uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
- valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
- }
- }
- else
- {
- switch(*((char*)context))
- {
- case UCNV_PRV_ESCAPE_XML_DEC:
- while (i < length)
- {
- uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
- uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
- valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
- uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
- }
- break;
-
- case UCNV_PRV_ESCAPE_XML_HEX:
- while (i < length)
- {
- uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
- uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
- uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
- valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
- uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
- }
- break;
- case UCNV_PRV_ESCAPE_C:
- while (i < length)
- {
- uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
- uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
- valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
- }
- break;
- default:
- while (i < length)
- {
- uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
- uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
- uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
- valueStringLength += 2;
- }
- }
- }
- /* reset the error */
- *err = U_ZERO_ERROR;
-
- ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
-}
-
-#endif
diff --git a/contrib/libs/icu/common/ucnv_ext.cpp b/contrib/libs/icu/common/ucnv_ext.cpp
deleted file mode 100644
index 7dea4eef41a..00000000000
--- a/contrib/libs/icu/common/ucnv_ext.cpp
+++ /dev/null
@@ -1,1143 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2003-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ucnv_ext.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2003jun13
-* created by: Markus W. Scherer
-*
-* Conversion extensions
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
-
-#include "unicode/uset.h"
-#include "unicode/ustring.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "ucnv_ext.h"
-#include "cmemory.h"
-#include "uassert.h"
-
-/* to Unicode --------------------------------------------------------------- */
-
-/*
- * @return lookup value for the byte, if found; else 0
- */
-static inline uint32_t
-ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) {
- uint32_t word0, word;
- int32_t i, start, limit;
-
- /* check the input byte against the lowest and highest section bytes */
- start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]);
- limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]);
- if(byte<start || limit<byte) {
- return 0; /* the byte is out of range */
- }
-
- if(length==((limit-start)+1)) {
- /* direct access on a linear array */
- return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */
- }
-
- /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */
- word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0);
-
- /*
- * Shift byte once instead of each section word and add 0xffffff.
- * We will compare the shifted/added byte (bbffffff) against
- * section words which have byte values in the same bit position.
- * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv
- * for all v=0..f
- * so we need not mask off the lower 24 bits of each section word.
- */
- word=word0|UCNV_EXT_TO_U_VALUE_MASK;
-
- /* binary search */
- start=0;
- limit=length;
- for(;;) {
- i=limit-start;
- if(i<=1) {
- break; /* done */
- }
- /* start<limit-1 */
-
- if(i<=4) {
- /* linear search for the last part */
- if(word0<=toUSection[start]) {
- break;
- }
- if(++start<limit && word0<=toUSection[start]) {
- break;
- }
- if(++start<limit && word0<=toUSection[start]) {
- break;
- }
- /* always break at start==limit-1 */
- ++start;
- break;
- }
-
- i=(start+limit)/2;
- if(word<toUSection[i]) {
- limit=i;
- } else {
- start=i;
- }
- }
-
- /* did we really find it? */
- if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) {
- return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */
- } else {
- return 0; /* not found */
- }
-}
-
-/*
- * TRUE if not an SI/SO stateful converter,
- * or if the match length fits with the current converter state
- */
-#define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \
- ((sisoState)<0 || ((sisoState)==0) == (match==1))
-
-/*
- * this works like ucnv_extMatchFromU() except
- * - the first character is in pre
- * - no trie is used
- * - the returned matchLength is not offset by 2
- */
-static int32_t
-ucnv_extMatchToU(const int32_t *cx, int8_t sisoState,
- const char *pre, int32_t preLength,
- const char *src, int32_t srcLength,
- uint32_t *pMatchValue,
- UBool /*useFallback*/, UBool flush) {
- const uint32_t *toUTable, *toUSection;
-
- uint32_t value, matchValue;
- int32_t i, j, idx, length, matchLength;
- uint8_t b;
-
- if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) {
- return 0; /* no extension data, no match */
- }
-
- /* initialize */
- toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t);
- idx=0;
-
- matchValue=0;
- i=j=matchLength=0;
-
- if(sisoState==0) {
- /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
- if(preLength>1) {
- return 0; /* no match of a DBCS sequence in SBCS mode */
- } else if(preLength==1) {
- srcLength=0;
- } else /* preLength==0 */ {
- if(srcLength>1) {
- srcLength=1;
- }
- }
- flush=TRUE;
- }
-
- /* we must not remember fallback matches when not using fallbacks */
-
- /* match input units until there is a full match or the input is consumed */
- for(;;) {
- /* go to the next section */
- toUSection=toUTable+idx;
-
- /* read first pair of the section */
- value=*toUSection++;
- length=UCNV_EXT_TO_U_GET_BYTE(value);
- value=UCNV_EXT_TO_U_GET_VALUE(value);
- if( value!=0 &&
- (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
- TO_U_USE_FALLBACK(useFallback)) &&
- UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j)
- ) {
- /* remember longest match so far */
- matchValue=value;
- matchLength=i+j;
- }
-
- /* match pre[] then src[] */
- if(i<preLength) {
- b=(uint8_t)pre[i++];
- } else if(j<srcLength) {
- b=(uint8_t)src[j++];
- } else {
- /* all input consumed, partial match */
- if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) {
- /*
- * end of the entire input stream, stop with the longest match so far
- * or: partial match must not be longer than UCNV_EXT_MAX_BYTES
- * because it must fit into state buffers
- */
- break;
- } else {
- /* continue with more input next time */
- return -length;
- }
- }
-
- /* search for the current UChar */
- value=ucnv_extFindToU(toUSection, length, b);
- if(value==0) {
- /* no match here, stop with the longest match so far */
- break;
- } else {
- if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
- /* partial match, continue */
- idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value);
- } else {
- if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
- TO_U_USE_FALLBACK(useFallback)) &&
- UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j)
- ) {
- /* full match, stop with result */
- matchValue=value;
- matchLength=i+j;
- } else {
- /* full match on fallback not taken, stop with the longest match so far */
- }
- break;
- }
- }
- }
-
- if(matchLength==0) {
- /* no match at all */
- return 0;
- }
-
- /* return result */
- *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue);
- return matchLength;
-}
-
-static inline void
-ucnv_extWriteToU(UConverter *cnv, const int32_t *cx,
- uint32_t value,
- UChar **target, const UChar *targetLimit,
- int32_t **offsets, int32_t srcIndex,
- UErrorCode *pErrorCode) {
- /* output the result */
- if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) {
- /* output a single code point */
- ucnv_toUWriteCodePoint(
- cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value),
- target, targetLimit,
- offsets, srcIndex,
- pErrorCode);
- } else {
- /* output a string - with correct data we have resultLength>0 */
- ucnv_toUWriteUChars(
- cnv,
- UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+
- UCNV_EXT_TO_U_GET_INDEX(value),
- UCNV_EXT_TO_U_GET_LENGTH(value),
- target, targetLimit,
- offsets, srcIndex,
- pErrorCode);
- }
-}
-
-/*
- * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS),
- * or 1 for DBCS-only,
- * or -1 if the converter is not SI/SO stateful
- *
- * Note: For SI/SO stateful converters getting here,
- * cnv->mode==0 is equivalent to firstLength==1.
- */
-#define UCNV_SISO_STATE(cnv) \
- ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \
- (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1)
-
-/*
- * target<targetLimit; set error code for overflow
- */
-U_CFUNC UBool
-ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx,
- int32_t firstLength,
- const char **src, const char *srcLimit,
- UChar **target, const UChar *targetLimit,
- int32_t **offsets, int32_t srcIndex,
- UBool flush,
- UErrorCode *pErrorCode) {
- uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
- int32_t match;
-
- /* try to match */
- match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv),
- (const char *)cnv->toUBytes, firstLength,
- *src, (int32_t)(srcLimit-*src),
- &value,
- cnv->useFallback, flush);
- if(match>0) {
- /* advance src pointer for the consumed input */
- *src+=match-firstLength;
-
- /* write result to target */
- ucnv_extWriteToU(cnv, cx,
- value,
- target, targetLimit,
- offsets, srcIndex,
- pErrorCode);
- return TRUE;
- } else if(match<0) {
- /* save state for partial match */
- const char *s;
- int32_t j;
-
- /* copy the first code point */
- s=(const char *)cnv->toUBytes;
- cnv->preToUFirstLength=(int8_t)firstLength;
- for(j=0; j<firstLength; ++j) {
- cnv->preToU[j]=*s++;
- }
-
- /* now copy the newly consumed input */
- s=*src;
- match=-match;
- for(; j<match; ++j) {
- cnv->preToU[j]=*s++;
- }
- *src=s; /* same as *src=srcLimit; because we reached the end of input */
- cnv->preToULength=(int8_t)match;
- return TRUE;
- } else /* match==0 no match */ {
- return FALSE;
- }
-}
-
-U_CFUNC UChar32
-ucnv_extSimpleMatchToU(const int32_t *cx,
- const char *source, int32_t length,
- UBool useFallback) {
- uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
- int32_t match;
-
- if(length<=0) {
- return 0xffff;
- }
-
- /* try to match */
- match=ucnv_extMatchToU(cx, -1,
- source, length,
- NULL, 0,
- &value,
- useFallback, TRUE);
- if(match==length) {
- /* write result for simple, single-character conversion */
- if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) {
- return UCNV_EXT_TO_U_GET_CODE_POINT(value);
- }
- }
-
- /*
- * return no match because
- * - match>0 && value points to string: simple conversion cannot handle multiple code points
- * - match>0 && match!=length: not all input consumed, forbidden for this function
- * - match==0: no match found in the first place
- * - match<0: partial match, not supported for simple conversion (and flush==TRUE)
- */
- return 0xfffe;
-}
-
-/*
- * continue partial match with new input
- * never called for simple, single-character conversion
- */
-U_CFUNC void
-ucnv_extContinueMatchToU(UConverter *cnv,
- UConverterToUnicodeArgs *pArgs, int32_t srcIndex,
- UErrorCode *pErrorCode) {
- uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
- int32_t match, length;
-
- match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv),
- cnv->preToU, cnv->preToULength,
- pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
- &value,
- cnv->useFallback, pArgs->flush);
- if(match>0) {
- if(match>=cnv->preToULength) {
- /* advance src pointer for the consumed input */
- pArgs->source+=match-cnv->preToULength;
- cnv->preToULength=0;
- } else {
- /* the match did not use all of preToU[] - keep the rest for replay */
- length=cnv->preToULength-match;
- uprv_memmove(cnv->preToU, cnv->preToU+match, length);
- cnv->preToULength=(int8_t)-length;
- }
-
- /* write result */
- ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes,
- value,
- &pArgs->target, pArgs->targetLimit,
- &pArgs->offsets, srcIndex,
- pErrorCode);
- } else if(match<0) {
- /* save state for partial match */
- const char *s;
- int32_t j;
-
- /* just _append_ the newly consumed input to preToU[] */
- s=pArgs->source;
- match=-match;
- for(j=cnv->preToULength; j<match; ++j) {
- cnv->preToU[j]=*s++;
- }
- pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */
- cnv->preToULength=(int8_t)match;
- } else /* match==0 */ {
- /*
- * no match
- *
- * We need to split the previous input into two parts:
- *
- * 1. The first codepage character is unmappable - that's how we got into
- * trying the extension data in the first place.
- * We need to move it from the preToU buffer
- * to the error buffer, set an error code,
- * and prepare the rest of the previous input for 2.
- *
- * 2. The rest of the previous input must be converted once we
- * come back from the callback for the first character.
- * At that time, we have to try again from scratch to convert
- * these input characters.
- * The replay will be handled by the ucnv.c conversion code.
- */
-
- /* move the first codepage character to the error field */
- uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength);
- cnv->toULength=cnv->preToUFirstLength;
-
- /* move the rest up inside the buffer */
- length=cnv->preToULength-cnv->preToUFirstLength;
- if(length>0) {
- uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length);
- }
-
- /* mark preToU for replay */
- cnv->preToULength=(int8_t)-length;
-
- /* set the error code for unassigned */
- *pErrorCode=U_INVALID_CHAR_FOUND;
- }
-}
-
-/* from Unicode ------------------------------------------------------------- */
-
-// Use roundtrips, "good one-way" mappings, and some normal fallbacks.
-static inline UBool
-extFromUUseMapping(UBool useFallback, uint32_t value, UChar32 firstCP) {
- return
- ((value&UCNV_EXT_FROM_U_STATUS_MASK)!=0 ||
- FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
- (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0;
-}
-
-/*
- * @return index of the UChar, if found; else <0
- */
-static inline int32_t
-ucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) {
- int32_t i, start, limit;
-
- /* binary search */
- start=0;
- limit=length;
- for(;;) {
- i=limit-start;
- if(i<=1) {
- break; /* done */
- }
- /* start<limit-1 */
-
- if(i<=4) {
- /* linear search for the last part */
- if(u<=fromUSection[start]) {
- break;
- }
- if(++start<limit && u<=fromUSection[start]) {
- break;
- }
- if(++start<limit && u<=fromUSection[start]) {
- break;
- }
- /* always break at start==limit-1 */
- ++start;
- break;
- }
-
- i=(start+limit)/2;
- if(u<fromUSection[i]) {
- limit=i;
- } else {
- start=i;
- }
- }
-
- /* did we really find it? */
- if(start<limit && u==fromUSection[start]) {
- return start;
- } else {
- return -1; /* not found */
- }
-}
-
-/*
- * @param cx pointer to extension data; if NULL, returns 0
- * @param firstCP the first code point before all the other UChars
- * @param pre UChars that must match; !initialMatch: partial match with them
- * @param preLength length of pre, >=0
- * @param src UChars that can be used to complete a match
- * @param srcLength length of src, >=0
- * @param pMatchValue [out] output result value for the match from the data structure
- * @param useFallback "use fallback" flag, usually from cnv->useFallback
- * @param flush TRUE if the end of the input stream is reached
- * @return >1: matched, return value=total match length (number of input units matched)
- * 1: matched, no mapping but request for <subchar1>
- * (only for the first code point)
- * 0: no match
- * <0: partial match, return value=negative total match length
- * (partial matches are never returned for flush==TRUE)
- * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS)
- * the matchLength is 2 if only firstCP matched, and >2 if firstCP and
- * further code units matched
- */
-static int32_t
-ucnv_extMatchFromU(const int32_t *cx,
- UChar32 firstCP,
- const UChar *pre, int32_t preLength,
- const UChar *src, int32_t srcLength,
- uint32_t *pMatchValue,
- UBool useFallback, UBool flush) {
- const uint16_t *stage12, *stage3;
- const uint32_t *stage3b;
-
- const UChar *fromUTableUChars, *fromUSectionUChars;
- const uint32_t *fromUTableValues, *fromUSectionValues;
-
- uint32_t value, matchValue;
- int32_t i, j, idx, length, matchLength;
- UChar c;
-
- if(cx==NULL) {
- return 0; /* no extension data, no match */
- }
-
- /* trie lookup of firstCP */
- idx=firstCP>>10; /* stage 1 index */
- if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) {
- return 0; /* the first code point is outside the trie */
- }
-
- stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t);
- stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t);
- idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP);
-
- stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t);
- value=stage3b[idx];
- if(value==0) {
- return 0;
- }
-
- /*
- * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0:
- * Do not interpret values with reserved bits used, for forward compatibility,
- * and do not even remember intermediate results with reserved bits used.
- */
-
- if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
- /* partial match, enter the loop below */
- idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
-
- /* initialize */
- fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar);
- fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t);
-
- matchValue=0;
- i=j=matchLength=0;
-
- /* we must not remember fallback matches when not using fallbacks */
-
- /* match input units until there is a full match or the input is consumed */
- for(;;) {
- /* go to the next section */
- fromUSectionUChars=fromUTableUChars+idx;
- fromUSectionValues=fromUTableValues+idx;
-
- /* read first pair of the section */
- length=*fromUSectionUChars++;
- value=*fromUSectionValues++;
- if(value!=0 && extFromUUseMapping(useFallback, value, firstCP)) {
- /* remember longest match so far */
- matchValue=value;
- matchLength=2+i+j;
- }
-
- /* match pre[] then src[] */
- if(i<preLength) {
- c=pre[i++];
- } else if(j<srcLength) {
- c=src[j++];
- } else {
- /* all input consumed, partial match */
- if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) {
- /*
- * end of the entire input stream, stop with the longest match so far
- * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS
- * because it must fit into state buffers
- */
- break;
- } else {
- /* continue with more input next time */
- return -(2+length);
- }
- }
-
- /* search for the current UChar */
- idx=ucnv_extFindFromU(fromUSectionUChars, length, c);
- if(idx<0) {
- /* no match here, stop with the longest match so far */
- break;
- } else {
- value=fromUSectionValues[idx];
- if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
- /* partial match, continue */
- idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
- } else {
- if(extFromUUseMapping(useFallback, value, firstCP)) {
- /* full match, stop with result */
- matchValue=value;
- matchLength=2+i+j;
- } else {
- /* full match on fallback not taken, stop with the longest match so far */
- }
- break;
- }
- }
- }
-
- if(matchLength==0) {
- /* no match at all */
- return 0;
- }
- } else /* result from firstCP trie lookup */ {
- if(extFromUUseMapping(useFallback, value, firstCP)) {
- /* full match, stop with result */
- matchValue=value;
- matchLength=2;
- } else {
- /* fallback not taken */
- return 0;
- }
- }
-
- /* return result */
- if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) {
- return 1; /* assert matchLength==2 */
- }
-
- *pMatchValue=matchValue;
- return matchLength;
-}
-
-/*
- * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits
- */
-static inline void
-ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx,
- uint32_t value,
- char **target, const char *targetLimit,
- int32_t **offsets, int32_t srcIndex,
- UErrorCode *pErrorCode) {
- uint8_t buffer[1+UCNV_EXT_MAX_BYTES];
- const uint8_t *result;
- int32_t length, prevLength;
-
- length=UCNV_EXT_FROM_U_GET_LENGTH(value);
- value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
-
- /* output the result */
- if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
- /*
- * Generate a byte array and then write it below.
- * This is not the fastest possible way, but it should be ok for
- * extension mappings, and it is much simpler.
- * Offset and overflow handling are only done once this way.
- */
- uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */
- switch(length) {
- case 3:
- *p++=(uint8_t)(value>>16);
- U_FALLTHROUGH;
- case 2:
- *p++=(uint8_t)(value>>8);
- U_FALLTHROUGH;
- case 1:
- *p++=(uint8_t)value;
- U_FALLTHROUGH;
- default:
- break; /* will never occur */
- }
- result=buffer+1;
- } else {
- result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
- }
-
- /* with correct data we have length>0 */
-
- if((prevLength=cnv->fromUnicodeStatus)!=0) {
- /* handle SI/SO stateful output */
- uint8_t shiftByte;
-
- if(prevLength>1 && length==1) {
- /* change from double-byte mode to single-byte */
- shiftByte=(uint8_t)UCNV_SI;
- cnv->fromUnicodeStatus=1;
- } else if(prevLength==1 && length>1) {
- /* change from single-byte mode to double-byte */
- shiftByte=(uint8_t)UCNV_SO;
- cnv->fromUnicodeStatus=2;
- } else {
- shiftByte=0;
- }
-
- if(shiftByte!=0) {
- /* prepend the shift byte to the result bytes */
- buffer[0]=shiftByte;
- if(result!=buffer+1) {
- uprv_memcpy(buffer+1, result, length);
- }
- result=buffer;
- ++length;
- }
- }
-
- ucnv_fromUWriteBytes(cnv, (const char *)result, length,
- target, targetLimit,
- offsets, srcIndex,
- pErrorCode);
-}
-
-/*
- * target<targetLimit; set error code for overflow
- */
-U_CFUNC UBool
-ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
- UChar32 cp,
- const UChar **src, const UChar *srcLimit,
- char **target, const char *targetLimit,
- int32_t **offsets, int32_t srcIndex,
- UBool flush,
- UErrorCode *pErrorCode) {
- uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
- int32_t match;
-
- /* try to match */
- match=ucnv_extMatchFromU(cx, cp,
- NULL, 0,
- *src, (int32_t)(srcLimit-*src),
- &value,
- cnv->useFallback, flush);
-
- /* reject a match if the result is a single byte for DBCS-only */
- if( match>=2 &&
- !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 &&
- cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY)
- ) {
- /* advance src pointer for the consumed input */
- *src+=match-2; /* remove 2 for the initial code point */
-
- /* write result to target */
- ucnv_extWriteFromU(cnv, cx,
- value,
- target, targetLimit,
- offsets, srcIndex,
- pErrorCode);
- return TRUE;
- } else if(match<0) {
- /* save state for partial match */
- const UChar *s;
- int32_t j;
-
- /* copy the first code point */
- cnv->preFromUFirstCP=cp;
-
- /* now copy the newly consumed input */
- s=*src;
- match=-match-2; /* remove 2 for the initial code point */
- for(j=0; j<match; ++j) {
- cnv->preFromU[j]=*s++;
- }
- *src=s; /* same as *src=srcLimit; because we reached the end of input */
- cnv->preFromULength=(int8_t)match;
- return TRUE;
- } else if(match==1) {
- /* matched, no mapping but request for <subchar1> */
- cnv->useSubChar1=TRUE;
- return FALSE;
- } else /* match==0 no match */ {
- return FALSE;
- }
-}
-
-/*
- * Used by ISO 2022 implementation.
- * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping
- */
-U_CFUNC int32_t
-ucnv_extSimpleMatchFromU(const int32_t *cx,
- UChar32 cp, uint32_t *pValue,
- UBool useFallback) {
- uint32_t value;
- int32_t match;
-
- /* try to match */
- match=ucnv_extMatchFromU(cx,
- cp,
- NULL, 0,
- NULL, 0,
- &value,
- useFallback, TRUE);
- if(match>=2) {
- /* write result for simple, single-character conversion */
- int32_t length;
- int isRoundtrip;
-
- isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value);
- length=UCNV_EXT_FROM_U_GET_LENGTH(value);
- value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
-
- if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
- *pValue=value;
- return isRoundtrip ? length : -length;
-#if 0 /* not currently used */
- } else if(length==4) {
- /* de-serialize a 4-byte result */
- const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
- *pValue=
- ((uint32_t)result[0]<<24)|
- ((uint32_t)result[1]<<16)|
- ((uint32_t)result[2]<<8)|
- result[3];
- return isRoundtrip ? 4 : -4;
-#endif
- }
- }
-
- /*
- * return no match because
- * - match>1 && resultLength>4: result too long for simple conversion
- * - match==1: no match found, <subchar1> preferred
- * - match==0: no match found in the first place
- * - match<0: partial match, not supported for simple conversion (and flush==TRUE)
- */
- return 0;
-}
-
-/*
- * continue partial match with new input, requires cnv->preFromUFirstCP>=0
- * never called for simple, single-character conversion
- */
-U_CFUNC void
-ucnv_extContinueMatchFromU(UConverter *cnv,
- UConverterFromUnicodeArgs *pArgs, int32_t srcIndex,
- UErrorCode *pErrorCode) {
- uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
- int32_t match;
-
- match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes,
- cnv->preFromUFirstCP,
- cnv->preFromU, cnv->preFromULength,
- pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
- &value,
- cnv->useFallback, pArgs->flush);
- if(match>=2) {
- match-=2; /* remove 2 for the initial code point */
-
- if(match>=cnv->preFromULength) {
- /* advance src pointer for the consumed input */
- pArgs->source+=match-cnv->preFromULength;
- cnv->preFromULength=0;
- } else {
- /* the match did not use all of preFromU[] - keep the rest for replay */
- int32_t length=cnv->preFromULength-match;
- u_memmove(cnv->preFromU, cnv->preFromU+match, length);
- cnv->preFromULength=(int8_t)-length;
- }
-
- /* finish the partial match */
- cnv->preFromUFirstCP=U_SENTINEL;
-
- /* write result */
- ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes,
- value,
- &pArgs->target, pArgs->targetLimit,
- &pArgs->offsets, srcIndex,
- pErrorCode);
- } else if(match<0) {
- /* save state for partial match */
- const UChar *s;
- int32_t j;
-
- /* just _append_ the newly consumed input to preFromU[] */
- s=pArgs->source;
- match=-match-2; /* remove 2 for the initial code point */
- for(j=cnv->preFromULength; j<match; ++j) {
- U_ASSERT(j>=0);
- cnv->preFromU[j]=*s++;
- }
- pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */
- cnv->preFromULength=(int8_t)match;
- } else /* match==0 or 1 */ {
- /*
- * no match
- *
- * We need to split the previous input into two parts:
- *
- * 1. The first code point is unmappable - that's how we got into
- * trying the extension data in the first place.
- * We need to move it from the preFromU buffer
- * to the error buffer, set an error code,
- * and prepare the rest of the previous input for 2.
- *
- * 2. The rest of the previous input must be converted once we
- * come back from the callback for the first code point.
- * At that time, we have to try again from scratch to convert
- * these input characters.
- * The replay will be handled by the ucnv.c conversion code.
- */
-
- if(match==1) {
- /* matched, no mapping but request for <subchar1> */
- cnv->useSubChar1=TRUE;
- }
-
- /* move the first code point to the error field */
- cnv->fromUChar32=cnv->preFromUFirstCP;
- cnv->preFromUFirstCP=U_SENTINEL;
-
- /* mark preFromU for replay */
- cnv->preFromULength=-cnv->preFromULength;
-
- /* set the error code for unassigned */
- *pErrorCode=U_INVALID_CHAR_FOUND;
- }
-}
-
-static UBool
-extSetUseMapping(UConverterUnicodeSet which, int32_t minLength, uint32_t value) {
- if(which==UCNV_ROUNDTRIP_SET) {
- // Add only code points for which the roundtrip flag is set.
- // Do not add any fallbacks, even if ucnv_fromUnicode() would use them
- // (fallbacks from PUA). See the API docs for ucnv_getUnicodeSet().
- //
- // By analogy, also do not add "good one-way" mappings.
- //
- // Do not add entries with reserved bits set.
- if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))!=
- UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) {
- return FALSE;
- }
- } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
- // Do not add entries with reserved bits set.
- if((value&UCNV_EXT_FROM_U_RESERVED_MASK)!=0) {
- return FALSE;
- }
- }
- // Do not add <subchar1> entries or other (future?) pseudo-entries
- // with an output length of 0.
- return UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength;
-}
-
-static void
-ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
- const int32_t *cx,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- int32_t minLength,
- UChar32 firstCP,
- UChar s[UCNV_EXT_MAX_UCHARS], int32_t length,
- int32_t sectionIndex,
- UErrorCode *pErrorCode) {
- const UChar *fromUSectionUChars;
- const uint32_t *fromUSectionValues;
-
- uint32_t value;
- int32_t i, count;
-
- fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex;
- fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex;
-
- /* read first pair of the section */
- count=*fromUSectionUChars++;
- value=*fromUSectionValues++;
-
- if(extSetUseMapping(which, minLength, value)) {
- if(length==U16_LENGTH(firstCP)) {
- /* add the initial code point */
- sa->add(sa->set, firstCP);
- } else {
- /* add the string so far */
- sa->addString(sa->set, s, length);
- }
- }
-
- for(i=0; i<count; ++i) {
- /* append this code unit and recurse or add the string */
- s[length]=fromUSectionUChars[i];
- value=fromUSectionValues[i];
-
- if(value==0) {
- /* no mapping, do nothing */
- } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
- ucnv_extGetUnicodeSetString(
- sharedData, cx, sa, which, minLength,
- firstCP, s, length+1,
- (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
- pErrorCode);
- } else if(extSetUseMapping(which, minLength, value)) {
- sa->addString(sa->set, s, length+1);
- }
- }
-}
-
-U_CFUNC void
-ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UConverterSetFilter filter,
- UErrorCode *pErrorCode) {
- const int32_t *cx;
- const uint16_t *stage12, *stage3, *ps2, *ps3;
- const uint32_t *stage3b;
-
- uint32_t value;
- int32_t st1, stage1Length, st2, st3, minLength;
-
- UChar s[UCNV_EXT_MAX_UCHARS];
- UChar32 c;
- int32_t length;
-
- cx=sharedData->mbcs.extIndexes;
- if(cx==NULL) {
- return;
- }
-
- stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t);
- stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t);
- stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t);
-
- stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH];
-
- /* enumerate the from-Unicode trie table */
- c=0; /* keep track of the current code point while enumerating */
-
- if(filter==UCNV_SET_FILTER_2022_CN) {
- minLength=3;
- } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
- filter!=UCNV_SET_FILTER_NONE
- ) {
- /* DBCS-only, ignore single-byte results */
- minLength=2;
- } else {
- minLength=1;
- }
-
- /*
- * the trie enumeration is almost the same as
- * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1
- */
- for(st1=0; st1<stage1Length; ++st1) {
- st2=stage12[st1];
- if(st2>stage1Length) {
- ps2=stage12+st2;
- for(st2=0; st2<64; ++st2) {
- if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) {
- /* read the stage 3 block */
- ps3=stage3+st3;
-
- do {
- value=stage3b[*ps3++];
- if(value==0) {
- /* no mapping, do nothing */
- } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
- // Recurse for partial results.
- length=0;
- U16_APPEND_UNSAFE(s, length, c);
- ucnv_extGetUnicodeSetString(
- sharedData, cx, sa, which, minLength,
- c, s, length,
- (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
- pErrorCode);
- } else if(extSetUseMapping(which, minLength, value)) {
- switch(filter) {
- case UCNV_SET_FILTER_2022_CN:
- if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) {
- continue;
- }
- break;
- case UCNV_SET_FILTER_SJIS:
- if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) {
- continue;
- }
- break;
- case UCNV_SET_FILTER_GR94DBCS:
- if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
- (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) &&
- (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
- continue;
- }
- break;
- case UCNV_SET_FILTER_HZ:
- if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
- (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
- (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
- continue;
- }
- break;
- default:
- /*
- * UCNV_SET_FILTER_NONE,
- * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
- */
- break;
- }
- sa->add(sa->set, c);
- }
- } while((++c&0xf)!=0);
- } else {
- c+=16; /* empty stage 3 block */
- }
- }
- } else {
- c+=1024; /* empty stage 2 block */
- }
- }
-}
-
-#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
diff --git a/contrib/libs/icu/common/ucnv_ext.h b/contrib/libs/icu/common/ucnv_ext.h
deleted file mode 100644
index dceea7ef126..00000000000
--- a/contrib/libs/icu/common/ucnv_ext.h
+++ /dev/null
@@ -1,481 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2003-2013, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ucnv_ext.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2003jun13
-* created by: Markus W. Scherer
-*
-* Conversion extensions
-*/
-
-#ifndef __UCNV_EXT_H__
-#define __UCNV_EXT_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "ucnv_cnv.h"
-
-/*
- * See icuhtml/design/conversion/conversion_extensions.html
- *
- * Conversion extensions serve three purposes:
- * 1. They support m:n mappings.
- * 2. They support extension-only conversion files that are used together
- * with the regular conversion data in base files.
- * 3. They support mappings with more complicated meta data,
- * for example "good one-way" mappings (|4).
- *
- * A base file may contain an extension table (explicitly requested or
- * implicitly generated for m:n mappings), but its extension table is not
- * used when an extension-only file is used.
- *
- * It is an error if a base file contains any regular (not extension) mapping
- * from the same sequence as a mapping in the extension file
- * because the base mapping would hide the extension mapping.
- *
- *
- * Data for conversion extensions:
- *
- * One set of data structures per conversion direction (to/from Unicode).
- * The data structures are sorted by input units to allow for binary search.
- * Input sequences of more than one unit are handled like contraction tables
- * in collation:
- * The lookup value of a unit points to another table that is to be searched
- * for the next unit, recursively.
- *
- * For conversion from Unicode, the initial code point is looked up in
- * a 3-stage trie for speed,
- * with an additional table of unique results to save space.
- *
- * Long output strings are stored in separate arrays, with length and index
- * in the lookup tables.
- * Output results also include a flag distinguishing roundtrip from
- * (reverse) fallback mappings.
- *
- * Input Unicode strings must not begin or end with unpaired surrogates
- * to avoid problems with matches on parts of surrogate pairs.
- *
- * Mappings from multiple characters (code points or codepage state
- * table sequences) must be searched preferring the longest match.
- * For this to work and be efficient, the variable-width table must contain
- * all mappings that contain prefixes of the multiple characters.
- * If an extension table is built on top of a base table in another file
- * and a base table entry is a prefix of a multi-character mapping, then
- * this is an error.
- *
- *
- * Implementation note:
- *
- * Currently, the parser and several checks in the code limit the number
- * of UChars or bytes in a mapping to
- * UCNV_EXT_MAX_UCHARS and UCNV_EXT_MAX_BYTES, respectively,
- * which are output value limits in the data structure.
- *
- * For input, this is not strictly necessary - it is a hard limit only for the
- * buffers in UConverter that are used to store partial matches.
- *
- * Input sequences could otherwise be arbitrarily long if partial matches
- * need not be stored (i.e., if a sequence does not span several buffers with too
- * many units before the last buffer), although then results would differ
- * depending on whether partial matches exceed the limits or not,
- * which depends on the pattern of buffer sizes.
- *
- *
- * Data structure:
- *
- * int32_t indexes[>=32];
- *
- * Array of indexes and lengths etc. The length of the array is at least 32.
- * The actual length is stored in indexes[0] to be forward compatible.
- *
- * Each index to another array is the number of bytes from indexes[].
- * Each length of an array is the number of array base units in that array.
- *
- * Some of the structures may not be present, in which case their indexes
- * and lengths are 0.
- *
- * Usage of indexes[i]:
- * [0] length of indexes[]
- *
- * // to Unicode table
- * [1] index of toUTable[] (array of uint32_t)
- * [2] length of toUTable[]
- * [3] index of toUUChars[] (array of UChar)
- * [4] length of toUUChars[]
- *
- * // from Unicode table, not for the initial code point
- * [5] index of fromUTableUChars[] (array of UChar)
- * [6] index of fromUTableValues[] (array of uint32_t)
- * [7] length of fromUTableUChars[] and fromUTableValues[]
- * [8] index of fromUBytes[] (array of char)
- * [9] length of fromUBytes[]
- *
- * // from Unicode trie for initial-code point lookup
- * [10] index of fromUStage12[] (combined array of uint16_t for stages 1 & 2)
- * [11] length of stage 1 portion of fromUStage12[]
- * [12] length of fromUStage12[]
- * [13] index of fromUStage3[] (array of uint16_t indexes into fromUStage3b[])
- * [14] length of fromUStage3[]
- * [15] index of fromUStage3b[] (array of uint32_t like fromUTableValues[])
- * [16] length of fromUStage3b[]
- *
- * [17] Bit field containing numbers of bytes:
- * 31..24 reserved, 0
- * 23..16 maximum input bytes
- * 15.. 8 maximum output bytes
- * 7.. 0 maximum bytes per UChar
- *
- * [18] Bit field containing numbers of UChars:
- * 31..24 reserved, 0
- * 23..16 maximum input UChars
- * 15.. 8 maximum output UChars
- * 7.. 0 maximum UChars per byte
- *
- * [19] Bit field containing flags:
- * (extension table unicodeMask)
- * 1 UCNV_HAS_SURROGATES flag for the extension table
- * 0 UCNV_HAS_SUPPLEMENTARY flag for the extension table
- *
- * [20]..[30] reserved, 0
- * [31] number of bytes for the entire extension structure
- * [>31] reserved; there are indexes[0] indexes
- *
- *
- * uint32_t toUTable[];
- *
- * Array of byte/value pairs for lookups for toUnicode conversion.
- * The array is partitioned into sections like collation contraction tables.
- * Each section contains one word with the number of following words and
- * a default value for when the lookup in this section yields no match.
- *
- * A section is sorted in ascending order of input bytes,
- * allowing for fast linear or binary searches.
- * The builder may store entries for a contiguous range of byte values
- * (compare difference between the first and last one with count),
- * which then allows for direct array access.
- * The builder should always do this for the initial table section.
- *
- * Entries may have 0 values, see below.
- * No two entries in a section have the same byte values.
- *
- * Each uint32_t contains an input byte value in bits 31..24 and the
- * corresponding lookup value in bits 23..0.
- * Interpret the value as follows:
- * if(value==0) {
- * no match, see below
- * } else if(value<0x1f0000) {
- * partial match - use value as index to the next toUTable section
- * and match the next unit; (value indexes toUTable[value])
- * } else {
- * if(bit 23 set) {
- * roundtrip;
- * } else {
- * fallback;
- * }
- * unset value bit 23;
- * if(value<=0x2fffff) {
- * (value-0x1f0000) is a code point; (BMP: value<=0x1fffff)
- * } else {
- * bits 17..0 (value&0x3ffff) is an index to
- * the result UChars in toUUChars[]; (0 indexes toUUChars[0])
- * length of the result=((value>>18)-12); (length=0..19)
- * }
- * }
- *
- * The first word in a section contains the number of following words in the
- * input byte position (bits 31..24, number=1..0xff).
- * The value of the initial word is used when the current byte is not found
- * in this section.
- * If the value is not 0, then it represents a result as above.
- * If the value is 0, then the search has to return a shorter match with an
- * earlier default value as the result, or result in "unmappable" even for the
- * initial bytes.
- * If the value is 0 for the initial toUTable entry, then the initial byte
- * does not start any mapping input.
- *
- *
- * UChar toUUChars[];
- *
- * Contains toUnicode mapping results, stored as sequences of UChars.
- * Indexes and lengths stored in the toUTable[].
- *
- *
- * UChar fromUTableUChars[];
- * uint32_t fromUTableValues[];
- *
- * The fromUTable is split into two arrays, but works otherwise much like
- * the toUTable. The array is partitioned into sections like collation
- * contraction tables and toUTable.
- * A row in the table consists of same-index entries in fromUTableUChars[]
- * and fromUTableValues[].
- *
- * Interpret a value as follows:
- * if(value==0) {
- * no match, see below
- * } else if(value<=0xffffff) { (bits 31..24 are 0)
- * partial match - use value as index to the next fromUTable section
- * and match the next unit; (value indexes fromUTable[value])
- * } else {
- * if(value==0x80000001) {
- * return no mapping, but request for <subchar1>;
- * }
- * if(bit 31 set) {
- * roundtrip (|0);
- * } else if(bit 30 set) {
- * "good one-way" mapping (|4); -- new in ICU4C 51, _MBCSHeader.version 5.4/4.4
- * } else {
- * normal fallback (|1);
- * }
- * // bit 29 reserved, 0
- * length=(value>>24)&0x1f; (bits 28..24)
- * if(length==1..3) {
- * bits 23..0 contain 1..3 bytes, padded with 00s on the left;
- * } else {
- * bits 23..0 (value&0xffffff) is an index to
- * the result bytes in fromUBytes[]; (0 indexes fromUBytes[0])
- * }
- * }
- *
- * The first pair in a section contains the number of following pairs in the
- * UChar position (16 bits, number=1..0xffff).
- * The value of the initial pair is used when the current UChar is not found
- * in this section.
- * If the value is not 0, then it represents a result as above.
- * If the value is 0, then the search has to return a shorter match with an
- * earlier default value as the result, or result in "unmappable" even for the
- * initial UChars.
- *
- * If the from Unicode trie is present, then the from Unicode search tables
- * are not used for initial code points.
- * In this case, the first entries (index 0) in the tables are not used
- * (reserved, set to 0) because a value of 0 is used in trie results
- * to indicate no mapping.
- *
- *
- * uint16_t fromUStage12[];
- *
- * Stages 1 & 2 of a trie that maps an initial code point.
- * Indexes in stage 1 are all offset by the length of stage 1 so that the
- * same array pointer can be used for both stages.
- * If (c>>10)>=(length of stage 1) then c does not start any mapping.
- * Same bit distribution as for regular conversion tries.
- *
- *
- * uint16_t fromUStage3[];
- * uint32_t fromUStage3b[];
- *
- * Stage 3 of the trie. The first array simply contains indexes to the second,
- * which contains words in the same format as fromUTableValues[].
- * Use a stage 3 granularity of 4, which allows for 256k stage 3 entries,
- * and 16-bit entries in stage 3 allow for 64k stage 3b entries.
- * The stage 3 granularity means that the stage 2 entry needs to be left-shifted.
- *
- * Two arrays are used because it is expected that more than half of the stage 3
- * entries will be zero. The 16-bit index stage 3 array saves space even
- * considering storing a total of 6 bytes per non-zero entry in both arrays
- * together.
- * Using a stage 3 granularity of >1 diminishes the compactability in that stage
- * but provides a larger effective addressing space in stage 2.
- * All but the final result stage use 16-bit entries to save space.
- *
- * fromUStage3b[] contains a zero for "no mapping" at its index 0,
- * and may contain UCNV_EXT_FROM_U_SUBCHAR1 at index 1 for "<subchar1> SUB mapping"
- * (i.e., "no mapping" with preference for <subchar1> rather than <subchar>),
- * and all other items are unique non-zero results.
- *
- * The default value of a fromUTableValues[] section that is referenced
- * _directly_ from a fromUStage3b[] item may also be UCNV_EXT_FROM_U_SUBCHAR1,
- * but this value must not occur anywhere else in fromUTableValues[]
- * because "no mapping" is always a property of a single code point,
- * never of multiple.
- *
- *
- * char fromUBytes[];
- *
- * Contains fromUnicode mapping results, stored as sequences of chars.
- * Indexes and lengths stored in the fromUTableValues[].
- */
-enum {
- UCNV_EXT_INDEXES_LENGTH, /* 0 */
-
- UCNV_EXT_TO_U_INDEX, /* 1 */
- UCNV_EXT_TO_U_LENGTH,
- UCNV_EXT_TO_U_UCHARS_INDEX,
- UCNV_EXT_TO_U_UCHARS_LENGTH,
-
- UCNV_EXT_FROM_U_UCHARS_INDEX, /* 5 */
- UCNV_EXT_FROM_U_VALUES_INDEX,
- UCNV_EXT_FROM_U_LENGTH,
- UCNV_EXT_FROM_U_BYTES_INDEX,
- UCNV_EXT_FROM_U_BYTES_LENGTH,
-
- UCNV_EXT_FROM_U_STAGE_12_INDEX, /* 10 */
- UCNV_EXT_FROM_U_STAGE_1_LENGTH,
- UCNV_EXT_FROM_U_STAGE_12_LENGTH,
- UCNV_EXT_FROM_U_STAGE_3_INDEX,
- UCNV_EXT_FROM_U_STAGE_3_LENGTH,
- UCNV_EXT_FROM_U_STAGE_3B_INDEX,
- UCNV_EXT_FROM_U_STAGE_3B_LENGTH,
-
- UCNV_EXT_COUNT_BYTES, /* 17 */
- UCNV_EXT_COUNT_UCHARS,
- UCNV_EXT_FLAGS,
-
- UCNV_EXT_RESERVED_INDEX, /* 20, moves with additional indexes */
-
- UCNV_EXT_SIZE=31,
- UCNV_EXT_INDEXES_MIN_LENGTH=32
-};
-
-/* get the pointer to an extension array from indexes[index] */
-#define UCNV_EXT_ARRAY(indexes, index, itemType) \
- ((const itemType *)((const char *)(indexes)+(indexes)[index]))
-
-#define UCNV_GET_MAX_BYTES_PER_UCHAR(indexes) \
- ((indexes)[UCNV_EXT_COUNT_BYTES]&0xff)
-
-/* internal API ------------------------------------------------------------- */
-
-U_CFUNC UBool
-ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx,
- int32_t firstLength,
- const char **src, const char *srcLimit,
- UChar **target, const UChar *targetLimit,
- int32_t **offsets, int32_t srcIndex,
- UBool flush,
- UErrorCode *pErrorCode);
-
-U_CFUNC UChar32
-ucnv_extSimpleMatchToU(const int32_t *cx,
- const char *source, int32_t length,
- UBool useFallback);
-
-U_CFUNC void
-ucnv_extContinueMatchToU(UConverter *cnv,
- UConverterToUnicodeArgs *pArgs, int32_t srcIndex,
- UErrorCode *pErrorCode);
-
-
-U_CFUNC UBool
-ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
- UChar32 cp,
- const UChar **src, const UChar *srcLimit,
- char **target, const char *targetLimit,
- int32_t **offsets, int32_t srcIndex,
- UBool flush,
- UErrorCode *pErrorCode);
-
-U_CFUNC int32_t
-ucnv_extSimpleMatchFromU(const int32_t *cx,
- UChar32 cp, uint32_t *pValue,
- UBool useFallback);
-
-U_CFUNC void
-ucnv_extContinueMatchFromU(UConverter *cnv,
- UConverterFromUnicodeArgs *pArgs, int32_t srcIndex,
- UErrorCode *pErrorCode);
-
-/*
- * Add code points and strings to the set according to the extension mappings.
- * Limitation on the UConverterSetFilter:
- * The filters currently assume that they are used with 1:1 mappings.
- * They only apply to single input code points, and then they pass through
- * only mappings with single-charset-code results.
- * For example, the Shift-JIS filter only works for 2-byte results and tests
- * that those 2 bytes are in the JIS X 0208 range of Shift-JIS.
- */
-U_CFUNC void
-ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UConverterSetFilter filter,
- UErrorCode *pErrorCode);
-
-/* toUnicode helpers -------------------------------------------------------- */
-
-#define UCNV_EXT_TO_U_BYTE_SHIFT 24
-#define UCNV_EXT_TO_U_VALUE_MASK 0xffffff
-#define UCNV_EXT_TO_U_MIN_CODE_POINT 0x1f0000
-#define UCNV_EXT_TO_U_MAX_CODE_POINT 0x2fffff
-#define UCNV_EXT_TO_U_ROUNDTRIP_FLAG ((uint32_t)1<<23)
-#define UCNV_EXT_TO_U_INDEX_MASK 0x3ffff
-#define UCNV_EXT_TO_U_LENGTH_SHIFT 18
-#define UCNV_EXT_TO_U_LENGTH_OFFSET 12
-
-/* maximum number of indexed UChars */
-#define UCNV_EXT_MAX_UCHARS 19
-
-#define UCNV_EXT_TO_U_MAKE_WORD(byte, value) (((uint32_t)(byte)<<UCNV_EXT_TO_U_BYTE_SHIFT)|(value))
-
-#define UCNV_EXT_TO_U_GET_BYTE(word) ((word)>>UCNV_EXT_TO_U_BYTE_SHIFT)
-#define UCNV_EXT_TO_U_GET_VALUE(word) ((word)&UCNV_EXT_TO_U_VALUE_MASK)
-
-#define UCNV_EXT_TO_U_IS_PARTIAL(value) ((value)<UCNV_EXT_TO_U_MIN_CODE_POINT)
-#define UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value) (value)
-
-#define UCNV_EXT_TO_U_IS_ROUNDTRIP(value) (((value)&UCNV_EXT_TO_U_ROUNDTRIP_FLAG)!=0)
-#define UCNV_EXT_TO_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_TO_U_ROUNDTRIP_FLAG)
-
-/* use after masking off the roundtrip flag */
-#define UCNV_EXT_TO_U_IS_CODE_POINT(value) ((value)<=UCNV_EXT_TO_U_MAX_CODE_POINT)
-#define UCNV_EXT_TO_U_GET_CODE_POINT(value) ((value)-UCNV_EXT_TO_U_MIN_CODE_POINT)
-
-#define UCNV_EXT_TO_U_GET_INDEX(value) ((value)&UCNV_EXT_TO_U_INDEX_MASK)
-#define UCNV_EXT_TO_U_GET_LENGTH(value) (((value)>>UCNV_EXT_TO_U_LENGTH_SHIFT)-UCNV_EXT_TO_U_LENGTH_OFFSET)
-
-/* fromUnicode helpers ------------------------------------------------------ */
-
-/* most trie constants are shared with ucnvmbcs.h */
-
-/* see similar utrie.h UTRIE_INDEX_SHIFT and UTRIE_DATA_GRANULARITY */
-#define UCNV_EXT_STAGE_2_LEFT_SHIFT 2
-#define UCNV_EXT_STAGE_3_GRANULARITY 4
-
-/* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */
-#define UCNV_EXT_FROM_U(stage12, stage3, s1Index, c) \
- (stage3)[ ((int32_t)(stage12)[ (stage12)[s1Index] +(((c)>>4)&0x3f) ]<<UCNV_EXT_STAGE_2_LEFT_SHIFT) +((c)&0xf) ]
-
-#define UCNV_EXT_FROM_U_LENGTH_SHIFT 24
-#define UCNV_EXT_FROM_U_ROUNDTRIP_FLAG ((uint32_t)1<<31)
-#define UCNV_EXT_FROM_U_GOOD_ONE_WAY_FLAG 0x40000000
-#define UCNV_EXT_FROM_U_STATUS_MASK 0xc0000000
-#define UCNV_EXT_FROM_U_RESERVED_MASK 0x20000000
-#define UCNV_EXT_FROM_U_DATA_MASK 0xffffff
-
-/* special value for "no mapping" to <subchar1> (impossible roundtrip to 0 bytes, value 01) */
-#define UCNV_EXT_FROM_U_SUBCHAR1 0x80000001
-
-/* at most 3 bytes in the lower part of the value */
-#define UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH 3
-
-/* maximum number of indexed bytes */
-#define UCNV_EXT_MAX_BYTES 0x1f
-
-#define UCNV_EXT_FROM_U_IS_PARTIAL(value) (((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)==0)
-#define UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value) (value)
-
-#define UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) (((value)&UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)!=0)
-#define UCNV_EXT_FROM_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)
-
-/* get length; masks away all other bits */
-#define UCNV_EXT_FROM_U_GET_LENGTH(value) (int32_t)(((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)&UCNV_EXT_MAX_BYTES)
-
-/* get bytes or bytes index */
-#define UCNV_EXT_FROM_U_GET_DATA(value) ((value)&UCNV_EXT_FROM_U_DATA_MASK)
-
-#endif
-
-#endif
diff --git a/contrib/libs/icu/common/ucnv_imp.h b/contrib/libs/icu/common/ucnv_imp.h
deleted file mode 100644
index c5e6aeb47e4..00000000000
--- a/contrib/libs/icu/common/ucnv_imp.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*
-*
-* ucnv_imp.h:
-* Contains all internal and external data structure definitions
-* Created & Maitained by Bertrand A. Damiba
-*
-*
-*
-* ATTENTION:
-* ---------
-* Although the data structures in this file are open and stack allocatable
-* we reserve the right to hide them in further releases.
-*/
-
-#ifndef UCNV_IMP_H
-#define UCNV_IMP_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/uloc.h"
-#include "ucnv_bld.h"
-
-/*
- * Fast check for whether a charset name is "UTF-8".
- * This does not recognize all of the variations that ucnv_open()
- * and other functions recognize, but it covers most cases.
- * @param name const char * charset name
- * @return
- */
-#define UCNV_FAST_IS_UTF8(name) \
- (((name[0]=='U' ? \
- ( name[1]=='T' && name[2]=='F') : \
- (name[0]=='u' && name[1]=='t' && name[2]=='f'))) \
- && (name[3]=='-' ? \
- (name[4]=='8' && name[5]==0) : \
- (name[3]=='8' && name[4]==0)))
-
-typedef struct {
- char cnvName[UCNV_MAX_CONVERTER_NAME_LENGTH];
- char locale[ULOC_FULLNAME_CAPACITY];
- uint32_t options;
-} UConverterNamePieces;
-
-U_CFUNC UBool
-ucnv_canCreateConverter(const char *converterName, UErrorCode *err);
-
-/* figures out if we need to go to file to read in the data tables.
- * @param converterName The name of the converter
- * @param err The error code
- * @return the newly created converter
- */
-U_CAPI UConverter *
-ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err);
-
-/*
- * Open a purely algorithmic converter, specified by a type constant.
- * @param myUConverter NULL, or pre-allocated UConverter structure to avoid
- * a memory allocation
- * @param type requested converter type
- * @param locale locale parameter, or ""
- * @param options converter options bit set (default 0)
- * @param err ICU error code, not tested for U_FAILURE on input
- * because this is an internal function
- * @internal
- */
-U_CFUNC UConverter *
-ucnv_createAlgorithmicConverter(UConverter *myUConverter,
- UConverterType type,
- const char *locale, uint32_t options,
- UErrorCode *err);
-
-/*
- * Creates a converter from shared data.
- * Adopts mySharedConverterData: No matter what happens, the caller must not
- * unload mySharedConverterData, except via ucnv_close(return value)
- * if this function is successful.
- */
-U_CFUNC UConverter *
-ucnv_createConverterFromSharedData(UConverter *myUConverter,
- UConverterSharedData *mySharedConverterData,
- UConverterLoadArgs *pArgs,
- UErrorCode *err);
-
-U_CFUNC UConverter *
-ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode *err);
-
-/**
- * Load a converter but do not create a UConverter object.
- * Simply return the UConverterSharedData.
- * Performs alias lookup etc.
- * The UConverterNamePieces need not be initialized
- * before calling this function.
- * The UConverterLoadArgs must be initialized
- * before calling this function.
- * If the args are passed in, then the pieces must be passed in too.
- * In other words, the following combinations are allowed:
- * - pieces==NULL && args==NULL
- * - pieces!=NULL && args==NULL
- * - pieces!=NULL && args!=NULL
- * @internal
- */
-U_CFUNC UConverterSharedData *
-ucnv_loadSharedData(const char *converterName,
- UConverterNamePieces *pieces,
- UConverterLoadArgs *pArgs,
- UErrorCode * err);
-
-/**
- * This may unload the shared data in a thread safe manner.
- * This will only unload the data if no other converters are sharing it.
- */
-U_CFUNC void
-ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData);
-
-/**
- * This is a thread safe way to increment the reference count.
- */
-U_CFUNC void
-ucnv_incrementRefCount(UConverterSharedData *sharedData);
-
-/**
- * These are the default error handling callbacks for the charset conversion framework.
- * For performance reasons, they are only called to handle an error (not normally called for a reset or close).
- */
-#define UCNV_TO_U_DEFAULT_CALLBACK ((UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE)
-#define UCNV_FROM_U_DEFAULT_CALLBACK ((UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE)
-
-#endif
-
-#endif /* _UCNV_IMP */
diff --git a/contrib/libs/icu/common/ucnv_io.cpp b/contrib/libs/icu/common/ucnv_io.cpp
deleted file mode 100644
index 7a95a3f1e61..00000000000
--- a/contrib/libs/icu/common/ucnv_io.cpp
+++ /dev/null
@@ -1,1360 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-*
-* ucnv_io.cpp:
-* initializes global variables and defines functions pertaining to converter
-* name resolution aspect of the conversion code.
-*
-* new implementation:
-*
-* created on: 1999nov22
-* created by: Markus W. Scherer
-*
-* Use the binary cnvalias.icu (created from convrtrs.txt) to work
-* with aliases for converter names.
-*
-* Date Name Description
-* 11/22/1999 markus Created
-* 06/28/2002 grhoten Major overhaul of the converter alias design.
-* Now an alias can map to different converters
-* depending on the specified standard.
-*******************************************************************************
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/udata.h"
-
-#include "umutex.h"
-#include "uarrsort.h"
-#include "uassert.h"
-#include "udataswp.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "ucnv_io.h"
-#include "uenumimp.h"
-#include "ucln_cmn.h"
-
-/* Format of cnvalias.icu -----------------------------------------------------
- *
- * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
- * This binary form contains several tables. All indexes are to uint16_t
- * units, and not to the bytes (uint8_t units). Addressing everything on
- * 16-bit boundaries allows us to store more information with small index
- * numbers, which are also 16-bit in size. The majority of the table (except
- * the string table) are 16-bit numbers.
- *
- * First there is the size of the Table of Contents (TOC). The TOC
- * entries contain the size of each section. In order to find the offset
- * you just need to sum up the previous offsets.
- * The TOC length and entries are an array of uint32_t values.
- * The first section after the TOC starts immediately after the TOC.
- *
- * 1) This section contains a list of converters. This list contains indexes
- * into the string table for the converter name. The index of this list is
- * also used by other sections, which are mentioned later on.
- * This list is not sorted.
- *
- * 2) This section contains a list of tags. This list contains indexes
- * into the string table for the tag name. The index of this list is
- * also used by other sections, which are mentioned later on.
- * This list is in priority order of standards.
- *
- * 3) This section contains a list of sorted unique aliases. This
- * list contains indexes into the string table for the alias name. The
- * index of this list is also used by other sections, like the 4th section.
- * The index for the 3rd and 4th section is used to get the
- * alias -> converter name mapping. Section 3 and 4 form a two column table.
- * Some of the most significant bits of each index may contain other
- * information (see findConverter for details).
- *
- * 4) This section contains a list of mapped converter names. Consider this
- * as a table that maps the 3rd section to the 1st section. This list contains
- * indexes into the 1st section. The index of this list is the same index in
- * the 3rd section. There is also some extra information in the high bits of
- * each converter index in this table. Currently it's only used to say that
- * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
- * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
- * the predigested form of the 5th section so that an alias lookup can be fast.
- *
- * 5) This section contains a 2D array with indexes to the 6th section. This
- * section is the full form of all alias mappings. The column index is the
- * index into the converter list (column header). The row index is the index
- * to tag list (row header). This 2D array is the top part a 3D array. The
- * third dimension is in the 6th section.
- *
- * 6) This is blob of variable length arrays. Each array starts with a size,
- * and is followed by indexes to alias names in the string table. This is
- * the third dimension to the section 5. No other section should be referencing
- * this section.
- *
- * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
- * presence indicates that a section 9 exists. UConverterAliasOptions specifies
- * what type of string normalization is used among other potential things in the
- * future.
- *
- * 8) This is the string table. All strings are indexed on an even address.
- * There are two reasons for this. First many chip architectures locate strings
- * faster on even address boundaries. Second, since all indexes are 16-bit
- * numbers, this string table can be 128KB in size instead of 64KB when we
- * only have strings starting on an even address.
- *
- * 9) When present this is a set of prenormalized strings from section 8. This
- * table contains normalized strings with the dashes and spaces stripped out,
- * and all strings lowercased. In the future, the options in section 7 may state
- * other types of normalization.
- *
- * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
- * has a unique alias among all converters. That same alias can
- * be mentioned in other standards on different converters,
- * but only one alias per tag can be unique.
- *
- *
- * Converter Names (Usually in TR22 form)
- * -------------------------------------------.
- * T / /|
- * a / / |
- * g / / |
- * s / / |
- * / / |
- * ------------------------------------------/ |
- * A | | |
- * l | | |
- * i | | /
- * a | | /
- * s | | /
- * e | | /
- * s | |/
- * -------------------------------------------
- *
- *
- *
- * Here is what it really looks like. It's like swiss cheese.
- * There are holes. Some converters aren't recognized by
- * a standard, or they are really old converters that the
- * standard doesn't recognize anymore.
- *
- * Converter Names (Usually in TR22 form)
- * -------------------------------------------.
- * T /##########################################/|
- * a / # # /#
- * g / # ## ## ### # ### ### ### #/
- * s / # ##### #### ## ## #/#
- * / ### # # ## # # # ### # # #/##
- * ------------------------------------------/# #
- * A |### # # ## # # # ### # # #|# #
- * l |# # # # # ## # #|# #
- * i |# # # # # # #|#
- * a |# #|#
- * s | #|#
- * e
- * s
- *
- */
-
-/**
- * Used by the UEnumeration API
- */
-typedef struct UAliasContext {
- uint32_t listOffset;
- uint32_t listIdx;
-} UAliasContext;
-
-static const char DATA_NAME[] = "cnvalias";
-static const char DATA_TYPE[] = "icu";
-
-static UDataMemory *gAliasData=NULL;
-static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER;
-
-enum {
- tocLengthIndex=0,
- converterListIndex=1,
- tagListIndex=2,
- aliasListIndex=3,
- untaggedConvArrayIndex=4,
- taggedAliasArrayIndex=5,
- taggedAliasListsIndex=6,
- tableOptionsIndex=7,
- stringTableIndex=8,
- normalizedStringTableIndex=9,
- offsetsCount, /* length of the swapper's temporary offsets[] */
- minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
-};
-
-static const UConverterAliasOptions defaultTableOptions = {
- UCNV_IO_UNNORMALIZED,
- 0 /* containsCnvOptionInfo */
-};
-static UConverterAlias gMainTable;
-
-#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
-#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
-
-static UBool U_CALLCONV
-isAcceptable(void * /*context*/,
- const char * /*type*/, const char * /*name*/,
- const UDataInfo *pInfo) {
- return (UBool)(
- pInfo->size>=20 &&
- pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
- pInfo->charsetFamily==U_CHARSET_FAMILY &&
- pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
- pInfo->dataFormat[1]==0x76 &&
- pInfo->dataFormat[2]==0x41 &&
- pInfo->dataFormat[3]==0x6c &&
- pInfo->formatVersion[0]==3);
-}
-
-static UBool U_CALLCONV ucnv_io_cleanup(void)
-{
- if (gAliasData) {
- udata_close(gAliasData);
- gAliasData = NULL;
- }
- gAliasDataInitOnce.reset();
-
- uprv_memset(&gMainTable, 0, sizeof(gMainTable));
-
- return TRUE; /* Everything was cleaned up */
-}
-
-static void U_CALLCONV initAliasData(UErrorCode &errCode) {
- UDataMemory *data;
- const uint16_t *table;
- const uint32_t *sectionSizes;
- uint32_t tableStart;
- uint32_t currOffset;
-
- ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
-
- U_ASSERT(gAliasData == NULL);
- data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode);
- if(U_FAILURE(errCode)) {
- return;
- }
-
- sectionSizes = (const uint32_t *)udata_getMemory(data);
- table = (const uint16_t *)sectionSizes;
-
- tableStart = sectionSizes[0];
- if (tableStart < minTocLength) {
- errCode = U_INVALID_FORMAT_ERROR;
- udata_close(data);
- return;
- }
- gAliasData = data;
-
- gMainTable.converterListSize = sectionSizes[1];
- gMainTable.tagListSize = sectionSizes[2];
- gMainTable.aliasListSize = sectionSizes[3];
- gMainTable.untaggedConvArraySize = sectionSizes[4];
- gMainTable.taggedAliasArraySize = sectionSizes[5];
- gMainTable.taggedAliasListsSize = sectionSizes[6];
- gMainTable.optionTableSize = sectionSizes[7];
- gMainTable.stringTableSize = sectionSizes[8];
-
- if (tableStart > 8) {
- gMainTable.normalizedStringTableSize = sectionSizes[9];
- }
-
- currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
- gMainTable.converterList = table + currOffset;
-
- currOffset += gMainTable.converterListSize;
- gMainTable.tagList = table + currOffset;
-
- currOffset += gMainTable.tagListSize;
- gMainTable.aliasList = table + currOffset;
-
- currOffset += gMainTable.aliasListSize;
- gMainTable.untaggedConvArray = table + currOffset;
-
- currOffset += gMainTable.untaggedConvArraySize;
- gMainTable.taggedAliasArray = table + currOffset;
-
- /* aliasLists is a 1's based array, but it has a padding character */
- currOffset += gMainTable.taggedAliasArraySize;
- gMainTable.taggedAliasLists = table + currOffset;
-
- currOffset += gMainTable.taggedAliasListsSize;
- if (gMainTable.optionTableSize > 0
- && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
- {
- /* Faster table */
- gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
- }
- else {
- /* Smaller table, or I can't handle this normalization mode!
- Use the original slower table lookup. */
- gMainTable.optionTable = &defaultTableOptions;
- }
-
- currOffset += gMainTable.optionTableSize;
- gMainTable.stringTable = table + currOffset;
-
- currOffset += gMainTable.stringTableSize;
- gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
- ? gMainTable.stringTable : (table + currOffset));
-}
-
-
-static UBool
-haveAliasData(UErrorCode *pErrorCode) {
- umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
- return U_SUCCESS(*pErrorCode);
-}
-
-static inline UBool
-isAlias(const char *alias, UErrorCode *pErrorCode) {
- if(alias==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- return (UBool)(*alias!=0);
-}
-
-static uint32_t getTagNumber(const char *tagname) {
- if (gMainTable.tagList) {
- uint32_t tagNum;
- for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
- if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
- return tagNum;
- }
- }
- }
-
- return UINT32_MAX;
-}
-
-/* character types relevant for ucnv_compareNames() */
-enum {
- UIGNORE,
- ZERO,
- NONZERO,
- MINLETTER /* any values from here on are lowercase letter mappings */
-};
-
-/* character types for ASCII 00..7F */
-static const uint8_t asciiTypes[128] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
- 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
- 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
- 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
- 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
-};
-
-#define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
-
-/* character types for EBCDIC 80..FF */
-static const uint8_t ebcdicTypes[128] = {
- 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
- 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
- 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
- 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
- 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
- ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
-};
-
-#define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
-
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
-# define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
-#else
-# error U_CHARSET_FAMILY is not valid
-#endif
-
-
-/* @see ucnv_compareNames */
-U_CAPI char * U_CALLCONV
-ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
- char *dstItr = dst;
- uint8_t type, nextType;
- char c1;
- UBool afterDigit = FALSE;
-
- while ((c1 = *name++) != 0) {
- type = GET_ASCII_TYPE(c1);
- switch (type) {
- case UIGNORE:
- afterDigit = FALSE;
- continue; /* ignore all but letters and digits */
- case ZERO:
- if (!afterDigit) {
- nextType = GET_ASCII_TYPE(*name);
- if (nextType == ZERO || nextType == NONZERO) {
- continue; /* ignore leading zero before another digit */
- }
- }
- break;
- case NONZERO:
- afterDigit = TRUE;
- break;
- default:
- c1 = (char)type; /* lowercased letter */
- afterDigit = FALSE;
- break;
- }
- *dstItr++ = c1;
- }
- *dstItr = 0;
- return dst;
-}
-
-U_CAPI char * U_CALLCONV
-ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
- char *dstItr = dst;
- uint8_t type, nextType;
- char c1;
- UBool afterDigit = FALSE;
-
- while ((c1 = *name++) != 0) {
- type = GET_EBCDIC_TYPE(c1);
- switch (type) {
- case UIGNORE:
- afterDigit = FALSE;
- continue; /* ignore all but letters and digits */
- case ZERO:
- if (!afterDigit) {
- nextType = GET_EBCDIC_TYPE(*name);
- if (nextType == ZERO || nextType == NONZERO) {
- continue; /* ignore leading zero before another digit */
- }
- }
- break;
- case NONZERO:
- afterDigit = TRUE;
- break;
- default:
- c1 = (char)type; /* lowercased letter */
- afterDigit = FALSE;
- break;
- }
- *dstItr++ = c1;
- }
- *dstItr = 0;
- return dst;
-}
-
-/**
- * Do a fuzzy compare of two converter/alias names.
- * The comparison is case-insensitive, ignores leading zeroes if they are not
- * followed by further digits, and ignores all but letters and digits.
- * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
- * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
- * at http://www.unicode.org/reports/tr22/
- *
- * This is a symmetrical (commutative) operation; order of arguments
- * is insignificant. This is an important property for sorting the
- * list (when the list is preprocessed into binary form) and for
- * performing binary searches on it at run time.
- *
- * @param name1 a converter name or alias, zero-terminated
- * @param name2 a converter name or alias, zero-terminated
- * @return 0 if the names match, or a negative value if the name1
- * lexically precedes name2, or a positive value if the name1
- * lexically follows name2.
- *
- * @see ucnv_io_stripForCompare
- */
-U_CAPI int U_EXPORT2
-ucnv_compareNames(const char *name1, const char *name2) {
- int rc;
- uint8_t type, nextType;
- char c1, c2;
- UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
-
- for (;;) {
- while ((c1 = *name1++) != 0) {
- type = GET_CHAR_TYPE(c1);
- switch (type) {
- case UIGNORE:
- afterDigit1 = FALSE;
- continue; /* ignore all but letters and digits */
- case ZERO:
- if (!afterDigit1) {
- nextType = GET_CHAR_TYPE(*name1);
- if (nextType == ZERO || nextType == NONZERO) {
- continue; /* ignore leading zero before another digit */
- }
- }
- break;
- case NONZERO:
- afterDigit1 = TRUE;
- break;
- default:
- c1 = (char)type; /* lowercased letter */
- afterDigit1 = FALSE;
- break;
- }
- break; /* deliver c1 */
- }
- while ((c2 = *name2++) != 0) {
- type = GET_CHAR_TYPE(c2);
- switch (type) {
- case UIGNORE:
- afterDigit2 = FALSE;
- continue; /* ignore all but letters and digits */
- case ZERO:
- if (!afterDigit2) {
- nextType = GET_CHAR_TYPE(*name2);
- if (nextType == ZERO || nextType == NONZERO) {
- continue; /* ignore leading zero before another digit */
- }
- }
- break;
- case NONZERO:
- afterDigit2 = TRUE;
- break;
- default:
- c2 = (char)type; /* lowercased letter */
- afterDigit2 = FALSE;
- break;
- }
- break; /* deliver c2 */
- }
-
- /* If we reach the ends of both strings then they match */
- if ((c1|c2)==0) {
- return 0;
- }
-
- /* Case-insensitive comparison */
- rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
- if (rc != 0) {
- return rc;
- }
- }
-}
-
-/*
- * search for an alias
- * return the converter number index for gConverterList
- */
-static inline uint32_t
-findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
- uint32_t mid, start, limit;
- uint32_t lastMid;
- int result;
- int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
- char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
-
- if (!isUnnormalized) {
- if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
- *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
- return UINT32_MAX;
- }
-
- /* Lower case and remove ignoreable characters. */
- ucnv_io_stripForCompare(strippedName, alias);
- alias = strippedName;
- }
-
- /* do a binary search for the alias */
- start = 0;
- limit = gMainTable.untaggedConvArraySize;
- mid = limit;
- lastMid = UINT32_MAX;
-
- for (;;) {
- mid = (uint32_t)((start + limit) / 2);
- if (lastMid == mid) { /* Have we moved? */
- break; /* We haven't moved, and it wasn't found. */
- }
- lastMid = mid;
- if (isUnnormalized) {
- result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
- }
- else {
- result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
- }
-
- if (result < 0) {
- limit = mid;
- } else if (result > 0) {
- start = mid;
- } else {
- /* Since the gencnval tool folds duplicates into one entry,
- * this alias in gAliasList is unique, but different standards
- * may map an alias to different converters.
- */
- if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
- *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
- }
- /* State whether the canonical converter name contains an option.
- This information is contained in this list in order to maintain backward & forward compatibility. */
- if (containsOption) {
- UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
- *containsOption = (UBool)((containsCnvOptionInfo
- && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
- || !containsCnvOptionInfo);
- }
- return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
- }
- }
-
- return UINT32_MAX;
-}
-
-/*
- * Is this alias in this list?
- * alias and listOffset should be non-NULL.
- */
-static inline UBool
-isAliasInList(const char *alias, uint32_t listOffset) {
- if (listOffset) {
- uint32_t currAlias;
- uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
- /* +1 to skip listCount */
- const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
- for (currAlias = 0; currAlias < listCount; currAlias++) {
- if (currList[currAlias]
- && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
- {
- return TRUE;
- }
- }
- }
- return FALSE;
-}
-
-/*
- * Search for an standard name of an alias (what is the default name
- * that this standard uses?)
- * return the listOffset for gTaggedAliasLists. If it's 0,
- * the it couldn't be found, but the parameters are valid.
- */
-static uint32_t
-findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
- uint32_t idx;
- uint32_t listOffset;
- uint32_t convNum;
- UErrorCode myErr = U_ZERO_ERROR;
- uint32_t tagNum = getTagNumber(standard);
-
- /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
- convNum = findConverter(alias, NULL, &myErr);
- if (myErr != U_ZERO_ERROR) {
- *pErrorCode = myErr;
- }
-
- if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
- listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
- if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
- return listOffset;
- }
- if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
- /* Uh Oh! They used an ambiguous alias.
- We have to search the whole swiss cheese starting
- at the highest standard affinity.
- This may take a while.
- */
- for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
- listOffset = gMainTable.taggedAliasArray[idx];
- if (listOffset && isAliasInList(alias, listOffset)) {
- uint32_t currTagNum = idx/gMainTable.converterListSize;
- uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
- uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
- if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
- return tempListOffset;
- }
- /* else keep on looking */
- /* We could speed this up by starting on the next row
- because an alias is unique per row, right now.
- This would change if alias versioning appears. */
- }
- }
- /* The standard doesn't know about the alias */
- }
- /* else no default name */
- return 0;
- }
- /* else converter or tag not found */
-
- return UINT32_MAX;
-}
-
-/* Return the canonical name */
-static uint32_t
-findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
- uint32_t idx;
- uint32_t listOffset;
- uint32_t convNum;
- UErrorCode myErr = U_ZERO_ERROR;
- uint32_t tagNum = getTagNumber(standard);
-
- /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
- convNum = findConverter(alias, NULL, &myErr);
- if (myErr != U_ZERO_ERROR) {
- *pErrorCode = myErr;
- }
-
- if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
- listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
- if (listOffset && isAliasInList(alias, listOffset)) {
- return convNum;
- }
- if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
- /* Uh Oh! They used an ambiguous alias.
- We have to search one slice of the swiss cheese.
- We search only in the requested tag, not the whole thing.
- This may take a while.
- */
- uint32_t convStart = (tagNum)*gMainTable.converterListSize;
- uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
- for (idx = convStart; idx < convLimit; idx++) {
- listOffset = gMainTable.taggedAliasArray[idx];
- if (listOffset && isAliasInList(alias, listOffset)) {
- return idx-convStart;
- }
- }
- /* The standard doesn't know about the alias */
- }
- /* else no canonical name */
- }
- /* else converter or tag not found */
-
- return UINT32_MAX;
-}
-
-U_CAPI const char *
-ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
- const char *aliasTmp = alias;
- int32_t i = 0;
- for (i = 0; i < 2; i++) {
- if (i == 1) {
- /*
- * After the first unsuccess converter lookup, check to see if
- * the name begins with 'x-'. If it does, strip it off and try
- * again. This behaviour is similar to how ICU4J does it.
- */
- if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
- aliasTmp = aliasTmp+2;
- } else {
- break;
- }
- }
- if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
- uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
- if (convNum < gMainTable.converterListSize) {
- return GET_STRING(gMainTable.converterList[convNum]);
- }
- /* else converter not found */
- } else {
- break;
- }
- }
-
- return NULL;
-}
-
-U_CDECL_BEGIN
-
-
-static int32_t U_CALLCONV
-ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
- int32_t value = 0;
- UAliasContext *myContext = (UAliasContext *)(enumerator->context);
- uint32_t listOffset = myContext->listOffset;
-
- if (listOffset) {
- value = gMainTable.taggedAliasLists[listOffset];
- }
- return value;
-}
-
-static const char * U_CALLCONV
-ucnv_io_nextStandardAliases(UEnumeration *enumerator,
- int32_t* resultLength,
- UErrorCode * /*pErrorCode*/)
-{
- UAliasContext *myContext = (UAliasContext *)(enumerator->context);
- uint32_t listOffset = myContext->listOffset;
-
- if (listOffset) {
- uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
- const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
-
- if (myContext->listIdx < listCount) {
- const char *myStr = GET_STRING(currList[myContext->listIdx++]);
- if (resultLength) {
- *resultLength = (int32_t)uprv_strlen(myStr);
- }
- return myStr;
- }
- }
- /* Either we accessed a zero length list, or we enumerated too far. */
- if (resultLength) {
- *resultLength = 0;
- }
- return NULL;
-}
-
-static void U_CALLCONV
-ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
- ((UAliasContext *)(enumerator->context))->listIdx = 0;
-}
-
-static void U_CALLCONV
-ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
- uprv_free(enumerator->context);
- uprv_free(enumerator);
-}
-
-U_CDECL_END
-
-/* Enumerate the aliases for the specified converter and standard tag */
-static const UEnumeration gEnumAliases = {
- NULL,
- NULL,
- ucnv_io_closeUEnumeration,
- ucnv_io_countStandardAliases,
- uenum_unextDefault,
- ucnv_io_nextStandardAliases,
- ucnv_io_resetStandardAliases
-};
-
-U_CAPI UEnumeration * U_EXPORT2
-ucnv_openStandardNames(const char *convName,
- const char *standard,
- UErrorCode *pErrorCode)
-{
- UEnumeration *myEnum = NULL;
- if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
- uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
-
- /* When listOffset == 0, we want to acknowledge that the
- converter name and standard are okay, but there
- is nothing to enumerate. */
- if (listOffset < gMainTable.taggedAliasListsSize) {
- UAliasContext *myContext;
-
- myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
- if (myEnum == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
- myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
- if (myContext == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(myEnum);
- return NULL;
- }
- myContext->listOffset = listOffset;
- myContext->listIdx = 0;
- myEnum->context = myContext;
- }
- /* else converter or tag not found */
- }
- return myEnum;
-}
-
-static uint16_t
-ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
- if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
- uint32_t convNum = findConverter(alias, NULL, pErrorCode);
- if (convNum < gMainTable.converterListSize) {
- /* tagListNum - 1 is the ALL tag */
- int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
-
- if (listOffset) {
- return gMainTable.taggedAliasLists[listOffset];
- }
- /* else this shouldn't happen. internal program error */
- }
- /* else converter not found */
- }
- return 0;
-}
-
-static uint16_t
-ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
- if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
- uint32_t currAlias;
- uint32_t convNum = findConverter(alias, NULL, pErrorCode);
- if (convNum < gMainTable.converterListSize) {
- /* tagListNum - 1 is the ALL tag */
- int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
-
- if (listOffset) {
- uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
- /* +1 to skip listCount */
- const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
-
- for (currAlias = start; currAlias < listCount; currAlias++) {
- aliases[currAlias] = GET_STRING(currList[currAlias]);
- }
- }
- /* else this shouldn't happen. internal program error */
- }
- /* else converter not found */
- }
- return 0;
-}
-
-static const char *
-ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
- if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
- uint32_t convNum = findConverter(alias, NULL, pErrorCode);
- if (convNum < gMainTable.converterListSize) {
- /* tagListNum - 1 is the ALL tag */
- int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
-
- if (listOffset) {
- uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
- /* +1 to skip listCount */
- const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
-
- if (n < listCount) {
- return GET_STRING(currList[n]);
- }
- *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- }
- /* else this shouldn't happen. internal program error */
- }
- /* else converter not found */
- }
- return NULL;
-}
-
-static uint16_t
-ucnv_io_countStandards(UErrorCode *pErrorCode) {
- if (haveAliasData(pErrorCode)) {
- /* Don't include the empty list */
- return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
- }
-
- return 0;
-}
-
-U_CAPI const char * U_EXPORT2
-ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
- if (haveAliasData(pErrorCode)) {
- if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
- return GET_STRING(gMainTable.tagList[n]);
- }
- *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- }
-
- return NULL;
-}
-
-U_CAPI const char * U_EXPORT2
-ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
- if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
- uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
-
- if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
- const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
-
- /* Get the preferred name from this list */
- if (currList[0]) {
- return GET_STRING(currList[0]);
- }
- /* else someone screwed up the alias table. */
- /* *pErrorCode = U_INVALID_FORMAT_ERROR */
- }
- }
-
- return NULL;
-}
-
-U_CAPI uint16_t U_EXPORT2
-ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
-{
- return ucnv_io_countAliases(alias, pErrorCode);
-}
-
-
-U_CAPI const char* U_EXPORT2
-ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
-{
- return ucnv_io_getAlias(alias, n, pErrorCode);
-}
-
-U_CAPI void U_EXPORT2
-ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
-{
- ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
-}
-
-U_CAPI uint16_t U_EXPORT2
-ucnv_countStandards(void)
-{
- UErrorCode err = U_ZERO_ERROR;
- return ucnv_io_countStandards(&err);
-}
-
-U_CAPI const char * U_EXPORT2
-ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
- if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
- uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
-
- if (convNum < gMainTable.converterListSize) {
- return GET_STRING(gMainTable.converterList[convNum]);
- }
- }
-
- return NULL;
-}
-
-U_CDECL_BEGIN
-
-
-static int32_t U_CALLCONV
-ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
- return gMainTable.converterListSize;
-}
-
-static const char * U_CALLCONV
-ucnv_io_nextAllConverters(UEnumeration *enumerator,
- int32_t* resultLength,
- UErrorCode * /*pErrorCode*/)
-{
- uint16_t *myContext = (uint16_t *)(enumerator->context);
-
- if (*myContext < gMainTable.converterListSize) {
- const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
- if (resultLength) {
- *resultLength = (int32_t)uprv_strlen(myStr);
- }
- return myStr;
- }
- /* Either we accessed a zero length list, or we enumerated too far. */
- if (resultLength) {
- *resultLength = 0;
- }
- return NULL;
-}
-
-static void U_CALLCONV
-ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
- *((uint16_t *)(enumerator->context)) = 0;
-}
-U_CDECL_END
-static const UEnumeration gEnumAllConverters = {
- NULL,
- NULL,
- ucnv_io_closeUEnumeration,
- ucnv_io_countAllConverters,
- uenum_unextDefault,
- ucnv_io_nextAllConverters,
- ucnv_io_resetAllConverters
-};
-
-U_CAPI UEnumeration * U_EXPORT2
-ucnv_openAllNames(UErrorCode *pErrorCode) {
- UEnumeration *myEnum = NULL;
- if (haveAliasData(pErrorCode)) {
- uint16_t *myContext;
-
- myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
- if (myEnum == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
- myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
- if (myContext == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(myEnum);
- return NULL;
- }
- *myContext = 0;
- myEnum->context = myContext;
- }
- return myEnum;
-}
-
-U_CAPI uint16_t
-ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
- if (haveAliasData(pErrorCode)) {
- return (uint16_t)gMainTable.converterListSize;
- }
- return 0;
-}
-
-/* alias table swapping ----------------------------------------------------- */
-
-U_CDECL_BEGIN
-
-typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
-U_CDECL_END
-
-
-/*
- * row of a temporary array
- *
- * gets platform-endian charset string indexes and sorting indexes;
- * after sorting this array by strings, the actual arrays are permutated
- * according to the sorting indexes
- */
-typedef struct TempRow {
- uint16_t strIndex, sortIndex;
-} TempRow;
-
-typedef struct TempAliasTable {
- const char *chars;
- TempRow *rows;
- uint16_t *resort;
- StripForCompareFn *stripForCompare;
-} TempAliasTable;
-
-enum {
- STACK_ROW_CAPACITY=500
-};
-
-static int32_t U_CALLCONV
-io_compareRows(const void *context, const void *left, const void *right) {
- char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
- strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
-
- TempAliasTable *tempTable=(TempAliasTable *)context;
- const char *chars=tempTable->chars;
-
- return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
- tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
-}
-
-U_CAPI int32_t U_EXPORT2
-ucnv_swapAliases(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UDataInfo *pInfo;
- int32_t headerSize;
-
- const uint16_t *inTable;
- const uint32_t *inSectionSizes;
- uint32_t toc[offsetsCount];
- uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
- uint32_t i, count, tocLength, topOffset;
-
- TempRow rows[STACK_ROW_CAPACITY];
- uint16_t resort[STACK_ROW_CAPACITY];
- TempAliasTable tempTable;
-
- /* udata_swapDataHeader checks the arguments */
- headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* check data format and format version */
- pInfo=(const UDataInfo *)((const char *)inData+4);
- if(!(
- pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
- pInfo->dataFormat[1]==0x76 &&
- pInfo->dataFormat[2]==0x41 &&
- pInfo->dataFormat[3]==0x6c &&
- pInfo->formatVersion[0]==3
- )) {
- udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
- pInfo->dataFormat[0], pInfo->dataFormat[1],
- pInfo->dataFormat[2], pInfo->dataFormat[3],
- pInfo->formatVersion[0]);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- /* an alias table must contain at least the table of contents array */
- if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
- udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
- length-headerSize);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
- inTable=(const uint16_t *)inSectionSizes;
- uprv_memset(toc, 0, sizeof(toc));
- toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
- if(tocLength<minTocLength || offsetsCount<=tocLength) {
- udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
-
- /* read the known part of the table of contents */
- for(i=converterListIndex; i<=tocLength; ++i) {
- toc[i]=ds->readUInt32(inSectionSizes[i]);
- }
-
- /* compute offsets */
- uprv_memset(offsets, 0, sizeof(offsets));
- offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
- for(i=tagListIndex; i<=tocLength; ++i) {
- offsets[i]=offsets[i-1]+toc[i-1];
- }
-
- /* compute the overall size of the after-header data, in numbers of 16-bit units */
- topOffset=offsets[i-1]+toc[i-1];
-
- if(length>=0) {
- uint16_t *outTable;
- const uint16_t *p, *p2;
- uint16_t *q, *q2;
- uint16_t oldIndex;
-
- if((length-headerSize)<(2*(int32_t)topOffset)) {
- udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
- length-headerSize);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- outTable=(uint16_t *)((char *)outData+headerSize);
-
- /* swap the entire table of contents */
- ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
-
- /* swap unormalized strings & normalized strings */
- ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
- outTable+offsets[stringTableIndex], pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
- return 0;
- }
-
- if(ds->inCharset==ds->outCharset) {
- /* no need to sort, just swap all 16-bit values together */
- ds->swapArray16(ds,
- inTable+offsets[converterListIndex],
- 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
- outTable+offsets[converterListIndex],
- pErrorCode);
- } else {
- /* allocate the temporary table for sorting */
- count=toc[aliasListIndex];
-
- tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
-
- if(count<=STACK_ROW_CAPACITY) {
- tempTable.rows=rows;
- tempTable.resort=resort;
- } else {
- tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
- if(tempTable.rows==NULL) {
- udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
- count);
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- tempTable.resort=(uint16_t *)(tempTable.rows+count);
- }
-
- if(ds->outCharset==U_ASCII_FAMILY) {
- tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
- } else /* U_EBCDIC_FAMILY */ {
- tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
- }
-
- /*
- * Sort unique aliases+mapped names.
- *
- * We need to sort the list again by outCharset strings because they
- * sort differently for different charset families.
- * First we set up a temporary table with the string indexes and
- * sorting indexes and sort that.
- * Then we permutate and copy/swap the actual values.
- */
- p=inTable+offsets[aliasListIndex];
- q=outTable+offsets[aliasListIndex];
-
- p2=inTable+offsets[untaggedConvArrayIndex];
- q2=outTable+offsets[untaggedConvArrayIndex];
-
- for(i=0; i<count; ++i) {
- tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
- tempTable.rows[i].sortIndex=(uint16_t)i;
- }
-
- uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
- io_compareRows, &tempTable,
- FALSE, pErrorCode);
-
- if(U_SUCCESS(*pErrorCode)) {
- /* copy/swap/permutate items */
- if(p!=q) {
- for(i=0; i<count; ++i) {
- oldIndex=tempTable.rows[i].sortIndex;
- ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
- ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
- }
- } else {
- /*
- * If we swap in-place, then the permutation must use another
- * temporary array (tempTable.resort)
- * before the results are copied to the outBundle.
- */
- uint16_t *r=tempTable.resort;
-
- for(i=0; i<count; ++i) {
- oldIndex=tempTable.rows[i].sortIndex;
- ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
- }
- uprv_memcpy(q, r, 2*(size_t)count);
-
- for(i=0; i<count; ++i) {
- oldIndex=tempTable.rows[i].sortIndex;
- ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
- }
- uprv_memcpy(q2, r, 2*(size_t)count);
- }
- }
-
- if(tempTable.rows!=rows) {
- uprv_free(tempTable.rows);
- }
-
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
- count);
- return 0;
- }
-
- /* swap remaining 16-bit values */
- ds->swapArray16(ds,
- inTable+offsets[converterListIndex],
- 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
- outTable+offsets[converterListIndex],
- pErrorCode);
- ds->swapArray16(ds,
- inTable+offsets[taggedAliasArrayIndex],
- 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
- outTable+offsets[taggedAliasArrayIndex],
- pErrorCode);
- }
- }
-
- return headerSize+2*(int32_t)topOffset;
-}
-
-#endif
-
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/contrib/libs/icu/common/ucnv_io.h b/contrib/libs/icu/common/ucnv_io.h
deleted file mode 100644
index 8f2d7b5a02b..00000000000
--- a/contrib/libs/icu/common/ucnv_io.h
+++ /dev/null
@@ -1,127 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- **********************************************************************
- * Copyright (C) 1999-2006, International Business Machines
- * Corporation and others. All Rights Reserved.
- **********************************************************************
- *
- *
- * ucnv_io.h:
- * defines variables and functions pertaining to converter name resolution
- * aspect of the conversion code
- */
-
-#ifndef UCNV_IO_H
-#define UCNV_IO_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "udataswp.h"
-
-#define UCNV_AMBIGUOUS_ALIAS_MAP_BIT 0x8000
-#define UCNV_CONTAINS_OPTION_BIT 0x4000
-#define UCNV_CONVERTER_INDEX_MASK 0xFFF
-#define UCNV_NUM_RESERVED_TAGS 2
-#define UCNV_NUM_HIDDEN_TAGS 1
-
-enum {
- UCNV_IO_UNNORMALIZED,
- UCNV_IO_STD_NORMALIZED,
- UCNV_IO_NORM_TYPE_COUNT
-};
-
-typedef struct {
- uint16_t stringNormalizationType;
- uint16_t containsCnvOptionInfo;
-} UConverterAliasOptions;
-
-typedef struct UConverterAlias {
- const uint16_t *converterList;
- const uint16_t *tagList;
- const uint16_t *aliasList;
- const uint16_t *untaggedConvArray;
- const uint16_t *taggedAliasArray;
- const uint16_t *taggedAliasLists;
- const UConverterAliasOptions *optionTable;
- const uint16_t *stringTable;
- const uint16_t *normalizedStringTable;
-
- uint32_t converterListSize;
- uint32_t tagListSize;
- uint32_t aliasListSize;
- uint32_t untaggedConvArraySize;
- uint32_t taggedAliasArraySize;
- uint32_t taggedAliasListsSize;
- uint32_t optionTableSize;
- uint32_t stringTableSize;
- uint32_t normalizedStringTableSize;
-} UConverterAlias;
-
-/**
- * \var ucnv_io_stripForCompare
- * Remove the underscores, dashes and spaces from the name, and convert
- * the name to lower case.
- * @param dst The destination buffer, which is <= the buffer of name.
- * @param dst The destination buffer, which is <= the buffer of name.
- * @see ucnv_compareNames
- * @return the destination buffer.
- */
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define ucnv_io_stripForCompare ucnv_io_stripASCIIForCompare
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
-# define ucnv_io_stripForCompare ucnv_io_stripEBCDICForCompare
-#else
-# error U_CHARSET_FAMILY is not valid
-#endif
-
-U_CAPI char * U_CALLCONV
-ucnv_io_stripASCIIForCompare(char *dst, const char *name);
-
-U_CAPI char * U_CALLCONV
-ucnv_io_stripEBCDICForCompare(char *dst, const char *name);
-
-/**
- * Map a converter alias name to a canonical converter name.
- * The alias is searched for case-insensitively, the converter name
- * is returned in mixed-case.
- * Returns NULL if the alias is not found.
- * @param alias The alias name to be searched.
- * @param containsOption A return value stating whether the returned converter name contains an option (a comma)
- * @param pErrorCode The error code
- * @return the converter name in mixed-case, return NULL if the alias is not found.
- */
-U_CAPI const char *
-ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode);
-
-/**
- * Return the number of all known converter names (no aliases).
- * @param pErrorCode The error code
- * @return the number of all aliases
- */
-U_CAPI uint16_t
-ucnv_io_countKnownConverters(UErrorCode *pErrorCode);
-
-/**
- * Swap an ICU converter alias table. See implementation for details.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-ucnv_swapAliases(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-#endif
-
-#endif /* _UCNV_IO */
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/contrib/libs/icu/common/ucnv_lmb.cpp b/contrib/libs/icu/common/ucnv_lmb.cpp
deleted file mode 100644
index 168392837b5..00000000000
--- a/contrib/libs/icu/common/ucnv_lmb.cpp
+++ /dev/null
@@ -1,1388 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2000-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* file name: ucnv_lmb.cpp
-* encoding: UTF-8
-* tab size: 4 (not used)
-* indentation:4
-*
-* created on: 2000feb09
-* created by: Brendan Murray
-* extensively hacked up by: Jim Snyder-Grant
-*
-* Modification History:
-*
-* Date Name Description
-*
-* 06/20/2000 helena OS/400 port changes; mostly typecast.
-* 06/27/2000 Jim Snyder-Grant Deal with partial characters and small buffers.
-* Add comments to document LMBCS format and implementation
-* restructured order & breakdown of functions
-* 06/28/2000 helena Major rewrite for the callback API changes.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
-
-#include "unicode/ucnv_err.h"
-#include "unicode/ucnv.h"
-#include "unicode/uset.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uassert.h"
-#include "ucnv_imp.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-
-#ifdef EBCDIC_RTL
- #include "ascii_a.h"
-#endif
-
-/*
- LMBCS
-
- (Lotus Multi-Byte Character Set)
-
- LMBCS was invented in the late 1980's and is primarily used in Lotus Notes
- databases and in Lotus 1-2-3 files. Programmers who work with the APIs
- into these products will sometimes need to deal with strings in this format.
-
- The code in this file provides an implementation for an ICU converter of
- LMBCS to and from Unicode.
-
- Since the LMBCS character set is only sparsely documented in existing
- printed or online material, we have added extensive annotation to this
- file to serve as a guide to understanding LMBCS.
-
- LMBCS was originally designed with these four sometimes-competing design goals:
-
- -Provide encodings for the characters in 12 existing national standards
- (plus a few other characters)
- -Minimal memory footprint
- -Maximal speed of conversion into the existing national character sets
- -No need to track a changing state as you interpret a string.
-
-
- All of the national character sets LMBCS was trying to encode are 'ANSI'
- based, in that the bytes from 0x20 - 0x7F are almost exactly the
- same common Latin unaccented characters and symbols in all character sets.
-
- So, in order to help meet the speed & memory design goals, the common ANSI
- bytes from 0x20-0x7F are represented by the same single-byte values in LMBCS.
-
- The general LMBCS code unit is from 1-3 bytes. We can describe the 3 bytes as
- follows:
-
- [G] D1 [D2]
-
- That is, a sometimes-optional 'group' byte, followed by 1 and sometimes 2
- data bytes. The maximum size of a LMBCS chjaracter is 3 bytes:
-*/
-#define ULMBCS_CHARSIZE_MAX 3
-/*
- The single-byte values from 0x20 to 0x7F are examples of single D1 bytes.
- We often have to figure out if byte values are below or above this, so we
- use the ANSI nomenclature 'C0' and 'C1' to refer to the range of control
- characters just above & below the common lower-ANSI range */
-#define ULMBCS_C0END 0x1F
-#define ULMBCS_C1START 0x80
-/*
- Since LMBCS is always dealing in byte units. we create a local type here for
- dealing with these units of LMBCS code units:
-
-*/
-typedef uint8_t ulmbcs_byte_t;
-
-/*
- Most of the values less than 0x20 are reserved in LMBCS to announce
- which national character standard is being used for the 'D' bytes.
- In the comments we show the common name and the IBM character-set ID
- for these character-set announcers:
-*/
-
-#define ULMBCS_GRP_L1 0x01 /* Latin-1 :ibm-850 */
-#define ULMBCS_GRP_GR 0x02 /* Greek :ibm-851 */
-#define ULMBCS_GRP_HE 0x03 /* Hebrew :ibm-1255 */
-#define ULMBCS_GRP_AR 0x04 /* Arabic :ibm-1256 */
-#define ULMBCS_GRP_RU 0x05 /* Cyrillic :ibm-1251 */
-#define ULMBCS_GRP_L2 0x06 /* Latin-2 :ibm-852 */
-#define ULMBCS_GRP_TR 0x08 /* Turkish :ibm-1254 */
-#define ULMBCS_GRP_TH 0x0B /* Thai :ibm-874 */
-#define ULMBCS_GRP_JA 0x10 /* Japanese :ibm-943 */
-#define ULMBCS_GRP_KO 0x11 /* Korean :ibm-1261 */
-#define ULMBCS_GRP_TW 0x12 /* Chinese SC :ibm-950 */
-#define ULMBCS_GRP_CN 0x13 /* Chinese TC :ibm-1386 */
-
-/*
- So, the beginning of understanding LMBCS is that IF the first byte of a LMBCS
- character is one of those 12 values, you can interpret the remaining bytes of
- that character as coming from one of those character sets. Since the lower
- ANSI bytes already are represented in single bytes, using one of the character
- set announcers is used to announce a character that starts with a byte of
- 0x80 or greater.
-
- The character sets are arranged so that the single byte sets all appear
- before the multi-byte character sets. When we need to tell whether a
- group byte is for a single byte char set or not we use this define: */
-
-#define ULMBCS_DOUBLEOPTGROUP_START 0x10
-
-/*
-However, to fully understand LMBCS, you must also understand a series of
-exceptions & optimizations made in service of the design goals.
-
-First, those of you who are character set mavens may have noticed that
-the 'double-byte' character sets are actually multi-byte character sets
-that can have 1 or two bytes, even in the upper-ascii range. To force
-each group byte to introduce a fixed-width encoding (to make it faster to
-count characters), we use a convention of doubling up on the group byte
-to introduce any single-byte character > 0x80 in an otherwise double-byte
-character set. So, for example, the LMBCS sequence x10 x10 xAE is the
-same as '0xAE' in the Japanese code page 943.
-
-Next, you will notice that the list of group bytes has some gaps.
-These are used in various ways.
-
-We reserve a few special single byte values for common control
-characters. These are in the same place as their ANSI eqivalents for speed.
-*/
-
-#define ULMBCS_HT 0x09 /* Fixed control char - Horizontal Tab */
-#define ULMBCS_LF 0x0A /* Fixed control char - Line Feed */
-#define ULMBCS_CR 0x0D /* Fixed control char - Carriage Return */
-
-/* Then, 1-2-3 reserved a special single-byte character to put at the
-beginning of internal 'system' range names: */
-
-#define ULMBCS_123SYSTEMRANGE 0x19
-
-/* Then we needed a place to put all the other ansi control characters
-that must be moved to different values because LMBCS reserves those
-values for other purposes. To represent the control characters, we start
-with a first byte of 0xF & add the control chaarcter value as the
-second byte */
-#define ULMBCS_GRP_CTRL 0x0F
-
-/* For the C0 controls (less than 0x20), we add 0x20 to preserve the
-useful doctrine that any byte less than 0x20 in a LMBCS char must be
-the first byte of a character:*/
-#define ULMBCS_CTRLOFFSET 0x20
-
-/*
-Where to put the characters that aren't part of any of the 12 national
-character sets? The first thing that was done, in the earlier years of
-LMBCS, was to use up the spaces of the form
-
- [G] D1,
-
- where 'G' was one of the single-byte character groups, and
- D1 was less than 0x80. These sequences are gathered together
- into a Lotus-invented doublebyte character set to represent a
- lot of stray values. Internally, in this implementation, we track this
- as group '0', as a place to tuck this exceptions list.*/
-
-#define ULMBCS_GRP_EXCEPT 0x00
-/*
- Finally, as the durability and usefulness of UNICODE became clear,
- LOTUS added a new group 0x14 to hold Unicode values not otherwise
- represented in LMBCS: */
-#define ULMBCS_GRP_UNICODE 0x14
-/* The two bytes appearing after a 0x14 are intrepreted as UFT-16 BE
-(Big-Endian) characters. The exception comes when the UTF16
-representation would have a zero as the second byte. In that case,
-'F6' is used in its place, and the bytes are swapped. (This prevents
-LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK:
-0xF6xx is in the middle of the Private Use Area.)*/
-#define ULMBCS_UNICOMPATZERO 0xF6
-
-/* It is also useful in our code to have a constant for the size of
-a LMBCS char that holds a literal Unicode value */
-#define ULMBCS_UNICODE_SIZE 3
-
-/*
-To squish the LMBCS representations down even further, and to make
-translations even faster,sometimes the optimization group byte can be dropped
-from a LMBCS character. This is decided on a process-by-process basis. The
-group byte that is dropped is called the 'optimization group'.
-
-For Notes, the optimzation group is always 0x1.*/
-#define ULMBCS_DEFAULTOPTGROUP 0x1
-/* For 1-2-3 files, the optimzation group is stored in the header of the 1-2-3
-file.
-
- In any case, when using ICU, you either pass in the
-optimization group as part of the name of the converter (LMBCS-1, LMBCS-2,
-etc.). Using plain 'LMBCS' as the name of the converter will give you
-LMBCS-1.
-
-
-*** Implementation strategy ***
-
-
-Because of the extensive use of other character sets, the LMBCS converter
-keeps a mapping between optimization groups and IBM character sets, so that
-ICU converters can be created and used as needed. */
-
-/* As you can see, even though any byte below 0x20 could be an optimization
-byte, only those at 0x13 or below can map to an actual converter. To limit
-some loops and searches, we define a value for that last group converter:*/
-
-#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */
-
-static const char * const OptGroupByteToCPName[ULMBCS_GRP_LAST + 1] = {
- /* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */
- /* 0x0001 */ "ibm-850",
- /* 0x0002 */ "ibm-851",
- /* 0x0003 */ "windows-1255",
- /* 0x0004 */ "windows-1256",
- /* 0x0005 */ "windows-1251",
- /* 0x0006 */ "ibm-852",
- /* 0x0007 */ NULL, /* Unused */
- /* 0x0008 */ "windows-1254",
- /* 0x0009 */ NULL, /* Control char HT */
- /* 0x000A */ NULL, /* Control char LF */
- /* 0x000B */ "windows-874",
- /* 0x000C */ NULL, /* Unused */
- /* 0x000D */ NULL, /* Control char CR */
- /* 0x000E */ NULL, /* Unused */
- /* 0x000F */ NULL, /* Control chars: 0x0F20 + C0/C1 character: algorithmic */
- /* 0x0010 */ "windows-932",
- /* 0x0011 */ "windows-949",
- /* 0x0012 */ "windows-950",
- /* 0x0013 */ "windows-936"
-
- /* The rest are null, including the 0x0014 Unicode compatibility region
- and 0x0019, the 1-2-3 system range control char */
-};
-
-
-/* That's approximately all the data that's needed for translating
- LMBCS to Unicode.
-
-
-However, to translate Unicode to LMBCS, we need some more support.
-
-That's because there are often more than one possible mappings from a Unicode
-code point back into LMBCS. The first thing we do is look up into a table
-to figure out if there are more than one possible mappings. This table,
-arranged by Unicode values (including ranges) either lists which group
-to use, or says that it could go into one or more of the SBCS sets, or
-into one or more of the DBCS sets. (If the character exists in both DBCS &
-SBCS, the table will place it in the SBCS sets, to make the LMBCS code point
-length as small as possible. Here's the two special markers we use to indicate
-ambiguous mappings: */
-
-#define ULMBCS_AMBIGUOUS_SBCS 0x80 /* could fit in more than one
- LMBCS sbcs native encoding
- (example: most accented latin) */
-#define ULMBCS_AMBIGUOUS_MBCS 0x81 /* could fit in more than one
- LMBCS mbcs native encoding
- (example: Unihan) */
-#define ULMBCS_AMBIGUOUS_ALL 0x82
-/* And here's a simple way to see if a group falls in an appropriate range */
-#define ULMBCS_AMBIGUOUS_MATCH(agroup, xgroup) \
- ((((agroup) == ULMBCS_AMBIGUOUS_SBCS) && \
- (xgroup) < ULMBCS_DOUBLEOPTGROUP_START) || \
- (((agroup) == ULMBCS_AMBIGUOUS_MBCS) && \
- (xgroup) >= ULMBCS_DOUBLEOPTGROUP_START)) || \
- ((agroup) == ULMBCS_AMBIGUOUS_ALL)
-
-
-/* The table & some code to use it: */
-
-
-static const struct _UniLMBCSGrpMap
-{
- const UChar uniStartRange;
- const UChar uniEndRange;
- const ulmbcs_byte_t GrpType;
-} UniLMBCSGrpMap[]
-=
-{
-
- {0x0001, 0x001F, ULMBCS_GRP_CTRL},
- {0x0080, 0x009F, ULMBCS_GRP_CTRL},
- {0x00A0, 0x00A6, ULMBCS_AMBIGUOUS_SBCS},
- {0x00A7, 0x00A8, ULMBCS_AMBIGUOUS_ALL},
- {0x00A9, 0x00AF, ULMBCS_AMBIGUOUS_SBCS},
- {0x00B0, 0x00B1, ULMBCS_AMBIGUOUS_ALL},
- {0x00B2, 0x00B3, ULMBCS_AMBIGUOUS_SBCS},
- {0x00B4, 0x00B4, ULMBCS_AMBIGUOUS_ALL},
- {0x00B5, 0x00B5, ULMBCS_AMBIGUOUS_SBCS},
- {0x00B6, 0x00B6, ULMBCS_AMBIGUOUS_ALL},
- {0x00B7, 0x00D6, ULMBCS_AMBIGUOUS_SBCS},
- {0x00D7, 0x00D7, ULMBCS_AMBIGUOUS_ALL},
- {0x00D8, 0x00F6, ULMBCS_AMBIGUOUS_SBCS},
- {0x00F7, 0x00F7, ULMBCS_AMBIGUOUS_ALL},
- {0x00F8, 0x01CD, ULMBCS_AMBIGUOUS_SBCS},
- {0x01CE, 0x01CE, ULMBCS_GRP_TW },
- {0x01CF, 0x02B9, ULMBCS_AMBIGUOUS_SBCS},
- {0x02BA, 0x02BA, ULMBCS_GRP_CN},
- {0x02BC, 0x02C8, ULMBCS_AMBIGUOUS_SBCS},
- {0x02C9, 0x02D0, ULMBCS_AMBIGUOUS_MBCS},
- {0x02D8, 0x02DD, ULMBCS_AMBIGUOUS_SBCS},
- {0x0384, 0x0390, ULMBCS_AMBIGUOUS_SBCS},
- {0x0391, 0x03A9, ULMBCS_AMBIGUOUS_ALL},
- {0x03AA, 0x03B0, ULMBCS_AMBIGUOUS_SBCS},
- {0x03B1, 0x03C9, ULMBCS_AMBIGUOUS_ALL},
- {0x03CA, 0x03CE, ULMBCS_AMBIGUOUS_SBCS},
- {0x0400, 0x0400, ULMBCS_GRP_RU},
- {0x0401, 0x0401, ULMBCS_AMBIGUOUS_ALL},
- {0x0402, 0x040F, ULMBCS_GRP_RU},
- {0x0410, 0x0431, ULMBCS_AMBIGUOUS_ALL},
- {0x0432, 0x044E, ULMBCS_GRP_RU},
- {0x044F, 0x044F, ULMBCS_AMBIGUOUS_ALL},
- {0x0450, 0x0491, ULMBCS_GRP_RU},
- {0x05B0, 0x05F2, ULMBCS_GRP_HE},
- {0x060C, 0x06AF, ULMBCS_GRP_AR},
- {0x0E01, 0x0E5B, ULMBCS_GRP_TH},
- {0x200C, 0x200F, ULMBCS_AMBIGUOUS_SBCS},
- {0x2010, 0x2010, ULMBCS_AMBIGUOUS_MBCS},
- {0x2013, 0x2014, ULMBCS_AMBIGUOUS_SBCS},
- {0x2015, 0x2015, ULMBCS_AMBIGUOUS_MBCS},
- {0x2016, 0x2016, ULMBCS_AMBIGUOUS_MBCS},
- {0x2017, 0x2017, ULMBCS_AMBIGUOUS_SBCS},
- {0x2018, 0x2019, ULMBCS_AMBIGUOUS_ALL},
- {0x201A, 0x201B, ULMBCS_AMBIGUOUS_SBCS},
- {0x201C, 0x201D, ULMBCS_AMBIGUOUS_ALL},
- {0x201E, 0x201F, ULMBCS_AMBIGUOUS_SBCS},
- {0x2020, 0x2021, ULMBCS_AMBIGUOUS_ALL},
- {0x2022, 0x2024, ULMBCS_AMBIGUOUS_SBCS},
- {0x2025, 0x2025, ULMBCS_AMBIGUOUS_MBCS},
- {0x2026, 0x2026, ULMBCS_AMBIGUOUS_ALL},
- {0x2027, 0x2027, ULMBCS_GRP_TW},
- {0x2030, 0x2030, ULMBCS_AMBIGUOUS_ALL},
- {0x2031, 0x2031, ULMBCS_AMBIGUOUS_SBCS},
- {0x2032, 0x2033, ULMBCS_AMBIGUOUS_MBCS},
- {0x2035, 0x2035, ULMBCS_AMBIGUOUS_MBCS},
- {0x2039, 0x203A, ULMBCS_AMBIGUOUS_SBCS},
- {0x203B, 0x203B, ULMBCS_AMBIGUOUS_MBCS},
- {0x203C, 0x203C, ULMBCS_GRP_EXCEPT},
- {0x2074, 0x2074, ULMBCS_GRP_KO},
- {0x207F, 0x207F, ULMBCS_GRP_EXCEPT},
- {0x2081, 0x2084, ULMBCS_GRP_KO},
- {0x20A4, 0x20AC, ULMBCS_AMBIGUOUS_SBCS},
- {0x2103, 0x2109, ULMBCS_AMBIGUOUS_MBCS},
- {0x2111, 0x2120, ULMBCS_AMBIGUOUS_SBCS},
- /*zhujin: upgrade, for regressiont test, spr HKIA4YHTSU*/
- {0x2121, 0x2121, ULMBCS_AMBIGUOUS_MBCS},
- {0x2122, 0x2126, ULMBCS_AMBIGUOUS_SBCS},
- {0x212B, 0x212B, ULMBCS_AMBIGUOUS_MBCS},
- {0x2135, 0x2135, ULMBCS_AMBIGUOUS_SBCS},
- {0x2153, 0x2154, ULMBCS_GRP_KO},
- {0x215B, 0x215E, ULMBCS_GRP_EXCEPT},
- {0x2160, 0x2179, ULMBCS_AMBIGUOUS_MBCS},
- {0x2190, 0x2193, ULMBCS_AMBIGUOUS_ALL},
- {0x2194, 0x2195, ULMBCS_GRP_EXCEPT},
- {0x2196, 0x2199, ULMBCS_AMBIGUOUS_MBCS},
- {0x21A8, 0x21A8, ULMBCS_GRP_EXCEPT},
- {0x21B8, 0x21B9, ULMBCS_GRP_CN},
- {0x21D0, 0x21D1, ULMBCS_GRP_EXCEPT},
- {0x21D2, 0x21D2, ULMBCS_AMBIGUOUS_MBCS},
- {0x21D3, 0x21D3, ULMBCS_GRP_EXCEPT},
- {0x21D4, 0x21D4, ULMBCS_AMBIGUOUS_MBCS},
- {0x21D5, 0x21D5, ULMBCS_GRP_EXCEPT},
- {0x21E7, 0x21E7, ULMBCS_GRP_CN},
- {0x2200, 0x2200, ULMBCS_AMBIGUOUS_MBCS},
- {0x2201, 0x2201, ULMBCS_GRP_EXCEPT},
- {0x2202, 0x2202, ULMBCS_AMBIGUOUS_MBCS},
- {0x2203, 0x2203, ULMBCS_AMBIGUOUS_MBCS},
- {0x2204, 0x2206, ULMBCS_GRP_EXCEPT},
- {0x2207, 0x2208, ULMBCS_AMBIGUOUS_MBCS},
- {0x2209, 0x220A, ULMBCS_GRP_EXCEPT},
- {0x220B, 0x220B, ULMBCS_AMBIGUOUS_MBCS},
- {0x220F, 0x2215, ULMBCS_AMBIGUOUS_MBCS},
- {0x2219, 0x2219, ULMBCS_GRP_EXCEPT},
- {0x221A, 0x221A, ULMBCS_AMBIGUOUS_MBCS},
- {0x221B, 0x221C, ULMBCS_GRP_EXCEPT},
- {0x221D, 0x221E, ULMBCS_AMBIGUOUS_MBCS},
- {0x221F, 0x221F, ULMBCS_GRP_EXCEPT},
- {0x2220, 0x2220, ULMBCS_AMBIGUOUS_MBCS},
- {0x2223, 0x222A, ULMBCS_AMBIGUOUS_MBCS},
- {0x222B, 0x223D, ULMBCS_AMBIGUOUS_MBCS},
- {0x2245, 0x2248, ULMBCS_GRP_EXCEPT},
- {0x224C, 0x224C, ULMBCS_GRP_TW},
- {0x2252, 0x2252, ULMBCS_AMBIGUOUS_MBCS},
- {0x2260, 0x2261, ULMBCS_AMBIGUOUS_MBCS},
- {0x2262, 0x2265, ULMBCS_GRP_EXCEPT},
- {0x2266, 0x226F, ULMBCS_AMBIGUOUS_MBCS},
- {0x2282, 0x2283, ULMBCS_AMBIGUOUS_MBCS},
- {0x2284, 0x2285, ULMBCS_GRP_EXCEPT},
- {0x2286, 0x2287, ULMBCS_AMBIGUOUS_MBCS},
- {0x2288, 0x2297, ULMBCS_GRP_EXCEPT},
- {0x2299, 0x22BF, ULMBCS_AMBIGUOUS_MBCS},
- {0x22C0, 0x22C0, ULMBCS_GRP_EXCEPT},
- {0x2310, 0x2310, ULMBCS_GRP_EXCEPT},
- {0x2312, 0x2312, ULMBCS_AMBIGUOUS_MBCS},
- {0x2318, 0x2321, ULMBCS_GRP_EXCEPT},
- {0x2318, 0x2321, ULMBCS_GRP_CN},
- {0x2460, 0x24E9, ULMBCS_AMBIGUOUS_MBCS},
- {0x2500, 0x2500, ULMBCS_AMBIGUOUS_SBCS},
- {0x2501, 0x2501, ULMBCS_AMBIGUOUS_MBCS},
- {0x2502, 0x2502, ULMBCS_AMBIGUOUS_ALL},
- {0x2503, 0x2503, ULMBCS_AMBIGUOUS_MBCS},
- {0x2504, 0x2505, ULMBCS_GRP_TW},
- {0x2506, 0x2665, ULMBCS_AMBIGUOUS_ALL},
- {0x2666, 0x2666, ULMBCS_GRP_EXCEPT},
- {0x2667, 0x2669, ULMBCS_AMBIGUOUS_SBCS},
- {0x266A, 0x266A, ULMBCS_AMBIGUOUS_ALL},
- {0x266B, 0x266C, ULMBCS_AMBIGUOUS_SBCS},
- {0x266D, 0x266D, ULMBCS_AMBIGUOUS_MBCS},
- {0x266E, 0x266E, ULMBCS_AMBIGUOUS_SBCS},
- {0x266F, 0x266F, ULMBCS_GRP_JA},
- {0x2670, 0x2E7F, ULMBCS_AMBIGUOUS_SBCS},
- {0x2E80, 0xF861, ULMBCS_AMBIGUOUS_MBCS},
- {0xF862, 0xF8FF, ULMBCS_GRP_EXCEPT},
- {0xF900, 0xFA2D, ULMBCS_AMBIGUOUS_MBCS},
- {0xFB00, 0xFEFF, ULMBCS_AMBIGUOUS_SBCS},
- {0xFF01, 0xFFEE, ULMBCS_AMBIGUOUS_MBCS},
- {0xFFFF, 0xFFFF, ULMBCS_GRP_UNICODE}
-};
-
-static ulmbcs_byte_t
-FindLMBCSUniRange(UChar uniChar)
-{
- const struct _UniLMBCSGrpMap * pTable = UniLMBCSGrpMap;
-
- while (uniChar > pTable->uniEndRange)
- {
- pTable++;
- }
-
- if (uniChar >= pTable->uniStartRange)
- {
- return pTable->GrpType;
- }
- return ULMBCS_GRP_UNICODE;
-}
-
-/*
-We also ask the creator of a converter to send in a preferred locale
-that we can use in resolving ambiguous mappings. They send the locale
-in as a string, and we map it, if possible, to one of the
-LMBCS groups. We use this table, and the associated code, to
-do the lookup: */
-
-/**************************************************
- This table maps locale ID's to LMBCS opt groups.
- The default return is group 0x01. Note that for
- performance reasons, the table is sorted in
- increasing alphabetic order, with the notable
- exception of zhTW. This is to force the check
- for Traditonal Chinese before dropping back to
- Simplified.
-
- Note too that the Latin-1 groups have been
- commented out because it's the default, and
- this shortens the table, allowing a serial
- search to go quickly.
- *************************************************/
-
-static const struct _LocaleLMBCSGrpMap
-{
- const char *LocaleID;
- const ulmbcs_byte_t OptGroup;
-} LocaleLMBCSGrpMap[] =
-{
- {"ar", ULMBCS_GRP_AR},
- {"be", ULMBCS_GRP_RU},
- {"bg", ULMBCS_GRP_L2},
- /* {"ca", ULMBCS_GRP_L1}, */
- {"cs", ULMBCS_GRP_L2},
- /* {"da", ULMBCS_GRP_L1}, */
- /* {"de", ULMBCS_GRP_L1}, */
- {"el", ULMBCS_GRP_GR},
- /* {"en", ULMBCS_GRP_L1}, */
- /* {"es", ULMBCS_GRP_L1}, */
- /* {"et", ULMBCS_GRP_L1}, */
- /* {"fi", ULMBCS_GRP_L1}, */
- /* {"fr", ULMBCS_GRP_L1}, */
- {"he", ULMBCS_GRP_HE},
- {"hu", ULMBCS_GRP_L2},
- /* {"is", ULMBCS_GRP_L1}, */
- /* {"it", ULMBCS_GRP_L1}, */
- {"iw", ULMBCS_GRP_HE},
- {"ja", ULMBCS_GRP_JA},
- {"ko", ULMBCS_GRP_KO},
- /* {"lt", ULMBCS_GRP_L1}, */
- /* {"lv", ULMBCS_GRP_L1}, */
- {"mk", ULMBCS_GRP_RU},
- /* {"nl", ULMBCS_GRP_L1}, */
- /* {"no", ULMBCS_GRP_L1}, */
- {"pl", ULMBCS_GRP_L2},
- /* {"pt", ULMBCS_GRP_L1}, */
- {"ro", ULMBCS_GRP_L2},
- {"ru", ULMBCS_GRP_RU},
- {"sh", ULMBCS_GRP_L2},
- {"sk", ULMBCS_GRP_L2},
- {"sl", ULMBCS_GRP_L2},
- {"sq", ULMBCS_GRP_L2},
- {"sr", ULMBCS_GRP_RU},
- /* {"sv", ULMBCS_GRP_L1}, */
- {"th", ULMBCS_GRP_TH},
- {"tr", ULMBCS_GRP_TR},
- {"uk", ULMBCS_GRP_RU},
- /* {"vi", ULMBCS_GRP_L1}, */
- {"zhTW", ULMBCS_GRP_TW},
- {"zh", ULMBCS_GRP_CN},
- {NULL, ULMBCS_GRP_L1}
-};
-
-
-static ulmbcs_byte_t
-FindLMBCSLocale(const char *LocaleID)
-{
- const struct _LocaleLMBCSGrpMap *pTable = LocaleLMBCSGrpMap;
-
- if ((!LocaleID) || (!*LocaleID))
- {
- return 0;
- }
-
- while (pTable->LocaleID)
- {
- if (*pTable->LocaleID == *LocaleID) /* Check only first char for speed */
- {
- /* First char matches - check whole name, for entry-length */
- if (uprv_strncmp(pTable->LocaleID, LocaleID, strlen(pTable->LocaleID)) == 0)
- return pTable->OptGroup;
- }
- else
- if (*pTable->LocaleID > *LocaleID) /* Sorted alphabetically - exit */
- break;
- pTable++;
- }
- return ULMBCS_GRP_L1;
-}
-
-
-/*
- Before we get to the main body of code, here's how we hook up to the rest
- of ICU. ICU converters are required to define a structure that includes
- some function pointers, and some common data, in the style of a C++
- vtable. There is also room in there for converter-specific data. LMBCS
- uses that converter-specific data to keep track of the 12 subconverters
- we use, the optimization group, and the group (if any) that matches the
- locale. We have one structure instantiated for each of the 12 possible
- optimization groups. To avoid typos & to avoid boring the reader, we
- put the declarations of these structures and functions into macros. To see
- the definitions of these structures, see unicode\ucnv_bld.h
-*/
-
-typedef struct
- {
- UConverterSharedData *OptGrpConverter[ULMBCS_GRP_LAST+1]; /* Converter per Opt. grp. */
- uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */
- uint8_t localeConverterIndex; /* reasonable locale match for index */
- }
-UConverterDataLMBCS;
-
-U_CDECL_BEGIN
-static void U_CALLCONV _LMBCSClose(UConverter * _this);
-U_CDECL_END
-
-#define DECLARE_LMBCS_DATA(n) \
-static const UConverterImpl _LMBCSImpl##n={\
- UCNV_LMBCS_##n,\
- NULL,NULL,\
- _LMBCSOpen##n,\
- _LMBCSClose,\
- NULL,\
- _LMBCSToUnicodeWithOffsets,\
- _LMBCSToUnicodeWithOffsets,\
- _LMBCSFromUnicode,\
- _LMBCSFromUnicode,\
- NULL,\
- NULL,\
- NULL,\
- NULL,\
- _LMBCSSafeClone,\
- ucnv_getCompleteUnicodeSet,\
- NULL,\
- NULL\
-};\
-static const UConverterStaticData _LMBCSStaticData##n={\
- sizeof(UConverterStaticData),\
- "LMBCS-" #n,\
- 0, UCNV_IBM, UCNV_LMBCS_##n, 1, 3,\
- { 0x3f, 0, 0, 0 },1,FALSE,FALSE,0,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} \
-};\
-const UConverterSharedData _LMBCSData##n= \
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_LMBCSStaticData##n, &_LMBCSImpl##n);
-
- /* The only function we needed to duplicate 12 times was the 'open'
-function, which will do basically the same thing except set a different
-optimization group. So, we put the common stuff into a worker function,
-and set up another macro to stamp out the 12 open functions:*/
-#define DEFINE_LMBCS_OPEN(n) \
-static void U_CALLCONV \
- _LMBCSOpen##n(UConverter* _this, UConverterLoadArgs* pArgs, UErrorCode* err) \
-{ _LMBCSOpenWorker(_this, pArgs, err, n); }
-
-
-
-/* Here's the open worker & the common close function */
-static void
-_LMBCSOpenWorker(UConverter* _this,
- UConverterLoadArgs *pArgs,
- UErrorCode* err,
- ulmbcs_byte_t OptGroup)
-{
- UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS));
- _this->extraInfo = extraInfo;
- if(extraInfo != NULL)
- {
- UConverterNamePieces stackPieces;
- UConverterLoadArgs stackArgs= UCNV_LOAD_ARGS_INITIALIZER;
- ulmbcs_byte_t i;
-
- uprv_memset(extraInfo, 0, sizeof(UConverterDataLMBCS));
-
- stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
-
- for (i=0; i <= ULMBCS_GRP_LAST && U_SUCCESS(*err); i++)
- {
- if(OptGroupByteToCPName[i] != NULL) {
- extraInfo->OptGrpConverter[i] = ucnv_loadSharedData(OptGroupByteToCPName[i], &stackPieces, &stackArgs, err);
- }
- }
-
- if(U_FAILURE(*err) || pArgs->onlyTestIsLoadable) {
- _LMBCSClose(_this);
- return;
- }
- extraInfo->OptGroup = OptGroup;
- extraInfo->localeConverterIndex = FindLMBCSLocale(pArgs->locale);
- }
- else
- {
- *err = U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-U_CDECL_BEGIN
-static void U_CALLCONV
-_LMBCSClose(UConverter * _this)
-{
- if (_this->extraInfo != NULL)
- {
- ulmbcs_byte_t Ix;
- UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
-
- for (Ix=0; Ix <= ULMBCS_GRP_LAST; Ix++)
- {
- if (extraInfo->OptGrpConverter[Ix] != NULL)
- ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[Ix]);
- }
- if (!_this->isExtraLocal) {
- uprv_free (_this->extraInfo);
- _this->extraInfo = NULL;
- }
- }
-}
-
-typedef struct LMBCSClone {
- UConverter cnv;
- UConverterDataLMBCS lmbcs;
-} LMBCSClone;
-
-static UConverter * U_CALLCONV
-_LMBCSSafeClone(const UConverter *cnv,
- void *stackBuffer,
- int32_t *pBufferSize,
- UErrorCode *status) {
- (void)status;
- LMBCSClone *newLMBCS;
- UConverterDataLMBCS *extraInfo;
- int32_t i;
-
- if(*pBufferSize<=0) {
- *pBufferSize=(int32_t)sizeof(LMBCSClone);
- return NULL;
- }
-
- extraInfo=(UConverterDataLMBCS *)cnv->extraInfo;
- newLMBCS=(LMBCSClone *)stackBuffer;
-
- /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
-
- uprv_memcpy(&newLMBCS->lmbcs, extraInfo, sizeof(UConverterDataLMBCS));
-
- /* share the subconverters */
- for(i = 0; i <= ULMBCS_GRP_LAST; ++i) {
- if(extraInfo->OptGrpConverter[i] != NULL) {
- ucnv_incrementRefCount(extraInfo->OptGrpConverter[i]);
- }
- }
-
- newLMBCS->cnv.extraInfo = &newLMBCS->lmbcs;
- newLMBCS->cnv.isExtraLocal = TRUE;
- return &newLMBCS->cnv;
-}
-
-/*
- * There used to be a _LMBCSGetUnicodeSet() function here (up to svn revision 20117)
- * which added all code points except for U+F6xx
- * because those cannot be represented in the Unicode group.
- * However, it turns out that windows-950 has roundtrips for all of U+F6xx
- * which means that LMBCS can convert all Unicode code points after all.
- * We now simply use ucnv_getCompleteUnicodeSet().
- *
- * This may need to be looked at again as Lotus uses _LMBCSGetUnicodeSet(). (091216)
- */
-
-/*
- Here's the basic helper function that we use when converting from
- Unicode to LMBCS, and we suspect that a Unicode character will fit into
- one of the 12 groups. The return value is the number of bytes written
- starting at pStartLMBCS (if any).
-*/
-
-static size_t
-LMBCSConversionWorker (
- UConverterDataLMBCS * extraInfo, /* subconverters, opt & locale groups */
- ulmbcs_byte_t group, /* The group to try */
- ulmbcs_byte_t * pStartLMBCS, /* where to put the results */
- UChar * pUniChar, /* The input unicode character */
- ulmbcs_byte_t * lastConverterIndex, /* output: track last successful group used */
- UBool * groups_tried /* output: track any unsuccessful groups */
-)
-{
- ulmbcs_byte_t * pLMBCS = pStartLMBCS;
- UConverterSharedData * xcnv = extraInfo->OptGrpConverter[group];
-
- int bytesConverted;
- uint32_t value;
- ulmbcs_byte_t firstByte;
-
- U_ASSERT(xcnv);
- U_ASSERT(group<ULMBCS_GRP_UNICODE);
-
- bytesConverted = ucnv_MBCSFromUChar32(xcnv, *pUniChar, &value, FALSE);
-
- /* get the first result byte */
- if(bytesConverted > 0) {
- firstByte = (ulmbcs_byte_t)(value >> ((bytesConverted - 1) * 8));
- } else {
- /* most common failure mode is an unassigned character */
- groups_tried[group] = TRUE;
- return 0;
- }
-
- *lastConverterIndex = group;
-
- /* All initial byte values in lower ascii range should have been caught by now,
- except with the exception group.
- */
- U_ASSERT((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT));
-
- /* use converted data: first write 0, 1 or two group bytes */
- if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group)
- {
- *pLMBCS++ = group;
- if (bytesConverted == 1 && group >= ULMBCS_DOUBLEOPTGROUP_START)
- {
- *pLMBCS++ = group;
- }
- }
-
- /* don't emit control chars */
- if ( bytesConverted == 1 && firstByte < 0x20 )
- return 0;
-
-
- /* then move over the converted data */
- switch(bytesConverted)
- {
- case 4:
- *pLMBCS++ = (ulmbcs_byte_t)(value >> 24);
- U_FALLTHROUGH;
- case 3:
- *pLMBCS++ = (ulmbcs_byte_t)(value >> 16);
- U_FALLTHROUGH;
- case 2:
- *pLMBCS++ = (ulmbcs_byte_t)(value >> 8);
- U_FALLTHROUGH;
- case 1:
- *pLMBCS++ = (ulmbcs_byte_t)value;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
-
- return (pLMBCS - pStartLMBCS);
-}
-
-
-/* This is a much simpler version of above, when we
-know we are writing LMBCS using the Unicode group
-*/
-static size_t
-LMBCSConvertUni(ulmbcs_byte_t * pLMBCS, UChar uniChar)
-{
- /* encode into LMBCS Unicode range */
- uint8_t LowCh = (uint8_t)(uniChar & 0x00FF);
- uint8_t HighCh = (uint8_t)(uniChar >> 8);
-
- *pLMBCS++ = ULMBCS_GRP_UNICODE;
-
- if (LowCh == 0)
- {
- *pLMBCS++ = ULMBCS_UNICOMPATZERO;
- *pLMBCS++ = HighCh;
- }
- else
- {
- *pLMBCS++ = HighCh;
- *pLMBCS++ = LowCh;
- }
- return ULMBCS_UNICODE_SIZE;
-}
-
-
-
-/* The main Unicode to LMBCS conversion function */
-static void U_CALLCONV
-_LMBCSFromUnicode(UConverterFromUnicodeArgs* args,
- UErrorCode* err)
-{
- ulmbcs_byte_t lastConverterIndex = 0;
- UChar uniChar;
- ulmbcs_byte_t LMBCS[ULMBCS_CHARSIZE_MAX];
- ulmbcs_byte_t * pLMBCS;
- int32_t bytes_written;
- UBool groups_tried[ULMBCS_GRP_LAST+1];
- UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
- int sourceIndex = 0;
-
- /* Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS)
- If that succeeds, see if it will all fit into the target & copy it over
- if it does.
-
- We try conversions in the following order:
-
- 1. Single-byte ascii & special fixed control chars (&null)
- 2. Look up group in table & try that (could be
- A) Unicode group
- B) control group,
- C) national encoding,
- or ambiguous SBCS or MBCS group (on to step 4...)
-
- 3. If its ambiguous, try this order:
- A) The optimization group
- B) The locale group
- C) The last group that succeeded with this string.
- D) every other group that's relevent (single or double)
- E) If its single-byte ambiguous, try the exceptions group
-
- 4. And as a grand fallback: Unicode
- */
-
- /*Fix for SPR#DJOE66JFN3 (Lotus)*/
- ulmbcs_byte_t OldConverterIndex = 0;
-
- while (args->source < args->sourceLimit && !U_FAILURE(*err))
- {
- /*Fix for SPR#DJOE66JFN3 (Lotus)*/
- OldConverterIndex = extraInfo->localeConverterIndex;
-
- if (args->target >= args->targetLimit)
- {
- *err = U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- uniChar = *(args->source);
- bytes_written = 0;
- pLMBCS = LMBCS;
-
- /* check cases in rough order of how common they are, for speed */
-
- /* single byte matches: strategy 1 */
- /*Fix for SPR#DJOE66JFN3 (Lotus)*/
- if((uniChar>=0x80) && (uniChar<=0xff)
- /*Fix for SPR#JUYA6XAERU and TSAO7GL5NK (Lotus)*/ &&(uniChar!=0xB1) &&(uniChar!=0xD7) &&(uniChar!=0xF7)
- &&(uniChar!=0xB0) &&(uniChar!=0xB4) &&(uniChar!=0xB6) &&(uniChar!=0xA7) &&(uniChar!=0xA8))
- {
- extraInfo->localeConverterIndex = ULMBCS_GRP_L1;
- }
- if (((uniChar > ULMBCS_C0END) && (uniChar < ULMBCS_C1START)) ||
- uniChar == 0 || uniChar == ULMBCS_HT || uniChar == ULMBCS_CR ||
- uniChar == ULMBCS_LF || uniChar == ULMBCS_123SYSTEMRANGE
- )
- {
- *pLMBCS++ = (ulmbcs_byte_t ) uniChar;
- bytes_written = 1;
- }
-
-
- if (!bytes_written)
- {
- /* Check by UNICODE range (Strategy 2) */
- ulmbcs_byte_t group = FindLMBCSUniRange(uniChar);
-
- if (group == ULMBCS_GRP_UNICODE) /* (Strategy 2A) */
- {
- pLMBCS += LMBCSConvertUni(pLMBCS,uniChar);
-
- bytes_written = (int32_t)(pLMBCS - LMBCS);
- }
- else if (group == ULMBCS_GRP_CTRL) /* (Strategy 2B) */
- {
- /* Handle control characters here */
- if (uniChar <= ULMBCS_C0END)
- {
- *pLMBCS++ = ULMBCS_GRP_CTRL;
- *pLMBCS++ = (ulmbcs_byte_t)(ULMBCS_CTRLOFFSET + uniChar);
- }
- else if (uniChar >= ULMBCS_C1START && uniChar <= ULMBCS_C1START + ULMBCS_CTRLOFFSET)
- {
- *pLMBCS++ = ULMBCS_GRP_CTRL;
- *pLMBCS++ = (ulmbcs_byte_t ) (uniChar & 0x00FF);
- }
- bytes_written = (int32_t)(pLMBCS - LMBCS);
- }
- else if (group < ULMBCS_GRP_UNICODE) /* (Strategy 2C) */
- {
- /* a specific converter has been identified - use it */
- bytes_written = (int32_t)LMBCSConversionWorker (
- extraInfo, group, pLMBCS, &uniChar,
- &lastConverterIndex, groups_tried);
- }
- if (!bytes_written) /* the ambiguous group cases (Strategy 3) */
- {
- uprv_memset(groups_tried, 0, sizeof(groups_tried));
-
- /* check for non-default optimization group (Strategy 3A )*/
- if ((extraInfo->OptGroup != 1) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->OptGroup)))
- {
- /*zhujin: upgrade, merge #39299 here (Lotus) */
- /*To make R5 compatible translation, look for exceptional group first for non-DBCS*/
-
- if(extraInfo->localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START)
- {
- bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
- ULMBCS_GRP_L1, pLMBCS, &uniChar,
- &lastConverterIndex, groups_tried);
-
- if(!bytes_written)
- {
- bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
- ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar,
- &lastConverterIndex, groups_tried);
- }
- if(!bytes_written)
- {
- bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
- extraInfo->localeConverterIndex, pLMBCS, &uniChar,
- &lastConverterIndex, groups_tried);
- }
- }
- else
- {
- bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
- extraInfo->localeConverterIndex, pLMBCS, &uniChar,
- &lastConverterIndex, groups_tried);
- }
- }
- /* check for locale optimization group (Strategy 3B) */
- if (!bytes_written && (extraInfo->localeConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->localeConverterIndex)))
- {
- bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
- extraInfo->localeConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried);
- }
- /* check for last optimization group used for this string (Strategy 3C) */
- if (!bytes_written && (lastConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, lastConverterIndex)))
- {
- bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
- lastConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried);
- }
- if (!bytes_written)
- {
- /* just check every possible matching converter (Strategy 3D) */
- ulmbcs_byte_t grp_start;
- ulmbcs_byte_t grp_end;
- ulmbcs_byte_t grp_ix;
- grp_start = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS)
- ? ULMBCS_DOUBLEOPTGROUP_START
- : ULMBCS_GRP_L1);
- grp_end = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS)
- ? ULMBCS_GRP_LAST
- : ULMBCS_GRP_TH);
- if(group == ULMBCS_AMBIGUOUS_ALL)
- {
- grp_start = ULMBCS_GRP_L1;
- grp_end = ULMBCS_GRP_LAST;
- }
- for (grp_ix = grp_start;
- grp_ix <= grp_end && !bytes_written;
- grp_ix++)
- {
- if (extraInfo->OptGrpConverter [grp_ix] && !groups_tried [grp_ix])
- {
- bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
- grp_ix, pLMBCS, &uniChar,
- &lastConverterIndex, groups_tried);
- }
- }
- /* a final conversion fallback to the exceptions group if its likely
- to be single byte (Strategy 3E) */
- if (!bytes_written && grp_start == ULMBCS_GRP_L1)
- {
- bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
- ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar,
- &lastConverterIndex, groups_tried);
- }
- }
- /* all of our other strategies failed. Fallback to Unicode. (Strategy 4)*/
- if (!bytes_written)
- {
-
- pLMBCS += LMBCSConvertUni(pLMBCS, uniChar);
- bytes_written = (int32_t)(pLMBCS - LMBCS);
- }
- }
- }
-
- /* we have a translation. increment source and write as much as posible to target */
- args->source++;
- pLMBCS = LMBCS;
- while (args->target < args->targetLimit && bytes_written--)
- {
- *(args->target)++ = *pLMBCS++;
- if (args->offsets)
- {
- *(args->offsets)++ = sourceIndex;
- }
- }
- sourceIndex++;
- if (bytes_written > 0)
- {
- /* write any bytes that didn't fit in target to the error buffer,
- common code will move this to target if we get called back with
- enough target room
- */
- uint8_t * pErrorBuffer = args->converter->charErrorBuffer;
- *err = U_BUFFER_OVERFLOW_ERROR;
- args->converter->charErrorBufferLength = (int8_t)bytes_written;
- while (bytes_written--)
- {
- *pErrorBuffer++ = *pLMBCS++;
- }
- }
- /*Fix for SPR#DJOE66JFN3 (Lotus)*/
- extraInfo->localeConverterIndex = OldConverterIndex;
- }
-}
-
-
-/* Now, the Unicode from LMBCS section */
-
-
-/* A function to call when we are looking at the Unicode group byte in LMBCS */
-static UChar
-GetUniFromLMBCSUni(char const ** ppLMBCSin) /* Called with LMBCS-style Unicode byte stream */
-{
- uint8_t HighCh = *(*ppLMBCSin)++; /* Big-endian Unicode in LMBCS compatibility group*/
- uint8_t LowCh = *(*ppLMBCSin)++;
-
- if (HighCh == ULMBCS_UNICOMPATZERO )
- {
- HighCh = LowCh;
- LowCh = 0; /* zero-byte in LSB special character */
- }
- return (UChar)((HighCh << 8) | LowCh);
-}
-
-
-
-/* CHECK_SOURCE_LIMIT: Helper macro to verify that there are at least'index'
- bytes left in source up to sourceLimit.Errors appropriately if not.
- If we reach the limit, then update the source pointer to there to consume
- all input as required by ICU converter semantics.
-*/
-
-#define CHECK_SOURCE_LIMIT(index) UPRV_BLOCK_MACRO_BEGIN { \
- if (args->source+index > args->sourceLimit) { \
- *err = U_TRUNCATED_CHAR_FOUND; \
- args->source = args->sourceLimit; \
- return 0xffff; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/* Return the Unicode representation for the current LMBCS character */
-
-static UChar32 U_CALLCONV
-_LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
- UErrorCode* err)
-{
- UChar32 uniChar = 0; /* an output UNICODE char */
- ulmbcs_byte_t CurByte; /* A byte from the input stream */
-
- /* error check */
- if (args->source >= args->sourceLimit)
- {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return 0xffff;
- }
- /* Grab first byte & save address for error recovery */
- CurByte = *((ulmbcs_byte_t *) (args->source++));
-
- /*
- * at entry of each if clause:
- * 1. 'CurByte' points at the first byte of a LMBCS character
- * 2. '*source'points to the next byte of the source stream after 'CurByte'
- *
- * the job of each if clause is:
- * 1. set '*source' to point at the beginning of next char (nop if LMBCS char is only 1 byte)
- * 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately
- */
-
- /* First lets check the simple fixed values. */
-
- if(((CurByte > ULMBCS_C0END) && (CurByte < ULMBCS_C1START)) /* ascii range */
- || (CurByte == 0)
- || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR
- || CurByte == ULMBCS_LF || CurByte == ULMBCS_123SYSTEMRANGE)
- {
- uniChar = CurByte;
- }
- else
- {
- UConverterDataLMBCS * extraInfo;
- ulmbcs_byte_t group;
- UConverterSharedData *cnv;
-
- if (CurByte == ULMBCS_GRP_CTRL) /* Control character group - no opt group update */
- {
- ulmbcs_byte_t C0C1byte;
- CHECK_SOURCE_LIMIT(1);
- C0C1byte = *(args->source)++;
- uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte;
- }
- else
- if (CurByte == ULMBCS_GRP_UNICODE) /* Unicode compatibility group: BigEndian UTF16 */
- {
- CHECK_SOURCE_LIMIT(2);
-
- /* don't check for error indicators fffe/ffff below */
- return GetUniFromLMBCSUni(&(args->source));
- }
- else if (CurByte <= ULMBCS_CTRLOFFSET)
- {
- group = CurByte; /* group byte is in the source */
- extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
- if (group > ULMBCS_GRP_LAST || (cnv = extraInfo->OptGrpConverter[group]) == NULL)
- {
- /* this is not a valid group byte - no converter*/
- *err = U_INVALID_CHAR_FOUND;
- }
- else if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */
- {
-
- CHECK_SOURCE_LIMIT(2);
-
- /* check for LMBCS doubled-group-byte case */
- if (*args->source == group) {
- /* single byte */
- ++args->source;
- uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 1, FALSE);
- ++args->source;
- } else {
- /* double byte */
- uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 2, FALSE);
- args->source += 2;
- }
- }
- else { /* single byte conversion */
- CHECK_SOURCE_LIMIT(1);
- CurByte = *(args->source)++;
-
- if (CurByte >= ULMBCS_C1START)
- {
- uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte);
- }
- else
- {
- /* The non-optimizable oddballs where there is an explicit byte
- * AND the second byte is not in the upper ascii range
- */
- char bytes[2];
-
- extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
- cnv = extraInfo->OptGrpConverter [ULMBCS_GRP_EXCEPT];
-
- /* Lookup value must include opt group */
- bytes[0] = group;
- bytes[1] = CurByte;
- uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, bytes, 2, FALSE);
- }
- }
- }
- else if (CurByte >= ULMBCS_C1START) /* group byte is implicit */
- {
- extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
- group = extraInfo->OptGroup;
- cnv = extraInfo->OptGrpConverter[group];
- if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */
- {
- if (!ucnv_MBCSIsLeadByte(cnv, CurByte))
- {
- CHECK_SOURCE_LIMIT(0);
-
- /* let the MBCS conversion consume CurByte again */
- uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 1, FALSE);
- }
- else
- {
- CHECK_SOURCE_LIMIT(1);
- /* let the MBCS conversion consume CurByte again */
- uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 2, FALSE);
- ++args->source;
- }
- }
- else /* single byte conversion */
- {
- uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte);
- }
- }
- }
- return uniChar;
-}
-
-
-/* The exported function that converts lmbcs to one or more
- UChars - currently UTF-16
-*/
-static void U_CALLCONV
-_LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
- UErrorCode* err)
-{
- char LMBCS [ULMBCS_CHARSIZE_MAX];
- UChar uniChar; /* one output UNICODE char */
- const char * saveSource; /* beginning of current code point */
- const char * pStartLMBCS = args->source; /* beginning of whole string */
- const char * errSource = NULL; /* pointer to actual input in case an error occurs */
- int8_t savebytes = 0;
-
- /* Process from source to limit, or until error */
- while (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit > args->target)
- {
- saveSource = args->source; /* beginning of current code point */
-
- if (args->converter->toULength) /* reassemble char from previous call */
- {
- const char *saveSourceLimit;
- size_t size_old = args->converter->toULength;
-
- /* limit from source is either remainder of temp buffer, or user limit on source */
- size_t size_new_maybe_1 = sizeof(LMBCS) - size_old;
- size_t size_new_maybe_2 = args->sourceLimit - args->source;
- size_t size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2;
-
-
- uprv_memcpy(LMBCS, args->converter->toUBytes, size_old);
- uprv_memcpy(LMBCS + size_old, args->source, size_new);
- saveSourceLimit = args->sourceLimit;
- args->source = errSource = LMBCS;
- args->sourceLimit = LMBCS+size_old+size_new;
- savebytes = (int8_t)(size_old+size_new);
- uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err);
- args->source = saveSource + ((args->source - LMBCS) - size_old);
- args->sourceLimit = saveSourceLimit;
-
- if (*err == U_TRUNCATED_CHAR_FOUND)
- {
- /* evil special case: source buffers so small a char spans more than 2 buffers */
- args->converter->toULength = savebytes;
- uprv_memcpy(args->converter->toUBytes, LMBCS, savebytes);
- args->source = args->sourceLimit;
- *err = U_ZERO_ERROR;
- return;
- }
- else
- {
- /* clear the partial-char marker */
- args->converter->toULength = 0;
- }
- }
- else
- {
- errSource = saveSource;
- uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err);
- savebytes = (int8_t)(args->source - saveSource);
- }
- if (U_SUCCESS(*err))
- {
- if (uniChar < 0xfffe)
- {
- *(args->target)++ = uniChar;
- if(args->offsets)
- {
- *(args->offsets)++ = (int32_t)(saveSource - pStartLMBCS);
- }
- }
- else if (uniChar == 0xfffe)
- {
- *err = U_INVALID_CHAR_FOUND;
- }
- else /* if (uniChar == 0xffff) */
- {
- *err = U_ILLEGAL_CHAR_FOUND;
- }
- }
- }
- /* if target ran out before source, return U_BUFFER_OVERFLOW_ERROR */
- if (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit <= args->target)
- {
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- else if (U_FAILURE(*err))
- {
- /* If character incomplete or unmappable/illegal, store it in toUBytes[] */
- args->converter->toULength = savebytes;
- if (savebytes > 0) {
- uprv_memcpy(args->converter->toUBytes, errSource, savebytes);
- }
- if (*err == U_TRUNCATED_CHAR_FOUND) {
- *err = U_ZERO_ERROR;
- }
- }
-}
-
-/* And now, the macroized declarations of data & functions: */
-DEFINE_LMBCS_OPEN(1)
-DEFINE_LMBCS_OPEN(2)
-DEFINE_LMBCS_OPEN(3)
-DEFINE_LMBCS_OPEN(4)
-DEFINE_LMBCS_OPEN(5)
-DEFINE_LMBCS_OPEN(6)
-DEFINE_LMBCS_OPEN(8)
-DEFINE_LMBCS_OPEN(11)
-DEFINE_LMBCS_OPEN(16)
-DEFINE_LMBCS_OPEN(17)
-DEFINE_LMBCS_OPEN(18)
-DEFINE_LMBCS_OPEN(19)
-
-
-DECLARE_LMBCS_DATA(1)
-DECLARE_LMBCS_DATA(2)
-DECLARE_LMBCS_DATA(3)
-DECLARE_LMBCS_DATA(4)
-DECLARE_LMBCS_DATA(5)
-DECLARE_LMBCS_DATA(6)
-DECLARE_LMBCS_DATA(8)
-DECLARE_LMBCS_DATA(11)
-DECLARE_LMBCS_DATA(16)
-DECLARE_LMBCS_DATA(17)
-DECLARE_LMBCS_DATA(18)
-DECLARE_LMBCS_DATA(19)
-
-U_CDECL_END
-
-#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
diff --git a/contrib/libs/icu/common/ucnv_set.cpp b/contrib/libs/icu/common/ucnv_set.cpp
deleted file mode 100644
index 926cee0de81..00000000000
--- a/contrib/libs/icu/common/ucnv_set.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2003-2007, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ucnv_set.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004sep07
-* created by: Markus W. Scherer
-*
-* Conversion API functions using USet (ucnv_getUnicodeSet())
-* moved here from ucnv.c for removing the dependency of other ucnv_
-* implementation functions on the USet implementation.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/uset.h"
-#include "unicode/ucnv.h"
-#include "ucnv_bld.h"
-#include "uset_imp.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-U_CAPI void U_EXPORT2
-ucnv_getUnicodeSet(const UConverter *cnv,
- USet *setFillIn,
- UConverterUnicodeSet whichSet,
- UErrorCode *pErrorCode) {
- /* argument checking */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
- if(cnv==NULL || setFillIn==NULL || whichSet<UCNV_ROUNDTRIP_SET || UCNV_SET_COUNT<=whichSet) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- /* does this converter support this function? */
- if(cnv->sharedData->impl->getUnicodeSet==NULL) {
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return;
- }
-
- {
- USetAdder sa={
- NULL,
- uset_add,
- uset_addRange,
- uset_addString,
- uset_remove,
- uset_removeRange
- };
- sa.set=setFillIn;
-
- /* empty the set */
- uset_clear(setFillIn);
-
- /* call the converter to add the code points it supports */
- cnv->sharedData->impl->getUnicodeSet(cnv, &sa, whichSet, pErrorCode);
- }
-}
-
-#endif
diff --git a/contrib/libs/icu/common/ucnv_u16.cpp b/contrib/libs/icu/common/ucnv_u16.cpp
deleted file mode 100644
index a5e8367400a..00000000000
--- a/contrib/libs/icu/common/ucnv_u16.cpp
+++ /dev/null
@@ -1,1579 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2002-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* file name: ucnv_u16.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002jul01
-* created by: Markus W. Scherer
-*
-* UTF-16 converter implementation. Used to be in ucnv_utf.c.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/uversion.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "cmemory.h"
-
-enum {
- UCNV_NEED_TO_WRITE_BOM=1
-};
-
-U_CDECL_BEGIN
-/*
- * The UTF-16 toUnicode implementation is also used for the Java-specific
- * "with BOM" variants of UTF-16BE and UTF-16LE.
- */
-static void U_CALLCONV
-_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode);
-
-/* UTF-16BE ----------------------------------------------------------------- */
-
-#if U_IS_BIG_ENDIAN
-# define _UTF16PEFromUnicodeWithOffsets _UTF16BEFromUnicodeWithOffsets
-#else
-# define _UTF16PEFromUnicodeWithOffsets _UTF16LEFromUnicodeWithOffsets
-#endif
-
-
-static void U_CALLCONV
-_UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source;
- char *target;
- int32_t *offsets;
-
- uint32_t targetCapacity, length, sourceIndex;
- UChar c, trail;
- char overflow[4];
-
- source=pArgs->source;
- length=(int32_t)(pArgs->sourceLimit-source);
- if(length<=0) {
- /* no input, nothing to do */
- return;
- }
-
- cnv=pArgs->converter;
-
- /* write the BOM if necessary */
- if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
- static const char bom[]={ (char)0xfeu, (char)0xffu };
- ucnv_fromUWriteBytes(cnv,
- bom, 2,
- &pArgs->target, pArgs->targetLimit,
- &pArgs->offsets, -1,
- pErrorCode);
- cnv->fromUnicodeStatus=0;
- }
-
- target=pArgs->target;
- if(target >= pArgs->targetLimit) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return;
- }
-
- targetCapacity=(uint32_t)(pArgs->targetLimit-target);
- offsets=pArgs->offsets;
- sourceIndex=0;
-
- /* c!=0 indicates in several places outside the main loops that a surrogate was found */
-
- if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) {
- /* the last buffer ended with a lead surrogate, output the surrogate pair */
- ++source;
- --length;
- target[0]=(uint8_t)(c>>8);
- target[1]=(uint8_t)c;
- target[2]=(uint8_t)(trail>>8);
- target[3]=(uint8_t)trail;
- target+=4;
- targetCapacity-=4;
- if(offsets!=NULL) {
- *offsets++=-1;
- *offsets++=-1;
- *offsets++=-1;
- *offsets++=-1;
- }
- sourceIndex=1;
- cnv->fromUChar32=c=0;
- }
-
- if(c==0) {
- /* copy an even number of bytes for complete UChars */
- uint32_t count=2*length;
- if(count>targetCapacity) {
- count=targetCapacity&~1;
- }
- /* count is even */
- targetCapacity-=count;
- count>>=1;
- length-=count;
-
- if(offsets==NULL) {
- while(count>0) {
- c=*source++;
- if(U16_IS_SINGLE(c)) {
- target[0]=(uint8_t)(c>>8);
- target[1]=(uint8_t)c;
- target+=2;
- } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
- ++source;
- --count;
- target[0]=(uint8_t)(c>>8);
- target[1]=(uint8_t)c;
- target[2]=(uint8_t)(trail>>8);
- target[3]=(uint8_t)trail;
- target+=4;
- } else {
- break;
- }
- --count;
- }
- } else {
- while(count>0) {
- c=*source++;
- if(U16_IS_SINGLE(c)) {
- target[0]=(uint8_t)(c>>8);
- target[1]=(uint8_t)c;
- target+=2;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
- ++source;
- --count;
- target[0]=(uint8_t)(c>>8);
- target[1]=(uint8_t)c;
- target[2]=(uint8_t)(trail>>8);
- target[3]=(uint8_t)trail;
- target+=4;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- sourceIndex+=2;
- } else {
- break;
- }
- --count;
- }
- }
-
- if(count==0) {
- /* done with the loop for complete UChars */
- if(length>0 && targetCapacity>0) {
- /*
- * there is more input and some target capacity -
- * it must be targetCapacity==1 because otherwise
- * the above would have copied more;
- * prepare for overflow output
- */
- if(U16_IS_SINGLE(c=*source++)) {
- overflow[0]=(char)(c>>8);
- overflow[1]=(char)c;
- length=2; /* 2 bytes to output */
- c=0;
- /* } else { keep c for surrogate handling, length will be set there */
- }
- } else {
- length=0;
- c=0;
- }
- } else {
- /* keep c for surrogate handling, length will be set there */
- targetCapacity+=2*count;
- }
- } else {
- length=0; /* from here on, length counts the bytes in overflow[] */
- }
-
- if(c!=0) {
- /*
- * c is a surrogate, and
- * - source or target too short
- * - or the surrogate is unmatched
- */
- length=0;
- if(U16_IS_SURROGATE_LEAD(c)) {
- if(source<pArgs->sourceLimit) {
- if(U16_IS_TRAIL(trail=*source)) {
- /* output the surrogate pair, will overflow (see conditions comment above) */
- ++source;
- overflow[0]=(char)(c>>8);
- overflow[1]=(char)c;
- overflow[2]=(char)(trail>>8);
- overflow[3]=(char)trail;
- length=4; /* 4 bytes to output */
- c=0;
- } else {
- /* unmatched lead surrogate */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- } else {
- /* see if the trail surrogate is in the next buffer */
- }
- } else {
- /* unmatched trail surrogate */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- cnv->fromUChar32=c;
- }
-
- if(length>0) {
- /* output length bytes with overflow (length>targetCapacity>0) */
- ucnv_fromUWriteBytes(cnv,
- overflow, length,
- (char **)&target, pArgs->targetLimit,
- &offsets, sourceIndex,
- pErrorCode);
- targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
- }
-
- if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
-}
-
-static void U_CALLCONV
-_UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const uint8_t *source;
- UChar *target;
- int32_t *offsets;
-
- uint32_t targetCapacity, length, count, sourceIndex;
- UChar c, trail;
-
- if(pArgs->converter->mode<8) {
- _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode);
- return;
- }
-
- cnv=pArgs->converter;
- source=(const uint8_t *)pArgs->source;
- length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
- if(length<=0 && cnv->toUnicodeStatus==0) {
- /* no input, nothing to do */
- return;
- }
-
- target=pArgs->target;
- if(target >= pArgs->targetLimit) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return;
- }
-
- targetCapacity=(uint32_t)(pArgs->targetLimit-target);
- offsets=pArgs->offsets;
- sourceIndex=0;
- c=0;
-
- /* complete a partial UChar or pair from the last call */
- if(cnv->toUnicodeStatus!=0) {
- /*
- * special case: single byte from a previous buffer,
- * where the byte turned out not to belong to a trail surrogate
- * and the preceding, unmatched lead surrogate was put into toUBytes[]
- * for error handling
- */
- cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus;
- cnv->toULength=1;
- cnv->toUnicodeStatus=0;
- }
- if((count=cnv->toULength)!=0) {
- uint8_t *p=cnv->toUBytes;
- do {
- p[count++]=*source++;
- ++sourceIndex;
- --length;
- if(count==2) {
- c=((UChar)p[0]<<8)|p[1];
- if(U16_IS_SINGLE(c)) {
- /* output the BMP code point */
- *target++=c;
- if(offsets!=NULL) {
- *offsets++=-1;
- }
- --targetCapacity;
- count=0;
- c=0;
- break;
- } else if(U16_IS_SURROGATE_LEAD(c)) {
- /* continue collecting bytes for the trail surrogate */
- c=0; /* avoid unnecessary surrogate handling below */
- } else {
- /* fall through to error handling for an unmatched trail surrogate */
- break;
- }
- } else if(count==4) {
- c=((UChar)p[0]<<8)|p[1];
- trail=((UChar)p[2]<<8)|p[3];
- if(U16_IS_TRAIL(trail)) {
- /* output the surrogate pair */
- *target++=c;
- if(targetCapacity>=2) {
- *target++=trail;
- if(offsets!=NULL) {
- *offsets++=-1;
- *offsets++=-1;
- }
- targetCapacity-=2;
- } else /* targetCapacity==1 */ {
- targetCapacity=0;
- cnv->UCharErrorBuffer[0]=trail;
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- count=0;
- c=0;
- break;
- } else {
- /* unmatched lead surrogate, handle here for consistent toUBytes[] */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-
- /* back out reading the code unit after it */
- if(((const uint8_t *)pArgs->source-source)>=2) {
- source-=2;
- } else {
- /*
- * if the trail unit's first byte was in a previous buffer, then
- * we need to put it into a special place because toUBytes[] will be
- * used for the lead unit's bytes
- */
- cnv->toUnicodeStatus=0x100|p[2];
- --source;
- }
- cnv->toULength=2;
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
- return;
- }
- }
- } while(length>0);
- cnv->toULength=(int8_t)count;
- }
-
- /* copy an even number of bytes for complete UChars */
- count=2*targetCapacity;
- if(count>length) {
- count=length&~1;
- }
- if(c==0 && count>0) {
- length-=count;
- count>>=1;
- targetCapacity-=count;
- if(offsets==NULL) {
- do {
- c=((UChar)source[0]<<8)|source[1];
- source+=2;
- if(U16_IS_SINGLE(c)) {
- *target++=c;
- } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
- U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])
- ) {
- source+=2;
- --count;
- *target++=c;
- *target++=trail;
- } else {
- break;
- }
- } while(--count>0);
- } else {
- do {
- c=((UChar)source[0]<<8)|source[1];
- source+=2;
- if(U16_IS_SINGLE(c)) {
- *target++=c;
- *offsets++=sourceIndex;
- sourceIndex+=2;
- } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
- U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])
- ) {
- source+=2;
- --count;
- *target++=c;
- *target++=trail;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- sourceIndex+=4;
- } else {
- break;
- }
- } while(--count>0);
- }
-
- if(count==0) {
- /* done with the loop for complete UChars */
- c=0;
- } else {
- /* keep c for surrogate handling, trail will be set there */
- length+=2*(count-1); /* one more byte pair was consumed than count decremented */
- targetCapacity+=count;
- }
- }
-
- if(c!=0) {
- /*
- * c is a surrogate, and
- * - source or target too short
- * - or the surrogate is unmatched
- */
- cnv->toUBytes[0]=(uint8_t)(c>>8);
- cnv->toUBytes[1]=(uint8_t)c;
- cnv->toULength=2;
-
- if(U16_IS_SURROGATE_LEAD(c)) {
- if(length>=2) {
- if(U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])) {
- /* output the surrogate pair, will overflow (see conditions comment above) */
- source+=2;
- length-=2;
- *target++=c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- cnv->UCharErrorBuffer[0]=trail;
- cnv->UCharErrorBufferLength=1;
- cnv->toULength=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- } else {
- /* unmatched lead surrogate */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- } else {
- /* see if the trail surrogate is in the next buffer */
- }
- } else {
- /* unmatched trail surrogate */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- }
-
- if(U_SUCCESS(*pErrorCode)) {
- /* check for a remaining source byte */
- if(length>0) {
- if(targetCapacity==0) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- } else {
- /* it must be length==1 because otherwise the above would have copied more */
- cnv->toUBytes[cnv->toULength++]=*source++;
- }
- }
- }
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
-}
-
-static UChar32 U_CALLCONV
-_UTF16BEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
- const uint8_t *s, *sourceLimit;
- UChar32 c;
-
- if(pArgs->converter->mode<8) {
- return UCNV_GET_NEXT_UCHAR_USE_TO_U;
- }
-
- s=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
-
- if(s>=sourceLimit) {
- /* no input */
- *err=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0xffff;
- }
-
- if(s+2>sourceLimit) {
- /* only one byte: truncated UChar */
- pArgs->converter->toUBytes[0]=*s++;
- pArgs->converter->toULength=1;
- pArgs->source=(const char *)s;
- *err = U_TRUNCATED_CHAR_FOUND;
- return 0xffff;
- }
-
- /* get one UChar */
- c=((UChar32)*s<<8)|s[1];
- s+=2;
-
- /* check for a surrogate pair */
- if(U_IS_SURROGATE(c)) {
- if(U16_IS_SURROGATE_LEAD(c)) {
- if(s+2<=sourceLimit) {
- UChar trail;
-
- /* get a second UChar and see if it is a trail surrogate */
- trail=((UChar)*s<<8)|s[1];
- if(U16_IS_TRAIL(trail)) {
- c=U16_GET_SUPPLEMENTARY(c, trail);
- s+=2;
- } else {
- /* unmatched lead surrogate */
- c=-2;
- }
- } else {
- /* too few (2 or 3) bytes for a surrogate pair: truncated code point */
- uint8_t *bytes=pArgs->converter->toUBytes;
- s-=2;
- pArgs->converter->toULength=(int8_t)(sourceLimit-s);
- do {
- *bytes++=*s++;
- } while(s<sourceLimit);
-
- c=0xffff;
- *err=U_TRUNCATED_CHAR_FOUND;
- }
- } else {
- /* unmatched trail surrogate */
- c=-2;
- }
-
- if(c<0) {
- /* write the unmatched surrogate */
- uint8_t *bytes=pArgs->converter->toUBytes;
- pArgs->converter->toULength=2;
- *bytes=*(s-2);
- bytes[1]=*(s-1);
-
- c=0xffff;
- *err=U_ILLEGAL_CHAR_FOUND;
- }
- }
-
- pArgs->source=(const char *)s;
- return c;
-}
-
-static void U_CALLCONV
-_UTF16BEReset(UConverter *cnv, UConverterResetChoice choice) {
- if(choice<=UCNV_RESET_TO_UNICODE) {
- /* reset toUnicode state */
- if(UCNV_GET_VERSION(cnv)==0) {
- cnv->mode=8; /* no BOM handling */
- } else {
- cnv->mode=0; /* Java-specific "UnicodeBig" requires BE BOM or no BOM */
- }
- }
- if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) {
- /* reset fromUnicode for "UnicodeBig": prepare to output the UTF-16BE BOM */
- cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
- }
-}
-
-static void U_CALLCONV
-_UTF16BEOpen(UConverter *cnv,
- UConverterLoadArgs *pArgs,
- UErrorCode *pErrorCode) {
- (void)pArgs;
- if(UCNV_GET_VERSION(cnv)<=1) {
- _UTF16BEReset(cnv, UCNV_RESET_BOTH);
- } else {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-static const char * U_CALLCONV
-_UTF16BEGetName(const UConverter *cnv) {
- if(UCNV_GET_VERSION(cnv)==0) {
- return "UTF-16BE";
- } else {
- return "UTF-16BE,version=1";
- }
-}
-U_CDECL_END
-
-static const UConverterImpl _UTF16BEImpl={
- UCNV_UTF16_BigEndian,
-
- NULL,
- NULL,
-
- _UTF16BEOpen,
- NULL,
- _UTF16BEReset,
-
- _UTF16BEToUnicodeWithOffsets,
- _UTF16BEToUnicodeWithOffsets,
- _UTF16BEFromUnicodeWithOffsets,
- _UTF16BEFromUnicodeWithOffsets,
- _UTF16BEGetNextUChar,
-
- NULL,
- _UTF16BEGetName,
- NULL,
- NULL,
- ucnv_getNonSurrogateUnicodeSet,
-
- NULL,
- NULL
-};
-
-static const UConverterStaticData _UTF16BEStaticData={
- sizeof(UConverterStaticData),
- "UTF-16BE",
- 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2,
- { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-
-const UConverterSharedData _UTF16BEData=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16BEStaticData, &_UTF16BEImpl);
-
-/* UTF-16LE ----------------------------------------------------------------- */
-U_CDECL_BEGIN
-static void U_CALLCONV
-_UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source;
- char *target;
- int32_t *offsets;
-
- uint32_t targetCapacity, length, sourceIndex;
- UChar c, trail;
- char overflow[4];
-
- source=pArgs->source;
- length=(int32_t)(pArgs->sourceLimit-source);
- if(length<=0) {
- /* no input, nothing to do */
- return;
- }
-
- cnv=pArgs->converter;
-
- /* write the BOM if necessary */
- if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
- static const char bom[]={ (char)0xffu, (char)0xfeu };
- ucnv_fromUWriteBytes(cnv,
- bom, 2,
- &pArgs->target, pArgs->targetLimit,
- &pArgs->offsets, -1,
- pErrorCode);
- cnv->fromUnicodeStatus=0;
- }
-
- target=pArgs->target;
- if(target >= pArgs->targetLimit) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return;
- }
-
- targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
- sourceIndex=0;
-
- /* c!=0 indicates in several places outside the main loops that a surrogate was found */
-
- if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) {
- /* the last buffer ended with a lead surrogate, output the surrogate pair */
- ++source;
- --length;
- target[0]=(uint8_t)c;
- target[1]=(uint8_t)(c>>8);
- target[2]=(uint8_t)trail;
- target[3]=(uint8_t)(trail>>8);
- target+=4;
- targetCapacity-=4;
- if(offsets!=NULL) {
- *offsets++=-1;
- *offsets++=-1;
- *offsets++=-1;
- *offsets++=-1;
- }
- sourceIndex=1;
- cnv->fromUChar32=c=0;
- }
-
- if(c==0) {
- /* copy an even number of bytes for complete UChars */
- uint32_t count=2*length;
- if(count>targetCapacity) {
- count=targetCapacity&~1;
- }
- /* count is even */
- targetCapacity-=count;
- count>>=1;
- length-=count;
-
- if(offsets==NULL) {
- while(count>0) {
- c=*source++;
- if(U16_IS_SINGLE(c)) {
- target[0]=(uint8_t)c;
- target[1]=(uint8_t)(c>>8);
- target+=2;
- } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
- ++source;
- --count;
- target[0]=(uint8_t)c;
- target[1]=(uint8_t)(c>>8);
- target[2]=(uint8_t)trail;
- target[3]=(uint8_t)(trail>>8);
- target+=4;
- } else {
- break;
- }
- --count;
- }
- } else {
- while(count>0) {
- c=*source++;
- if(U16_IS_SINGLE(c)) {
- target[0]=(uint8_t)c;
- target[1]=(uint8_t)(c>>8);
- target+=2;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
- ++source;
- --count;
- target[0]=(uint8_t)c;
- target[1]=(uint8_t)(c>>8);
- target[2]=(uint8_t)trail;
- target[3]=(uint8_t)(trail>>8);
- target+=4;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- sourceIndex+=2;
- } else {
- break;
- }
- --count;
- }
- }
-
- if(count==0) {
- /* done with the loop for complete UChars */
- if(length>0 && targetCapacity>0) {
- /*
- * there is more input and some target capacity -
- * it must be targetCapacity==1 because otherwise
- * the above would have copied more;
- * prepare for overflow output
- */
- if(U16_IS_SINGLE(c=*source++)) {
- overflow[0]=(char)c;
- overflow[1]=(char)(c>>8);
- length=2; /* 2 bytes to output */
- c=0;
- /* } else { keep c for surrogate handling, length will be set there */
- }
- } else {
- length=0;
- c=0;
- }
- } else {
- /* keep c for surrogate handling, length will be set there */
- targetCapacity+=2*count;
- }
- } else {
- length=0; /* from here on, length counts the bytes in overflow[] */
- }
-
- if(c!=0) {
- /*
- * c is a surrogate, and
- * - source or target too short
- * - or the surrogate is unmatched
- */
- length=0;
- if(U16_IS_SURROGATE_LEAD(c)) {
- if(source<pArgs->sourceLimit) {
- if(U16_IS_TRAIL(trail=*source)) {
- /* output the surrogate pair, will overflow (see conditions comment above) */
- ++source;
- overflow[0]=(char)c;
- overflow[1]=(char)(c>>8);
- overflow[2]=(char)trail;
- overflow[3]=(char)(trail>>8);
- length=4; /* 4 bytes to output */
- c=0;
- } else {
- /* unmatched lead surrogate */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- } else {
- /* see if the trail surrogate is in the next buffer */
- }
- } else {
- /* unmatched trail surrogate */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- cnv->fromUChar32=c;
- }
-
- if(length>0) {
- /* output length bytes with overflow (length>targetCapacity>0) */
- ucnv_fromUWriteBytes(cnv,
- overflow, length,
- &target, pArgs->targetLimit,
- &offsets, sourceIndex,
- pErrorCode);
- targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
- }
-
- if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=target;
- pArgs->offsets=offsets;
-}
-
-static void U_CALLCONV
-_UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const uint8_t *source;
- UChar *target;
- int32_t *offsets;
-
- uint32_t targetCapacity, length, count, sourceIndex;
- UChar c, trail;
-
- if(pArgs->converter->mode<8) {
- _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode);
- return;
- }
-
- cnv=pArgs->converter;
- source=(const uint8_t *)pArgs->source;
- length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
- if(length<=0 && cnv->toUnicodeStatus==0) {
- /* no input, nothing to do */
- return;
- }
-
- target=pArgs->target;
- if(target >= pArgs->targetLimit) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return;
- }
-
- targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
- sourceIndex=0;
- c=0;
-
- /* complete a partial UChar or pair from the last call */
- if(cnv->toUnicodeStatus!=0) {
- /*
- * special case: single byte from a previous buffer,
- * where the byte turned out not to belong to a trail surrogate
- * and the preceding, unmatched lead surrogate was put into toUBytes[]
- * for error handling
- */
- cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus;
- cnv->toULength=1;
- cnv->toUnicodeStatus=0;
- }
- if((count=cnv->toULength)!=0) {
- uint8_t *p=cnv->toUBytes;
- do {
- p[count++]=*source++;
- ++sourceIndex;
- --length;
- if(count==2) {
- c=((UChar)p[1]<<8)|p[0];
- if(U16_IS_SINGLE(c)) {
- /* output the BMP code point */
- *target++=c;
- if(offsets!=NULL) {
- *offsets++=-1;
- }
- --targetCapacity;
- count=0;
- c=0;
- break;
- } else if(U16_IS_SURROGATE_LEAD(c)) {
- /* continue collecting bytes for the trail surrogate */
- c=0; /* avoid unnecessary surrogate handling below */
- } else {
- /* fall through to error handling for an unmatched trail surrogate */
- break;
- }
- } else if(count==4) {
- c=((UChar)p[1]<<8)|p[0];
- trail=((UChar)p[3]<<8)|p[2];
- if(U16_IS_TRAIL(trail)) {
- /* output the surrogate pair */
- *target++=c;
- if(targetCapacity>=2) {
- *target++=trail;
- if(offsets!=NULL) {
- *offsets++=-1;
- *offsets++=-1;
- }
- targetCapacity-=2;
- } else /* targetCapacity==1 */ {
- targetCapacity=0;
- cnv->UCharErrorBuffer[0]=trail;
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- count=0;
- c=0;
- break;
- } else {
- /* unmatched lead surrogate, handle here for consistent toUBytes[] */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-
- /* back out reading the code unit after it */
- if(((const uint8_t *)pArgs->source-source)>=2) {
- source-=2;
- } else {
- /*
- * if the trail unit's first byte was in a previous buffer, then
- * we need to put it into a special place because toUBytes[] will be
- * used for the lead unit's bytes
- */
- cnv->toUnicodeStatus=0x100|p[2];
- --source;
- }
- cnv->toULength=2;
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
- return;
- }
- }
- } while(length>0);
- cnv->toULength=(int8_t)count;
- }
-
- /* copy an even number of bytes for complete UChars */
- count=2*targetCapacity;
- if(count>length) {
- count=length&~1;
- }
- if(c==0 && count>0) {
- length-=count;
- count>>=1;
- targetCapacity-=count;
- if(offsets==NULL) {
- do {
- c=((UChar)source[1]<<8)|source[0];
- source+=2;
- if(U16_IS_SINGLE(c)) {
- *target++=c;
- } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
- U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])
- ) {
- source+=2;
- --count;
- *target++=c;
- *target++=trail;
- } else {
- break;
- }
- } while(--count>0);
- } else {
- do {
- c=((UChar)source[1]<<8)|source[0];
- source+=2;
- if(U16_IS_SINGLE(c)) {
- *target++=c;
- *offsets++=sourceIndex;
- sourceIndex+=2;
- } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
- U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])
- ) {
- source+=2;
- --count;
- *target++=c;
- *target++=trail;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- sourceIndex+=4;
- } else {
- break;
- }
- } while(--count>0);
- }
-
- if(count==0) {
- /* done with the loop for complete UChars */
- c=0;
- } else {
- /* keep c for surrogate handling, trail will be set there */
- length+=2*(count-1); /* one more byte pair was consumed than count decremented */
- targetCapacity+=count;
- }
- }
-
- if(c!=0) {
- /*
- * c is a surrogate, and
- * - source or target too short
- * - or the surrogate is unmatched
- */
- cnv->toUBytes[0]=(uint8_t)c;
- cnv->toUBytes[1]=(uint8_t)(c>>8);
- cnv->toULength=2;
-
- if(U16_IS_SURROGATE_LEAD(c)) {
- if(length>=2) {
- if(U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])) {
- /* output the surrogate pair, will overflow (see conditions comment above) */
- source+=2;
- length-=2;
- *target++=c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- cnv->UCharErrorBuffer[0]=trail;
- cnv->UCharErrorBufferLength=1;
- cnv->toULength=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- } else {
- /* unmatched lead surrogate */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- } else {
- /* see if the trail surrogate is in the next buffer */
- }
- } else {
- /* unmatched trail surrogate */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- }
-
- if(U_SUCCESS(*pErrorCode)) {
- /* check for a remaining source byte */
- if(length>0) {
- if(targetCapacity==0) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- } else {
- /* it must be length==1 because otherwise the above would have copied more */
- cnv->toUBytes[cnv->toULength++]=*source++;
- }
- }
- }
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
-}
-
-static UChar32 U_CALLCONV
-_UTF16LEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
- const uint8_t *s, *sourceLimit;
- UChar32 c;
-
- if(pArgs->converter->mode<8) {
- return UCNV_GET_NEXT_UCHAR_USE_TO_U;
- }
-
- s=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
-
- if(s>=sourceLimit) {
- /* no input */
- *err=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0xffff;
- }
-
- if(s+2>sourceLimit) {
- /* only one byte: truncated UChar */
- pArgs->converter->toUBytes[0]=*s++;
- pArgs->converter->toULength=1;
- pArgs->source=(const char *)s;
- *err = U_TRUNCATED_CHAR_FOUND;
- return 0xffff;
- }
-
- /* get one UChar */
- c=((UChar32)s[1]<<8)|*s;
- s+=2;
-
- /* check for a surrogate pair */
- if(U_IS_SURROGATE(c)) {
- if(U16_IS_SURROGATE_LEAD(c)) {
- if(s+2<=sourceLimit) {
- UChar trail;
-
- /* get a second UChar and see if it is a trail surrogate */
- trail=((UChar)s[1]<<8)|*s;
- if(U16_IS_TRAIL(trail)) {
- c=U16_GET_SUPPLEMENTARY(c, trail);
- s+=2;
- } else {
- /* unmatched lead surrogate */
- c=-2;
- }
- } else {
- /* too few (2 or 3) bytes for a surrogate pair: truncated code point */
- uint8_t *bytes=pArgs->converter->toUBytes;
- s-=2;
- pArgs->converter->toULength=(int8_t)(sourceLimit-s);
- do {
- *bytes++=*s++;
- } while(s<sourceLimit);
-
- c=0xffff;
- *err=U_TRUNCATED_CHAR_FOUND;
- }
- } else {
- /* unmatched trail surrogate */
- c=-2;
- }
-
- if(c<0) {
- /* write the unmatched surrogate */
- uint8_t *bytes=pArgs->converter->toUBytes;
- pArgs->converter->toULength=2;
- *bytes=*(s-2);
- bytes[1]=*(s-1);
-
- c=0xffff;
- *err=U_ILLEGAL_CHAR_FOUND;
- }
- }
-
- pArgs->source=(const char *)s;
- return c;
-}
-
-static void U_CALLCONV
-_UTF16LEReset(UConverter *cnv, UConverterResetChoice choice) {
- if(choice<=UCNV_RESET_TO_UNICODE) {
- /* reset toUnicode state */
- if(UCNV_GET_VERSION(cnv)==0) {
- cnv->mode=8; /* no BOM handling */
- } else {
- cnv->mode=0; /* Java-specific "UnicodeLittle" requires LE BOM or no BOM */
- }
- }
- if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) {
- /* reset fromUnicode for "UnicodeLittle": prepare to output the UTF-16LE BOM */
- cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
- }
-}
-
-static void U_CALLCONV
-_UTF16LEOpen(UConverter *cnv,
- UConverterLoadArgs *pArgs,
- UErrorCode *pErrorCode) {
- (void)pArgs;
- if(UCNV_GET_VERSION(cnv)<=1) {
- _UTF16LEReset(cnv, UCNV_RESET_BOTH);
- } else {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-static const char * U_CALLCONV
-_UTF16LEGetName(const UConverter *cnv) {
- if(UCNV_GET_VERSION(cnv)==0) {
- return "UTF-16LE";
- } else {
- return "UTF-16LE,version=1";
- }
-}
-U_CDECL_END
-
-static const UConverterImpl _UTF16LEImpl={
- UCNV_UTF16_LittleEndian,
-
- NULL,
- NULL,
-
- _UTF16LEOpen,
- NULL,
- _UTF16LEReset,
-
- _UTF16LEToUnicodeWithOffsets,
- _UTF16LEToUnicodeWithOffsets,
- _UTF16LEFromUnicodeWithOffsets,
- _UTF16LEFromUnicodeWithOffsets,
- _UTF16LEGetNextUChar,
-
- NULL,
- _UTF16LEGetName,
- NULL,
- NULL,
- ucnv_getNonSurrogateUnicodeSet,
-
- NULL,
- NULL
-};
-
-
-static const UConverterStaticData _UTF16LEStaticData={
- sizeof(UConverterStaticData),
- "UTF-16LE",
- 1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2,
- { 0xfd, 0xff, 0, 0 },2,FALSE,FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-
-const UConverterSharedData _UTF16LEData=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16LEStaticData, &_UTF16LEImpl);
-
-/* UTF-16 (Detect BOM) ------------------------------------------------------ */
-
-/*
- * Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE
- * accordingly.
- * This is a simpler version of the UTF-32 converter, with
- * fewer states for shorter BOMs.
- *
- * State values:
- * 0 initial state
- * 1 saw first byte
- * 2..5 -
- * 6..7 see _UTF16ToUnicodeWithOffsets() comments in state 1
- * 8 UTF-16BE mode
- * 9 UTF-16LE mode
- *
- * During detection: state==number of initial bytes seen so far.
- *
- * On output, emit U+FEFF as the first code point.
- *
- * Variants:
- * - UTF-16,version=1 (Java "Unicode" encoding) treats a missing BOM as an error.
- * - UTF-16BE,version=1 (Java "UnicodeBig" encoding) and
- * UTF-16LE,version=1 (Java "UnicodeLittle" encoding) treat a reverse BOM as an error.
- */
-U_CDECL_BEGIN
-static void U_CALLCONV
-_UTF16Reset(UConverter *cnv, UConverterResetChoice choice) {
- if(choice<=UCNV_RESET_TO_UNICODE) {
- /* reset toUnicode: state=0 */
- cnv->mode=0;
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- /* reset fromUnicode: prepare to output the UTF-16PE BOM */
- cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
- }
-}
-U_CDECL_END
-extern const UConverterSharedData _UTF16v2Data;
-U_CDECL_BEGIN
-static void U_CALLCONV
-_UTF16Open(UConverter *cnv,
- UConverterLoadArgs *pArgs,
- UErrorCode *pErrorCode) {
- if(UCNV_GET_VERSION(cnv)<=2) {
- if(UCNV_GET_VERSION(cnv)==2 && !pArgs->onlyTestIsLoadable) {
- /*
- * Switch implementation, and switch the staticData that's different
- * and was copied into the UConverter.
- * (See ucnv_createConverterFromSharedData() in ucnv_bld.c.)
- * UTF-16,version=2 fromUnicode() always writes a big-endian byte stream.
- */
- cnv->sharedData=(UConverterSharedData*)&_UTF16v2Data;
- uprv_memcpy(cnv->subChars, _UTF16v2Data.staticData->subChar, UCNV_MAX_SUBCHAR_LEN);
- }
- _UTF16Reset(cnv, UCNV_RESET_BOTH);
- } else {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-static const char * U_CALLCONV
-_UTF16GetName(const UConverter *cnv) {
- if(UCNV_GET_VERSION(cnv)==0) {
- return "UTF-16";
- } else if(UCNV_GET_VERSION(cnv)==1) {
- return "UTF-16,version=1";
- } else {
- return "UTF-16,version=2";
- }
-}
-U_CDECL_END
-extern const UConverterSharedData _UTF16Data;
-
-static inline bool IS_UTF16BE(const UConverter *cnv) {
- return ((cnv)->sharedData == &_UTF16BEData);
-}
-
-static inline bool IS_UTF16LE(const UConverter *cnv) {
- return ((cnv)->sharedData == &_UTF16LEData);
-}
-
-static inline bool IS_UTF16(const UConverter *cnv) {
- return ((cnv)->sharedData==&_UTF16Data) || ((cnv)->sharedData == &_UTF16v2Data);
-}
-
-U_CDECL_BEGIN
-static void U_CALLCONV
-_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv=pArgs->converter;
- const char *source=pArgs->source;
- const char *sourceLimit=pArgs->sourceLimit;
- int32_t *offsets=pArgs->offsets;
-
- int32_t state, offsetDelta;
- uint8_t b;
-
- state=cnv->mode;
-
- /*
- * If we detect a BOM in this buffer, then we must add the BOM size to the
- * offsets because the actual converter function will not see and count the BOM.
- * offsetDelta will have the number of the BOM bytes that are in the current buffer.
- */
- offsetDelta=0;
-
- while(source<sourceLimit && U_SUCCESS(*pErrorCode)) {
- switch(state) {
- case 0:
- cnv->toUBytes[0]=(uint8_t)*source++;
- cnv->toULength=1;
- state=1;
- break;
- case 1:
- /*
- * Only inside this switch case can the state variable
- * temporarily take two additional values:
- * 6: BOM error, continue with BE
- * 7: BOM error, continue with LE
- */
- b=*source;
- if(cnv->toUBytes[0]==0xfe && b==0xff) {
- if(IS_UTF16LE(cnv)) {
- state=7; /* illegal reverse BOM for Java "UnicodeLittle" */
- } else {
- state=8; /* detect UTF-16BE */
- }
- } else if(cnv->toUBytes[0]==0xff && b==0xfe) {
- if(IS_UTF16BE(cnv)) {
- state=6; /* illegal reverse BOM for Java "UnicodeBig" */
- } else {
- state=9; /* detect UTF-16LE */
- }
- } else if((IS_UTF16(cnv) && UCNV_GET_VERSION(cnv)==1)) {
- state=6; /* illegal missing BOM for Java "Unicode" */
- }
- if(state>=8) {
- /* BOM detected, consume it */
- ++source;
- cnv->toULength=0;
- offsetDelta=(int32_t)(source-pArgs->source);
- } else if(state<6) {
- /* ok: no BOM, and not a reverse BOM */
- if(source!=pArgs->source) {
- /* reset the source for a correct first offset */
- source=pArgs->source;
- cnv->toULength=0;
- }
- if(IS_UTF16LE(cnv)) {
- /* Make Java "UnicodeLittle" default to LE. */
- state=9;
- } else {
- /* Make standard UTF-16 and Java "UnicodeBig" default to BE. */
- state=8;
- }
- } else {
- /*
- * error: missing BOM, or reverse BOM
- * UTF-16,version=1: Java-specific "Unicode" requires a BOM.
- * UTF-16BE,version=1: Java-specific "UnicodeBig" requires a BE BOM or no BOM.
- * UTF-16LE,version=1: Java-specific "UnicodeLittle" requires an LE BOM or no BOM.
- */
- /* report the non-BOM or reverse BOM as an illegal sequence */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- pArgs->source=source+1;
- /* continue with conversion if the callback resets the error */
- /*
- * Make Java "Unicode" default to BE like standard UTF-16.
- * Make Java "UnicodeBig" and "UnicodeLittle" default
- * to their normal endiannesses.
- */
- cnv->mode=state+2;
- *pErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;
- return;
- }
- /* convert the rest of the stream */
- cnv->mode=state;
- continue;
- case 8:
- /* call UTF-16BE */
- pArgs->source=source;
- _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
- source=pArgs->source;
- break;
- case 9:
- /* call UTF-16LE */
- pArgs->source=source;
- _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
- source=pArgs->source;
- break;
- default:
- break; /* does not occur */
- }
- }
-
- /* add BOM size to offsets - see comment at offsetDelta declaration */
- if(offsets!=NULL && offsetDelta!=0) {
- int32_t *offsetsLimit=pArgs->offsets;
- while(offsets<offsetsLimit) {
- *offsets++ += offsetDelta;
- }
- }
-
- pArgs->source=source;
-
- if(source==sourceLimit && pArgs->flush) {
- /* handle truncated input */
- switch(state) {
- case 0:
- break; /* no input at all, nothing to do */
- case 8:
- _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
- break;
- case 9:
- _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
- break;
- default:
- /* 0<state<8: framework will report truncation, nothing to do here */
- break;
- }
- }
-
- cnv->mode=state;
-}
-
-static UChar32 U_CALLCONV
-_UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- switch(pArgs->converter->mode) {
- case 8:
- return _UTF16BEGetNextUChar(pArgs, pErrorCode);
- case 9:
- return _UTF16LEGetNextUChar(pArgs, pErrorCode);
- default:
- return UCNV_GET_NEXT_UCHAR_USE_TO_U;
- }
-}
-U_CDECL_END
-
-static const UConverterImpl _UTF16Impl = {
- UCNV_UTF16,
-
- NULL,
- NULL,
-
- _UTF16Open,
- NULL,
- _UTF16Reset,
-
- _UTF16ToUnicodeWithOffsets,
- _UTF16ToUnicodeWithOffsets,
- _UTF16PEFromUnicodeWithOffsets,
- _UTF16PEFromUnicodeWithOffsets,
- _UTF16GetNextUChar,
-
- NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
- _UTF16GetName,
- NULL,
- NULL,
- ucnv_getNonSurrogateUnicodeSet,
-
- NULL,
- NULL
-};
-
-static const UConverterStaticData _UTF16StaticData = {
- sizeof(UConverterStaticData),
- "UTF-16",
- 1204, /* CCSID for BOM sensitive UTF-16 */
- UCNV_IBM, UCNV_UTF16, 2, 2,
-#if U_IS_BIG_ENDIAN
- { 0xff, 0xfd, 0, 0 }, 2,
-#else
- { 0xfd, 0xff, 0, 0 }, 2,
-#endif
- FALSE, FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-const UConverterSharedData _UTF16Data =
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16StaticData, &_UTF16Impl);
-
-static const UConverterImpl _UTF16v2Impl = {
- UCNV_UTF16,
-
- NULL,
- NULL,
-
- _UTF16Open,
- NULL,
- _UTF16Reset,
-
- _UTF16ToUnicodeWithOffsets,
- _UTF16ToUnicodeWithOffsets,
- _UTF16BEFromUnicodeWithOffsets,
- _UTF16BEFromUnicodeWithOffsets,
- _UTF16GetNextUChar,
-
- NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
- _UTF16GetName,
- NULL,
- NULL,
- ucnv_getNonSurrogateUnicodeSet,
-
- NULL,
- NULL
-};
-
-static const UConverterStaticData _UTF16v2StaticData = {
- sizeof(UConverterStaticData),
- "UTF-16,version=2",
- 1204, /* CCSID for BOM sensitive UTF-16 */
- UCNV_IBM, UCNV_UTF16, 2, 2,
- { 0xff, 0xfd, 0, 0 }, 2,
- FALSE, FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-const UConverterSharedData _UTF16v2Data =
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16v2StaticData, &_UTF16v2Impl);
-
-#endif
diff --git a/contrib/libs/icu/common/ucnv_u32.cpp b/contrib/libs/icu/common/ucnv_u32.cpp
deleted file mode 100644
index 9f98914b9d7..00000000000
--- a/contrib/libs/icu/common/ucnv_u32.cpp
+++ /dev/null
@@ -1,1253 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2002-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* file name: ucnv_u32.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002jul01
-* created by: Markus W. Scherer
-*
-* UTF-32 converter implementation. Used to be in ucnv_utf.c.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/utf.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "cmemory.h"
-
-#define MAXIMUM_UCS2 0x0000FFFF
-#define MAXIMUM_UTF 0x0010FFFF
-#define HALF_SHIFT 10
-#define HALF_BASE 0x0010000
-#define HALF_MASK 0x3FF
-#define SURROGATE_HIGH_START 0xD800
-#define SURROGATE_LOW_START 0xDC00
-
-/* -SURROGATE_LOW_START + HALF_BASE */
-#define SURROGATE_LOW_BASE 9216
-
-enum {
- UCNV_NEED_TO_WRITE_BOM=1
-};
-
-/* UTF-32BE ----------------------------------------------------------------- */
-U_CDECL_BEGIN
-static void U_CALLCONV
-T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
- UErrorCode * err)
-{
- const unsigned char *mySource = (unsigned char *) args->source;
- UChar *myTarget = args->target;
- const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
- const UChar *targetLimit = args->targetLimit;
- unsigned char *toUBytes = args->converter->toUBytes;
- uint32_t ch, i;
-
- /* Restore state of current sequence */
- if (args->converter->toULength > 0 && myTarget < targetLimit) {
- i = args->converter->toULength; /* restore # of bytes consumed */
- args->converter->toULength = 0;
-
- ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
- args->converter->toUnicodeStatus = 0;
- goto morebytes;
- }
-
- while (mySource < sourceLimit && myTarget < targetLimit) {
- i = 0;
- ch = 0;
-morebytes:
- while (i < sizeof(uint32_t)) {
- if (mySource < sourceLimit) {
- ch = (ch << 8) | (uint8_t)(*mySource);
- toUBytes[i++] = (char) *(mySource++);
- }
- else {
- /* stores a partially calculated target*/
- /* + 1 to make 0 a valid character */
- args->converter->toUnicodeStatus = ch + 1;
- args->converter->toULength = (int8_t) i;
- goto donefornow;
- }
- }
-
- if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
- /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
- if (ch <= MAXIMUM_UCS2)
- {
- /* fits in 16 bits */
- *(myTarget++) = (UChar) ch;
- }
- else {
- /* write out the surrogates */
- *(myTarget++) = U16_LEAD(ch);
- ch = U16_TRAIL(ch);
- if (myTarget < targetLimit) {
- *(myTarget++) = (UChar)ch;
- }
- else {
- /* Put in overflow buffer (not handled here) */
- args->converter->UCharErrorBuffer[0] = (UChar) ch;
- args->converter->UCharErrorBufferLength = 1;
- *err = U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- }
- else {
- args->converter->toULength = (int8_t)i;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
-
-donefornow:
- if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
- /* End of target buffer */
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- args->target = myTarget;
- args->source = (const char *) mySource;
-}
-
-static void U_CALLCONV
-T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
- UErrorCode * err)
-{
- const unsigned char *mySource = (unsigned char *) args->source;
- UChar *myTarget = args->target;
- int32_t *myOffsets = args->offsets;
- const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
- const UChar *targetLimit = args->targetLimit;
- unsigned char *toUBytes = args->converter->toUBytes;
- uint32_t ch, i;
- int32_t offsetNum = 0;
-
- /* Restore state of current sequence */
- if (args->converter->toULength > 0 && myTarget < targetLimit) {
- i = args->converter->toULength; /* restore # of bytes consumed */
- args->converter->toULength = 0;
-
- ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
- args->converter->toUnicodeStatus = 0;
- goto morebytes;
- }
-
- while (mySource < sourceLimit && myTarget < targetLimit) {
- i = 0;
- ch = 0;
-morebytes:
- while (i < sizeof(uint32_t)) {
- if (mySource < sourceLimit) {
- ch = (ch << 8) | (uint8_t)(*mySource);
- toUBytes[i++] = (char) *(mySource++);
- }
- else {
- /* stores a partially calculated target*/
- /* + 1 to make 0 a valid character */
- args->converter->toUnicodeStatus = ch + 1;
- args->converter->toULength = (int8_t) i;
- goto donefornow;
- }
- }
-
- if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
- /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
- if (ch <= MAXIMUM_UCS2) {
- /* fits in 16 bits */
- *(myTarget++) = (UChar) ch;
- *(myOffsets++) = offsetNum;
- }
- else {
- /* write out the surrogates */
- *(myTarget++) = U16_LEAD(ch);
- *myOffsets++ = offsetNum;
- ch = U16_TRAIL(ch);
- if (myTarget < targetLimit)
- {
- *(myTarget++) = (UChar)ch;
- *(myOffsets++) = offsetNum;
- }
- else {
- /* Put in overflow buffer (not handled here) */
- args->converter->UCharErrorBuffer[0] = (UChar) ch;
- args->converter->UCharErrorBufferLength = 1;
- *err = U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- }
- else {
- args->converter->toULength = (int8_t)i;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- offsetNum += i;
- }
-
-donefornow:
- if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
- {
- /* End of target buffer */
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- args->target = myTarget;
- args->source = (const char *) mySource;
- args->offsets = myOffsets;
-}
-
-static void U_CALLCONV
-T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
- UErrorCode * err)
-{
- const UChar *mySource = args->source;
- unsigned char *myTarget;
- const UChar *sourceLimit = args->sourceLimit;
- const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
- UChar32 ch, ch2;
- unsigned int indexToWrite;
- unsigned char temp[sizeof(uint32_t)];
-
- if(mySource >= sourceLimit) {
- /* no input, nothing to do */
- return;
- }
-
- /* write the BOM if necessary */
- if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
- static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu };
- ucnv_fromUWriteBytes(args->converter,
- bom, 4,
- &args->target, args->targetLimit,
- &args->offsets, -1,
- err);
- args->converter->fromUnicodeStatus=0;
- }
-
- myTarget = (unsigned char *) args->target;
- temp[0] = 0;
-
- if (args->converter->fromUChar32) {
- ch = args->converter->fromUChar32;
- args->converter->fromUChar32 = 0;
- goto lowsurogate;
- }
-
- while (mySource < sourceLimit && myTarget < targetLimit) {
- ch = *(mySource++);
-
- if (U_IS_SURROGATE(ch)) {
- if (U_IS_LEAD(ch)) {
-lowsurogate:
- if (mySource < sourceLimit) {
- ch2 = *mySource;
- if (U_IS_TRAIL(ch2)) {
- ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
- mySource++;
- }
- else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- args->converter->fromUChar32 = ch;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
- else {
- /* ran out of source */
- args->converter->fromUChar32 = ch;
- if (args->flush) {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *err = U_ILLEGAL_CHAR_FOUND;
- }
- break;
- }
- }
- else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- args->converter->fromUChar32 = ch;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
-
- /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
- temp[1] = (uint8_t) (ch >> 16 & 0x1F);
- temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
- temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
-
- for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) {
- if (myTarget < targetLimit) {
- *(myTarget++) = temp[indexToWrite];
- }
- else {
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }
- }
-
- if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- args->target = (char *) myTarget;
- args->source = mySource;
-}
-
-static void U_CALLCONV
-T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
- UErrorCode * err)
-{
- const UChar *mySource = args->source;
- unsigned char *myTarget;
- int32_t *myOffsets;
- const UChar *sourceLimit = args->sourceLimit;
- const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
- UChar32 ch, ch2;
- int32_t offsetNum = 0;
- unsigned int indexToWrite;
- unsigned char temp[sizeof(uint32_t)];
-
- if(mySource >= sourceLimit) {
- /* no input, nothing to do */
- return;
- }
-
- /* write the BOM if necessary */
- if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
- static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu };
- ucnv_fromUWriteBytes(args->converter,
- bom, 4,
- &args->target, args->targetLimit,
- &args->offsets, -1,
- err);
- args->converter->fromUnicodeStatus=0;
- }
-
- myTarget = (unsigned char *) args->target;
- myOffsets = args->offsets;
- temp[0] = 0;
-
- if (args->converter->fromUChar32) {
- ch = args->converter->fromUChar32;
- args->converter->fromUChar32 = 0;
- goto lowsurogate;
- }
-
- while (mySource < sourceLimit && myTarget < targetLimit) {
- ch = *(mySource++);
-
- if (U_IS_SURROGATE(ch)) {
- if (U_IS_LEAD(ch)) {
-lowsurogate:
- if (mySource < sourceLimit) {
- ch2 = *mySource;
- if (U_IS_TRAIL(ch2)) {
- ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
- mySource++;
- }
- else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- args->converter->fromUChar32 = ch;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
- else {
- /* ran out of source */
- args->converter->fromUChar32 = ch;
- if (args->flush) {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *err = U_ILLEGAL_CHAR_FOUND;
- }
- break;
- }
- }
- else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- args->converter->fromUChar32 = ch;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
-
- /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
- temp[1] = (uint8_t) (ch >> 16 & 0x1F);
- temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
- temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
-
- for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) {
- if (myTarget < targetLimit) {
- *(myTarget++) = temp[indexToWrite];
- *(myOffsets++) = offsetNum;
- }
- else {
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }
- offsetNum = offsetNum + 1 + (temp[1] != 0);
- }
-
- if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- args->target = (char *) myTarget;
- args->source = mySource;
- args->offsets = myOffsets;
-}
-
-static UChar32 U_CALLCONV
-T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args,
- UErrorCode* err)
-{
- const uint8_t *mySource;
- UChar32 myUChar;
- int32_t length;
-
- mySource = (const uint8_t *)args->source;
- if (mySource >= (const uint8_t *)args->sourceLimit)
- {
- /* no input */
- *err = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0xffff;
- }
-
- length = (int32_t)((const uint8_t *)args->sourceLimit - mySource);
- if (length < 4)
- {
- /* got a partial character */
- uprv_memcpy(args->converter->toUBytes, mySource, length);
- args->converter->toULength = (int8_t)length;
- args->source = (const char *)(mySource + length);
- *err = U_TRUNCATED_CHAR_FOUND;
- return 0xffff;
- }
-
- /* Don't even try to do a direct cast because the value may be on an odd address. */
- myUChar = ((UChar32)mySource[0] << 24)
- | ((UChar32)mySource[1] << 16)
- | ((UChar32)mySource[2] << 8)
- | ((UChar32)mySource[3]);
-
- args->source = (const char *)(mySource + 4);
- if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) {
- return myUChar;
- }
-
- uprv_memcpy(args->converter->toUBytes, mySource, 4);
- args->converter->toULength = 4;
-
- *err = U_ILLEGAL_CHAR_FOUND;
- return 0xffff;
-}
-U_CDECL_END
-static const UConverterImpl _UTF32BEImpl = {
- UCNV_UTF32_BigEndian,
-
- NULL,
- NULL,
-
- NULL,
- NULL,
- NULL,
-
- T_UConverter_toUnicode_UTF32_BE,
- T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC,
- T_UConverter_fromUnicode_UTF32_BE,
- T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC,
- T_UConverter_getNextUChar_UTF32_BE,
-
- NULL,
- NULL,
- NULL,
- NULL,
- ucnv_getNonSurrogateUnicodeSet,
-
- NULL,
- NULL
-};
-
-/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
-static const UConverterStaticData _UTF32BEStaticData = {
- sizeof(UConverterStaticData),
- "UTF-32BE",
- 1232,
- UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4,
- { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-const UConverterSharedData _UTF32BEData =
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32BEStaticData, &_UTF32BEImpl);
-
-/* UTF-32LE ---------------------------------------------------------- */
-U_CDECL_BEGIN
-static void U_CALLCONV
-T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
- UErrorCode * err)
-{
- const unsigned char *mySource = (unsigned char *) args->source;
- UChar *myTarget = args->target;
- const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
- const UChar *targetLimit = args->targetLimit;
- unsigned char *toUBytes = args->converter->toUBytes;
- uint32_t ch, i;
-
- /* Restore state of current sequence */
- if (args->converter->toULength > 0 && myTarget < targetLimit)
- {
- i = args->converter->toULength; /* restore # of bytes consumed */
- args->converter->toULength = 0;
-
- /* Stores the previously calculated ch from a previous call*/
- ch = args->converter->toUnicodeStatus - 1;
- args->converter->toUnicodeStatus = 0;
- goto morebytes;
- }
-
- while (mySource < sourceLimit && myTarget < targetLimit)
- {
- i = 0;
- ch = 0;
-morebytes:
- while (i < sizeof(uint32_t))
- {
- if (mySource < sourceLimit)
- {
- ch |= ((uint8_t)(*mySource)) << (i * 8);
- toUBytes[i++] = (char) *(mySource++);
- }
- else
- {
- /* stores a partially calculated target*/
- /* + 1 to make 0 a valid character */
- args->converter->toUnicodeStatus = ch + 1;
- args->converter->toULength = (int8_t) i;
- goto donefornow;
- }
- }
-
- if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
- /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
- if (ch <= MAXIMUM_UCS2) {
- /* fits in 16 bits */
- *(myTarget++) = (UChar) ch;
- }
- else {
- /* write out the surrogates */
- *(myTarget++) = U16_LEAD(ch);
- ch = U16_TRAIL(ch);
- if (myTarget < targetLimit) {
- *(myTarget++) = (UChar)ch;
- }
- else {
- /* Put in overflow buffer (not handled here) */
- args->converter->UCharErrorBuffer[0] = (UChar) ch;
- args->converter->UCharErrorBufferLength = 1;
- *err = U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- }
- else {
- args->converter->toULength = (int8_t)i;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
-
-donefornow:
- if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
- {
- /* End of target buffer */
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- args->target = myTarget;
- args->source = (const char *) mySource;
-}
-
-static void U_CALLCONV
-T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
- UErrorCode * err)
-{
- const unsigned char *mySource = (unsigned char *) args->source;
- UChar *myTarget = args->target;
- int32_t *myOffsets = args->offsets;
- const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
- const UChar *targetLimit = args->targetLimit;
- unsigned char *toUBytes = args->converter->toUBytes;
- uint32_t ch, i;
- int32_t offsetNum = 0;
-
- /* Restore state of current sequence */
- if (args->converter->toULength > 0 && myTarget < targetLimit)
- {
- i = args->converter->toULength; /* restore # of bytes consumed */
- args->converter->toULength = 0;
-
- /* Stores the previously calculated ch from a previous call*/
- ch = args->converter->toUnicodeStatus - 1;
- args->converter->toUnicodeStatus = 0;
- goto morebytes;
- }
-
- while (mySource < sourceLimit && myTarget < targetLimit)
- {
- i = 0;
- ch = 0;
-morebytes:
- while (i < sizeof(uint32_t))
- {
- if (mySource < sourceLimit)
- {
- ch |= ((uint8_t)(*mySource)) << (i * 8);
- toUBytes[i++] = (char) *(mySource++);
- }
- else
- {
- /* stores a partially calculated target*/
- /* + 1 to make 0 a valid character */
- args->converter->toUnicodeStatus = ch + 1;
- args->converter->toULength = (int8_t) i;
- goto donefornow;
- }
- }
-
- if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch))
- {
- /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
- if (ch <= MAXIMUM_UCS2)
- {
- /* fits in 16 bits */
- *(myTarget++) = (UChar) ch;
- *(myOffsets++) = offsetNum;
- }
- else {
- /* write out the surrogates */
- *(myTarget++) = U16_LEAD(ch);
- *(myOffsets++) = offsetNum;
- ch = U16_TRAIL(ch);
- if (myTarget < targetLimit)
- {
- *(myTarget++) = (UChar)ch;
- *(myOffsets++) = offsetNum;
- }
- else
- {
- /* Put in overflow buffer (not handled here) */
- args->converter->UCharErrorBuffer[0] = (UChar) ch;
- args->converter->UCharErrorBufferLength = 1;
- *err = U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- }
- else
- {
- args->converter->toULength = (int8_t)i;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- offsetNum += i;
- }
-
-donefornow:
- if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
- {
- /* End of target buffer */
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- args->target = myTarget;
- args->source = (const char *) mySource;
- args->offsets = myOffsets;
-}
-
-static void U_CALLCONV
-T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
- UErrorCode * err)
-{
- const UChar *mySource = args->source;
- unsigned char *myTarget;
- const UChar *sourceLimit = args->sourceLimit;
- const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
- UChar32 ch, ch2;
- unsigned int indexToWrite;
- unsigned char temp[sizeof(uint32_t)];
-
- if(mySource >= sourceLimit) {
- /* no input, nothing to do */
- return;
- }
-
- /* write the BOM if necessary */
- if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
- static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 };
- ucnv_fromUWriteBytes(args->converter,
- bom, 4,
- &args->target, args->targetLimit,
- &args->offsets, -1,
- err);
- args->converter->fromUnicodeStatus=0;
- }
-
- myTarget = (unsigned char *) args->target;
- temp[3] = 0;
-
- if (args->converter->fromUChar32)
- {
- ch = args->converter->fromUChar32;
- args->converter->fromUChar32 = 0;
- goto lowsurogate;
- }
-
- while (mySource < sourceLimit && myTarget < targetLimit)
- {
- ch = *(mySource++);
-
- if (U16_IS_SURROGATE(ch)) {
- if (U16_IS_LEAD(ch))
- {
-lowsurogate:
- if (mySource < sourceLimit)
- {
- ch2 = *mySource;
- if (U16_IS_TRAIL(ch2)) {
- ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
- mySource++;
- }
- else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- args->converter->fromUChar32 = ch;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
- else {
- /* ran out of source */
- args->converter->fromUChar32 = ch;
- if (args->flush) {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *err = U_ILLEGAL_CHAR_FOUND;
- }
- break;
- }
- }
- else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- args->converter->fromUChar32 = ch;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
-
- /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
- temp[2] = (uint8_t) (ch >> 16 & 0x1F);
- temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
- temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
-
- for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
- {
- if (myTarget < targetLimit)
- {
- *(myTarget++) = temp[indexToWrite];
- }
- else
- {
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }
- }
-
- if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
- {
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- args->target = (char *) myTarget;
- args->source = mySource;
-}
-
-static void U_CALLCONV
-T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
- UErrorCode * err)
-{
- const UChar *mySource = args->source;
- unsigned char *myTarget;
- int32_t *myOffsets;
- const UChar *sourceLimit = args->sourceLimit;
- const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
- UChar32 ch, ch2;
- unsigned int indexToWrite;
- unsigned char temp[sizeof(uint32_t)];
- int32_t offsetNum = 0;
-
- if(mySource >= sourceLimit) {
- /* no input, nothing to do */
- return;
- }
-
- /* write the BOM if necessary */
- if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
- static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 };
- ucnv_fromUWriteBytes(args->converter,
- bom, 4,
- &args->target, args->targetLimit,
- &args->offsets, -1,
- err);
- args->converter->fromUnicodeStatus=0;
- }
-
- myTarget = (unsigned char *) args->target;
- myOffsets = args->offsets;
- temp[3] = 0;
-
- if (args->converter->fromUChar32)
- {
- ch = args->converter->fromUChar32;
- args->converter->fromUChar32 = 0;
- goto lowsurogate;
- }
-
- while (mySource < sourceLimit && myTarget < targetLimit)
- {
- ch = *(mySource++);
-
- if (U16_IS_SURROGATE(ch)) {
- if (U16_IS_LEAD(ch))
- {
-lowsurogate:
- if (mySource < sourceLimit)
- {
- ch2 = *mySource;
- if (U16_IS_TRAIL(ch2))
- {
- ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
- mySource++;
- }
- else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- args->converter->fromUChar32 = ch;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
- else {
- /* ran out of source */
- args->converter->fromUChar32 = ch;
- if (args->flush) {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *err = U_ILLEGAL_CHAR_FOUND;
- }
- break;
- }
- }
- else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- args->converter->fromUChar32 = ch;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
-
- /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
- temp[2] = (uint8_t) (ch >> 16 & 0x1F);
- temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
- temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
-
- for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
- {
- if (myTarget < targetLimit)
- {
- *(myTarget++) = temp[indexToWrite];
- *(myOffsets++) = offsetNum;
- }
- else
- {
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }
- offsetNum = offsetNum + 1 + (temp[2] != 0);
- }
-
- if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
- {
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- args->target = (char *) myTarget;
- args->source = mySource;
- args->offsets = myOffsets;
-}
-
-static UChar32 U_CALLCONV
-T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args,
- UErrorCode* err)
-{
- const uint8_t *mySource;
- UChar32 myUChar;
- int32_t length;
-
- mySource = (const uint8_t *)args->source;
- if (mySource >= (const uint8_t *)args->sourceLimit)
- {
- /* no input */
- *err = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0xffff;
- }
-
- length = (int32_t)((const uint8_t *)args->sourceLimit - mySource);
- if (length < 4)
- {
- /* got a partial character */
- uprv_memcpy(args->converter->toUBytes, mySource, length);
- args->converter->toULength = (int8_t)length;
- args->source = (const char *)(mySource + length);
- *err = U_TRUNCATED_CHAR_FOUND;
- return 0xffff;
- }
-
- /* Don't even try to do a direct cast because the value may be on an odd address. */
- myUChar = ((UChar32)mySource[3] << 24)
- | ((UChar32)mySource[2] << 16)
- | ((UChar32)mySource[1] << 8)
- | ((UChar32)mySource[0]);
-
- args->source = (const char *)(mySource + 4);
- if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) {
- return myUChar;
- }
-
- uprv_memcpy(args->converter->toUBytes, mySource, 4);
- args->converter->toULength = 4;
-
- *err = U_ILLEGAL_CHAR_FOUND;
- return 0xffff;
-}
-U_CDECL_END
-static const UConverterImpl _UTF32LEImpl = {
- UCNV_UTF32_LittleEndian,
-
- NULL,
- NULL,
-
- NULL,
- NULL,
- NULL,
-
- T_UConverter_toUnicode_UTF32_LE,
- T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC,
- T_UConverter_fromUnicode_UTF32_LE,
- T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC,
- T_UConverter_getNextUChar_UTF32_LE,
-
- NULL,
- NULL,
- NULL,
- NULL,
- ucnv_getNonSurrogateUnicodeSet,
-
- NULL,
- NULL
-};
-
-/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
-static const UConverterStaticData _UTF32LEStaticData = {
- sizeof(UConverterStaticData),
- "UTF-32LE",
- 1234,
- UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4,
- { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-
-const UConverterSharedData _UTF32LEData =
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32LEStaticData, &_UTF32LEImpl);
-
-/* UTF-32 (Detect BOM) ------------------------------------------------------ */
-
-/*
- * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE
- * accordingly.
- *
- * State values:
- * 0 initial state
- * 1 saw 00
- * 2 saw 00 00
- * 3 saw 00 00 FE
- * 4 -
- * 5 saw FF
- * 6 saw FF FE
- * 7 saw FF FE 00
- * 8 UTF-32BE mode
- * 9 UTF-32LE mode
- *
- * During detection: state&3==number of matching bytes so far.
- *
- * On output, emit U+FEFF as the first code point.
- */
-U_CDECL_BEGIN
-static void U_CALLCONV
-_UTF32Reset(UConverter *cnv, UConverterResetChoice choice) {
- if(choice<=UCNV_RESET_TO_UNICODE) {
- /* reset toUnicode: state=0 */
- cnv->mode=0;
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- /* reset fromUnicode: prepare to output the UTF-32PE BOM */
- cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
- }
-}
-
-static void U_CALLCONV
-_UTF32Open(UConverter *cnv,
- UConverterLoadArgs *pArgs,
- UErrorCode *pErrorCode) {
- (void)pArgs;
- (void)pErrorCode;
- _UTF32Reset(cnv, UCNV_RESET_BOTH);
-}
-
-static const char utf32BOM[8]={ 0, 0, (char)0xfeu, (char)0xffu, (char)0xffu, (char)0xfeu, 0, 0 };
-
-static void U_CALLCONV
-_UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv=pArgs->converter;
- const char *source=pArgs->source;
- const char *sourceLimit=pArgs->sourceLimit;
- int32_t *offsets=pArgs->offsets;
-
- int32_t state, offsetDelta;
- char b;
-
- state=cnv->mode;
-
- /*
- * If we detect a BOM in this buffer, then we must add the BOM size to the
- * offsets because the actual converter function will not see and count the BOM.
- * offsetDelta will have the number of the BOM bytes that are in the current buffer.
- */
- offsetDelta=0;
-
- while(source<sourceLimit && U_SUCCESS(*pErrorCode)) {
- switch(state) {
- case 0:
- b=*source;
- if(b==0) {
- state=1; /* could be 00 00 FE FF */
- } else if(b==(char)0xffu) {
- state=5; /* could be FF FE 00 00 */
- } else {
- state=8; /* default to UTF-32BE */
- continue;
- }
- ++source;
- break;
- case 1:
- case 2:
- case 3:
- case 5:
- case 6:
- case 7:
- if(*source==utf32BOM[state]) {
- ++state;
- ++source;
- if(state==4) {
- state=8; /* detect UTF-32BE */
- offsetDelta=(int32_t)(source-pArgs->source);
- } else if(state==8) {
- state=9; /* detect UTF-32LE */
- offsetDelta=(int32_t)(source-pArgs->source);
- }
- } else {
- /* switch to UTF-32BE and pass the previous bytes */
- int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */
-
- /* reset the source */
- source=pArgs->source;
-
- if(count==(state&3)) {
- /* simple: all in the same buffer, just reset source */
- } else {
- UBool oldFlush=pArgs->flush;
-
- /* some of the bytes are from a previous buffer, replay those first */
- pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
- pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */
- pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */
-
- /* no offsets: bytes from previous buffer, and not enough for output */
- T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
-
- /* restore real pointers; pArgs->source will be set in case 8/9 */
- pArgs->sourceLimit=sourceLimit;
- pArgs->flush=oldFlush;
- }
- state=8;
- continue;
- }
- break;
- case 8:
- /* call UTF-32BE */
- pArgs->source=source;
- if(offsets==NULL) {
- T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
- } else {
- T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode);
- }
- source=pArgs->source;
- break;
- case 9:
- /* call UTF-32LE */
- pArgs->source=source;
- if(offsets==NULL) {
- T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
- } else {
- T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode);
- }
- source=pArgs->source;
- break;
- default:
- break; /* does not occur */
- }
- }
-
- /* add BOM size to offsets - see comment at offsetDelta declaration */
- if(offsets!=NULL && offsetDelta!=0) {
- int32_t *offsetsLimit=pArgs->offsets;
- while(offsets<offsetsLimit) {
- *offsets++ += offsetDelta;
- }
- }
-
- pArgs->source=source;
-
- if(source==sourceLimit && pArgs->flush) {
- /* handle truncated input */
- switch(state) {
- case 0:
- break; /* no input at all, nothing to do */
- case 8:
- T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
- break;
- case 9:
- T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
- break;
- default:
- /* handle 0<state<8: call UTF-32BE with too-short input */
- pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
- pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */
-
- /* no offsets: not enough for output */
- T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
- pArgs->source=source;
- pArgs->sourceLimit=sourceLimit;
- state=8;
- break;
- }
- }
-
- cnv->mode=state;
-}
-
-static UChar32 U_CALLCONV
-_UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- switch(pArgs->converter->mode) {
- case 8:
- return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode);
- case 9:
- return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode);
- default:
- return UCNV_GET_NEXT_UCHAR_USE_TO_U;
- }
-}
-U_CDECL_END
-static const UConverterImpl _UTF32Impl = {
- UCNV_UTF32,
-
- NULL,
- NULL,
-
- _UTF32Open,
- NULL,
- _UTF32Reset,
-
- _UTF32ToUnicodeWithOffsets,
- _UTF32ToUnicodeWithOffsets,
-#if U_IS_BIG_ENDIAN
- T_UConverter_fromUnicode_UTF32_BE,
- T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC,
-#else
- T_UConverter_fromUnicode_UTF32_LE,
- T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC,
-#endif
- _UTF32GetNextUChar,
-
- NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
- NULL,
- NULL,
- NULL,
- ucnv_getNonSurrogateUnicodeSet,
-
- NULL,
- NULL
-};
-
-/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */
-static const UConverterStaticData _UTF32StaticData = {
- sizeof(UConverterStaticData),
- "UTF-32",
- 1236,
- UCNV_IBM, UCNV_UTF32, 4, 4,
-#if U_IS_BIG_ENDIAN
- { 0, 0, 0xff, 0xfd }, 4,
-#else
- { 0xfd, 0xff, 0, 0 }, 4,
-#endif
- FALSE, FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-const UConverterSharedData _UTF32Data =
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32StaticData, &_UTF32Impl);
-
-#endif
diff --git a/contrib/libs/icu/common/ucnv_u7.cpp b/contrib/libs/icu/common/ucnv_u7.cpp
deleted file mode 100644
index 87ba8cf37ec..00000000000
--- a/contrib/libs/icu/common/ucnv_u7.cpp
+++ /dev/null
@@ -1,1491 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2002-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* file name: ucnv_u7.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002jul01
-* created by: Markus W. Scherer
-*
-* UTF-7 converter implementation. Used to be in ucnv_utf.c.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
-
-#include "cmemory.h"
-#include "unicode/ucnv.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "uassert.h"
-
-/* UTF-7 -------------------------------------------------------------------- */
-
-/*
- * UTF-7 is a stateful encoding of Unicode.
- * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt)
- * It was intended for use in Internet email systems, using in its bytewise
- * encoding only a subset of 7-bit US-ASCII.
- * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still
- * occasionally used.
- *
- * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII
- * characters directly or in base64. Especially, the characters in set O
- * as defined in the RFC (see below) may be encoded directly but are not
- * allowed in, e.g., email headers.
- * By default, the ICU UTF-7 converter encodes set O directly.
- * By choosing the option "version=1", set O will be escaped instead.
- * For example:
- * utf7Converter=ucnv_open("UTF-7,version=1");
- *
- * For details about email headers see RFC 2047.
- */
-
-/*
- * Tests for US-ASCII characters belonging to character classes
- * defined in UTF-7.
- *
- * Set D (directly encoded characters) consists of the following
- * characters: the upper and lower case letters A through Z
- * and a through z, the 10 digits 0-9, and the following nine special
- * characters (note that "+" and "=" are omitted):
- * '(),-./:?
- *
- * Set O (optional direct characters) consists of the following
- * characters (note that "\" and "~" are omitted):
- * !"#$%&*;<=>@[]^_`{|}
- *
- * According to the rules in RFC 2152, the byte values for the following
- * US-ASCII characters are not used in UTF-7 and are therefore illegal:
- * - all C0 control codes except for CR LF TAB
- * - BACKSLASH
- * - TILDE
- * - DEL
- * - all codes beyond US-ASCII, i.e. all >127
- */
-#define inSetD(c) \
- ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \
- (uint8_t)((c)-48)<10 || /* digits */ \
- (uint8_t)((c)-39)<3 || /* '() */ \
- (uint8_t)((c)-44)<4 || /* ,-./ */ \
- (c)==58 || (c)==63 /* :? */ \
- )
-
-#define inSetO(c) \
- ((uint8_t)((c)-33)<6 || /* !"#$%& */ \
- (uint8_t)((c)-59)<4 || /* ;<=> */ \
- (uint8_t)((c)-93)<4 || /* ]^_` */ \
- (uint8_t)((c)-123)<3 || /* {|} */ \
- (c)==42 || (c)==64 || (c)==91 /* *@[ */ \
- )
-
-#define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9)
-#define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9)
-
-#define PLUS 43
-#define MINUS 45
-#define BACKSLASH 92
-#define TILDE 126
-
-/* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */
-#define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c))
-
-/* encode directly sets D and O and CR LF SP TAB */
-static const UBool encodeDirectlyMaximum[128]={
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
-
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0
-};
-
-/* encode directly set D and CR LF SP TAB but not set O */
-static const UBool encodeDirectlyRestricted[128]={
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
-
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
-
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
-};
-
-static const uint8_t
-toBase64[64]={
- /* A-Z */
- 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
- 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
- /* a-z */
- 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
- 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
- /* 0-9 */
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
- /* +/ */
- 43, 47
-};
-
-static const int8_t
-fromBase64[128]={
- /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */
- -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3,
- -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
-
- /* general punctuation with + and / and a special value (-2) for - */
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63,
- /* digits */
- 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
-
- /* A-Z */
- -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,
-
- /* a-z */
- -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3
-};
-
-/*
- * converter status values:
- *
- * toUnicodeStatus:
- * 24 inDirectMode (boolean)
- * 23..16 base64Counter (-1..7)
- * 15..0 bits (up to 14 bits incoming base64)
- *
- * fromUnicodeStatus:
- * 31..28 version (0: set O direct 1: set O escaped)
- * 24 inDirectMode (boolean)
- * 23..16 base64Counter (0..2)
- * 7..0 bits (6 bits outgoing base64)
- *
- */
-
-U_CDECL_BEGIN
-static void U_CALLCONV
-_UTF7Reset(UConverter *cnv, UConverterResetChoice choice) {
- if(choice<=UCNV_RESET_TO_UNICODE) {
- /* reset toUnicode */
- cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
- cnv->toULength=0;
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- /* reset fromUnicode */
- cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
- }
-}
-
-static void U_CALLCONV
-_UTF7Open(UConverter *cnv,
- UConverterLoadArgs *pArgs,
- UErrorCode *pErrorCode) {
- (void)pArgs;
- if(UCNV_GET_VERSION(cnv)<=1) {
- /* TODO(markus): Should just use cnv->options rather than copying the version number. */
- cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28;
- _UTF7Reset(cnv, UCNV_RESET_BOTH);
- } else {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
-}
-
-static void U_CALLCONV
-_UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const uint8_t *source, *sourceLimit;
- UChar *target;
- const UChar *targetLimit;
- int32_t *offsets;
-
- uint8_t *bytes;
- uint8_t byteIndex;
-
- int32_t length, targetCapacity;
-
- /* UTF-7 state */
- uint16_t bits;
- int8_t base64Counter;
- UBool inDirectMode;
-
- int8_t base64Value;
-
- int32_t sourceIndex, nextSourceIndex;
-
- uint8_t b;
- /* set up the local pointers */
- cnv=pArgs->converter;
-
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
- offsets=pArgs->offsets;
- /* get the state machine state */
- {
- uint32_t status=cnv->toUnicodeStatus;
- inDirectMode=(UBool)((status>>24)&1);
- base64Counter=(int8_t)(status>>16);
- bits=(uint16_t)status;
- }
- bytes=cnv->toUBytes;
- byteIndex=cnv->toULength;
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex=byteIndex==0 ? 0 : -1;
- nextSourceIndex=0;
-
- if(inDirectMode) {
-directMode:
- /*
- * In Direct Mode, most US-ASCII characters are encoded directly, i.e.,
- * with their US-ASCII byte values.
- * Backslash and Tilde and most control characters are not allowed in UTF-7.
- * A plus sign starts Unicode (or "escape") Mode.
- *
- * In Direct Mode, only the sourceIndex is used.
- */
- byteIndex=0;
- length=(int32_t)(sourceLimit-source);
- targetCapacity=(int32_t)(targetLimit-target);
- if(length>targetCapacity) {
- length=targetCapacity;
- }
- while(length>0) {
- b=*source++;
- if(!isLegalUTF7(b)) {
- /* illegal */
- bytes[0]=b;
- byteIndex=1;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- } else if(b!=PLUS) {
- /* write directly encoded character */
- *target++=b;
- if(offsets!=NULL) {
- *offsets++=sourceIndex++;
- }
- } else /* PLUS */ {
- /* switch to Unicode mode */
- nextSourceIndex=++sourceIndex;
- inDirectMode=FALSE;
- byteIndex=0;
- bits=0;
- base64Counter=-1;
- goto unicodeMode;
- }
- --length;
- }
- if(source<sourceLimit && target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- } else {
-unicodeMode:
- /*
- * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
- * The base64 sequence ends with any character that is not in the base64 alphabet.
- * A terminating minus sign is consumed.
- *
- * In Unicode Mode, the sourceIndex has the index to the start of the current
- * base64 bytes, while nextSourceIndex is precisely parallel to source,
- * keeping the index to the following byte.
- * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
- */
- while(source<sourceLimit) {
- if(target<targetLimit) {
- bytes[byteIndex++]=b=*source++;
- ++nextSourceIndex;
- base64Value = -3; /* initialize as illegal */
- if(b>=126 || (base64Value=fromBase64[b])==-3 || base64Value==-1) {
- /* either
- * base64Value==-1 for any legal character except base64 and minus sign, or
- * base64Value==-3 for illegal characters:
- * 1. In either case, leave Unicode mode.
- * 2.1. If we ended with an incomplete UChar or none after the +, then
- * generate an error for the preceding erroneous sequence and deal with
- * the current (possibly illegal) character next time through.
- * 2.2. Else the current char comes after a complete UChar, which was already
- * pushed to the output buf, so:
- * 2.2.1. If the current char is legal, just save it for processing next time.
- * It may be for example, a plus which we need to deal with in direct mode.
- * 2.2.2. Else if the current char is illegal, we might as well deal with it here.
- */
- inDirectMode=TRUE;
- if(base64Counter==-1) {
- /* illegal: + immediately followed by something other than base64 or minus sign */
- /* include the plus sign in the reported sequence, but not the subsequent char */
- --source;
- bytes[0]=PLUS;
- byteIndex=1;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- } else if(bits!=0) {
- /* bits are illegally left over, a UChar is incomplete */
- /* don't include current char (legal or illegal) in error seq */
- --source;
- --byteIndex;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- } else {
- /* previous UChar was complete */
- if(base64Value==-3) {
- /* current character is illegal, deal with it here */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- } else {
- /* un-read the current character in case it is a plus sign */
- --source;
- sourceIndex=nextSourceIndex-1;
- goto directMode;
- }
- }
- } else if(base64Value>=0) {
- /* collect base64 bytes into UChars */
- switch(base64Counter) {
- case -1: /* -1 is immediately after the + */
- case 0:
- bits=base64Value;
- base64Counter=1;
- break;
- case 1:
- case 3:
- case 4:
- case 6:
- bits=(uint16_t)((bits<<6)|base64Value);
- ++base64Counter;
- break;
- case 2:
- *target++=(UChar)((bits<<4)|(base64Value>>2));
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- sourceIndex=nextSourceIndex-1;
- }
- bytes[0]=b; /* keep this byte in case an error occurs */
- byteIndex=1;
- bits=(uint16_t)(base64Value&3);
- base64Counter=3;
- break;
- case 5:
- *target++=(UChar)((bits<<2)|(base64Value>>4));
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- sourceIndex=nextSourceIndex-1;
- }
- bytes[0]=b; /* keep this byte in case an error occurs */
- byteIndex=1;
- bits=(uint16_t)(base64Value&15);
- base64Counter=6;
- break;
- case 7:
- *target++=(UChar)((bits<<6)|base64Value);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- sourceIndex=nextSourceIndex;
- }
- byteIndex=0;
- bits=0;
- base64Counter=0;
- break;
- default:
- /* will never occur */
- break;
- }
- } else /*base64Value==-2*/ {
- /* minus sign terminates the base64 sequence */
- inDirectMode=TRUE;
- if(base64Counter==-1) {
- /* +- i.e. a minus immediately following a plus */
- *target++=PLUS;
- if(offsets!=NULL) {
- *offsets++=sourceIndex-1;
- }
- } else {
- /* absorb the minus and leave the Unicode Mode */
- if(bits!=0) {
- /* bits are illegally left over, a UChar is incomplete */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
- sourceIndex=nextSourceIndex;
- goto directMode;
- }
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- }
-
- if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) {
- /*
- * if we are in Unicode mode, then the byteIndex might not be 0,
- * but that is ok if bits==0
- * -> we set byteIndex=0 at the end of the stream to avoid a truncated error
- * (not true for IMAP-mailbox-name where we must end in direct mode)
- */
- byteIndex=0;
- }
-
- /* set the converter state back into UConverter */
- cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
- cnv->toULength=byteIndex;
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
- return;
-}
-
-static void U_CALLCONV
-_UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source, *sourceLimit;
- uint8_t *target, *targetLimit;
- int32_t *offsets;
-
- int32_t length, targetCapacity, sourceIndex;
- UChar c;
-
- /* UTF-7 state */
- const UBool *encodeDirectly;
- uint8_t bits;
- int8_t base64Counter;
- UBool inDirectMode;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
-
- /* set up the local pointers */
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetLimit=(uint8_t *)pArgs->targetLimit;
- offsets=pArgs->offsets;
-
- /* get the state machine state */
- {
- uint32_t status=cnv->fromUnicodeStatus;
- encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirectlyRestricted;
- inDirectMode=(UBool)((status>>24)&1);
- base64Counter=(int8_t)(status>>16);
- bits=(uint8_t)status;
- U_ASSERT(bits<=UPRV_LENGTHOF(toBase64));
- }
-
- /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
- sourceIndex=0;
-
- if(inDirectMode) {
-directMode:
- length=(int32_t)(sourceLimit-source);
- targetCapacity=(int32_t)(targetLimit-target);
- if(length>targetCapacity) {
- length=targetCapacity;
- }
- while(length>0) {
- c=*source++;
- /* currently always encode CR LF SP TAB directly */
- if(c<=127 && encodeDirectly[c]) {
- /* encode directly */
- *target++=(uint8_t)c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex++;
- }
- } else if(c==PLUS) {
- /* output +- for + */
- *target++=PLUS;
- if(target<targetLimit) {
- *target++=MINUS;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- }
- /* realign length and targetCapacity */
- goto directMode;
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex++;
- }
- cnv->charErrorBuffer[0]=MINUS;
- cnv->charErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- } else {
- /* un-read this character and switch to Unicode Mode */
- --source;
- *target++=PLUS;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- inDirectMode=FALSE;
- base64Counter=0;
- goto unicodeMode;
- }
- --length;
- }
- if(source<sourceLimit && target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- } else {
-unicodeMode:
- while(source<sourceLimit) {
- if(target<targetLimit) {
- c=*source++;
- if(c<=127 && encodeDirectly[c]) {
- /* encode directly */
- inDirectMode=TRUE;
-
- /* trick: back out this character to make this easier */
- --source;
-
- /* terminate the base64 sequence */
- if(base64Counter!=0) {
- /* write remaining bits for the previous character */
- *target++=toBase64[bits];
- if(offsets!=NULL) {
- *offsets++=sourceIndex-1;
- }
- }
- if(fromBase64[c]!=-1) {
- /* need to terminate with a minus */
- if(target<targetLimit) {
- *target++=MINUS;
- if(offsets!=NULL) {
- *offsets++=sourceIndex-1;
- }
- } else {
- cnv->charErrorBuffer[0]=MINUS;
- cnv->charErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- goto directMode;
- } else {
- /*
- * base64 this character:
- * Output 2 or 3 base64 bytes for the remaining bits of the previous character
- * and the bits of this character, each implicitly in UTF-16BE.
- *
- * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
- * character to the next. The actual 2 or 4 bits are shifted to the left edge
- * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
- */
- switch(base64Counter) {
- case 0:
- *target++=toBase64[c>>10];
- if(target<targetLimit) {
- *target++=toBase64[(c>>4)&0x3f];
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- }
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex++;
- }
- cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f];
- cnv->charErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- bits=(uint8_t)((c&15)<<2);
- base64Counter=1;
- break;
- case 1:
- *target++=toBase64[bits|(c>>14)];
- if(target<targetLimit) {
- *target++=toBase64[(c>>8)&0x3f];
- if(target<targetLimit) {
- *target++=toBase64[(c>>2)&0x3f];
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- }
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- }
- cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f];
- cnv->charErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex++;
- }
- cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f];
- cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f];
- cnv->charErrorBufferLength=2;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- bits=(uint8_t)((c&3)<<4);
- base64Counter=2;
- break;
- case 2:
- *target++=toBase64[bits|(c>>12)];
- if(target<targetLimit) {
- *target++=toBase64[(c>>6)&0x3f];
- if(target<targetLimit) {
- *target++=toBase64[c&0x3f];
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- }
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- }
- cnv->charErrorBuffer[0]=toBase64[c&0x3f];
- cnv->charErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex++;
- }
- cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f];
- cnv->charErrorBuffer[1]=toBase64[c&0x3f];
- cnv->charErrorBufferLength=2;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- bits=0;
- base64Counter=0;
- break;
- default:
- /* will never occur */
- break;
- }
- }
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- }
-
- if(pArgs->flush && source>=sourceLimit) {
- /* flush remaining bits to the target */
- if(!inDirectMode) {
- if (base64Counter!=0) {
- if(target<targetLimit) {
- *target++=toBase64[bits];
- if(offsets!=NULL) {
- *offsets++=sourceIndex-1;
- }
- } else {
- cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- }
- /* Add final MINUS to terminate unicodeMode */
- if(target<targetLimit) {
- *target++=MINUS;
- if(offsets!=NULL) {
- *offsets++=sourceIndex-1;
- }
- } else {
- cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- }
- /* reset the state for the next conversion */
- cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
- } else {
- /* set the converter state back into UConverter */
- cnv->fromUnicodeStatus=
- (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/
- ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
- }
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
- return;
-}
-
-static const char * U_CALLCONV
-_UTF7GetName(const UConverter *cnv) {
- switch(cnv->fromUnicodeStatus>>28) {
- case 1:
- return "UTF-7,version=1";
- default:
- return "UTF-7";
- }
-}
-U_CDECL_END
-
-static const UConverterImpl _UTF7Impl={
- UCNV_UTF7,
-
- NULL,
- NULL,
-
- _UTF7Open,
- NULL,
- _UTF7Reset,
-
- _UTF7ToUnicodeWithOffsets,
- _UTF7ToUnicodeWithOffsets,
- _UTF7FromUnicodeWithOffsets,
- _UTF7FromUnicodeWithOffsets,
- NULL,
-
- NULL,
- _UTF7GetName,
- NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
- NULL,
- ucnv_getCompleteUnicodeSet,
-
- NULL,
- NULL
-};
-
-static const UConverterStaticData _UTF7StaticData={
- sizeof(UConverterStaticData),
- "UTF-7",
- 0, /* TODO CCSID for UTF-7 */
- UCNV_IBM, UCNV_UTF7,
- 1, 4,
- { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
- FALSE, FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-const UConverterSharedData _UTF7Data=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF7StaticData, &_UTF7Impl);
-
-/* IMAP mailbox name encoding ----------------------------------------------- */
-
-/*
- * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
- * http://www.ietf.org/rfc/rfc2060.txt
- *
- * 5.1.3. Mailbox International Naming Convention
- *
- * By convention, international mailbox names are specified using a
- * modified version of the UTF-7 encoding described in [UTF-7]. The
- * purpose of these modifications is to correct the following problems
- * with UTF-7:
- *
- * 1) UTF-7 uses the "+" character for shifting; this conflicts with
- * the common use of "+" in mailbox names, in particular USENET
- * newsgroup names.
- *
- * 2) UTF-7's encoding is BASE64 which uses the "/" character; this
- * conflicts with the use of "/" as a popular hierarchy delimiter.
- *
- * 3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with
- * the use of "\" as a popular hierarchy delimiter.
- *
- * 4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with
- * the use of "~" in some servers as a home directory indicator.
- *
- * 5) UTF-7 permits multiple alternate forms to represent the same
- * string; in particular, printable US-ASCII chararacters can be
- * represented in encoded form.
- *
- * In modified UTF-7, printable US-ASCII characters except for "&"
- * represent themselves; that is, characters with octet values 0x20-0x25
- * and 0x27-0x7e. The character "&" (0x26) is represented by the two-
- * octet sequence "&-".
- *
- * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all
- * Unicode 16-bit octets) are represented in modified BASE64, with a
- * further modification from [UTF-7] that "," is used instead of "/".
- * Modified BASE64 MUST NOT be used to represent any printing US-ASCII
- * character which can represent itself.
- *
- * "&" is used to shift to modified BASE64 and "-" to shift back to US-
- * ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that
- * is, a name that ends with a Unicode 16-bit octet MUST end with a "-
- * ").
- *
- * For example, here is a mailbox name which mixes English, Japanese,
- * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw-
- */
-
-/*
- * Tests for US-ASCII characters belonging to character classes
- * defined in UTF-7.
- *
- * Set D (directly encoded characters) consists of the following
- * characters: the upper and lower case letters A through Z
- * and a through z, the 10 digits 0-9, and the following nine special
- * characters (note that "+" and "=" are omitted):
- * '(),-./:?
- *
- * Set O (optional direct characters) consists of the following
- * characters (note that "\" and "~" are omitted):
- * !"#$%&*;<=>@[]^_`{|}
- *
- * According to the rules in RFC 2152, the byte values for the following
- * US-ASCII characters are not used in UTF-7 and are therefore illegal:
- * - all C0 control codes except for CR LF TAB
- * - BACKSLASH
- * - TILDE
- * - DEL
- * - all codes beyond US-ASCII, i.e. all >127
- */
-
-/* uses '&' not '+' to start a base64 sequence */
-#define AMPERSAND 0x26
-#define COMMA 0x2c
-#define SLASH 0x2f
-
-/* legal byte values: all US-ASCII graphic characters 0x20..0x7e */
-#define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e)
-
-/* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */
-#define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND)
-
-#define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA)
-#define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c])
-
-/*
- * converter status values:
- *
- * toUnicodeStatus:
- * 24 inDirectMode (boolean)
- * 23..16 base64Counter (-1..7)
- * 15..0 bits (up to 14 bits incoming base64)
- *
- * fromUnicodeStatus:
- * 24 inDirectMode (boolean)
- * 23..16 base64Counter (0..2)
- * 7..0 bits (6 bits outgoing base64)
- *
- * ignore bits 31..25
- */
-
-U_CDECL_BEGIN
-static void U_CALLCONV
-_IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const uint8_t *source, *sourceLimit;
- UChar *target;
- const UChar *targetLimit;
- int32_t *offsets;
-
- uint8_t *bytes;
- uint8_t byteIndex;
-
- int32_t length, targetCapacity;
-
- /* UTF-7 state */
- uint16_t bits;
- int8_t base64Counter;
- UBool inDirectMode;
-
- int8_t base64Value;
-
- int32_t sourceIndex, nextSourceIndex;
-
- UChar c;
- uint8_t b;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
-
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
- offsets=pArgs->offsets;
- /* get the state machine state */
- {
- uint32_t status=cnv->toUnicodeStatus;
- inDirectMode=(UBool)((status>>24)&1);
- base64Counter=(int8_t)(status>>16);
- bits=(uint16_t)status;
- }
- bytes=cnv->toUBytes;
- byteIndex=cnv->toULength;
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex=byteIndex==0 ? 0 : -1;
- nextSourceIndex=0;
-
- if(inDirectMode) {
-directMode:
- /*
- * In Direct Mode, US-ASCII characters are encoded directly, i.e.,
- * with their US-ASCII byte values.
- * An ampersand starts Unicode (or "escape") Mode.
- *
- * In Direct Mode, only the sourceIndex is used.
- */
- byteIndex=0;
- length=(int32_t)(sourceLimit-source);
- targetCapacity=(int32_t)(targetLimit-target);
- if(length>targetCapacity) {
- length=targetCapacity;
- }
- while(length>0) {
- b=*source++;
- if(!isLegalIMAP(b)) {
- /* illegal */
- bytes[0]=b;
- byteIndex=1;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- } else if(b!=AMPERSAND) {
- /* write directly encoded character */
- *target++=b;
- if(offsets!=NULL) {
- *offsets++=sourceIndex++;
- }
- } else /* AMPERSAND */ {
- /* switch to Unicode mode */
- nextSourceIndex=++sourceIndex;
- inDirectMode=FALSE;
- byteIndex=0;
- bits=0;
- base64Counter=-1;
- goto unicodeMode;
- }
- --length;
- }
- if(source<sourceLimit && target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- } else {
-unicodeMode:
- /*
- * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
- * The base64 sequence ends with any character that is not in the base64 alphabet.
- * A terminating minus sign is consumed.
- * US-ASCII must not be base64-ed.
- *
- * In Unicode Mode, the sourceIndex has the index to the start of the current
- * base64 bytes, while nextSourceIndex is precisely parallel to source,
- * keeping the index to the following byte.
- * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
- */
- while(source<sourceLimit) {
- if(target<targetLimit) {
- bytes[byteIndex++]=b=*source++;
- ++nextSourceIndex;
- if(b>0x7e) {
- /* illegal - test other illegal US-ASCII values by base64Value==-3 */
- inDirectMode=TRUE;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- } else if((base64Value=FROM_BASE64_IMAP(b))>=0) {
- /* collect base64 bytes into UChars */
- switch(base64Counter) {
- case -1: /* -1 is immediately after the & */
- case 0:
- bits=base64Value;
- base64Counter=1;
- break;
- case 1:
- case 3:
- case 4:
- case 6:
- bits=(uint16_t)((bits<<6)|base64Value);
- ++base64Counter;
- break;
- case 2:
- c=(UChar)((bits<<4)|(base64Value>>2));
- if(isLegalIMAP(c)) {
- /* illegal */
- inDirectMode=TRUE;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- *target++=c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- sourceIndex=nextSourceIndex-1;
- }
- bytes[0]=b; /* keep this byte in case an error occurs */
- byteIndex=1;
- bits=(uint16_t)(base64Value&3);
- base64Counter=3;
- break;
- case 5:
- c=(UChar)((bits<<2)|(base64Value>>4));
- if(isLegalIMAP(c)) {
- /* illegal */
- inDirectMode=TRUE;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- *target++=c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- sourceIndex=nextSourceIndex-1;
- }
- bytes[0]=b; /* keep this byte in case an error occurs */
- byteIndex=1;
- bits=(uint16_t)(base64Value&15);
- base64Counter=6;
- break;
- case 7:
- c=(UChar)((bits<<6)|base64Value);
- if(isLegalIMAP(c)) {
- /* illegal */
- inDirectMode=TRUE;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- *target++=c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- sourceIndex=nextSourceIndex;
- }
- byteIndex=0;
- bits=0;
- base64Counter=0;
- break;
- default:
- /* will never occur */
- break;
- }
- } else if(base64Value==-2) {
- /* minus sign terminates the base64 sequence */
- inDirectMode=TRUE;
- if(base64Counter==-1) {
- /* &- i.e. a minus immediately following an ampersand */
- *target++=AMPERSAND;
- if(offsets!=NULL) {
- *offsets++=sourceIndex-1;
- }
- } else {
- /* absorb the minus and leave the Unicode Mode */
- if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {
- /* bits are illegally left over, a UChar is incomplete */
- /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
- sourceIndex=nextSourceIndex;
- goto directMode;
- } else {
- if(base64Counter==-1) {
- /* illegal: & immediately followed by something other than base64 or minus sign */
- /* include the ampersand in the reported sequence */
- --sourceIndex;
- bytes[0]=AMPERSAND;
- bytes[1]=b;
- byteIndex=2;
- }
- /* base64Value==-1 for characters that are illegal only in Unicode mode */
- /* base64Value==-3 for illegal characters */
- /* illegal */
- inDirectMode=TRUE;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- }
-endloop:
-
- /*
- * the end of the input stream and detection of truncated input
- * are handled by the framework, but here we must check if we are in Unicode
- * mode and byteIndex==0 because we must end in direct mode
- *
- * conditions:
- * successful
- * in Unicode mode and byteIndex==0
- * end of input and no truncated input
- */
- if( U_SUCCESS(*pErrorCode) &&
- !inDirectMode && byteIndex==0 &&
- pArgs->flush && source>=sourceLimit
- ) {
- if(base64Counter==-1) {
- /* & at the very end of the input */
- /* make the ampersand the reported sequence */
- bytes[0]=AMPERSAND;
- byteIndex=1;
- }
- /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */
-
- inDirectMode=TRUE; /* avoid looping */
- *pErrorCode=U_TRUNCATED_CHAR_FOUND;
- }
-
- /* set the converter state back into UConverter */
- cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
- cnv->toULength=byteIndex;
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
- return;
-}
-
-static void U_CALLCONV
-_IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source, *sourceLimit;
- uint8_t *target, *targetLimit;
- int32_t *offsets;
-
- int32_t length, targetCapacity, sourceIndex;
- UChar c;
- uint8_t b;
-
- /* UTF-7 state */
- uint8_t bits;
- int8_t base64Counter;
- UBool inDirectMode;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
-
- /* set up the local pointers */
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetLimit=(uint8_t *)pArgs->targetLimit;
- offsets=pArgs->offsets;
-
- /* get the state machine state */
- {
- uint32_t status=cnv->fromUnicodeStatus;
- inDirectMode=(UBool)((status>>24)&1);
- base64Counter=(int8_t)(status>>16);
- bits=(uint8_t)status;
- }
-
- /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
- sourceIndex=0;
-
- if(inDirectMode) {
-directMode:
- length=(int32_t)(sourceLimit-source);
- targetCapacity=(int32_t)(targetLimit-target);
- if(length>targetCapacity) {
- length=targetCapacity;
- }
- while(length>0) {
- c=*source++;
- /* encode 0x20..0x7e except '&' directly */
- if(inSetDIMAP(c)) {
- /* encode directly */
- *target++=(uint8_t)c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex++;
- }
- } else if(c==AMPERSAND) {
- /* output &- for & */
- *target++=AMPERSAND;
- if(target<targetLimit) {
- *target++=MINUS;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- }
- /* realign length and targetCapacity */
- goto directMode;
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex++;
- }
- cnv->charErrorBuffer[0]=MINUS;
- cnv->charErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- } else {
- /* un-read this character and switch to Unicode Mode */
- --source;
- *target++=AMPERSAND;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- inDirectMode=FALSE;
- base64Counter=0;
- goto unicodeMode;
- }
- --length;
- }
- if(source<sourceLimit && target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- } else {
-unicodeMode:
- while(source<sourceLimit) {
- if(target<targetLimit) {
- c=*source++;
- if(isLegalIMAP(c)) {
- /* encode directly */
- inDirectMode=TRUE;
-
- /* trick: back out this character to make this easier */
- --source;
-
- /* terminate the base64 sequence */
- if(base64Counter!=0) {
- /* write remaining bits for the previous character */
- *target++=TO_BASE64_IMAP(bits);
- if(offsets!=NULL) {
- *offsets++=sourceIndex-1;
- }
- }
- /* need to terminate with a minus */
- if(target<targetLimit) {
- *target++=MINUS;
- if(offsets!=NULL) {
- *offsets++=sourceIndex-1;
- }
- } else {
- cnv->charErrorBuffer[0]=MINUS;
- cnv->charErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- goto directMode;
- } else {
- /*
- * base64 this character:
- * Output 2 or 3 base64 bytes for the remaining bits of the previous character
- * and the bits of this character, each implicitly in UTF-16BE.
- *
- * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
- * character to the next. The actual 2 or 4 bits are shifted to the left edge
- * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
- */
- switch(base64Counter) {
- case 0:
- b=(uint8_t)(c>>10);
- *target++=TO_BASE64_IMAP(b);
- if(target<targetLimit) {
- b=(uint8_t)((c>>4)&0x3f);
- *target++=TO_BASE64_IMAP(b);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- }
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex++;
- }
- b=(uint8_t)((c>>4)&0x3f);
- cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
- cnv->charErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- bits=(uint8_t)((c&15)<<2);
- base64Counter=1;
- break;
- case 1:
- b=(uint8_t)(bits|(c>>14));
- *target++=TO_BASE64_IMAP(b);
- if(target<targetLimit) {
- b=(uint8_t)((c>>8)&0x3f);
- *target++=TO_BASE64_IMAP(b);
- if(target<targetLimit) {
- b=(uint8_t)((c>>2)&0x3f);
- *target++=TO_BASE64_IMAP(b);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- }
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- }
- b=(uint8_t)((c>>2)&0x3f);
- cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
- cnv->charErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex++;
- }
- b=(uint8_t)((c>>8)&0x3f);
- cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
- b=(uint8_t)((c>>2)&0x3f);
- cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
- cnv->charErrorBufferLength=2;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- bits=(uint8_t)((c&3)<<4);
- base64Counter=2;
- break;
- case 2:
- b=(uint8_t)(bits|(c>>12));
- *target++=TO_BASE64_IMAP(b);
- if(target<targetLimit) {
- b=(uint8_t)((c>>6)&0x3f);
- *target++=TO_BASE64_IMAP(b);
- if(target<targetLimit) {
- b=(uint8_t)(c&0x3f);
- *target++=TO_BASE64_IMAP(b);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- }
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex++;
- }
- b=(uint8_t)(c&0x3f);
- cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
- cnv->charErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex++;
- }
- b=(uint8_t)((c>>6)&0x3f);
- cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
- b=(uint8_t)(c&0x3f);
- cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
- cnv->charErrorBufferLength=2;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- bits=0;
- base64Counter=0;
- break;
- default:
- /* will never occur */
- break;
- }
- }
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- }
-
- if(pArgs->flush && source>=sourceLimit) {
- /* flush remaining bits to the target */
- if(!inDirectMode) {
- if(base64Counter!=0) {
- if(target<targetLimit) {
- *target++=TO_BASE64_IMAP(bits);
- if(offsets!=NULL) {
- *offsets++=sourceIndex-1;
- }
- } else {
- cnv->charErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits);
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- }
- /* need to terminate with a minus */
- if(target<targetLimit) {
- *target++=MINUS;
- if(offsets!=NULL) {
- *offsets++=sourceIndex-1;
- }
- } else {
- cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- }
- /* reset the state for the next conversion */
- cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
- } else {
- /* set the converter state back into UConverter */
- cnv->fromUnicodeStatus=
- (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/
- ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
- }
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
- return;
-}
-U_CDECL_END
-
-static const UConverterImpl _IMAPImpl={
- UCNV_IMAP_MAILBOX,
-
- NULL,
- NULL,
-
- _UTF7Open,
- NULL,
- _UTF7Reset,
-
- _IMAPToUnicodeWithOffsets,
- _IMAPToUnicodeWithOffsets,
- _IMAPFromUnicodeWithOffsets,
- _IMAPFromUnicodeWithOffsets,
- NULL,
-
- NULL,
- NULL,
- NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
- NULL,
- ucnv_getCompleteUnicodeSet,
- NULL,
- NULL
-};
-
-static const UConverterStaticData _IMAPStaticData={
- sizeof(UConverterStaticData),
- "IMAP-mailbox-name",
- 0, /* TODO CCSID for IMAP-mailbox-name */
- UCNV_IBM, UCNV_IMAP_MAILBOX,
- 1, 4,
- { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
- FALSE, FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-const UConverterSharedData _IMAPData=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_IMAPStaticData, &_IMAPImpl);
-
-#endif
diff --git a/contrib/libs/icu/common/ucnv_u8.cpp b/contrib/libs/icu/common/ucnv_u8.cpp
deleted file mode 100644
index 9b518e08df6..00000000000
--- a/contrib/libs/icu/common/ucnv_u8.cpp
+++ /dev/null
@@ -1,944 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2002-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* file name: ucnv_u8.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002jul01
-* created by: Markus W. Scherer
-*
-* UTF-8 converter implementation. Used to be in ucnv_utf.c.
-*
-* Also, CESU-8 implementation, see UTR 26.
-* The CESU-8 converter uses all the same functions as the
-* UTF-8 converter, with a branch for converting supplementary code points.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/utf.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "uassert.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "cmemory.h"
-#include "ustr_imp.h"
-
-/* Prototypes --------------------------------------------------------------- */
-
-/* Keep these here to make finicky compilers happy */
-
-U_CFUNC void ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs *args,
- UErrorCode *err);
-U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args,
- UErrorCode *err);
-
-
-/* UTF-8 -------------------------------------------------------------------- */
-
-#define MAXIMUM_UCS2 0x0000FFFF
-
-static const uint32_t offsetsFromUTF8[5] = {0,
- (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080,
- (uint32_t) 0x03C82080
-};
-
-static UBool hasCESU8Data(const UConverter *cnv)
-{
-#if UCONFIG_ONLY_HTML_CONVERSION
- return FALSE;
-#else
- return (UBool)(cnv->sharedData == &_CESU8Data);
-#endif
-}
-U_CDECL_BEGIN
-static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
- UErrorCode * err)
-{
- UConverter *cnv = args->converter;
- const unsigned char *mySource = (unsigned char *) args->source;
- UChar *myTarget = args->target;
- const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
- const UChar *targetLimit = args->targetLimit;
- unsigned char *toUBytes = cnv->toUBytes;
- UBool isCESU8 = hasCESU8Data(cnv);
- uint32_t ch, ch2 = 0;
- int32_t i, inBytes;
-
- /* Restore size of current sequence */
- if (cnv->toULength > 0 && myTarget < targetLimit)
- {
- inBytes = cnv->mode; /* restore # of bytes to consume */
- i = cnv->toULength; /* restore # of bytes consumed */
- cnv->toULength = 0;
-
- ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
- cnv->toUnicodeStatus = 0;
- goto morebytes;
- }
-
-
- while (mySource < sourceLimit && myTarget < targetLimit)
- {
- ch = *(mySource++);
- if (U8_IS_SINGLE(ch)) /* Simple case */
- {
- *(myTarget++) = (UChar) ch;
- }
- else
- {
- /* store the first char */
- toUBytes[0] = (char)ch;
- inBytes = U8_COUNT_BYTES_NON_ASCII(ch); /* lookup current sequence length */
- i = 1;
-
-morebytes:
- while (i < inBytes)
- {
- if (mySource < sourceLimit)
- {
- toUBytes[i] = (char) (ch2 = *mySource);
- if (!icu::UTF8::isValidTrail(ch, static_cast<uint8_t>(ch2), i, inBytes) &&
- !(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
- {
- break; /* i < inBytes */
- }
- ch = (ch << 6) + ch2;
- ++mySource;
- i++;
- }
- else
- {
- /* stores a partially calculated target*/
- cnv->toUnicodeStatus = ch;
- cnv->mode = inBytes;
- cnv->toULength = (int8_t) i;
- goto donefornow;
- }
- }
-
- // In CESU-8, only surrogates, not supplementary code points, are encoded directly.
- if (i == inBytes && (!isCESU8 || i <= 3))
- {
- /* Remove the accumulated high bits */
- ch -= offsetsFromUTF8[inBytes];
-
- /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
- if (ch <= MAXIMUM_UCS2)
- {
- /* fits in 16 bits */
- *(myTarget++) = (UChar) ch;
- }
- else
- {
- /* write out the surrogates */
- *(myTarget++) = U16_LEAD(ch);
- ch = U16_TRAIL(ch);
- if (myTarget < targetLimit)
- {
- *(myTarget++) = (UChar)ch;
- }
- else
- {
- /* Put in overflow buffer (not handled here) */
- cnv->UCharErrorBuffer[0] = (UChar) ch;
- cnv->UCharErrorBufferLength = 1;
- *err = U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- }
- else
- {
- cnv->toULength = (int8_t)i;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
- }
-
-donefornow:
- if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
- {
- /* End of target buffer */
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- args->target = myTarget;
- args->source = (const char *) mySource;
-}
-
-static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
- UErrorCode * err)
-{
- UConverter *cnv = args->converter;
- const unsigned char *mySource = (unsigned char *) args->source;
- UChar *myTarget = args->target;
- int32_t *myOffsets = args->offsets;
- int32_t offsetNum = 0;
- const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
- const UChar *targetLimit = args->targetLimit;
- unsigned char *toUBytes = cnv->toUBytes;
- UBool isCESU8 = hasCESU8Data(cnv);
- uint32_t ch, ch2 = 0;
- int32_t i, inBytes;
-
- /* Restore size of current sequence */
- if (cnv->toULength > 0 && myTarget < targetLimit)
- {
- inBytes = cnv->mode; /* restore # of bytes to consume */
- i = cnv->toULength; /* restore # of bytes consumed */
- cnv->toULength = 0;
-
- ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
- cnv->toUnicodeStatus = 0;
- goto morebytes;
- }
-
- while (mySource < sourceLimit && myTarget < targetLimit)
- {
- ch = *(mySource++);
- if (U8_IS_SINGLE(ch)) /* Simple case */
- {
- *(myTarget++) = (UChar) ch;
- *(myOffsets++) = offsetNum++;
- }
- else
- {
- toUBytes[0] = (char)ch;
- inBytes = U8_COUNT_BYTES_NON_ASCII(ch);
- i = 1;
-
-morebytes:
- while (i < inBytes)
- {
- if (mySource < sourceLimit)
- {
- toUBytes[i] = (char) (ch2 = *mySource);
- if (!icu::UTF8::isValidTrail(ch, static_cast<uint8_t>(ch2), i, inBytes) &&
- !(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
- {
- break; /* i < inBytes */
- }
- ch = (ch << 6) + ch2;
- ++mySource;
- i++;
- }
- else
- {
- cnv->toUnicodeStatus = ch;
- cnv->mode = inBytes;
- cnv->toULength = (int8_t)i;
- goto donefornow;
- }
- }
-
- // In CESU-8, only surrogates, not supplementary code points, are encoded directly.
- if (i == inBytes && (!isCESU8 || i <= 3))
- {
- /* Remove the accumulated high bits */
- ch -= offsetsFromUTF8[inBytes];
-
- /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
- if (ch <= MAXIMUM_UCS2)
- {
- /* fits in 16 bits */
- *(myTarget++) = (UChar) ch;
- *(myOffsets++) = offsetNum;
- }
- else
- {
- /* write out the surrogates */
- *(myTarget++) = U16_LEAD(ch);
- *(myOffsets++) = offsetNum;
- ch = U16_TRAIL(ch);
- if (myTarget < targetLimit)
- {
- *(myTarget++) = (UChar)ch;
- *(myOffsets++) = offsetNum;
- }
- else
- {
- cnv->UCharErrorBuffer[0] = (UChar) ch;
- cnv->UCharErrorBufferLength = 1;
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }
- offsetNum += i;
- }
- else
- {
- cnv->toULength = (int8_t)i;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
- }
-
-donefornow:
- if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
- { /* End of target buffer */
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- args->target = myTarget;
- args->source = (const char *) mySource;
- args->offsets = myOffsets;
-}
-U_CDECL_END
-
-U_CFUNC void U_CALLCONV ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
- UErrorCode * err)
-{
- UConverter *cnv = args->converter;
- const UChar *mySource = args->source;
- const UChar *sourceLimit = args->sourceLimit;
- uint8_t *myTarget = (uint8_t *) args->target;
- const uint8_t *targetLimit = (uint8_t *) args->targetLimit;
- uint8_t *tempPtr;
- UChar32 ch;
- uint8_t tempBuf[4];
- int32_t indexToWrite;
- UBool isNotCESU8 = !hasCESU8Data(cnv);
-
- if (cnv->fromUChar32 && myTarget < targetLimit)
- {
- ch = cnv->fromUChar32;
- cnv->fromUChar32 = 0;
- goto lowsurrogate;
- }
-
- while (mySource < sourceLimit && myTarget < targetLimit)
- {
- ch = *(mySource++);
-
- if (ch < 0x80) /* Single byte */
- {
- *(myTarget++) = (uint8_t) ch;
- }
- else if (ch < 0x800) /* Double byte */
- {
- *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0);
- if (myTarget < targetLimit)
- {
- *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80);
- }
- else
- {
- cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80);
- cnv->charErrorBufferLength = 1;
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }
- else {
- /* Check for surrogates */
- if(U16_IS_SURROGATE(ch) && isNotCESU8) {
-lowsurrogate:
- if (mySource < sourceLimit) {
- /* test both code units */
- if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
- /* convert and consume this supplementary code point */
- ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
- ++mySource;
- /* exit this condition tree */
- }
- else {
- /* this is an unpaired trail or lead code unit */
- /* callback(illegal) */
- cnv->fromUChar32 = ch;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
- else {
- /* no more input */
- cnv->fromUChar32 = ch;
- break;
- }
- }
-
- /* Do we write the buffer directly for speed,
- or do we have to be careful about target buffer space? */
- tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf);
-
- if (ch <= MAXIMUM_UCS2) {
- indexToWrite = 2;
- tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0);
- }
- else {
- indexToWrite = 3;
- tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0);
- tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80);
- }
- tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80);
- tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80);
-
- if (tempPtr == myTarget) {
- /* There was enough space to write the codepoint directly. */
- myTarget += (indexToWrite + 1);
- }
- else {
- /* We might run out of room soon. Write it slowly. */
- for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) {
- if (myTarget < targetLimit) {
- *(myTarget++) = *tempPtr;
- }
- else {
- cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr;
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }
- }
- }
- }
-
- if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
- {
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- args->target = (char *) myTarget;
- args->source = mySource;
-}
-
-U_CFUNC void U_CALLCONV ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
- UErrorCode * err)
-{
- UConverter *cnv = args->converter;
- const UChar *mySource = args->source;
- int32_t *myOffsets = args->offsets;
- const UChar *sourceLimit = args->sourceLimit;
- uint8_t *myTarget = (uint8_t *) args->target;
- const uint8_t *targetLimit = (uint8_t *) args->targetLimit;
- uint8_t *tempPtr;
- UChar32 ch;
- int32_t offsetNum, nextSourceIndex;
- int32_t indexToWrite;
- uint8_t tempBuf[4];
- UBool isNotCESU8 = !hasCESU8Data(cnv);
-
- if (cnv->fromUChar32 && myTarget < targetLimit)
- {
- ch = cnv->fromUChar32;
- cnv->fromUChar32 = 0;
- offsetNum = -1;
- nextSourceIndex = 0;
- goto lowsurrogate;
- } else {
- offsetNum = 0;
- }
-
- while (mySource < sourceLimit && myTarget < targetLimit)
- {
- ch = *(mySource++);
-
- if (ch < 0x80) /* Single byte */
- {
- *(myOffsets++) = offsetNum++;
- *(myTarget++) = (char) ch;
- }
- else if (ch < 0x800) /* Double byte */
- {
- *(myOffsets++) = offsetNum;
- *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0);
- if (myTarget < targetLimit)
- {
- *(myOffsets++) = offsetNum++;
- *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80);
- }
- else
- {
- cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80);
- cnv->charErrorBufferLength = 1;
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }
- else
- /* Check for surrogates */
- {
- nextSourceIndex = offsetNum + 1;
-
- if(U16_IS_SURROGATE(ch) && isNotCESU8) {
-lowsurrogate:
- if (mySource < sourceLimit) {
- /* test both code units */
- if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
- /* convert and consume this supplementary code point */
- ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
- ++mySource;
- ++nextSourceIndex;
- /* exit this condition tree */
- }
- else {
- /* this is an unpaired trail or lead code unit */
- /* callback(illegal) */
- cnv->fromUChar32 = ch;
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
- else {
- /* no more input */
- cnv->fromUChar32 = ch;
- break;
- }
- }
-
- /* Do we write the buffer directly for speed,
- or do we have to be careful about target buffer space? */
- tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf);
-
- if (ch <= MAXIMUM_UCS2) {
- indexToWrite = 2;
- tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0);
- }
- else {
- indexToWrite = 3;
- tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0);
- tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80);
- }
- tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80);
- tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80);
-
- if (tempPtr == myTarget) {
- /* There was enough space to write the codepoint directly. */
- myTarget += (indexToWrite + 1);
- myOffsets[0] = offsetNum;
- myOffsets[1] = offsetNum;
- myOffsets[2] = offsetNum;
- if (indexToWrite >= 3) {
- myOffsets[3] = offsetNum;
- }
- myOffsets += (indexToWrite + 1);
- }
- else {
- /* We might run out of room soon. Write it slowly. */
- for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) {
- if (myTarget < targetLimit)
- {
- *(myOffsets++) = offsetNum;
- *(myTarget++) = *tempPtr;
- }
- else
- {
- cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr;
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }
- }
- offsetNum = nextSourceIndex;
- }
- }
-
- if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
- {
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- args->target = (char *) myTarget;
- args->source = mySource;
- args->offsets = myOffsets;
-}
-
-U_CDECL_BEGIN
-static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
- UErrorCode *err) {
- UConverter *cnv;
- const uint8_t *sourceInitial;
- const uint8_t *source;
- uint8_t myByte;
- UChar32 ch;
- int8_t i;
-
- /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */
-
- cnv = args->converter;
- sourceInitial = source = (const uint8_t *)args->source;
- if (source >= (const uint8_t *)args->sourceLimit)
- {
- /* no input */
- *err = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0xffff;
- }
-
- myByte = (uint8_t)*(source++);
- if (U8_IS_SINGLE(myByte))
- {
- args->source = (const char *)source;
- return (UChar32)myByte;
- }
-
- uint16_t countTrailBytes = U8_COUNT_TRAIL_BYTES(myByte);
- if (countTrailBytes == 0) {
- cnv->toUBytes[0] = myByte;
- cnv->toULength = 1;
- *err = U_ILLEGAL_CHAR_FOUND;
- args->source = (const char *)source;
- return 0xffff;
- }
-
- /*The byte sequence is longer than the buffer area passed*/
- if (((const char *)source + countTrailBytes) > args->sourceLimit)
- {
- /* check if all of the remaining bytes are trail bytes */
- uint16_t extraBytesToWrite = countTrailBytes + 1;
- cnv->toUBytes[0] = myByte;
- i = 1;
- *err = U_TRUNCATED_CHAR_FOUND;
- while(source < (const uint8_t *)args->sourceLimit) {
- uint8_t b = *source;
- if(icu::UTF8::isValidTrail(myByte, b, i, extraBytesToWrite)) {
- cnv->toUBytes[i++] = b;
- ++source;
- } else {
- /* error even before we run out of input */
- *err = U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
- cnv->toULength = i;
- args->source = (const char *)source;
- return 0xffff;
- }
-
- ch = myByte << 6;
- if(countTrailBytes == 2) {
- uint8_t t1 = *source, t2;
- if(U8_IS_VALID_LEAD3_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source)) {
- args->source = (const char *)(source + 1);
- return (((ch + t1) << 6) + t2) - offsetsFromUTF8[3];
- }
- } else if(countTrailBytes == 1) {
- uint8_t t1 = *source;
- if(U8_IS_TRAIL(t1)) {
- args->source = (const char *)(source + 1);
- return (ch + t1) - offsetsFromUTF8[2];
- }
- } else { // countTrailBytes == 3
- uint8_t t1 = *source, t2, t3;
- if(U8_IS_VALID_LEAD4_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source) &&
- U8_IS_TRAIL(t3 = *++source)) {
- args->source = (const char *)(source + 1);
- return (((((ch + t1) << 6) + t2) << 6) + t3) - offsetsFromUTF8[4];
- }
- }
- args->source = (const char *)source;
-
- for(i = 0; sourceInitial < source; ++i) {
- cnv->toUBytes[i] = *sourceInitial++;
- }
- cnv->toULength = i;
- *err = U_ILLEGAL_CHAR_FOUND;
- return 0xffff;
-}
-U_CDECL_END
-
-/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */
-
-U_CDECL_BEGIN
-/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */
-static void U_CALLCONV
-ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
- UConverterToUnicodeArgs *pToUArgs,
- UErrorCode *pErrorCode) {
- UConverter *utf8;
- const uint8_t *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
- int32_t count;
-
- int8_t oldToULength, toULength, toULimit;
-
- UChar32 c;
- uint8_t b, t1, t2;
-
- /* set up the local pointers */
- utf8=pToUArgs->converter;
- source=(uint8_t *)pToUArgs->source;
- sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
- target=(uint8_t *)pFromUArgs->target;
- targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
-
- /* get the converter state from the UTF-8 UConverter */
- if(utf8->toULength > 0) {
- toULength=oldToULength=utf8->toULength;
- toULimit=(int8_t)utf8->mode;
- c=(UChar32)utf8->toUnicodeStatus;
- } else {
- toULength=oldToULength=toULimit=0;
- c = 0;
- }
-
- count=(int32_t)(sourceLimit-source)+oldToULength;
- if(count<toULimit) {
- /*
- * Not enough input to complete the partial character.
- * Jump to moreBytes below - it will not output to target.
- */
- } else if(targetCapacity<toULimit) {
- /*
- * Not enough target capacity to output the partial character.
- * Let the standard converter handle this.
- */
- *pErrorCode=U_USING_DEFAULT_WARNING;
- return;
- } else {
- // Use a single counter for source and target, counting the minimum of
- // the source length and the target capacity.
- // Let the standard converter handle edge cases.
- if(count>targetCapacity) {
- count=targetCapacity;
- }
-
- // The conversion loop checks count>0 only once per character.
- // If the buffer ends with a truncated sequence,
- // then we reduce the count to stop before that,
- // and collect the remaining bytes after the conversion loop.
-
- // Do not go back into the bytes that will be read for finishing a partial
- // sequence from the previous buffer.
- int32_t length=count-toULimit;
- U8_TRUNCATE_IF_INCOMPLETE(source, 0, length);
- count=toULimit+length;
- }
-
- if(c!=0) {
- utf8->toUnicodeStatus=0;
- utf8->toULength=0;
- goto moreBytes;
- /* See note in ucnv_SBCSFromUTF8() about this goto. */
- }
-
- /* conversion loop */
- while(count>0) {
- b=*source++;
- if(U8_IS_SINGLE(b)) {
- /* convert ASCII */
- *target++=b;
- --count;
- continue;
- } else {
- if(b>=0xe0) {
- if( /* handle U+0800..U+FFFF inline */
- b<0xf0 &&
- U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
- U8_IS_TRAIL(t2=source[1])
- ) {
- source+=2;
- *target++=b;
- *target++=t1;
- *target++=t2;
- count-=3;
- continue;
- }
- } else {
- if( /* handle U+0080..U+07FF inline */
- b>=0xc2 &&
- U8_IS_TRAIL(t1=*source)
- ) {
- ++source;
- *target++=b;
- *target++=t1;
- count-=2;
- continue;
- }
- }
-
- /* handle "complicated" and error cases, and continuing partial characters */
- oldToULength=0;
- toULength=1;
- toULimit=U8_COUNT_BYTES_NON_ASCII(b);
- c=b;
-moreBytes:
- while(toULength<toULimit) {
- if(source<sourceLimit) {
- b=*source;
- if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
- ++source;
- ++toULength;
- c=(c<<6)+b;
- } else {
- break; /* sequence too short, stop with toULength<toULimit */
- }
- } else {
- /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
- source-=(toULength-oldToULength);
- while(oldToULength<toULength) {
- utf8->toUBytes[oldToULength++]=*source++;
- }
- utf8->toUnicodeStatus=c;
- utf8->toULength=toULength;
- utf8->mode=toULimit;
- pToUArgs->source=(char *)source;
- pFromUArgs->target=(char *)target;
- return;
- }
- }
-
- if(toULength!=toULimit) {
- /* error handling: illegal UTF-8 byte sequence */
- source-=(toULength-oldToULength);
- while(oldToULength<toULength) {
- utf8->toUBytes[oldToULength++]=*source++;
- }
- utf8->toULength=toULength;
- pToUArgs->source=(char *)source;
- pFromUArgs->target=(char *)target;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- return;
- }
-
- /* copy the legal byte sequence to the target */
- {
- int8_t i;
-
- for(i=0; i<oldToULength; ++i) {
- *target++=utf8->toUBytes[i];
- }
- source-=(toULength-oldToULength);
- for(; i<toULength; ++i) {
- *target++=*source++;
- }
- count-=toULength;
- }
- }
- }
- U_ASSERT(count>=0);
-
- if(U_SUCCESS(*pErrorCode) && source<sourceLimit) {
- if(target==(const uint8_t *)pFromUArgs->targetLimit) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- } else {
- b=*source;
- toULimit=U8_COUNT_BYTES(b);
- if(toULimit>(sourceLimit-source)) {
- /* collect a truncated byte sequence */
- toULength=0;
- c=b;
- for(;;) {
- utf8->toUBytes[toULength++]=b;
- if(++source==sourceLimit) {
- /* partial byte sequence at end of source */
- utf8->toUnicodeStatus=c;
- utf8->toULength=toULength;
- utf8->mode=toULimit;
- break;
- } else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) {
- utf8->toULength=toULength;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- c=(c<<6)+b;
- }
- } else {
- /* partial-sequence target overflow: fall back to the pivoting implementation */
- *pErrorCode=U_USING_DEFAULT_WARNING;
- }
- }
- }
-
- /* write back the updated pointers */
- pToUArgs->source=(char *)source;
- pFromUArgs->target=(char *)target;
-}
-
-U_CDECL_END
-
-/* UTF-8 converter data ----------------------------------------------------- */
-
-static const UConverterImpl _UTF8Impl={
- UCNV_UTF8,
-
- NULL,
- NULL,
-
- NULL,
- NULL,
- NULL,
-
- ucnv_toUnicode_UTF8,
- ucnv_toUnicode_UTF8_OFFSETS_LOGIC,
- ucnv_fromUnicode_UTF8,
- ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
- ucnv_getNextUChar_UTF8,
-
- NULL,
- NULL,
- NULL,
- NULL,
- ucnv_getNonSurrogateUnicodeSet,
-
- ucnv_UTF8FromUTF8,
- ucnv_UTF8FromUTF8
-};
-
-/* The 1208 CCSID refers to any version of Unicode of UTF-8 */
-static const UConverterStaticData _UTF8StaticData={
- sizeof(UConverterStaticData),
- "UTF-8",
- 1208, UCNV_IBM, UCNV_UTF8,
- 1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
- { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-
-const UConverterSharedData _UTF8Data=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF8StaticData, &_UTF8Impl);
-
-/* CESU-8 converter data ---------------------------------------------------- */
-
-static const UConverterImpl _CESU8Impl={
- UCNV_CESU8,
-
- NULL,
- NULL,
-
- NULL,
- NULL,
- NULL,
-
- ucnv_toUnicode_UTF8,
- ucnv_toUnicode_UTF8_OFFSETS_LOGIC,
- ucnv_fromUnicode_UTF8,
- ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
- NULL,
-
- NULL,
- NULL,
- NULL,
- NULL,
- ucnv_getCompleteUnicodeSet,
-
- NULL,
- NULL
-};
-
-static const UConverterStaticData _CESU8StaticData={
- sizeof(UConverterStaticData),
- "CESU-8",
- 9400, /* CCSID for CESU-8 */
- UCNV_UNKNOWN, UCNV_CESU8, 1, 3,
- { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-
-const UConverterSharedData _CESU8Data=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CESU8StaticData, &_CESU8Impl);
-
-#endif
diff --git a/contrib/libs/icu/common/ucnvbocu.cpp b/contrib/libs/icu/common/ucnvbocu.cpp
deleted file mode 100644
index 7c2aab56558..00000000000
--- a/contrib/libs/icu/common/ucnvbocu.cpp
+++ /dev/null
@@ -1,1413 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2002-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ucnvbocu.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002mar27
-* created by: Markus W. Scherer
-*
-* This is an implementation of the Binary Ordered Compression for Unicode,
-* in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/ucnv_cb.h"
-#include "unicode/utf16.h"
-#include "putilimp.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "uassert.h"
-
-/* BOCU-1 constants and macros ---------------------------------------------- */
-
-/*
- * BOCU-1 encodes the code points of a Unicode string as
- * a sequence of byte-encoded differences (slope detection),
- * preserving lexical order.
- *
- * Optimize the difference-taking for runs of Unicode text within
- * small scripts:
- *
- * Most small scripts are allocated within aligned 128-blocks of Unicode
- * code points. Lexical order is preserved if the "previous code point" state
- * is always moved into the middle of such a block.
- *
- * Additionally, "prev" is moved from anywhere in the Unihan and Hangul
- * areas into the middle of those areas.
- *
- * C0 control codes and space are encoded with their US-ASCII bytes.
- * "prev" is reset for C0 controls but not for space.
- */
-
-/* initial value for "prev": middle of the ASCII range */
-#define BOCU1_ASCII_PREV 0x40
-
-/* bounding byte values for differences */
-#define BOCU1_MIN 0x21
-#define BOCU1_MIDDLE 0x90
-#define BOCU1_MAX_LEAD 0xfe
-#define BOCU1_MAX_TRAIL 0xff
-#define BOCU1_RESET 0xff
-
-/* number of lead bytes */
-#define BOCU1_COUNT (BOCU1_MAX_LEAD-BOCU1_MIN+1)
-
-/* adjust trail byte counts for the use of some C0 control byte values */
-#define BOCU1_TRAIL_CONTROLS_COUNT 20
-#define BOCU1_TRAIL_BYTE_OFFSET (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
-
-/* number of trail bytes */
-#define BOCU1_TRAIL_COUNT ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
-
-/*
- * number of positive and negative single-byte codes
- * (counting 0==BOCU1_MIDDLE among the positive ones)
- */
-#define BOCU1_SINGLE 64
-
-/* number of lead bytes for positive and negative 2/3/4-byte sequences */
-#define BOCU1_LEAD_2 43
-#define BOCU1_LEAD_3 3
-#define BOCU1_LEAD_4 1
-
-/* The difference value range for single-byters. */
-#define BOCU1_REACH_POS_1 (BOCU1_SINGLE-1)
-#define BOCU1_REACH_NEG_1 (-BOCU1_SINGLE)
-
-/* The difference value range for double-byters. */
-#define BOCU1_REACH_POS_2 (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
-#define BOCU1_REACH_NEG_2 (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
-
-/* The difference value range for 3-byters. */
-#define BOCU1_REACH_POS_3 \
- (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
-
-#define BOCU1_REACH_NEG_3 (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
-
-/* The lead byte start values. */
-#define BOCU1_START_POS_2 (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
-#define BOCU1_START_POS_3 (BOCU1_START_POS_2+BOCU1_LEAD_2)
-#define BOCU1_START_POS_4 (BOCU1_START_POS_3+BOCU1_LEAD_3)
- /* ==BOCU1_MAX_LEAD */
-
-#define BOCU1_START_NEG_2 (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
-#define BOCU1_START_NEG_3 (BOCU1_START_NEG_2-BOCU1_LEAD_2)
-#define BOCU1_START_NEG_4 (BOCU1_START_NEG_3-BOCU1_LEAD_3)
- /* ==BOCU1_MIN+1 */
-
-/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
-#define BOCU1_LENGTH_FROM_LEAD(lead) \
- ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
- (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
- (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
-
-/* The length of a byte sequence, according to its packed form. */
-#define BOCU1_LENGTH_FROM_PACKED(packed) \
- ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
-
-/*
- * 12 commonly used C0 control codes (and space) are only used to encode
- * themselves directly,
- * which makes BOCU-1 MIME-usable and reasonably safe for
- * ASCII-oriented software.
- *
- * These controls are
- * 0 NUL
- *
- * 7 BEL
- * 8 BS
- *
- * 9 TAB
- * a LF
- * b VT
- * c FF
- * d CR
- *
- * e SO
- * f SI
- *
- * 1a SUB
- * 1b ESC
- *
- * The other 20 C0 controls are also encoded directly (to preserve order)
- * but are also used as trail bytes in difference encoding
- * (for better compression).
- */
-#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
-
-/*
- * Byte value map for control codes,
- * from external byte values 0x00..0x20
- * to trail byte values 0..19 (0..0x13) as used in the difference calculation.
- * External byte values that are illegal as trail bytes are mapped to -1.
- */
-static const int8_t
-bocu1ByteToTrail[BOCU1_MIN]={
-/* 0 1 2 3 4 5 6 7 */
- -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
-
-/* 8 9 a b c d e f */
- -1, -1, -1, -1, -1, -1, -1, -1,
-
-/* 10 11 12 13 14 15 16 17 */
- 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
-
-/* 18 19 1a 1b 1c 1d 1e 1f */
- 0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13,
-
-/* 20 */
- -1
-};
-
-/*
- * Byte value map for control codes,
- * from trail byte values 0..19 (0..0x13) as used in the difference calculation
- * to external byte values 0x00..0x20.
- */
-static const int8_t
-bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
-/* 0 1 2 3 4 5 6 7 */
- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
-
-/* 8 9 a b c d e f */
- 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
-
-/* 10 11 12 13 */
- 0x1c, 0x1d, 0x1e, 0x1f
-};
-
-/**
- * Integer division and modulo with negative numerators
- * yields negative modulo results and quotients that are one more than
- * what we need here.
- * This macro adjust the results so that the modulo-value m is always >=0.
- *
- * For positive n, the if() condition is always FALSE.
- *
- * @param n Number to be split into quotient and rest.
- * Will be modified to contain the quotient.
- * @param d Divisor.
- * @param m Output variable for the rest (modulo result).
- */
-#define NEGDIVMOD(n, d, m) UPRV_BLOCK_MACRO_BEGIN { \
- (m)=(n)%(d); \
- (n)/=(d); \
- if((m)<0) { \
- --(n); \
- (m)+=(d); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/* Faster versions of packDiff() for single-byte-encoded diff values. */
-
-/** Is a diff value encodable in a single byte? */
-#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1)
-
-/** Encode a diff value in a single byte. */
-#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff))
-
-/** Is a diff value encodable in two bytes? */
-#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2)
-
-/* BOCU-1 implementation functions ------------------------------------------ */
-
-#define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV)
-
-/**
- * Compute the next "previous" value for differencing
- * from the current code point.
- *
- * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
- * @return "previous code point" state value
- */
-static inline int32_t
-bocu1Prev(int32_t c) {
- /* compute new prev */
- if(/* 0x3040<=c && */ c<=0x309f) {
- /* Hiragana is not 128-aligned */
- return 0x3070;
- } else if(0x4e00<=c && c<=0x9fa5) {
- /* CJK Unihan */
- return 0x4e00-BOCU1_REACH_NEG_2;
- } else if(0xac00<=c /* && c<=0xd7a3 */) {
- /* Korean Hangul */
- return (0xd7a3+0xac00)/2;
- } else {
- /* mostly small scripts */
- return BOCU1_SIMPLE_PREV(c);
- }
-}
-
-/** Fast version of bocu1Prev() for most scripts. */
-#define BOCU1_PREV(c) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c))
-
-/*
- * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
- * The UConverter fields are used as follows:
- *
- * fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
- *
- * toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
- * mode decoder's incomplete (diff<<2)|count (ignored when toULength==0)
- */
-
-/* BOCU-1-from-Unicode conversion functions --------------------------------- */
-
-/**
- * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
- * and return a packed integer with them.
- *
- * The encoding favors small absolute differences with short encodings
- * to compress runs of same-script characters.
- *
- * Optimized version with unrolled loops and fewer floating-point operations
- * than the standard packDiff().
- *
- * @param diff difference value -0x10ffff..0x10ffff
- * @return
- * 0x010000zz for 1-byte sequence zz
- * 0x0200yyzz for 2-byte sequence yy zz
- * 0x03xxyyzz for 3-byte sequence xx yy zz
- * 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
- */
-static int32_t
-packDiff(int32_t diff) {
- int32_t result, m;
-
- U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */
- if(diff>=BOCU1_REACH_NEG_1) {
- /* mostly positive differences, and single-byte negative ones */
-#if 0 /* single-byte case handled in macros, see below */
- if(diff<=BOCU1_REACH_POS_1) {
- /* single byte */
- return 0x01000000|(BOCU1_MIDDLE+diff);
- } else
-#endif
- if(diff<=BOCU1_REACH_POS_2) {
- /* two bytes */
- diff-=BOCU1_REACH_POS_1+1;
- result=0x02000000;
-
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- result|=BOCU1_TRAIL_TO_BYTE(m);
-
- result|=(BOCU1_START_POS_2+diff)<<8;
- } else if(diff<=BOCU1_REACH_POS_3) {
- /* three bytes */
- diff-=BOCU1_REACH_POS_2+1;
- result=0x03000000;
-
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- result|=BOCU1_TRAIL_TO_BYTE(m);
-
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
-
- result|=(BOCU1_START_POS_3+diff)<<16;
- } else {
- /* four bytes */
- diff-=BOCU1_REACH_POS_3+1;
-
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- result=BOCU1_TRAIL_TO_BYTE(m);
-
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
-
- /*
- * We know that / and % would deliver quotient 0 and rest=diff.
- * Avoid division and modulo for performance.
- */
- result|=BOCU1_TRAIL_TO_BYTE(diff)<<16;
-
- result|=((uint32_t)BOCU1_START_POS_4)<<24;
- }
- } else {
- /* two- to four-byte negative differences */
- if(diff>=BOCU1_REACH_NEG_2) {
- /* two bytes */
- diff-=BOCU1_REACH_NEG_1;
- result=0x02000000;
-
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- result|=BOCU1_TRAIL_TO_BYTE(m);
-
- result|=(BOCU1_START_NEG_2+diff)<<8;
- } else if(diff>=BOCU1_REACH_NEG_3) {
- /* three bytes */
- diff-=BOCU1_REACH_NEG_2;
- result=0x03000000;
-
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- result|=BOCU1_TRAIL_TO_BYTE(m);
-
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
-
- result|=(BOCU1_START_NEG_3+diff)<<16;
- } else {
- /* four bytes */
- diff-=BOCU1_REACH_NEG_3;
-
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- result=BOCU1_TRAIL_TO_BYTE(m);
-
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
-
- /*
- * We know that NEGDIVMOD would deliver
- * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
- * Avoid division and modulo for performance.
- */
- m=diff+BOCU1_TRAIL_COUNT;
- result|=BOCU1_TRAIL_TO_BYTE(m)<<16;
-
- result|=BOCU1_MIN<<24;
- }
- }
- return result;
-}
-
-
-static void U_CALLCONV
-_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
- int32_t *offsets;
-
- int32_t prev, c, diff;
-
- int32_t sourceIndex, nextSourceIndex;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
-
- /* get the converter state from UConverter */
- c=cnv->fromUChar32;
- prev=(int32_t)cnv->fromUnicodeStatus;
- if(prev==0) {
- prev=BOCU1_ASCII_PREV;
- }
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex= c==0 ? 0 : -1;
- nextSourceIndex=0;
-
- /* conversion loop */
- if(c!=0 && targetCapacity>0) {
- goto getTrail;
- }
-
-fastSingle:
- /* fast loop for single-byte differences */
- /* use only one loop counter variable, targetCapacity, not also source */
- diff=(int32_t)(sourceLimit-source);
- if(targetCapacity>diff) {
- targetCapacity=diff;
- }
- while(targetCapacity>0 && (c=*source)<0x3000) {
- if(c<=0x20) {
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(uint8_t)c;
- *offsets++=nextSourceIndex++;
- ++source;
- --targetCapacity;
- } else {
- diff=c-prev;
- if(DIFF_IS_SINGLE(diff)) {
- prev=BOCU1_SIMPLE_PREV(c);
- *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
- *offsets++=nextSourceIndex++;
- ++source;
- --targetCapacity;
- } else {
- break;
- }
- }
- }
- /* restore real values */
- targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
- sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
-
- /* regular loop for all cases */
- while(source<sourceLimit) {
- if(targetCapacity>0) {
- c=*source++;
- ++nextSourceIndex;
-
- if(c<=0x20) {
- /*
- * ISO C0 control & space:
- * Encode directly for MIME compatibility,
- * and reset state except for space, to not disrupt compression.
- */
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(uint8_t)c;
- *offsets++=sourceIndex;
- --targetCapacity;
-
- sourceIndex=nextSourceIndex;
- continue;
- }
-
- if(U16_IS_LEAD(c)) {
-getTrail:
- if(source<sourceLimit) {
- /* test the following code unit */
- UChar trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- ++nextSourceIndex;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- }
- } else {
- /* no more input */
- c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
- break;
- }
- }
-
- /*
- * all other Unicode code points c==U+0021..U+10ffff
- * are encoded with the difference c-prev
- *
- * a new prev is computed from c,
- * placed in the middle of a 0x80-block (for most small scripts) or
- * in the middle of the Unihan and Hangul blocks
- * to statistically minimize the following difference
- */
- diff=c-prev;
- prev=BOCU1_PREV(c);
- if(DIFF_IS_SINGLE(diff)) {
- *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
- *offsets++=sourceIndex;
- --targetCapacity;
- sourceIndex=nextSourceIndex;
- if(c<0x3000) {
- goto fastSingle;
- }
- } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
- /* optimize 2-byte case */
- int32_t m;
-
- if(diff>=0) {
- diff-=BOCU1_REACH_POS_1+1;
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- diff+=BOCU1_START_POS_2;
- } else {
- diff-=BOCU1_REACH_NEG_1;
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- diff+=BOCU1_START_NEG_2;
- }
- *target++=(uint8_t)diff;
- *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- targetCapacity-=2;
- sourceIndex=nextSourceIndex;
- } else {
- int32_t length; /* will be 2..4 */
-
- diff=packDiff(diff);
- length=BOCU1_LENGTH_FROM_PACKED(diff);
-
- /* write the output character bytes from diff and length */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(length<=targetCapacity) {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(diff>>24);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(diff>>16);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(diff>>8);
- *offsets++=sourceIndex;
- /* case 1: handled above */
- *target++=(uint8_t)diff;
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- targetCapacity-=length;
- sourceIndex=nextSourceIndex;
- } else {
- uint8_t *charErrorBuffer;
-
- /*
- * We actually do this backwards here:
- * In order to save an intermediate variable, we output
- * first to the overflow buffer what does not fit into the
- * regular target.
- */
- /* we know that 1<=targetCapacity<length<=4 */
- length-=targetCapacity;
- charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
- switch(length) {
- /* each branch falls through to the next one */
- case 3:
- *charErrorBuffer++=(uint8_t)(diff>>16);
- U_FALLTHROUGH;
- case 2:
- *charErrorBuffer++=(uint8_t)(diff>>8);
- U_FALLTHROUGH;
- case 1:
- *charErrorBuffer=(uint8_t)diff;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- cnv->charErrorBufferLength=(int8_t)length;
-
- /* now output what fits into the regular target */
- diff>>=8*length; /* length was reduced by targetCapacity */
- switch(targetCapacity) {
- /* each branch falls through to the next one */
- case 3:
- *target++=(uint8_t)(diff>>16);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(diff>>8);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)diff;
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
-
- /* target overflow */
- targetCapacity=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- /* set the converter state back into UConverter */
- cnv->fromUChar32= c<0 ? -c : 0;
- cnv->fromUnicodeStatus=(uint32_t)prev;
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
-}
-
-/*
- * Identical to _Bocu1FromUnicodeWithOffsets but without offset handling.
- * If a change is made in the original function, then either
- * change this function the same way or
- * re-copy the original function and remove the variables
- * offsets, sourceIndex, and nextSourceIndex.
- */
-static void U_CALLCONV
-_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
-
- int32_t prev, c, diff;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
-
- /* get the converter state from UConverter */
- c=cnv->fromUChar32;
- prev=(int32_t)cnv->fromUnicodeStatus;
- if(prev==0) {
- prev=BOCU1_ASCII_PREV;
- }
-
- /* conversion loop */
- if(c!=0 && targetCapacity>0) {
- goto getTrail;
- }
-
-fastSingle:
- /* fast loop for single-byte differences */
- /* use only one loop counter variable, targetCapacity, not also source */
- diff=(int32_t)(sourceLimit-source);
- if(targetCapacity>diff) {
- targetCapacity=diff;
- }
- while(targetCapacity>0 && (c=*source)<0x3000) {
- if(c<=0x20) {
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(uint8_t)c;
- } else {
- diff=c-prev;
- if(DIFF_IS_SINGLE(diff)) {
- prev=BOCU1_SIMPLE_PREV(c);
- *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
- } else {
- break;
- }
- }
- ++source;
- --targetCapacity;
- }
- /* restore real values */
- targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
-
- /* regular loop for all cases */
- while(source<sourceLimit) {
- if(targetCapacity>0) {
- c=*source++;
-
- if(c<=0x20) {
- /*
- * ISO C0 control & space:
- * Encode directly for MIME compatibility,
- * and reset state except for space, to not disrupt compression.
- */
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(uint8_t)c;
- --targetCapacity;
- continue;
- }
-
- if(U16_IS_LEAD(c)) {
-getTrail:
- if(source<sourceLimit) {
- /* test the following code unit */
- UChar trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- }
- } else {
- /* no more input */
- c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
- break;
- }
- }
-
- /*
- * all other Unicode code points c==U+0021..U+10ffff
- * are encoded with the difference c-prev
- *
- * a new prev is computed from c,
- * placed in the middle of a 0x80-block (for most small scripts) or
- * in the middle of the Unihan and Hangul blocks
- * to statistically minimize the following difference
- */
- diff=c-prev;
- prev=BOCU1_PREV(c);
- if(DIFF_IS_SINGLE(diff)) {
- *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
- --targetCapacity;
- if(c<0x3000) {
- goto fastSingle;
- }
- } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
- /* optimize 2-byte case */
- int32_t m;
-
- if(diff>=0) {
- diff-=BOCU1_REACH_POS_1+1;
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- diff+=BOCU1_START_POS_2;
- } else {
- diff-=BOCU1_REACH_NEG_1;
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- diff+=BOCU1_START_NEG_2;
- }
- *target++=(uint8_t)diff;
- *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
- targetCapacity-=2;
- } else {
- int32_t length; /* will be 2..4 */
-
- diff=packDiff(diff);
- length=BOCU1_LENGTH_FROM_PACKED(diff);
-
- /* write the output character bytes from diff and length */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(length<=targetCapacity) {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(diff>>24);
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(diff>>16);
- /* case 2: handled above */
- *target++=(uint8_t)(diff>>8);
- /* case 1: handled above */
- *target++=(uint8_t)diff;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- targetCapacity-=length;
- } else {
- uint8_t *charErrorBuffer;
-
- /*
- * We actually do this backwards here:
- * In order to save an intermediate variable, we output
- * first to the overflow buffer what does not fit into the
- * regular target.
- */
- /* we know that 1<=targetCapacity<length<=4 */
- length-=targetCapacity;
- charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
- switch(length) {
- /* each branch falls through to the next one */
- case 3:
- *charErrorBuffer++=(uint8_t)(diff>>16);
- U_FALLTHROUGH;
- case 2:
- *charErrorBuffer++=(uint8_t)(diff>>8);
- U_FALLTHROUGH;
- case 1:
- *charErrorBuffer=(uint8_t)diff;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- cnv->charErrorBufferLength=(int8_t)length;
-
- /* now output what fits into the regular target */
- diff>>=8*length; /* length was reduced by targetCapacity */
- switch(targetCapacity) {
- /* each branch falls through to the next one */
- case 3:
- *target++=(uint8_t)(diff>>16);
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(diff>>8);
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)diff;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
-
- /* target overflow */
- targetCapacity=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- /* set the converter state back into UConverter */
- cnv->fromUChar32= c<0 ? -c : 0;
- cnv->fromUnicodeStatus=(uint32_t)prev;
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
-}
-
-/* BOCU-1-to-Unicode conversion functions ----------------------------------- */
-
-/**
- * Function for BOCU-1 decoder; handles multi-byte lead bytes.
- *
- * @param b lead byte;
- * BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD
- * @return (diff<<2)|count
- */
-static inline int32_t
-decodeBocu1LeadByte(int32_t b) {
- int32_t diff, count;
-
- if(b>=BOCU1_START_NEG_2) {
- /* positive difference */
- if(b<BOCU1_START_POS_3) {
- /* two bytes */
- diff=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
- count=1;
- } else if(b<BOCU1_START_POS_4) {
- /* three bytes */
- diff=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
- count=2;
- } else {
- /* four bytes */
- diff=BOCU1_REACH_POS_3+1;
- count=3;
- }
- } else {
- /* negative difference */
- if(b>=BOCU1_START_NEG_3) {
- /* two bytes */
- diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
- count=1;
- } else if(b>BOCU1_MIN) {
- /* three bytes */
- diff=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
- count=2;
- } else {
- /* four bytes */
- diff=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
- count=3;
- }
- }
-
- /* return the state for decoding the trail byte(s) */
- return (diff<<2)|count;
-}
-
-/**
- * Function for BOCU-1 decoder; handles multi-byte trail bytes.
- *
- * @param count number of remaining trail bytes including this one
- * @param b trail byte
- * @return new delta for diff including b - <0 indicates an error
- *
- * @see decodeBocu1
- */
-static inline int32_t
-decodeBocu1TrailByte(int32_t count, int32_t b) {
- if(b<=0x20) {
- /* skip some C0 controls and make the trail byte range contiguous */
- b=bocu1ByteToTrail[b];
- /* b<0 for an illegal trail byte value will result in return<0 below */
-#if BOCU1_MAX_TRAIL<0xff
- } else if(b>BOCU1_MAX_TRAIL) {
- return -99;
-#endif
- } else {
- b-=BOCU1_TRAIL_BYTE_OFFSET;
- }
-
- /* add trail byte into difference and decrement count */
- if(count==1) {
- return b;
- } else if(count==2) {
- return b*BOCU1_TRAIL_COUNT;
- } else /* count==3 */ {
- return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT);
- }
-}
-
-static void U_CALLCONV
-_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const uint8_t *source, *sourceLimit;
- UChar *target;
- const UChar *targetLimit;
- int32_t *offsets;
-
- int32_t prev, count, diff, c;
-
- int8_t byteIndex;
- uint8_t *bytes;
-
- int32_t sourceIndex, nextSourceIndex;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
- offsets=pArgs->offsets;
-
- /* get the converter state from UConverter */
- prev=(int32_t)cnv->toUnicodeStatus;
- if(prev==0) {
- prev=BOCU1_ASCII_PREV;
- }
- diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
- count=diff&3;
- diff>>=2;
-
- byteIndex=cnv->toULength;
- bytes=cnv->toUBytes;
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex=byteIndex==0 ? 0 : -1;
- nextSourceIndex=0;
-
- /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
- if(count>0 && byteIndex>0 && target<targetLimit) {
- goto getTrail;
- }
-
-fastSingle:
- /* fast loop for single-byte differences */
- /* use count as the only loop counter variable */
- diff=(int32_t)(sourceLimit-source);
- count=(int32_t)(pArgs->targetLimit-target);
- if(count>diff) {
- count=diff;
- }
- while(count>0) {
- if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
- c=prev+(c-BOCU1_MIDDLE);
- if(c<0x3000) {
- *target++=(UChar)c;
- *offsets++=nextSourceIndex++;
- prev=BOCU1_SIMPLE_PREV(c);
- } else {
- break;
- }
- } else if(c<=0x20) {
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(UChar)c;
- *offsets++=nextSourceIndex++;
- } else {
- break;
- }
- ++source;
- --count;
- }
- sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
-
- /* decode a sequence of single and lead bytes */
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
-
- ++nextSourceIndex;
- c=*source++;
- if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
- /* Write a code point directly from a single-byte difference. */
- c=prev+(c-BOCU1_MIDDLE);
- if(c<0x3000) {
- *target++=(UChar)c;
- *offsets++=sourceIndex;
- prev=BOCU1_SIMPLE_PREV(c);
- sourceIndex=nextSourceIndex;
- goto fastSingle;
- }
- } else if(c<=0x20) {
- /*
- * Direct-encoded C0 control code or space.
- * Reset prev for C0 control codes but not for space.
- */
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(UChar)c;
- *offsets++=sourceIndex;
- sourceIndex=nextSourceIndex;
- continue;
- } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
- /* Optimize two-byte case. */
- if(c>=BOCU1_MIDDLE) {
- diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
- } else {
- diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
- }
-
- /* trail byte */
- ++nextSourceIndex;
- c=decodeBocu1TrailByte(1, *source++);
- if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
- bytes[0]=source[-2];
- bytes[1]=source[-1];
- byteIndex=2;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- } else if(c==BOCU1_RESET) {
- /* only reset the state, no code point */
- prev=BOCU1_ASCII_PREV;
- sourceIndex=nextSourceIndex;
- continue;
- } else {
- /*
- * For multi-byte difference lead bytes, set the decoder state
- * with the partial difference value from the lead byte and
- * with the number of trail bytes.
- */
- bytes[0]=(uint8_t)c;
- byteIndex=1;
-
- diff=decodeBocu1LeadByte(c);
- count=diff&3;
- diff>>=2;
-getTrail:
- for(;;) {
- if(source>=sourceLimit) {
- goto endloop;
- }
- ++nextSourceIndex;
- c=bytes[byteIndex++]=*source++;
-
- /* trail byte in any position */
- c=decodeBocu1TrailByte(count, c);
- if(c<0) {
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
-
- diff+=c;
- if(--count==0) {
- /* final trail byte, deliver a code point */
- byteIndex=0;
- c=prev+diff;
- if((uint32_t)c>0x10ffff) {
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- break;
- }
- }
- }
-
- /* calculate the next prev and output c */
- prev=BOCU1_PREV(c);
- if(c<=0xffff) {
- *target++=(UChar)c;
- *offsets++=sourceIndex;
- } else {
- /* output surrogate pair */
- *target++=U16_LEAD(c);
- if(target<targetLimit) {
- *target++=U16_TRAIL(c);
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- } else {
- /* target overflow */
- *offsets++=sourceIndex;
- cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- sourceIndex=nextSourceIndex;
- }
-endloop:
-
- if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
- /* set the converter state in UConverter to deal with the next character */
- cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
- cnv->mode=0;
- } else {
- /* set the converter state back into UConverter */
- cnv->toUnicodeStatus=(uint32_t)prev;
- cnv->mode=(diff<<2)|count;
- }
- cnv->toULength=byteIndex;
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
- return;
-}
-
-/*
- * Identical to _Bocu1ToUnicodeWithOffsets but without offset handling.
- * If a change is made in the original function, then either
- * change this function the same way or
- * re-copy the original function and remove the variables
- * offsets, sourceIndex, and nextSourceIndex.
- */
-static void U_CALLCONV
-_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const uint8_t *source, *sourceLimit;
- UChar *target;
- const UChar *targetLimit;
-
- int32_t prev, count, diff, c;
-
- int8_t byteIndex;
- uint8_t *bytes;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
-
- /* get the converter state from UConverter */
- prev=(int32_t)cnv->toUnicodeStatus;
- if(prev==0) {
- prev=BOCU1_ASCII_PREV;
- }
- diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
- count=diff&3;
- diff>>=2;
-
- byteIndex=cnv->toULength;
- bytes=cnv->toUBytes;
-
- /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
- if(count>0 && byteIndex>0 && target<targetLimit) {
- goto getTrail;
- }
-
-fastSingle:
- /* fast loop for single-byte differences */
- /* use count as the only loop counter variable */
- diff=(int32_t)(sourceLimit-source);
- count=(int32_t)(pArgs->targetLimit-target);
- if(count>diff) {
- count=diff;
- }
- while(count>0) {
- if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
- c=prev+(c-BOCU1_MIDDLE);
- if(c<0x3000) {
- *target++=(UChar)c;
- prev=BOCU1_SIMPLE_PREV(c);
- } else {
- break;
- }
- } else if(c<=0x20) {
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(UChar)c;
- } else {
- break;
- }
- ++source;
- --count;
- }
-
- /* decode a sequence of single and lead bytes */
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
-
- c=*source++;
- if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
- /* Write a code point directly from a single-byte difference. */
- c=prev+(c-BOCU1_MIDDLE);
- if(c<0x3000) {
- *target++=(UChar)c;
- prev=BOCU1_SIMPLE_PREV(c);
- goto fastSingle;
- }
- } else if(c<=0x20) {
- /*
- * Direct-encoded C0 control code or space.
- * Reset prev for C0 control codes but not for space.
- */
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(UChar)c;
- continue;
- } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
- /* Optimize two-byte case. */
- if(c>=BOCU1_MIDDLE) {
- diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
- } else {
- diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
- }
-
- /* trail byte */
- c=decodeBocu1TrailByte(1, *source++);
- if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
- bytes[0]=source[-2];
- bytes[1]=source[-1];
- byteIndex=2;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- } else if(c==BOCU1_RESET) {
- /* only reset the state, no code point */
- prev=BOCU1_ASCII_PREV;
- continue;
- } else {
- /*
- * For multi-byte difference lead bytes, set the decoder state
- * with the partial difference value from the lead byte and
- * with the number of trail bytes.
- */
- bytes[0]=(uint8_t)c;
- byteIndex=1;
-
- diff=decodeBocu1LeadByte(c);
- count=diff&3;
- diff>>=2;
-getTrail:
- for(;;) {
- if(source>=sourceLimit) {
- goto endloop;
- }
- c=bytes[byteIndex++]=*source++;
-
- /* trail byte in any position */
- c=decodeBocu1TrailByte(count, c);
- if(c<0) {
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
-
- diff+=c;
- if(--count==0) {
- /* final trail byte, deliver a code point */
- byteIndex=0;
- c=prev+diff;
- if((uint32_t)c>0x10ffff) {
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- break;
- }
- }
- }
-
- /* calculate the next prev and output c */
- prev=BOCU1_PREV(c);
- if(c<=0xffff) {
- *target++=(UChar)c;
- } else {
- /* output surrogate pair */
- *target++=U16_LEAD(c);
- if(target<targetLimit) {
- *target++=U16_TRAIL(c);
- } else {
- /* target overflow */
- cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- }
-endloop:
-
- if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
- /* set the converter state in UConverter to deal with the next character */
- cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
- cnv->mode=0;
- } else {
- /* set the converter state back into UConverter */
- cnv->toUnicodeStatus=(uint32_t)prev;
- cnv->mode=(diff<<2)|count;
- }
- cnv->toULength=byteIndex;
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- return;
-}
-
-/* miscellaneous ------------------------------------------------------------ */
-
-static const UConverterImpl _Bocu1Impl={
- UCNV_BOCU1,
-
- NULL,
- NULL,
-
- NULL,
- NULL,
- NULL,
-
- _Bocu1ToUnicode,
- _Bocu1ToUnicodeWithOffsets,
- _Bocu1FromUnicode,
- _Bocu1FromUnicodeWithOffsets,
- NULL,
-
- NULL,
- NULL,
- NULL,
- NULL,
- ucnv_getCompleteUnicodeSet,
-
- NULL,
- NULL
-};
-
-static const UConverterStaticData _Bocu1StaticData={
- sizeof(UConverterStaticData),
- "BOCU-1",
- 1214, /* CCSID for BOCU-1 */
- UCNV_IBM, UCNV_BOCU1,
- 1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
- { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
- FALSE, FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-const UConverterSharedData _Bocu1Data=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Bocu1StaticData, &_Bocu1Impl);
-
-#endif
diff --git a/contrib/libs/icu/common/ucnvdisp.cpp b/contrib/libs/icu/common/ucnvdisp.cpp
deleted file mode 100644
index ac86b985970..00000000000
--- a/contrib/libs/icu/common/ucnvdisp.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1998-2004, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* ucnvdisp.c:
-* Implements APIs for the ICU's codeset conversion library display names.
-*
-* Modification History:
-*
-* Date Name Description
-* 04/04/99 helena Fixed internal header inclusion.
-* 05/09/00 helena Added implementation to handle fallback mappings.
-* 06/20/2000 helena OS/400 port changes; mostly typecast.
-* 09/08/2004 grhoten split from ucnv.c
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ustring.h"
-#include "unicode/ures.h"
-#include "unicode/ucnv.h"
-#include "cstring.h"
-#include "ustr_imp.h"
-#include "ucnv_imp.h"
-#include "putilimp.h"
-
-U_CAPI int32_t U_EXPORT2
-ucnv_getDisplayName(const UConverter *cnv,
- const char *displayLocale,
- UChar *displayName, int32_t displayNameCapacity,
- UErrorCode *pErrorCode) {
- UResourceBundle *rb;
- const UChar *name;
- int32_t length;
- UErrorCode localStatus = U_ZERO_ERROR;
-
- /* check arguments */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if(cnv==NULL || displayNameCapacity<0 || (displayNameCapacity>0 && displayName==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* open the resource bundle and get the display name string */
- rb=ures_open(NULL, displayLocale, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* use the internal name as the key */
- name=ures_getStringByKey(rb, cnv->sharedData->staticData->name, &length, &localStatus);
- ures_close(rb);
-
- if(U_SUCCESS(localStatus)) {
- /* copy the string */
- if (*pErrorCode == U_ZERO_ERROR) {
- *pErrorCode = localStatus;
- }
- u_memcpy(displayName, name, uprv_min(length, displayNameCapacity)*U_SIZEOF_UCHAR);
- } else {
- /* convert the internal name into a Unicode string */
- length=(int32_t)uprv_strlen(cnv->sharedData->staticData->name);
- u_charsToUChars(cnv->sharedData->staticData->name, displayName, uprv_min(length, displayNameCapacity));
- }
- return u_terminateUChars(displayName, displayNameCapacity, length, pErrorCode);
-}
-
-#endif
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/contrib/libs/icu/common/ucnvhz.cpp b/contrib/libs/icu/common/ucnvhz.cpp
deleted file mode 100644
index 6b2f5faaf0a..00000000000
--- a/contrib/libs/icu/common/ucnvhz.cpp
+++ /dev/null
@@ -1,625 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2000-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* file name: ucnvhz.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2000oct16
-* created by: Ram Viswanadha
-* 10/31/2000 Ram Implemented offsets logic function
-*
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
-
-#include "cmemory.h"
-#include "unicode/ucnv.h"
-#include "unicode/ucnv_cb.h"
-#include "unicode/uset.h"
-#include "unicode/utf16.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "ucnv_imp.h"
-
-#define UCNV_TILDE 0x7E /* ~ */
-#define UCNV_OPEN_BRACE 0x7B /* { */
-#define UCNV_CLOSE_BRACE 0x7D /* } */
-#define SB_ESCAPE "\x7E\x7D"
-#define DB_ESCAPE "\x7E\x7B"
-#define TILDE_ESCAPE "\x7E\x7E"
-#define ESC_LEN 2
-
-
-#define CONCAT_ESCAPE_MACRO(args, targetIndex,targetLength,strToAppend, err, len,sourceIndex) UPRV_BLOCK_MACRO_BEGIN { \
- while(len-->0){ \
- if(targetIndex < targetLength){ \
- args->target[targetIndex] = (unsigned char) *strToAppend; \
- if(args->offsets!=NULL){ \
- *(offsets++) = sourceIndex-1; \
- } \
- targetIndex++; \
- } \
- else{ \
- args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \
- *err =U_BUFFER_OVERFLOW_ERROR; \
- } \
- strToAppend++; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-
-typedef struct{
- UConverter* gbConverter;
- int32_t targetIndex;
- int32_t sourceIndex;
- UBool isEscapeAppended;
- UBool isStateDBCS;
- UBool isTargetUCharDBCS;
- UBool isEmptySegment;
-}UConverterDataHZ;
-
-
-U_CDECL_BEGIN
-static void U_CALLCONV
-_HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
- UConverter *gbConverter;
- if(pArgs->onlyTestIsLoadable) {
- ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */
- return;
- }
- gbConverter = ucnv_open("GBK", errorCode);
- if(U_FAILURE(*errorCode)) {
- return;
- }
- cnv->toUnicodeStatus = 0;
- cnv->fromUnicodeStatus= 0;
- cnv->mode=0;
- cnv->fromUChar32=0x0000;
- cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ));
- if(cnv->extraInfo != NULL){
- ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter;
- }
- else {
- ucnv_close(gbConverter);
- *errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-}
-
-static void U_CALLCONV
-_HZClose(UConverter *cnv){
- if(cnv->extraInfo != NULL) {
- ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);
- if(!cnv->isExtraLocal) {
- uprv_free(cnv->extraInfo);
- }
- cnv->extraInfo = NULL;
- }
-}
-
-static void U_CALLCONV
-_HZReset(UConverter *cnv, UConverterResetChoice choice){
- if(choice<=UCNV_RESET_TO_UNICODE) {
- cnv->toUnicodeStatus = 0;
- cnv->mode=0;
- if(cnv->extraInfo != NULL){
- ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
- ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE;
- }
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- cnv->fromUnicodeStatus= 0;
- cnv->fromUChar32=0x0000;
- if(cnv->extraInfo != NULL){
- ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
- ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
- ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
- ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;
- }
- }
-}
-
-/**************************************HZ Encoding*************************************************
-* Rules for HZ encoding
-*
-* In ASCII mode, a byte is interpreted as an ASCII character, unless a
-* '~' is encountered. The character '~' is an escape character. By
-* convention, it must be immediately followed ONLY by '~', '{' or '\n'
-* (<LF>), with the following special meaning.
-
-* 1. The escape sequence '~~' is interpreted as a '~'.
-* 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
-* 3. The escape sequence '~\n' is a line-continuation marker to be
-* consumed with no output produced.
-* In GB mode, characters are interpreted two bytes at a time as (pure)
-* GB codes until the escape-from-GB code '~}' is read. This code
-* switches the mode from GB back to ASCII. (Note that the escape-
-* from-GB code '~}' ($7E7D) is outside the defined GB range.)
-*
-* Source: RFC 1842
-*
-* Note that the formal syntax in RFC 1842 is invalid. I assume that the
-* intended definition of single-byte-segment is as follows (pedberg):
-* single-byte-segment = single-byte-seq 1*single-byte-char
-*/
-
-
-static void U_CALLCONV
-UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
- UErrorCode* err){
- char tempBuf[2];
- const char *mySource = ( char *) args->source;
- UChar *myTarget = args->target;
- const char *mySourceLimit = args->sourceLimit;
- UChar32 targetUniChar = 0x0000;
- int32_t mySourceChar = 0x0000;
- UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
- tempBuf[0]=0;
- tempBuf[1]=0;
-
- /* Calling code already handles this situation. */
- /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }*/
-
- while(mySource< mySourceLimit){
-
- if(myTarget < args->targetLimit){
-
- mySourceChar= (unsigned char) *mySource++;
-
- if(args->converter->mode == UCNV_TILDE) {
- /* second byte after ~ */
- args->converter->mode=0;
- switch(mySourceChar) {
- case 0x0A:
- /* no output for ~\n (line-continuation marker) */
- continue;
- case UCNV_TILDE:
- if(args->offsets) {
- args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);
- }
- *(myTarget++)=(UChar)mySourceChar;
- myData->isEmptySegment = FALSE;
- continue;
- case UCNV_OPEN_BRACE:
- case UCNV_CLOSE_BRACE:
- myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);
- if (myData->isEmptySegment) {
- myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
- *err = U_ILLEGAL_ESCAPE_SEQUENCE;
- args->converter->toUCallbackReason = UCNV_IRREGULAR;
- args->converter->toUBytes[0] = UCNV_TILDE;
- args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar);
- args->converter->toULength = 2;
- args->target = myTarget;
- args->source = mySource;
- return;
- }
- myData->isEmptySegment = TRUE;
- continue;
- default:
- /* if the first byte is equal to TILDE and the trail byte
- * is not a valid byte then it is an error condition
- */
- /*
- * Ticket 5691: consistent illegal sequences:
- * - We include at least the first byte in the illegal sequence.
- * - If any of the non-initial bytes could be the start of a character,
- * we stop the illegal sequence before the first one of those.
- */
- myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
- *err = U_ILLEGAL_ESCAPE_SEQUENCE;
- args->converter->toUBytes[0] = UCNV_TILDE;
- if( myData->isStateDBCS ?
- (0x21 <= mySourceChar && mySourceChar <= 0x7e) :
- mySourceChar <= 0x7f
- ) {
- /* The current byte could be the start of a character: Back it out. */
- args->converter->toULength = 1;
- --mySource;
- } else {
- /* Include the current byte in the illegal sequence. */
- args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar);
- args->converter->toULength = 2;
- }
- args->target = myTarget;
- args->source = mySource;
- return;
- }
- } else if(myData->isStateDBCS) {
- if(args->converter->toUnicodeStatus == 0x00){
- /* lead byte */
- if(mySourceChar == UCNV_TILDE) {
- args->converter->mode = UCNV_TILDE;
- } else {
- /* add another bit to distinguish a 0 byte from not having seen a lead byte */
- args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);
- myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */
- }
- continue;
- }
- else{
- /* trail byte */
- int leadIsOk, trailIsOk;
- uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;
- targetUniChar = 0xffff;
- /*
- * Ticket 5691: consistent illegal sequences:
- * - We include at least the first byte in the illegal sequence.
- * - If any of the non-initial bytes could be the start of a character,
- * we stop the illegal sequence before the first one of those.
- *
- * In HZ DBCS, if the second byte is in the 21..7e range,
- * we report only the first byte as the illegal sequence.
- * Otherwise we convert or report the pair of bytes.
- */
- leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21);
- trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
- if (leadIsOk && trailIsOk) {
- tempBuf[0] = (char) (leadByte+0x80) ;
- tempBuf[1] = (char) (mySourceChar+0x80);
- targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
- tempBuf, 2, args->converter->useFallback);
- mySourceChar= (leadByte << 8) | mySourceChar;
- } else if (trailIsOk) {
- /* report a single illegal byte and continue with the following DBCS starter byte */
- --mySource;
- mySourceChar = (int32_t)leadByte;
- } else {
- /* report a pair of illegal bytes if the second byte is not a DBCS starter */
- /* add another bit so that the code below writes 2 bytes in case of error */
- mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
- }
- args->converter->toUnicodeStatus =0x00;
- }
- }
- else{
- if(mySourceChar == UCNV_TILDE) {
- args->converter->mode = UCNV_TILDE;
- continue;
- } else if(mySourceChar <= 0x7f) {
- targetUniChar = (UChar)mySourceChar; /* ASCII */
- myData->isEmptySegment = FALSE; /* the segment has something valid */
- } else {
- targetUniChar = 0xffff;
- myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
- }
- }
- if(targetUniChar < 0xfffe){
- if(args->offsets) {
- args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS));
- }
-
- *(myTarget++)=(UChar)targetUniChar;
- }
- else /* targetUniChar>=0xfffe */ {
- if(targetUniChar == 0xfffe){
- *err = U_INVALID_CHAR_FOUND;
- }
- else{
- *err = U_ILLEGAL_CHAR_FOUND;
- }
- if(mySourceChar > 0xff){
- args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8);
- args->converter->toUBytes[1] = (uint8_t)mySourceChar;
- args->converter->toULength=2;
- }
- else{
- args->converter->toUBytes[0] = (uint8_t)mySourceChar;
- args->converter->toULength=1;
- }
- break;
- }
- }
- else{
- *err =U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- args->target = myTarget;
- args->source = mySource;
-}
-
-
-static void U_CALLCONV
-UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
- UErrorCode * err){
- const UChar *mySource = args->source;
- char *myTarget = args->target;
- int32_t* offsets = args->offsets;
- int32_t mySourceIndex = 0;
- int32_t myTargetIndex = 0;
- int32_t targetLength = (int32_t)(args->targetLimit - myTarget);
- int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source);
- uint32_t targetUniChar = 0x0000;
- UChar32 mySourceChar = 0x0000;
- UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
- UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
- UBool oldIsTargetUCharDBCS;
- int len =0;
- const char* escSeq=NULL;
-
- /* Calling code already handles this situation. */
- /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }*/
- if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
- goto getTrail;
- }
- /*writing the char to the output stream */
- while (mySourceIndex < mySourceLength){
- targetUniChar = missingCharMarker;
- if (myTargetIndex < targetLength){
-
- mySourceChar = (UChar) mySource[mySourceIndex++];
-
-
- oldIsTargetUCharDBCS = isTargetUCharDBCS;
- if(mySourceChar ==UCNV_TILDE){
- /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/
- len = ESC_LEN;
- escSeq = TILDE_ESCAPE;
- CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
- continue;
- } else if(mySourceChar <= 0x7f) {
- targetUniChar = mySourceChar;
- } else {
- int32_t length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
- mySourceChar,&targetUniChar,args->converter->useFallback);
- /* we can only use lead bytes 21..7D and trail bytes 21..7E */
- if( length == 2 &&
- (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) &&
- (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1)
- ) {
- targetUniChar -= 0x8080;
- } else {
- targetUniChar = missingCharMarker;
- }
- }
- if (targetUniChar != missingCharMarker){
- myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
- if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){
- /*Shifting from a double byte to single byte mode*/
- if(!isTargetUCharDBCS){
- len =ESC_LEN;
- escSeq = SB_ESCAPE;
- CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
- myConverterData->isEscapeAppended = TRUE;
- }
- else{ /* Shifting from a single byte to double byte mode*/
- len =ESC_LEN;
- escSeq = DB_ESCAPE;
- CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
- myConverterData->isEscapeAppended = TRUE;
-
- }
- }
-
- if(isTargetUCharDBCS){
- if( myTargetIndex <targetLength){
- myTarget[myTargetIndex++] =(char) (targetUniChar >> 8);
- if(offsets){
- *(offsets++) = mySourceIndex-1;
- }
- if(myTargetIndex < targetLength){
- myTarget[myTargetIndex++] =(char) targetUniChar;
- if(offsets){
- *(offsets++) = mySourceIndex-1;
- }
- }else{
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }else{
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8);
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
-
- }else{
- if( myTargetIndex <targetLength){
- myTarget[myTargetIndex++] = (char) (targetUniChar );
- if(offsets){
- *(offsets++) = mySourceIndex-1;
- }
-
- }else{
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }
-
- }
- else{
- /* oops.. the code point is unassigned */
- /*Handle surrogates */
- /*check if the char is a First surrogate*/
- if(U16_IS_SURROGATE(mySourceChar)) {
- if(U16_IS_SURROGATE_LEAD(mySourceChar)) {
- args->converter->fromUChar32=mySourceChar;
-getTrail:
- /*look ahead to find the trail surrogate*/
- if(mySourceIndex < mySourceLength) {
- /* test the following code unit */
- UChar trail=(UChar) args->source[mySourceIndex];
- if(U16_IS_TRAIL(trail)) {
- ++mySourceIndex;
- mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail);
- args->converter->fromUChar32=0x00;
- /* there are no surrogates in GB2312*/
- *err = U_INVALID_CHAR_FOUND;
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- }
- } else {
- /* no more input */
- *err = U_ZERO_ERROR;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- }
- } else {
- /* callback(unassigned) for a BMP code point */
- *err = U_INVALID_CHAR_FOUND;
- }
-
- args->converter->fromUChar32=mySourceChar;
- break;
- }
- }
- else{
- *err = U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- targetUniChar=missingCharMarker;
- }
-
- args->target += myTargetIndex;
- args->source += mySourceIndex;
- myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
-}
-
-static void U_CALLCONV
-_HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
- UConverter *cnv = args->converter;
- UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo;
- char *p;
- char buffer[4];
- p = buffer;
-
- if( convData->isTargetUCharDBCS){
- *p++= UCNV_TILDE;
- *p++= UCNV_CLOSE_BRACE;
- convData->isTargetUCharDBCS=FALSE;
- }
- *p++= (char)cnv->subChars[0];
-
- ucnv_cbFromUWriteBytes(args,
- buffer, (int32_t)(p - buffer),
- offsetIndex, err);
-}
-
-/*
- * Structure for cloning an HZ converter into a single memory block.
- */
-struct cloneHZStruct
-{
- UConverter cnv;
- UConverter subCnv;
- UConverterDataHZ mydata;
-};
-
-
-static UConverter * U_CALLCONV
-_HZ_SafeClone(const UConverter *cnv,
- void *stackBuffer,
- int32_t *pBufferSize,
- UErrorCode *status)
-{
- struct cloneHZStruct * localClone;
- int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct);
-
- if (U_FAILURE(*status)){
- return nullptr;
- }
-
- if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
- *pBufferSize = bufferSizeNeeded;
- return nullptr;
- }
-
- localClone = (struct cloneHZStruct *)stackBuffer;
- /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
-
- uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ));
- localClone->cnv.extraInfo = &localClone->mydata;
- localClone->cnv.isExtraLocal = TRUE;
-
- /* deep-clone the sub-converter */
- size = (int32_t)sizeof(UConverter);
- ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter =
- ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status);
-
- return &localClone->cnv;
-}
-
-static void U_CALLCONV
-_HZ_GetUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode) {
- /* HZ converts all of ASCII */
- sa->addRange(sa->set, 0, 0x7f);
-
- /* add all of the code points that the sub-converter handles */
- ucnv_MBCSGetFilteredUnicodeSetForUnicode(
- ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData,
- sa, which, UCNV_SET_FILTER_HZ,
- pErrorCode);
-}
-U_CDECL_END
-static const UConverterImpl _HZImpl={
-
- UCNV_HZ,
-
- NULL,
- NULL,
-
- _HZOpen,
- _HZClose,
- _HZReset,
-
- UConverter_toUnicode_HZ_OFFSETS_LOGIC,
- UConverter_toUnicode_HZ_OFFSETS_LOGIC,
- UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
- UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
- NULL,
-
- NULL,
- NULL,
- _HZ_WriteSub,
- _HZ_SafeClone,
- _HZ_GetUnicodeSet,
- NULL,
- NULL
-};
-
-static const UConverterStaticData _HZStaticData={
- sizeof(UConverterStaticData),
- "HZ",
- 0,
- UCNV_IBM,
- UCNV_HZ,
- 1,
- 4,
- { 0x1a, 0, 0, 0 },
- 1,
- FALSE,
- FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
-
-};
-
-const UConverterSharedData _HZData=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_HZStaticData, &_HZImpl);
-
-#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */
diff --git a/contrib/libs/icu/common/ucnvisci.cpp b/contrib/libs/icu/common/ucnvisci.cpp
deleted file mode 100644
index 44a7c05a3c8..00000000000
--- a/contrib/libs/icu/common/ucnvisci.cpp
+++ /dev/null
@@ -1,1635 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2000-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* file name: ucnvisci.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001JUN26
-* created by: Ram Viswanadha
-*
-* Date Name Description
-* 24/7/2001 Ram Added support for EXT character handling
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/ucnv_cb.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "cstring.h"
-#include "uassert.h"
-
-#define UCNV_OPTIONS_VERSION_MASK 0xf
-#define NUKTA 0x093c
-#define HALANT 0x094d
-#define ZWNJ 0x200c /* Zero Width Non Joiner */
-#define ZWJ 0x200d /* Zero width Joiner */
-#define INVALID_CHAR 0xffff
-#define ATR 0xEF /* Attribute code */
-#define EXT 0xF0 /* Extension code */
-#define DANDA 0x0964
-#define DOUBLE_DANDA 0x0965
-#define ISCII_NUKTA 0xE9
-#define ISCII_HALANT 0xE8
-#define ISCII_DANDA 0xEA
-#define ISCII_INV 0xD9
-#define ISCII_VOWEL_SIGN_E 0xE0
-#define INDIC_BLOCK_BEGIN 0x0900
-#define INDIC_BLOCK_END 0x0D7F
-#define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
-#define VOCALLIC_RR 0x0931
-#define LF 0x0A
-#define ASCII_END 0xA0
-#define NO_CHAR_MARKER 0xFFFE
-#define TELUGU_DELTA DELTA * TELUGU
-#define DEV_ABBR_SIGN 0x0970
-#define DEV_ANUDATTA 0x0952
-#define EXT_RANGE_BEGIN 0xA1
-#define EXT_RANGE_END 0xEE
-
-#define PNJ_DELTA 0x0100
-#define PNJ_BINDI 0x0A02
-#define PNJ_TIPPI 0x0A70
-#define PNJ_SIGN_VIRAMA 0x0A4D
-#define PNJ_ADHAK 0x0A71
-#define PNJ_HA 0x0A39
-#define PNJ_RRA 0x0A5C
-
-typedef enum {
- DEVANAGARI =0,
- BENGALI,
- GURMUKHI,
- GUJARATI,
- ORIYA,
- TAMIL,
- TELUGU,
- KANNADA,
- MALAYALAM,
- DELTA=0x80
-}UniLang;
-
-/**
- * Enumeration for switching code pages if <ATR>+<one of below values>
- * is encountered
- */
-typedef enum {
- DEF = 0x40,
- RMN = 0x41,
- DEV = 0x42,
- BNG = 0x43,
- TML = 0x44,
- TLG = 0x45,
- ASM = 0x46,
- ORI = 0x47,
- KND = 0x48,
- MLM = 0x49,
- GJR = 0x4A,
- PNJ = 0x4B,
- ARB = 0x71,
- PES = 0x72,
- URD = 0x73,
- SND = 0x74,
- KSM = 0x75,
- PST = 0x76
-}ISCIILang;
-
-typedef enum {
- DEV_MASK =0x80,
- PNJ_MASK =0x40,
- GJR_MASK =0x20,
- ORI_MASK =0x10,
- BNG_MASK =0x08,
- KND_MASK =0x04,
- MLM_MASK =0x02,
- TML_MASK =0x01,
- ZERO =0x00
-}MaskEnum;
-
-#define ISCII_CNV_PREFIX "ISCII,version="
-
-typedef struct {
- UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */
- UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */
- uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */
- uint16_t currentDeltaFromUnicode; /* current delta in Indic block */
- uint16_t currentDeltaToUnicode; /* current delta in Indic block */
- MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */
- MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */
- MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */
- UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */
- UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/
- char name[sizeof(ISCII_CNV_PREFIX) + 1];
- UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
-} UConverterDataISCII;
-
-typedef struct LookupDataStruct {
- UniLang uniLang;
- MaskEnum maskEnum;
- ISCIILang isciiLang;
-} LookupDataStruct;
-
-static const LookupDataStruct lookupInitialData[]={
- { DEVANAGARI, DEV_MASK, DEV },
- { BENGALI, BNG_MASK, BNG },
- { GURMUKHI, PNJ_MASK, PNJ },
- { GUJARATI, GJR_MASK, GJR },
- { ORIYA, ORI_MASK, ORI },
- { TAMIL, TML_MASK, TML },
- { TELUGU, KND_MASK, TLG },
- { KANNADA, KND_MASK, KND },
- { MALAYALAM, MLM_MASK, MLM }
-};
-
-/*
- * For special handling of certain Gurmukhi characters.
- * Bit 0 (value 1): PNJ consonant
- * Bit 1 (value 2): PNJ Bindi Tippi
- */
-static const uint8_t pnjMap[80] = {
- /* 0A00..0A0F */
- 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0A10..0A1F */
- 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- /* 0A20..0A2F */
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3,
- /* 0A30..0A3F */
- 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2,
- /* 0A40..0A4F */
- 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-static UBool
-isPNJConsonant(UChar32 c) {
- if (c < 0xa00 || 0xa50 <= c) {
- return FALSE;
- } else {
- return (UBool)(pnjMap[c - 0xa00] & 1);
- }
-}
-
-static UBool
-isPNJBindiTippi(UChar32 c) {
- if (c < 0xa00 || 0xa50 <= c) {
- return FALSE;
- } else {
- return (UBool)(pnjMap[c - 0xa00] >> 1);
- }
-}
-U_CDECL_BEGIN
-static void U_CALLCONV
-_ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
- if(pArgs->onlyTestIsLoadable) {
- return;
- }
-
- cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
-
- if (cnv->extraInfo != NULL) {
- int32_t len=0;
- UConverterDataISCII *converterData=
- (UConverterDataISCII *) cnv->extraInfo;
- converterData->contextCharToUnicode=NO_CHAR_MARKER;
- cnv->toUnicodeStatus = missingCharMarker;
- converterData->contextCharFromUnicode=0x0000;
- converterData->resetToDefaultToUnicode=FALSE;
- /* check if the version requested is supported */
- if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
- /* initialize state variables */
- converterData->currentDeltaFromUnicode
- = converterData->currentDeltaToUnicode
- = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
-
- converterData->currentMaskFromUnicode
- = converterData->currentMaskToUnicode
- = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
-
- converterData->isFirstBuffer=TRUE;
- (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
- len = (int32_t)uprv_strlen(converterData->name);
- converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
- converterData->name[len+1]=0;
-
- converterData->prevToUnicodeStatus = 0x0000;
- } else {
- uprv_free(cnv->extraInfo);
- cnv->extraInfo = NULL;
- *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- }
-
- } else {
- *errorCode =U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-static void U_CALLCONV
-_ISCIIClose(UConverter *cnv) {
- if (cnv->extraInfo!=NULL) {
- if (!cnv->isExtraLocal) {
- uprv_free(cnv->extraInfo);
- }
- cnv->extraInfo=NULL;
- }
-}
-
-static const char* U_CALLCONV
-_ISCIIgetName(const UConverter* cnv) {
- if (cnv->extraInfo) {
- UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
- return myData->name;
- }
- return NULL;
-}
-
-static void U_CALLCONV
-_ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
- UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
- if (choice<=UCNV_RESET_TO_UNICODE) {
- cnv->toUnicodeStatus = missingCharMarker;
- cnv->mode=0;
- data->currentDeltaToUnicode=data->defDeltaToUnicode;
- data->currentMaskToUnicode = data->defMaskToUnicode;
- data->contextCharToUnicode=NO_CHAR_MARKER;
- data->prevToUnicodeStatus = 0x0000;
- }
- if (choice!=UCNV_RESET_TO_UNICODE) {
- cnv->fromUChar32=0x0000;
- data->contextCharFromUnicode=0x00;
- data->currentMaskFromUnicode=data->defMaskToUnicode;
- data->currentDeltaFromUnicode=data->defDeltaToUnicode;
- data->isFirstBuffer=TRUE;
- data->resetToDefaultToUnicode=FALSE;
- }
-}
-
-/**
- * The values in validity table are indexed by the lower bits of Unicode
- * range 0x0900 - 0x09ff. The values have a structure like:
- * ---------------------------------------------------------------
- * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML |
- * | | | | | ASM | KND | | |
- * ---------------------------------------------------------------
- * If a code point is valid in a particular script
- * then that bit is turned on
- *
- * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
- * to represent these languages
- *
- * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
- * and combine and use 1 bit to represent these languages.
- *
- * TODO: It is probably easier to understand and maintain to change this
- * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
- */
-
-static const uint8_t validityTable[128] = {
-/* This state table is tool generated please do not edit unless you know exactly what you are doing */
-/* Note: This table was edited to mirror the Windows XP implementation */
-/*ISCII:Valid:Unicode */
-/*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
-/*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK ,
-/*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,
-/*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,
-/*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO ,
-/*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO ,
-/*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO ,
-/*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO ,
-/*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
-/*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
-/*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
-/*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
-/*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
-/*
- * The length of the array is 128 to provide values for 0x900..0x97f.
- * The last 15 entries for 0x971..0x97f of the validity table are all zero
- * because no Indic script uses such Unicode code points.
- */
-/*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO
-};
-
-static const uint16_t fromUnicodeTable[128]={
- 0x00a0 ,/* 0x0900 */
- 0x00a1 ,/* 0x0901 */
- 0x00a2 ,/* 0x0902 */
- 0x00a3 ,/* 0x0903 */
- 0xa4e0 ,/* 0x0904 */
- 0x00a4 ,/* 0x0905 */
- 0x00a5 ,/* 0x0906 */
- 0x00a6 ,/* 0x0907 */
- 0x00a7 ,/* 0x0908 */
- 0x00a8 ,/* 0x0909 */
- 0x00a9 ,/* 0x090a */
- 0x00aa ,/* 0x090b */
- 0xA6E9 ,/* 0x090c */
- 0x00ae ,/* 0x090d */
- 0x00ab ,/* 0x090e */
- 0x00ac ,/* 0x090f */
- 0x00ad ,/* 0x0910 */
- 0x00b2 ,/* 0x0911 */
- 0x00af ,/* 0x0912 */
- 0x00b0 ,/* 0x0913 */
- 0x00b1 ,/* 0x0914 */
- 0x00b3 ,/* 0x0915 */
- 0x00b4 ,/* 0x0916 */
- 0x00b5 ,/* 0x0917 */
- 0x00b6 ,/* 0x0918 */
- 0x00b7 ,/* 0x0919 */
- 0x00b8 ,/* 0x091a */
- 0x00b9 ,/* 0x091b */
- 0x00ba ,/* 0x091c */
- 0x00bb ,/* 0x091d */
- 0x00bc ,/* 0x091e */
- 0x00bd ,/* 0x091f */
- 0x00be ,/* 0x0920 */
- 0x00bf ,/* 0x0921 */
- 0x00c0 ,/* 0x0922 */
- 0x00c1 ,/* 0x0923 */
- 0x00c2 ,/* 0x0924 */
- 0x00c3 ,/* 0x0925 */
- 0x00c4 ,/* 0x0926 */
- 0x00c5 ,/* 0x0927 */
- 0x00c6 ,/* 0x0928 */
- 0x00c7 ,/* 0x0929 */
- 0x00c8 ,/* 0x092a */
- 0x00c9 ,/* 0x092b */
- 0x00ca ,/* 0x092c */
- 0x00cb ,/* 0x092d */
- 0x00cc ,/* 0x092e */
- 0x00cd ,/* 0x092f */
- 0x00cf ,/* 0x0930 */
- 0x00d0 ,/* 0x0931 */
- 0x00d1 ,/* 0x0932 */
- 0x00d2 ,/* 0x0933 */
- 0x00d3 ,/* 0x0934 */
- 0x00d4 ,/* 0x0935 */
- 0x00d5 ,/* 0x0936 */
- 0x00d6 ,/* 0x0937 */
- 0x00d7 ,/* 0x0938 */
- 0x00d8 ,/* 0x0939 */
- 0xFFFF ,/* 0x093A */
- 0xFFFF ,/* 0x093B */
- 0x00e9 ,/* 0x093c */
- 0xEAE9 ,/* 0x093d */
- 0x00da ,/* 0x093e */
- 0x00db ,/* 0x093f */
- 0x00dc ,/* 0x0940 */
- 0x00dd ,/* 0x0941 */
- 0x00de ,/* 0x0942 */
- 0x00df ,/* 0x0943 */
- 0xDFE9 ,/* 0x0944 */
- 0x00e3 ,/* 0x0945 */
- 0x00e0 ,/* 0x0946 */
- 0x00e1 ,/* 0x0947 */
- 0x00e2 ,/* 0x0948 */
- 0x00e7 ,/* 0x0949 */
- 0x00e4 ,/* 0x094a */
- 0x00e5 ,/* 0x094b */
- 0x00e6 ,/* 0x094c */
- 0x00e8 ,/* 0x094d */
- 0x00ec ,/* 0x094e */
- 0x00ed ,/* 0x094f */
- 0xA1E9 ,/* 0x0950 */ /* OM Symbol */
- 0xFFFF ,/* 0x0951 */
- 0xF0B8 ,/* 0x0952 */
- 0xFFFF ,/* 0x0953 */
- 0xFFFF ,/* 0x0954 */
- 0xFFFF ,/* 0x0955 */
- 0xFFFF ,/* 0x0956 */
- 0xFFFF ,/* 0x0957 */
- 0xb3e9 ,/* 0x0958 */
- 0xb4e9 ,/* 0x0959 */
- 0xb5e9 ,/* 0x095a */
- 0xbae9 ,/* 0x095b */
- 0xbfe9 ,/* 0x095c */
- 0xC0E9 ,/* 0x095d */
- 0xc9e9 ,/* 0x095e */
- 0x00ce ,/* 0x095f */
- 0xAAe9 ,/* 0x0960 */
- 0xA7E9 ,/* 0x0961 */
- 0xDBE9 ,/* 0x0962 */
- 0xDCE9 ,/* 0x0963 */
- 0x00ea ,/* 0x0964 */
- 0xeaea ,/* 0x0965 */
- 0x00f1 ,/* 0x0966 */
- 0x00f2 ,/* 0x0967 */
- 0x00f3 ,/* 0x0968 */
- 0x00f4 ,/* 0x0969 */
- 0x00f5 ,/* 0x096a */
- 0x00f6 ,/* 0x096b */
- 0x00f7 ,/* 0x096c */
- 0x00f8 ,/* 0x096d */
- 0x00f9 ,/* 0x096e */
- 0x00fa ,/* 0x096f */
- 0xF0BF ,/* 0x0970 */
- 0xFFFF ,/* 0x0971 */
- 0xFFFF ,/* 0x0972 */
- 0xFFFF ,/* 0x0973 */
- 0xFFFF ,/* 0x0974 */
- 0xFFFF ,/* 0x0975 */
- 0xFFFF ,/* 0x0976 */
- 0xFFFF ,/* 0x0977 */
- 0xFFFF ,/* 0x0978 */
- 0xFFFF ,/* 0x0979 */
- 0xFFFF ,/* 0x097a */
- 0xFFFF ,/* 0x097b */
- 0xFFFF ,/* 0x097c */
- 0xFFFF ,/* 0x097d */
- 0xFFFF ,/* 0x097e */
- 0xFFFF ,/* 0x097f */
-};
-static const uint16_t toUnicodeTable[256]={
- 0x0000,/* 0x00 */
- 0x0001,/* 0x01 */
- 0x0002,/* 0x02 */
- 0x0003,/* 0x03 */
- 0x0004,/* 0x04 */
- 0x0005,/* 0x05 */
- 0x0006,/* 0x06 */
- 0x0007,/* 0x07 */
- 0x0008,/* 0x08 */
- 0x0009,/* 0x09 */
- 0x000a,/* 0x0a */
- 0x000b,/* 0x0b */
- 0x000c,/* 0x0c */
- 0x000d,/* 0x0d */
- 0x000e,/* 0x0e */
- 0x000f,/* 0x0f */
- 0x0010,/* 0x10 */
- 0x0011,/* 0x11 */
- 0x0012,/* 0x12 */
- 0x0013,/* 0x13 */
- 0x0014,/* 0x14 */
- 0x0015,/* 0x15 */
- 0x0016,/* 0x16 */
- 0x0017,/* 0x17 */
- 0x0018,/* 0x18 */
- 0x0019,/* 0x19 */
- 0x001a,/* 0x1a */
- 0x001b,/* 0x1b */
- 0x001c,/* 0x1c */
- 0x001d,/* 0x1d */
- 0x001e,/* 0x1e */
- 0x001f,/* 0x1f */
- 0x0020,/* 0x20 */
- 0x0021,/* 0x21 */
- 0x0022,/* 0x22 */
- 0x0023,/* 0x23 */
- 0x0024,/* 0x24 */
- 0x0025,/* 0x25 */
- 0x0026,/* 0x26 */
- 0x0027,/* 0x27 */
- 0x0028,/* 0x28 */
- 0x0029,/* 0x29 */
- 0x002a,/* 0x2a */
- 0x002b,/* 0x2b */
- 0x002c,/* 0x2c */
- 0x002d,/* 0x2d */
- 0x002e,/* 0x2e */
- 0x002f,/* 0x2f */
- 0x0030,/* 0x30 */
- 0x0031,/* 0x31 */
- 0x0032,/* 0x32 */
- 0x0033,/* 0x33 */
- 0x0034,/* 0x34 */
- 0x0035,/* 0x35 */
- 0x0036,/* 0x36 */
- 0x0037,/* 0x37 */
- 0x0038,/* 0x38 */
- 0x0039,/* 0x39 */
- 0x003A,/* 0x3A */
- 0x003B,/* 0x3B */
- 0x003c,/* 0x3c */
- 0x003d,/* 0x3d */
- 0x003e,/* 0x3e */
- 0x003f,/* 0x3f */
- 0x0040,/* 0x40 */
- 0x0041,/* 0x41 */
- 0x0042,/* 0x42 */
- 0x0043,/* 0x43 */
- 0x0044,/* 0x44 */
- 0x0045,/* 0x45 */
- 0x0046,/* 0x46 */
- 0x0047,/* 0x47 */
- 0x0048,/* 0x48 */
- 0x0049,/* 0x49 */
- 0x004a,/* 0x4a */
- 0x004b,/* 0x4b */
- 0x004c,/* 0x4c */
- 0x004d,/* 0x4d */
- 0x004e,/* 0x4e */
- 0x004f,/* 0x4f */
- 0x0050,/* 0x50 */
- 0x0051,/* 0x51 */
- 0x0052,/* 0x52 */
- 0x0053,/* 0x53 */
- 0x0054,/* 0x54 */
- 0x0055,/* 0x55 */
- 0x0056,/* 0x56 */
- 0x0057,/* 0x57 */
- 0x0058,/* 0x58 */
- 0x0059,/* 0x59 */
- 0x005a,/* 0x5a */
- 0x005b,/* 0x5b */
- 0x005c,/* 0x5c */
- 0x005d,/* 0x5d */
- 0x005e,/* 0x5e */
- 0x005f,/* 0x5f */
- 0x0060,/* 0x60 */
- 0x0061,/* 0x61 */
- 0x0062,/* 0x62 */
- 0x0063,/* 0x63 */
- 0x0064,/* 0x64 */
- 0x0065,/* 0x65 */
- 0x0066,/* 0x66 */
- 0x0067,/* 0x67 */
- 0x0068,/* 0x68 */
- 0x0069,/* 0x69 */
- 0x006a,/* 0x6a */
- 0x006b,/* 0x6b */
- 0x006c,/* 0x6c */
- 0x006d,/* 0x6d */
- 0x006e,/* 0x6e */
- 0x006f,/* 0x6f */
- 0x0070,/* 0x70 */
- 0x0071,/* 0x71 */
- 0x0072,/* 0x72 */
- 0x0073,/* 0x73 */
- 0x0074,/* 0x74 */
- 0x0075,/* 0x75 */
- 0x0076,/* 0x76 */
- 0x0077,/* 0x77 */
- 0x0078,/* 0x78 */
- 0x0079,/* 0x79 */
- 0x007a,/* 0x7a */
- 0x007b,/* 0x7b */
- 0x007c,/* 0x7c */
- 0x007d,/* 0x7d */
- 0x007e,/* 0x7e */
- 0x007f,/* 0x7f */
- 0x0080,/* 0x80 */
- 0x0081,/* 0x81 */
- 0x0082,/* 0x82 */
- 0x0083,/* 0x83 */
- 0x0084,/* 0x84 */
- 0x0085,/* 0x85 */
- 0x0086,/* 0x86 */
- 0x0087,/* 0x87 */
- 0x0088,/* 0x88 */
- 0x0089,/* 0x89 */
- 0x008a,/* 0x8a */
- 0x008b,/* 0x8b */
- 0x008c,/* 0x8c */
- 0x008d,/* 0x8d */
- 0x008e,/* 0x8e */
- 0x008f,/* 0x8f */
- 0x0090,/* 0x90 */
- 0x0091,/* 0x91 */
- 0x0092,/* 0x92 */
- 0x0093,/* 0x93 */
- 0x0094,/* 0x94 */
- 0x0095,/* 0x95 */
- 0x0096,/* 0x96 */
- 0x0097,/* 0x97 */
- 0x0098,/* 0x98 */
- 0x0099,/* 0x99 */
- 0x009a,/* 0x9a */
- 0x009b,/* 0x9b */
- 0x009c,/* 0x9c */
- 0x009d,/* 0x9d */
- 0x009e,/* 0x9e */
- 0x009f,/* 0x9f */
- 0x00A0,/* 0xa0 */
- 0x0901,/* 0xa1 */
- 0x0902,/* 0xa2 */
- 0x0903,/* 0xa3 */
- 0x0905,/* 0xa4 */
- 0x0906,/* 0xa5 */
- 0x0907,/* 0xa6 */
- 0x0908,/* 0xa7 */
- 0x0909,/* 0xa8 */
- 0x090a,/* 0xa9 */
- 0x090b,/* 0xaa */
- 0x090e,/* 0xab */
- 0x090f,/* 0xac */
- 0x0910,/* 0xad */
- 0x090d,/* 0xae */
- 0x0912,/* 0xaf */
- 0x0913,/* 0xb0 */
- 0x0914,/* 0xb1 */
- 0x0911,/* 0xb2 */
- 0x0915,/* 0xb3 */
- 0x0916,/* 0xb4 */
- 0x0917,/* 0xb5 */
- 0x0918,/* 0xb6 */
- 0x0919,/* 0xb7 */
- 0x091a,/* 0xb8 */
- 0x091b,/* 0xb9 */
- 0x091c,/* 0xba */
- 0x091d,/* 0xbb */
- 0x091e,/* 0xbc */
- 0x091f,/* 0xbd */
- 0x0920,/* 0xbe */
- 0x0921,/* 0xbf */
- 0x0922,/* 0xc0 */
- 0x0923,/* 0xc1 */
- 0x0924,/* 0xc2 */
- 0x0925,/* 0xc3 */
- 0x0926,/* 0xc4 */
- 0x0927,/* 0xc5 */
- 0x0928,/* 0xc6 */
- 0x0929,/* 0xc7 */
- 0x092a,/* 0xc8 */
- 0x092b,/* 0xc9 */
- 0x092c,/* 0xca */
- 0x092d,/* 0xcb */
- 0x092e,/* 0xcc */
- 0x092f,/* 0xcd */
- 0x095f,/* 0xce */
- 0x0930,/* 0xcf */
- 0x0931,/* 0xd0 */
- 0x0932,/* 0xd1 */
- 0x0933,/* 0xd2 */
- 0x0934,/* 0xd3 */
- 0x0935,/* 0xd4 */
- 0x0936,/* 0xd5 */
- 0x0937,/* 0xd6 */
- 0x0938,/* 0xd7 */
- 0x0939,/* 0xd8 */
- 0x200D,/* 0xd9 */
- 0x093e,/* 0xda */
- 0x093f,/* 0xdb */
- 0x0940,/* 0xdc */
- 0x0941,/* 0xdd */
- 0x0942,/* 0xde */
- 0x0943,/* 0xdf */
- 0x0946,/* 0xe0 */
- 0x0947,/* 0xe1 */
- 0x0948,/* 0xe2 */
- 0x0945,/* 0xe3 */
- 0x094a,/* 0xe4 */
- 0x094b,/* 0xe5 */
- 0x094c,/* 0xe6 */
- 0x0949,/* 0xe7 */
- 0x094d,/* 0xe8 */
- 0x093c,/* 0xe9 */
- 0x0964,/* 0xea */
- 0xFFFF,/* 0xeb */
- 0xFFFF,/* 0xec */
- 0xFFFF,/* 0xed */
- 0xFFFF,/* 0xee */
- 0xFFFF,/* 0xef */
- 0xFFFF,/* 0xf0 */
- 0x0966,/* 0xf1 */
- 0x0967,/* 0xf2 */
- 0x0968,/* 0xf3 */
- 0x0969,/* 0xf4 */
- 0x096a,/* 0xf5 */
- 0x096b,/* 0xf6 */
- 0x096c,/* 0xf7 */
- 0x096d,/* 0xf8 */
- 0x096e,/* 0xf9 */
- 0x096f,/* 0xfa */
- 0xFFFF,/* 0xfb */
- 0xFFFF,/* 0xfc */
- 0xFFFF,/* 0xfd */
- 0xFFFF,/* 0xfe */
- 0xFFFF /* 0xff */
-};
-
-static const uint16_t vowelSignESpecialCases[][2]={
- { 2 /*length of array*/ , 0 },
- { 0xA4 , 0x0904 },
-};
-
-static const uint16_t nuktaSpecialCases[][2]={
- { 16 /*length of array*/ , 0 },
- { 0xA6 , 0x090c },
- { 0xEA , 0x093D },
- { 0xDF , 0x0944 },
- { 0xA1 , 0x0950 },
- { 0xb3 , 0x0958 },
- { 0xb4 , 0x0959 },
- { 0xb5 , 0x095a },
- { 0xba , 0x095b },
- { 0xbf , 0x095c },
- { 0xC0 , 0x095d },
- { 0xc9 , 0x095e },
- { 0xAA , 0x0960 },
- { 0xA7 , 0x0961 },
- { 0xDB , 0x0962 },
- { 0xDC , 0x0963 },
-};
-
-
-#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t offset = (int32_t)(source - args->source-1); \
- /* write the targetUniChar to target */ \
- if(target < targetLimit){ \
- if(targetByteUnit <= 0xFF){ \
- *(target)++ = (uint8_t)(targetByteUnit); \
- if(offsets){ \
- *(offsets++) = offset; \
- } \
- }else{ \
- if (targetByteUnit > 0xFFFF) { \
- *(target)++ = (uint8_t)(targetByteUnit>>16); \
- if (offsets) { \
- --offset; \
- *(offsets++) = offset; \
- } \
- } \
- if (!(target < targetLimit)) { \
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
- (uint8_t)(targetByteUnit >> 8); \
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
- (uint8_t)targetByteUnit; \
- *err = U_BUFFER_OVERFLOW_ERROR; \
- } else { \
- *(target)++ = (uint8_t)(targetByteUnit>>8); \
- if(offsets){ \
- *(offsets++) = offset; \
- } \
- if(target < targetLimit){ \
- *(target)++ = (uint8_t) targetByteUnit; \
- if(offsets){ \
- *(offsets++) = offset ; \
- } \
- }else{ \
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
- (uint8_t) (targetByteUnit); \
- *err = U_BUFFER_OVERFLOW_ERROR; \
- } \
- } \
- } \
- }else{ \
- if (targetByteUnit & 0xFF0000) { \
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
- (uint8_t) (targetByteUnit >>16); \
- } \
- if(targetByteUnit & 0xFF00){ \
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
- (uint8_t) (targetByteUnit >>8); \
- } \
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
- (uint8_t) (targetByteUnit); \
- *err = U_BUFFER_OVERFLOW_ERROR; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/* Rules:
- * Explicit Halant :
- * <HALANT> + <ZWNJ>
- * Soft Halant :
- * <HALANT> + <ZWJ>
- */
-static void U_CALLCONV
-UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
- UConverterFromUnicodeArgs * args, UErrorCode * err) {
- const UChar *source = args->source;
- const UChar *sourceLimit = args->sourceLimit;
- unsigned char *target = (unsigned char *) args->target;
- unsigned char *targetLimit = (unsigned char *) args->targetLimit;
- int32_t* offsets = args->offsets;
- uint32_t targetByteUnit = 0x0000;
- UChar32 sourceChar = 0x0000;
- UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */
- UConverterDataISCII *converterData;
- uint16_t newDelta=0;
- uint16_t range = 0;
- UBool deltaChanged = FALSE;
-
- if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- /* initialize data */
- converterData=(UConverterDataISCII*)args->converter->extraInfo;
- newDelta=converterData->currentDeltaFromUnicode;
- range = (uint16_t)(newDelta/DELTA);
-
- if ((sourceChar = args->converter->fromUChar32)!=0) {
- goto getTrail;
- }
-
- /*writing the char to the output stream */
- while (source < sourceLimit) {
- /* Write the language code following LF only if LF is not the last character. */
- if (args->converter->fromUnicodeStatus == LF) {
- targetByteUnit = ATR<<8;
- targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
- args->converter->fromUnicodeStatus = 0x0000;
- /* now append ATR and language code */
- WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
- if (U_FAILURE(*err)) {
- break;
- }
- }
-
- sourceChar = *source++;
- tempContextFromUnicode = converterData->contextCharFromUnicode;
-
- targetByteUnit = missingCharMarker;
-
- /*check if input is in ASCII and C0 control codes range*/
- if (sourceChar <= ASCII_END) {
- args->converter->fromUnicodeStatus = sourceChar;
- WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
- if (U_FAILURE(*err)) {
- break;
- }
- continue;
- }
- switch (sourceChar) {
- case ZWNJ:
- /* contextChar has HALANT */
- if (converterData->contextCharFromUnicode) {
- converterData->contextCharFromUnicode = 0x00;
- targetByteUnit = ISCII_HALANT;
- } else {
- /* consume ZWNJ and continue */
- converterData->contextCharFromUnicode = 0x00;
- continue;
- }
- break;
- case ZWJ:
- /* contextChar has HALANT */
- if (converterData->contextCharFromUnicode) {
- targetByteUnit = ISCII_NUKTA;
- } else {
- targetByteUnit =ISCII_INV;
- }
- converterData->contextCharFromUnicode = 0x00;
- break;
- default:
- /* is the sourceChar in the INDIC_RANGE? */
- if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
- /* Danda and Double Danda are valid in Northern scripts.. since Unicode
- * does not include these codepoints in all Northern scrips we need to
- * filter them out
- */
- if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
- /* find out to which block the souceChar belongs*/
- range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
- newDelta =(uint16_t)(range*DELTA);
-
- /* Now are we in the same block as the previous? */
- if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
- converterData->currentDeltaFromUnicode = newDelta;
- converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
- deltaChanged =TRUE;
- converterData->isFirstBuffer=FALSE;
- }
-
- if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
- if (sourceChar == PNJ_TIPPI) {
- /* Make sure Tippi is converterd to Bindi. */
- sourceChar = PNJ_BINDI;
- } else if (sourceChar == PNJ_ADHAK) {
- /* This is for consonant cluster handling. */
- converterData->contextCharFromUnicode = PNJ_ADHAK;
- }
-
- }
- /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
- /* now subtract the new delta from sourceChar*/
- sourceChar -= converterData->currentDeltaFromUnicode;
- }
-
- /* get the target byte unit */
- targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
-
- /* is the code point valid in current script? */
- if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
- /* Vocallic RR is assigned in ISCII Telugu and Unicode */
- if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
- targetByteUnit=missingCharMarker;
- }
- }
-
- if (deltaChanged) {
- /* we are in a script block which is different than
- * previous sourceChar's script block write ATR and language codes
- */
- uint32_t temp=0;
- temp =(uint16_t)(ATR<<8);
- temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
- /* reset */
- deltaChanged=FALSE;
- /* now append ATR and language code */
- WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
- if (U_FAILURE(*err)) {
- break;
- }
- }
-
- if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
- continue;
- }
- }
- /* reset context char */
- converterData->contextCharFromUnicode = 0x00;
- break;
- }
- if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
- /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
- /* reset context char */
- converterData->contextCharFromUnicode = 0x0000;
- targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
- /* write targetByteUnit to target */
- WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
- if (U_FAILURE(*err)) {
- break;
- }
- } else if (targetByteUnit != missingCharMarker) {
- if (targetByteUnit==ISCII_HALANT) {
- converterData->contextCharFromUnicode = (UChar)targetByteUnit;
- }
- /* write targetByteUnit to target*/
- WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
- if (U_FAILURE(*err)) {
- break;
- }
- } else {
- /* oops.. the code point is unassigned */
- /*check if the char is a First surrogate*/
- if (U16_IS_SURROGATE(sourceChar)) {
- if (U16_IS_SURROGATE_LEAD(sourceChar)) {
-getTrail:
- /*look ahead to find the trail surrogate*/
- if (source < sourceLimit) {
- /* test the following code unit */
- UChar trail= (*source);
- if (U16_IS_TRAIL(trail)) {
- source++;
- sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
- *err =U_INVALID_CHAR_FOUND;
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- }
- } else {
- /* no more input */
- *err = U_ZERO_ERROR;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- }
- } else {
- /* callback(unassigned) for a BMP code point */
- *err = U_INVALID_CHAR_FOUND;
- }
-
- args->converter->fromUChar32=sourceChar;
- break;
- }
- }/* end while(mySourceIndex<mySourceLength) */
-
- /*save the state and return */
- args->source = source;
- args->target = (char*)target;
-}
-
-static const uint16_t lookupTable[][2]={
- { ZERO, ZERO }, /*DEFALT*/
- { ZERO, ZERO }, /*ROMAN*/
- { DEVANAGARI, DEV_MASK },
- { BENGALI, BNG_MASK },
- { TAMIL, TML_MASK },
- { TELUGU, KND_MASK },
- { BENGALI, BNG_MASK },
- { ORIYA, ORI_MASK },
- { KANNADA, KND_MASK },
- { MALAYALAM, MLM_MASK },
- { GUJARATI, GJR_MASK },
- { GURMUKHI, PNJ_MASK }
-};
-
-#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err) UPRV_BLOCK_MACRO_BEGIN { \
- /* add offset to current Indic Block */ \
- if(targetUniChar>ASCII_END && \
- targetUniChar != ZWJ && \
- targetUniChar != ZWNJ && \
- targetUniChar != DANDA && \
- targetUniChar != DOUBLE_DANDA){ \
- \
- targetUniChar+=(uint16_t)(delta); \
- } \
- /* now write the targetUniChar */ \
- if(target<args->targetLimit){ \
- *(target)++ = (UChar)targetUniChar; \
- if(offsets){ \
- *(offsets)++ = (int32_t)(offset); \
- } \
- }else{ \
- args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \
- (UChar)targetUniChar; \
- *err = U_BUFFER_OVERFLOW_ERROR; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-#define GET_MAPPING(sourceChar,targetUniChar,data) UPRV_BLOCK_MACRO_BEGIN { \
- targetUniChar = toUnicodeTable[(sourceChar)] ; \
- /* is the code point valid in current script? */ \
- if(sourceChar> ASCII_END && \
- (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \
- /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \
- if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \
- targetUniChar!=VOCALLIC_RR){ \
- targetUniChar=missingCharMarker; \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/***********
- * Rules for ISCII to Unicode converter
- * ISCII is stateful encoding. To convert ISCII bytes to Unicode,
- * which has both precomposed and decomposed forms characters
- * pre-context and post-context need to be considered.
- *
- * Post context
- * i) ATR : Attribute code is used to declare the font and script switching.
- * Currently we only switch scripts and font codes consumed without generating an error
- * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
- * obsolete characters
- * Pre context
- * i) Halant: if preceeded by a halant then it is a explicit halant
- * ii) Nukta :
- * a) if preceeded by a halant then it is a soft halant
- * b) if preceeded by specific consonants and the ligatures have pre-composed
- * characters in Unicode then convert to pre-composed characters
- * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
- *
- */
-
-static void U_CALLCONV
-UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
- const char *source = ( char *) args->source;
- UChar *target = args->target;
- const char *sourceLimit = args->sourceLimit;
- const UChar* targetLimit = args->targetLimit;
- uint32_t targetUniChar = 0x0000;
- uint8_t sourceChar = 0x0000;
- UConverterDataISCII* data;
- UChar32* toUnicodeStatus=NULL;
- UChar32 tempTargetUniChar = 0x0000;
- UChar* contextCharToUnicode= NULL;
- UBool found;
- int i;
- int offset = 0;
-
- if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- data = (UConverterDataISCII*)(args->converter->extraInfo);
- contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
- toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
-
- while (U_SUCCESS(*err) && source<sourceLimit) {
-
- targetUniChar = missingCharMarker;
-
- if (target < targetLimit) {
- sourceChar = (unsigned char)*(source)++;
-
- /* look at the post-context preform special processing */
- if (*contextCharToUnicode==ATR) {
-
- /* If we have ATR in *contextCharToUnicode then we need to change our
- * state to the Indic Script specified by sourceChar
- */
-
- /* check if the sourceChar is supported script range*/
- if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
- data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
- data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
- } else if (sourceChar==DEF) {
- /* switch back to default */
- data->currentDeltaToUnicode = data->defDeltaToUnicode;
- data->currentMaskToUnicode = data->defMaskToUnicode;
- } else {
- if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
- /* these are display codes consume and continue */
- } else {
- *err =U_ILLEGAL_CHAR_FOUND;
- /* reset */
- *contextCharToUnicode=NO_CHAR_MARKER;
- goto CALLBACK;
- }
- }
-
- /* reset */
- *contextCharToUnicode=NO_CHAR_MARKER;
-
- continue;
-
- } else if (*contextCharToUnicode==EXT) {
- /* check if sourceChar is in 0xA1-0xEE range */
- if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
- /* We currently support only Anudatta and Devanagari abbreviation sign */
- if (sourceChar==0xBF || sourceChar == 0xB8) {
- targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
-
- /* find out if the mapping is valid in this state */
- if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
- *contextCharToUnicode= NO_CHAR_MARKER;
-
- /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
- if (data->prevToUnicodeStatus) {
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
- data->prevToUnicodeStatus = 0x0000;
- }
- /* write to target */
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
-
- continue;
- }
- }
- /* byte unit is unassigned */
- targetUniChar = missingCharMarker;
- *err= U_INVALID_CHAR_FOUND;
- } else {
- /* only 0xA1 - 0xEE are legal after EXT char */
- *contextCharToUnicode= NO_CHAR_MARKER;
- *err = U_ILLEGAL_CHAR_FOUND;
- }
- goto CALLBACK;
- } else if (*contextCharToUnicode==ISCII_INV) {
- if (sourceChar==ISCII_HALANT) {
- targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
- } else {
- targetUniChar = ZWJ;
- }
-
- /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
- if (data->prevToUnicodeStatus) {
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
- data->prevToUnicodeStatus = 0x0000;
- }
- /* write to target */
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
- /* reset */
- *contextCharToUnicode=NO_CHAR_MARKER;
- }
-
- /* look at the pre-context and perform special processing */
- switch (sourceChar) {
- case ISCII_INV:
- case EXT:
- case ATR:
- *contextCharToUnicode = (UChar)sourceChar;
-
- if (*toUnicodeStatus != missingCharMarker) {
- /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
- if (data->prevToUnicodeStatus) {
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
- data->prevToUnicodeStatus = 0x0000;
- }
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
- *toUnicodeStatus = missingCharMarker;
- }
- continue;
- case ISCII_DANDA:
- /* handle double danda*/
- if (*contextCharToUnicode== ISCII_DANDA) {
- targetUniChar = DOUBLE_DANDA;
- /* clear the context */
- *contextCharToUnicode = NO_CHAR_MARKER;
- *toUnicodeStatus = missingCharMarker;
- } else {
- GET_MAPPING(sourceChar,targetUniChar,data);
- *contextCharToUnicode = sourceChar;
- }
- break;
- case ISCII_HALANT:
- /* handle explicit halant */
- if (*contextCharToUnicode == ISCII_HALANT) {
- targetUniChar = ZWNJ;
- /* clear the context */
- *contextCharToUnicode = NO_CHAR_MARKER;
- } else {
- GET_MAPPING(sourceChar,targetUniChar,data);
- *contextCharToUnicode = sourceChar;
- }
- break;
- case 0x0A:
- case 0x0D:
- data->resetToDefaultToUnicode = TRUE;
- GET_MAPPING(sourceChar,targetUniChar,data)
- ;
- *contextCharToUnicode = sourceChar;
- break;
-
- case ISCII_VOWEL_SIGN_E:
- i=1;
- found=FALSE;
- for (; i<vowelSignESpecialCases[0][0]; i++) {
- U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases));
- if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
- targetUniChar=vowelSignESpecialCases[i][1];
- found=TRUE;
- break;
- }
- }
- if (found) {
- /* find out if the mapping is valid in this state */
- if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
- /*targetUniChar += data->currentDeltaToUnicode ;*/
- *contextCharToUnicode= NO_CHAR_MARKER;
- *toUnicodeStatus = missingCharMarker;
- break;
- }
- }
- GET_MAPPING(sourceChar,targetUniChar,data);
- *contextCharToUnicode = sourceChar;
- break;
-
- case ISCII_NUKTA:
- /* handle soft halant */
- if (*contextCharToUnicode == ISCII_HALANT) {
- targetUniChar = ZWJ;
- /* clear the context */
- *contextCharToUnicode = NO_CHAR_MARKER;
- break;
- } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
- /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
- if (data->prevToUnicodeStatus) {
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
- data->prevToUnicodeStatus = 0x0000;
- }
- /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
- * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
- */
- targetUniChar = PNJ_RRA;
- WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
- if (U_SUCCESS(*err)) {
- targetUniChar = PNJ_SIGN_VIRAMA;
- WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
- if (U_SUCCESS(*err)) {
- targetUniChar = PNJ_HA;
- WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
- } else {
- args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
- }
- } else {
- args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
- args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
- }
- *toUnicodeStatus = missingCharMarker;
- data->contextCharToUnicode = NO_CHAR_MARKER;
- continue;
- } else {
- /* try to handle <CHAR> + ISCII_NUKTA special mappings */
- i=1;
- found =FALSE;
- for (; i<nuktaSpecialCases[0][0]; i++) {
- if (nuktaSpecialCases[i][0]==(uint8_t)
- *contextCharToUnicode) {
- targetUniChar=nuktaSpecialCases[i][1];
- found =TRUE;
- break;
- }
- }
- if (found) {
- /* find out if the mapping is valid in this state */
- if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
- /*targetUniChar += data->currentDeltaToUnicode ;*/
- *contextCharToUnicode= NO_CHAR_MARKER;
- *toUnicodeStatus = missingCharMarker;
- if (data->currentDeltaToUnicode == PNJ_DELTA) {
- /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
- if (data->prevToUnicodeStatus) {
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
- data->prevToUnicodeStatus = 0x0000;
- }
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
- continue;
- }
- break;
- }
- /* else fall through to default */
- }
- /* else fall through to default */
- U_FALLTHROUGH;
- }
- default:GET_MAPPING(sourceChar,targetUniChar,data)
- ;
- *contextCharToUnicode = sourceChar;
- break;
- }
-
- if (*toUnicodeStatus != missingCharMarker) {
- /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
- if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
- (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && ((UChar32)(targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus)) {
- /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
- offset = (int)(source-args->source - 3);
- tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
- data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
- *toUnicodeStatus = missingCharMarker;
- continue;
- } else {
- /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
- if (data->prevToUnicodeStatus) {
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
- data->prevToUnicodeStatus = 0x0000;
- }
- /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
- * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
- */
- if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
- targetUniChar = PNJ_TIPPI - PNJ_DELTA;
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
- } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
- /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
- data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
- } else {
- /* write the previously mapped codepoint */
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
- }
- }
- *toUnicodeStatus = missingCharMarker;
- }
-
- if (targetUniChar != missingCharMarker) {
- /* now save the targetUniChar for delayed write */
- *toUnicodeStatus = (UChar) targetUniChar;
- if (data->resetToDefaultToUnicode==TRUE) {
- data->currentDeltaToUnicode = data->defDeltaToUnicode;
- data->currentMaskToUnicode = data->defMaskToUnicode;
- data->resetToDefaultToUnicode=FALSE;
- }
- } else {
-
- /* we reach here only if targetUniChar == missingCharMarker
- * so assign codes to reason and err
- */
- *err = U_INVALID_CHAR_FOUND;
-CALLBACK:
- args->converter->toUBytes[0] = (uint8_t) sourceChar;
- args->converter->toULength = 1;
- break;
- }
-
- } else {
- *err =U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
- /* end of the input stream */
- UConverter *cnv = args->converter;
-
- if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
- /* set toUBytes[] */
- cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
- cnv->toULength = 1;
-
- /* avoid looping on truncated sequences */
- *contextCharToUnicode = NO_CHAR_MARKER;
- } else {
- cnv->toULength = 0;
- }
-
- if (*toUnicodeStatus != missingCharMarker) {
- /* output a remaining target character */
- WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
- *toUnicodeStatus = missingCharMarker;
- }
- }
-
- args->target = target;
- args->source = source;
-}
-
-/* structure for SafeClone calculations */
-struct cloneISCIIStruct {
- UConverter cnv;
- UConverterDataISCII mydata;
-};
-
-static UConverter * U_CALLCONV
-_ISCII_SafeClone(const UConverter *cnv,
- void *stackBuffer,
- int32_t *pBufferSize,
- UErrorCode *status)
-{
- struct cloneISCIIStruct * localClone;
- int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
- *pBufferSize = bufferSizeNeeded;
- return 0;
- }
-
- localClone = (struct cloneISCIIStruct *)stackBuffer;
- /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
-
- uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
- localClone->cnv.extraInfo = &localClone->mydata;
- localClone->cnv.isExtraLocal = TRUE;
-
- return &localClone->cnv;
-}
-
-static void U_CALLCONV
-_ISCIIGetUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode)
-{
- (void)cnv;
- (void)which;
- (void)pErrorCode;
- int32_t idx, script;
- uint8_t mask;
-
- /* Since all ISCII versions allow switching to other ISCII
- scripts, we add all roundtrippable characters to this set. */
- sa->addRange(sa->set, 0, ASCII_END);
- for (script = DEVANAGARI; script <= MALAYALAM; script++) {
- mask = (uint8_t)(lookupInitialData[script].maskEnum);
- for (idx = 0; idx < DELTA; idx++) {
- /* added check for TELUGU character */
- if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
- sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
- }
- }
- }
- sa->add(sa->set, DANDA);
- sa->add(sa->set, DOUBLE_DANDA);
- sa->add(sa->set, ZWNJ);
- sa->add(sa->set, ZWJ);
-}
-U_CDECL_END
-static const UConverterImpl _ISCIIImpl={
-
- UCNV_ISCII,
-
- NULL,
- NULL,
-
- _ISCIIOpen,
- _ISCIIClose,
- _ISCIIReset,
-
- UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
- UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
- UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
- UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
- NULL,
-
- NULL,
- _ISCIIgetName,
- NULL,
- _ISCII_SafeClone,
- _ISCIIGetUnicodeSet,
- NULL,
- NULL
-};
-
-static const UConverterStaticData _ISCIIStaticData={
- sizeof(UConverterStaticData),
- "ISCII",
- 0,
- UCNV_IBM,
- UCNV_ISCII,
- 1,
- 4,
- { 0x1a, 0, 0, 0 },
- 0x1,
- FALSE,
- FALSE,
- 0x0,
- 0x0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
-
-};
-
-const UConverterSharedData _ISCIIData=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl);
-
-#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
diff --git a/contrib/libs/icu/common/ucnvlat1.cpp b/contrib/libs/icu/common/ucnvlat1.cpp
deleted file mode 100644
index 358bc0caa25..00000000000
--- a/contrib/libs/icu/common/ucnvlat1.cpp
+++ /dev/null
@@ -1,756 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2000-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* file name: ucnvlat1.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2000feb07
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/uset.h"
-#include "unicode/utf8.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "ustr_imp.h"
-
-/* control optimizations according to the platform */
-#define LATIN1_UNROLL_FROM_UNICODE 1
-
-/* ISO 8859-1 --------------------------------------------------------------- */
-
-/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
-U_CDECL_BEGIN
-static void U_CALLCONV
-_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- const uint8_t *source;
- UChar *target;
- int32_t targetCapacity, length;
- int32_t *offsets;
-
- int32_t sourceIndex;
-
- /* set up the local pointers */
- source=(const uint8_t *)pArgs->source;
- target=pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
-
- sourceIndex=0;
-
- /*
- * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
- * for the minimum of the sourceLength and targetCapacity
- */
- length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
- if(length<=targetCapacity) {
- targetCapacity=length;
- } else {
- /* target will be full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- length=targetCapacity;
- }
-
- if(targetCapacity>=8) {
- /* This loop is unrolled for speed and improved pipelining. */
- int32_t count, loops;
-
- loops=count=targetCapacity>>3;
- length=targetCapacity&=0x7;
- do {
- target[0]=source[0];
- target[1]=source[1];
- target[2]=source[2];
- target[3]=source[3];
- target[4]=source[4];
- target[5]=source[5];
- target[6]=source[6];
- target[7]=source[7];
- target+=8;
- source+=8;
- } while(--count>0);
-
- if(offsets!=NULL) {
- do {
- offsets[0]=sourceIndex++;
- offsets[1]=sourceIndex++;
- offsets[2]=sourceIndex++;
- offsets[3]=sourceIndex++;
- offsets[4]=sourceIndex++;
- offsets[5]=sourceIndex++;
- offsets[6]=sourceIndex++;
- offsets[7]=sourceIndex++;
- offsets+=8;
- } while(--loops>0);
- }
- }
-
- /* conversion loop */
- while(targetCapacity>0) {
- *target++=*source++;
- --targetCapacity;
- }
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
-
- /* set offsets */
- if(offsets!=NULL) {
- while(length>0) {
- *offsets++=sourceIndex++;
- --length;
- }
- pArgs->offsets=offsets;
- }
-}
-
-/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
-static UChar32 U_CALLCONV
-_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- const uint8_t *source=(const uint8_t *)pArgs->source;
- if(source<(const uint8_t *)pArgs->sourceLimit) {
- pArgs->source=(const char *)(source+1);
- return *source;
- }
-
- /* no output because of empty input */
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0xffff;
-}
-
-/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
-static void U_CALLCONV
-_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source, *sourceLimit;
- uint8_t *target, *oldTarget;
- int32_t targetCapacity, length;
- int32_t *offsets;
-
- UChar32 cp;
- UChar c, max;
-
- int32_t sourceIndex;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=oldTarget=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
-
- if(cnv->sharedData==&_Latin1Data) {
- max=0xff; /* Latin-1 */
- } else {
- max=0x7f; /* US-ASCII */
- }
-
- /* get the converter state from UConverter */
- cp=cnv->fromUChar32;
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex= cp==0 ? 0 : -1;
-
- /*
- * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
- * for the minimum of the sourceLength and targetCapacity
- */
- length=(int32_t)(sourceLimit-source);
- if(length<targetCapacity) {
- targetCapacity=length;
- }
-
- /* conversion loop */
- if(cp!=0 && targetCapacity>0) {
- goto getTrail;
- }
-
-#if LATIN1_UNROLL_FROM_UNICODE
- /* unroll the loop with the most common case */
- if(targetCapacity>=16) {
- int32_t count, loops;
- UChar u, oredChars;
-
- loops=count=targetCapacity>>4;
- do {
- oredChars=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
- oredChars|=u=*source++;
- *target++=(uint8_t)u;
-
- /* were all 16 entries really valid? */
- if(oredChars>max) {
- /* no, return to the first of these 16 */
- source-=16;
- target-=16;
- break;
- }
- } while(--count>0);
- count=loops-count;
- targetCapacity-=16*count;
-
- if(offsets!=NULL) {
- oldTarget+=16*count;
- while(count>0) {
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- --count;
- }
- }
- }
-#endif
-
- /* conversion loop */
- c=0;
- while(targetCapacity>0 && (c=*source++)<=max) {
- /* convert the Unicode code point */
- *target++=(uint8_t)c;
- --targetCapacity;
- }
-
- if(c>max) {
- cp=c;
- if(!U_IS_SURROGATE(cp)) {
- /* callback(unassigned) */
- } else if(U_IS_SURROGATE_LEAD(cp)) {
-getTrail:
- if(source<sourceLimit) {
- /* test the following code unit */
- UChar trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- cp=U16_GET_SUPPLEMENTARY(cp, trail);
- /* this codepage does not map supplementary code points */
- /* callback(unassigned) */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- }
- } else {
- /* no more input */
- cnv->fromUChar32=cp;
- goto noMoreInput;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- }
-
- *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
- cnv->fromUChar32=cp;
- }
-noMoreInput:
-
- /* set offsets since the start */
- if(offsets!=NULL) {
- size_t count=target-oldTarget;
- while(count>0) {
- *offsets++=sourceIndex++;
- --count;
- }
- }
-
- if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
-}
-
-/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
-static void U_CALLCONV
-ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
- UConverterToUnicodeArgs *pToUArgs,
- UErrorCode *pErrorCode) {
- UConverter *utf8;
- const uint8_t *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
-
- UChar32 c;
- uint8_t b, t1;
-
- /* set up the local pointers */
- utf8=pToUArgs->converter;
- source=(uint8_t *)pToUArgs->source;
- sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
- target=(uint8_t *)pFromUArgs->target;
- targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
-
- /* get the converter state from the UTF-8 UConverter */
- if (utf8->toULength > 0) {
- c=(UChar32)utf8->toUnicodeStatus;
- } else {
- c = 0;
- }
- if(c!=0 && source<sourceLimit) {
- if(targetCapacity==0) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return;
- } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
- ++source;
- *target++=(uint8_t)(((c&3)<<6)|t1);
- --targetCapacity;
-
- utf8->toUnicodeStatus=0;
- utf8->toULength=0;
- } else {
- /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
- *pErrorCode=U_USING_DEFAULT_WARNING;
- return;
- }
- }
-
- /*
- * Make sure that the last byte sequence before sourceLimit is complete
- * or runs into a lead byte.
- * In the conversion loop compare source with sourceLimit only once
- * per multi-byte character.
- * For Latin-1, adjust sourceLimit only for 1 trail byte because
- * the conversion loop handles at most 2-byte sequences.
- */
- if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
- --sourceLimit;
- }
-
- /* conversion loop */
- while(source<sourceLimit) {
- if(targetCapacity>0) {
- b=*source++;
- if(U8_IS_SINGLE(b)) {
- /* convert ASCII */
- *target++=(uint8_t)b;
- --targetCapacity;
- } else if( /* handle U+0080..U+00FF inline */
- b>=0xc2 && b<=0xc3 &&
- (t1=(uint8_t)(*source-0x80)) <= 0x3f
- ) {
- ++source;
- *target++=(uint8_t)(((b&3)<<6)|t1);
- --targetCapacity;
- } else {
- /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
- pToUArgs->source=(char *)(source-1);
- pFromUArgs->target=(char *)target;
- *pErrorCode=U_USING_DEFAULT_WARNING;
- return;
- }
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- /*
- * The sourceLimit may have been adjusted before the conversion loop
- * to stop before a truncated sequence.
- * If so, then collect the truncated sequence now.
- * For Latin-1, there is at most exactly one lead byte because of the
- * smaller sourceLimit adjustment logic.
- */
- if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
- utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
- utf8->toULength=1;
- utf8->mode=U8_COUNT_BYTES(b);
- }
-
- /* write back the updated pointers */
- pToUArgs->source=(char *)source;
- pFromUArgs->target=(char *)target;
-}
-
-static void U_CALLCONV
-_Latin1GetUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode) {
- (void)cnv;
- (void)which;
- (void)pErrorCode;
- sa->addRange(sa->set, 0, 0xff);
-}
-U_CDECL_END
-
-
-static const UConverterImpl _Latin1Impl={
- UCNV_LATIN_1,
-
- NULL,
- NULL,
-
- NULL,
- NULL,
- NULL,
-
- _Latin1ToUnicodeWithOffsets,
- _Latin1ToUnicodeWithOffsets,
- _Latin1FromUnicodeWithOffsets,
- _Latin1FromUnicodeWithOffsets,
- _Latin1GetNextUChar,
-
- NULL,
- NULL,
- NULL,
- NULL,
- _Latin1GetUnicodeSet,
-
- NULL,
- ucnv_Latin1FromUTF8
-};
-
-static const UConverterStaticData _Latin1StaticData={
- sizeof(UConverterStaticData),
- "ISO-8859-1",
- 819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
- { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-const UConverterSharedData _Latin1Data=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
-
-/* US-ASCII ----------------------------------------------------------------- */
-
-U_CDECL_BEGIN
-/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
-static void U_CALLCONV
-_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- const uint8_t *source, *sourceLimit;
- UChar *target, *oldTarget;
- int32_t targetCapacity, length;
- int32_t *offsets;
-
- int32_t sourceIndex;
-
- uint8_t c;
-
- /* set up the local pointers */
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=oldTarget=pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex=0;
-
- /*
- * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
- * for the minimum of the sourceLength and targetCapacity
- */
- length=(int32_t)(sourceLimit-source);
- if(length<targetCapacity) {
- targetCapacity=length;
- }
-
- if(targetCapacity>=8) {
- /* This loop is unrolled for speed and improved pipelining. */
- int32_t count, loops;
- UChar oredChars;
-
- loops=count=targetCapacity>>3;
- do {
- oredChars=target[0]=source[0];
- oredChars|=target[1]=source[1];
- oredChars|=target[2]=source[2];
- oredChars|=target[3]=source[3];
- oredChars|=target[4]=source[4];
- oredChars|=target[5]=source[5];
- oredChars|=target[6]=source[6];
- oredChars|=target[7]=source[7];
-
- /* were all 16 entries really valid? */
- if(oredChars>0x7f) {
- /* no, return to the first of these 16 */
- break;
- }
- source+=8;
- target+=8;
- } while(--count>0);
- count=loops-count;
- targetCapacity-=count*8;
-
- if(offsets!=NULL) {
- oldTarget+=count*8;
- while(count>0) {
- offsets[0]=sourceIndex++;
- offsets[1]=sourceIndex++;
- offsets[2]=sourceIndex++;
- offsets[3]=sourceIndex++;
- offsets[4]=sourceIndex++;
- offsets[5]=sourceIndex++;
- offsets[6]=sourceIndex++;
- offsets[7]=sourceIndex++;
- offsets+=8;
- --count;
- }
- }
- }
-
- /* conversion loop */
- c=0;
- while(targetCapacity>0 && (c=*source++)<=0x7f) {
- *target++=c;
- --targetCapacity;
- }
-
- if(c>0x7f) {
- /* callback(illegal); copy the current bytes to toUBytes[] */
- UConverter *cnv=pArgs->converter;
- cnv->toUBytes[0]=c;
- cnv->toULength=1;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- } else if(source<sourceLimit && target>=pArgs->targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
-
- /* set offsets since the start */
- if(offsets!=NULL) {
- size_t count=target-oldTarget;
- while(count>0) {
- *offsets++=sourceIndex++;
- --count;
- }
- }
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
-}
-
-/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
-static UChar32 U_CALLCONV
-_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- const uint8_t *source;
- uint8_t b;
-
- source=(const uint8_t *)pArgs->source;
- if(source<(const uint8_t *)pArgs->sourceLimit) {
- b=*source++;
- pArgs->source=(const char *)source;
- if(b<=0x7f) {
- return b;
- } else {
- UConverter *cnv=pArgs->converter;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- return 0xffff;
- }
- }
-
- /* no output because of empty input */
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0xffff;
-}
-
-/* "Convert" UTF-8 to US-ASCII: Validate and copy. */
-static void U_CALLCONV
-ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
- UConverterToUnicodeArgs *pToUArgs,
- UErrorCode *pErrorCode) {
- const uint8_t *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity, length;
-
- uint8_t c;
-
- if(pToUArgs->converter->toULength > 0) {
- /* no handling of partial UTF-8 characters here, fall back to pivoting */
- *pErrorCode=U_USING_DEFAULT_WARNING;
- return;
- }
-
- /* set up the local pointers */
- source=(const uint8_t *)pToUArgs->source;
- sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
- target=(uint8_t *)pFromUArgs->target;
- targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
-
- /*
- * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
- * for the minimum of the sourceLength and targetCapacity
- */
- length=(int32_t)(sourceLimit-source);
- if(length<targetCapacity) {
- targetCapacity=length;
- }
-
- /* unroll the loop with the most common case */
- if(targetCapacity>=16) {
- int32_t count, loops;
- uint8_t oredChars;
-
- loops=count=targetCapacity>>4;
- do {
- oredChars=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
- oredChars|=*target++=*source++;
-
- /* were all 16 entries really valid? */
- if(oredChars>0x7f) {
- /* no, return to the first of these 16 */
- source-=16;
- target-=16;
- break;
- }
- } while(--count>0);
- count=loops-count;
- targetCapacity-=16*count;
- }
-
- /* conversion loop */
- c=0;
- while(targetCapacity>0 && (c=*source)<=0x7f) {
- ++source;
- *target++=c;
- --targetCapacity;
- }
-
- if(c>0x7f) {
- /* non-ASCII character, handle in standard converter */
- *pErrorCode=U_USING_DEFAULT_WARNING;
- } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
-
- /* write back the updated pointers */
- pToUArgs->source=(const char *)source;
- pFromUArgs->target=(char *)target;
-}
-
-static void U_CALLCONV
-_ASCIIGetUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode) {
- (void)cnv;
- (void)which;
- (void)pErrorCode;
- sa->addRange(sa->set, 0, 0x7f);
-}
-U_CDECL_END
-
-static const UConverterImpl _ASCIIImpl={
- UCNV_US_ASCII,
-
- NULL,
- NULL,
-
- NULL,
- NULL,
- NULL,
-
- _ASCIIToUnicodeWithOffsets,
- _ASCIIToUnicodeWithOffsets,
- _Latin1FromUnicodeWithOffsets,
- _Latin1FromUnicodeWithOffsets,
- _ASCIIGetNextUChar,
-
- NULL,
- NULL,
- NULL,
- NULL,
- _ASCIIGetUnicodeSet,
-
- NULL,
- ucnv_ASCIIFromUTF8
-};
-
-static const UConverterStaticData _ASCIIStaticData={
- sizeof(UConverterStaticData),
- "US-ASCII",
- 367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
- { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-const UConverterSharedData _ASCIIData=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
-
-#endif
diff --git a/contrib/libs/icu/common/ucnvmbcs.cpp b/contrib/libs/icu/common/ucnvmbcs.cpp
deleted file mode 100644
index ca9b0a335ad..00000000000
--- a/contrib/libs/icu/common/ucnvmbcs.cpp
+++ /dev/null
@@ -1,5723 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2000-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ucnvmbcs.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2000jul03
-* created by: Markus W. Scherer
-*
-* The current code in this file replaces the previous implementation
-* of conversion code from multi-byte codepages to Unicode and back.
-* This implementation supports the following:
-* - legacy variable-length codepages with up to 4 bytes per character
-* - all Unicode code points (up to 0x10ffff)
-* - efficient distinction of unassigned vs. illegal byte sequences
-* - it is possible in fromUnicode() to directly deal with simple
-* stateful encodings (used for EBCDIC_STATEFUL)
-* - it is possible to convert Unicode code points
-* to a single zero byte (but not as a fallback except for SBCS)
-*
-* Remaining limitations in fromUnicode:
-* - byte sequences must not have leading zero bytes
-* - except for SBCS codepages: no fallback mapping from Unicode to a zero byte
-* - limitation to up to 4 bytes per character
-*
-* ICU 2.8 (late 2003) adds a secondary data structure which lifts some of these
-* limitations and adds m:n character mappings and other features.
-* See ucnv_ext.h for details.
-*
-* Change history:
-*
-* 5/6/2001 Ram Moved MBCS_SINGLE_RESULT_FROM_U,MBCS_STAGE_2_FROM_U,
-* MBCS_VALUE_2_FROM_STAGE_2, MBCS_VALUE_4_FROM_STAGE_2
-* macros to ucnvmbcs.h file
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/ucnv_cb.h"
-#include "unicode/udata.h"
-#include "unicode/uset.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "ucnv_bld.h"
-#include "ucnvmbcs.h"
-#include "ucnv_ext.h"
-#include "ucnv_cnv.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "umutex.h"
-#include "ustr_imp.h"
-
-/* control optimizations according to the platform */
-#define MBCS_UNROLL_SINGLE_TO_BMP 1
-#define MBCS_UNROLL_SINGLE_FROM_BMP 0
-
-/*
- * _MBCSHeader versions 5.3 & 4.3
- * (Note that the _MBCSHeader version is in addition to the converter formatVersion.)
- *
- * This version is optional. Version 5 is used for incompatible data format changes.
- * makeconv will continue to generate version 4 files if possible.
- *
- * Changes from version 4:
- *
- * The main difference is an additional _MBCSHeader field with
- * - the length (number of uint32_t) of the _MBCSHeader
- * - flags for further incompatible data format changes
- * - flags for further, backward compatible data format changes
- *
- * The MBCS_OPT_FROM_U flag indicates that most of the fromUnicode data is omitted from
- * the file and needs to be reconstituted at load time.
- * This requires a utf8Friendly format with an additional mbcsIndex table for fast
- * (and UTF-8-friendly) fromUnicode conversion for Unicode code points up to maxFastUChar.
- * (For details about these structures see below, and see ucnvmbcs.h.)
- *
- * utf8Friendly also implies that the fromUnicode mappings are stored in ascending order
- * of the Unicode code points. (This requires that the .ucm file has the |0 etc.
- * precision markers for all mappings.)
- *
- * All fallbacks have been moved to the extension table, leaving only roundtrips in the
- * omitted data that can be reconstituted from the toUnicode data.
- *
- * Of the stage 2 table, the part corresponding to maxFastUChar and below is omitted.
- * With only roundtrip mappings in the base fromUnicode data, this part is fully
- * redundant with the mbcsIndex and will be reconstituted from that (also using the
- * stage 1 table which contains the information about how stage 2 was compacted).
- *
- * The rest of the stage 2 table, the part for code points above maxFastUChar,
- * is stored in the file and will be appended to the reconstituted part.
- *
- * The entire fromUBytes array is omitted from the file and will be reconstitued.
- * This is done by enumerating all toUnicode roundtrip mappings, performing
- * each mapping (using the stage 1 and reconstituted stage 2 tables) and
- * writing instead of reading the byte values.
- *
- * _MBCSHeader version 4.3
- *
- * Change from version 4.2:
- * - Optional utf8Friendly data structures, with 64-entry stage 3 block
- * allocation for parts of the BMP, and an additional mbcsIndex in non-SBCS
- * files which can be used instead of stages 1 & 2.
- * Faster lookups for roundtrips from most commonly used characters,
- * and lookups from UTF-8 byte sequences with a natural bit distribution.
- * See ucnvmbcs.h for more details.
- *
- * Change from version 4.1:
- * - Added an optional extension table structure at the end of the .cnv file.
- * It is present if the upper bits of the header flags field contains a non-zero
- * byte offset to it.
- * Files that contain only a conversion table and no base table
- * use the special outputType MBCS_OUTPUT_EXT_ONLY.
- * These contain the base table name between the MBCS header and the extension
- * data.
- *
- * Change from version 4.0:
- * - Replace header.reserved with header.fromUBytesLength so that all
- * fields in the data have length.
- *
- * Changes from version 3 (for performance improvements):
- * - new bit distribution for state table entries
- * - reordered action codes
- * - new data structure for single-byte fromUnicode
- * + stage 2 only contains indexes
- * + stage 3 stores 16 bits per character with classification bits 15..8
- * - no multiplier for stage 1 entries
- * - stage 2 for non-single-byte codepages contains the index and the flags in
- * one 32-bit value
- * - 2-byte and 4-byte fromUnicode results are stored directly as 16/32-bit integers
- *
- * For more details about old versions of the MBCS data structure, see
- * the corresponding versions of this file.
- *
- * Converting stateless codepage data ---------------------------------------***
- * (or codepage data with simple states) to Unicode.
- *
- * Data structure and algorithm for converting from complex legacy codepages
- * to Unicode. (Designed before 2000-may-22.)
- *
- * The basic idea is that the structure of legacy codepages can be described
- * with state tables.
- * When reading a byte stream, each input byte causes a state transition.
- * Some transitions result in the output of a code point, some result in
- * "unassigned" or "illegal" output.
- * This is used here for character conversion.
- *
- * The data structure begins with a state table consisting of a row
- * per state, with 256 entries (columns) per row for each possible input
- * byte value.
- * Each entry is 32 bits wide, with two formats distinguished by
- * the sign bit (bit 31):
- *
- * One format for transitional entries (bit 31 not set) for non-final bytes, and
- * one format for final entries (bit 31 set).
- * Both formats contain the number of the next state in the same bit
- * positions.
- * State 0 is the initial state.
- *
- * Most of the time, the offset values of subsequent states are added
- * up to a scalar value. This value will eventually be the index of
- * the Unicode code point in a table that follows the state table.
- * The effect is that the code points for final state table rows
- * are contiguous. The code points of final state rows follow each other
- * in the order of the references to those final states by previous
- * states, etc.
- *
- * For some terminal states, the offset is itself the output Unicode
- * code point (16 bits for a BMP code point or 20 bits for a supplementary
- * code point (stored as code point minus 0x10000 so that 20 bits are enough).
- * For others, the code point in the Unicode table is stored with either
- * one or two code units: one for BMP code points, two for a pair of
- * surrogates.
- * All code points for a final state entry take up the same number of code
- * units, regardless of whether they all actually _use_ the same number
- * of code units. This is necessary for simple array access.
- *
- * An additional feature comes in with what in ICU is called "fallback"
- * mappings:
- *
- * In addition to round-trippable, precise, 1:1 mappings, there are often
- * mappings defined between similar, though not the same, characters.
- * Typically, such mappings occur only in fromUnicode mapping tables because
- * Unicode has a superset repertoire of most other codepages. However, it
- * is possible to provide such mappings in the toUnicode tables, too.
- * In this case, the fallback mappings are partly integrated into the
- * general state tables because the structure of the encoding includes their
- * byte sequences.
- * For final entries in an initial state, fallback mappings are stored in
- * the entry itself like with roundtrip mappings.
- * For other final entries, they are stored in the code units table if
- * the entry is for a pair of code units.
- * For single-unit results in the code units table, there is no space to
- * alternatively hold a fallback mapping; in this case, the code unit
- * is stored as U+fffe (unassigned), and the fallback mapping needs to
- * be looked up by the scalar offset value in a separate table.
- *
- * "Unassigned" state entries really mean "structurally unassigned",
- * i.e., such a byte sequence will never have a mapping result.
- *
- * The interpretation of the bits in each entry is as follows:
- *
- * Bit 31 not set, not a terminal entry ("transitional"):
- * 30..24 next state
- * 23..0 offset delta, to be added up
- *
- * Bit 31 set, terminal ("final") entry:
- * 30..24 next state (regardless of action code)
- * 23..20 action code:
- * action codes 0 and 1 result in precise-mapping Unicode code points
- * 0 valid byte sequence
- * 19..16 not used, 0
- * 15..0 16-bit Unicode BMP code point
- * never U+fffe or U+ffff
- * 1 valid byte sequence
- * 19..0 20-bit Unicode supplementary code point
- * never U+fffe or U+ffff
- *
- * action codes 2 and 3 result in fallback (unidirectional-mapping) Unicode code points
- * 2 valid byte sequence (fallback)
- * 19..16 not used, 0
- * 15..0 16-bit Unicode BMP code point as fallback result
- * 3 valid byte sequence (fallback)
- * 19..0 20-bit Unicode supplementary code point as fallback result
- *
- * action codes 4 and 5 may result in roundtrip/fallback/unassigned/illegal results
- * depending on the code units they result in
- * 4 valid byte sequence
- * 19..9 not used, 0
- * 8..0 final offset delta
- * pointing to one 16-bit code unit which may be
- * fffe unassigned -- look for a fallback for this offset
- * ffff illegal
- * 5 valid byte sequence
- * 19..9 not used, 0
- * 8..0 final offset delta
- * pointing to two 16-bit code units
- * (typically UTF-16 surrogates)
- * the result depends on the first code unit as follows:
- * 0000..d7ff roundtrip BMP code point (1st alone)
- * d800..dbff roundtrip surrogate pair (1st, 2nd)
- * dc00..dfff fallback surrogate pair (1st-400, 2nd)
- * e000 roundtrip BMP code point (2nd alone)
- * e001 fallback BMP code point (2nd alone)
- * fffe unassigned
- * ffff illegal
- * (the final offset deltas are at most 255 * 2,
- * times 2 because of storing code unit pairs)
- *
- * 6 unassigned byte sequence
- * 19..16 not used, 0
- * 15..0 16-bit Unicode BMP code point U+fffe (new with version 2)
- * this does not contain a final offset delta because the main
- * purpose of this action code is to save scalar offset values;
- * therefore, fallback values cannot be assigned to byte
- * sequences that result in this action code
- * 7 illegal byte sequence
- * 19..16 not used, 0
- * 15..0 16-bit Unicode BMP code point U+ffff (new with version 2)
- * 8 state change only
- * 19..0 not used, 0
- * useful for state changes in simple stateful encodings,
- * at Shift-In/Shift-Out codes
- *
- *
- * 9..15 reserved for future use
- * current implementations will only perform a state change
- * and ignore bits 19..0
- *
- * An encoding with contiguous ranges of unassigned byte sequences, like
- * Shift-JIS and especially EUC-TW, can be stored efficiently by having
- * at least two states for the trail bytes:
- * One trail byte state that results in code points, and one that only
- * has "unassigned" and "illegal" terminal states.
- *
- * Note: partly by accident, this data structure supports simple stateful
- * encodings without any additional logic.
- * Currently, only simple Shift-In/Shift-Out schemes are handled with
- * appropriate state tables (especially EBCDIC_STATEFUL!).
- *
- * MBCS version 2 added:
- * unassigned and illegal action codes have U+fffe and U+ffff
- * instead of unused bits; this is useful for _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP()
- *
- * Converting from Unicode to codepage bytes --------------------------------***
- *
- * The conversion data structure for fromUnicode is designed for the known
- * structure of Unicode. It maps from 21-bit code points (0..0x10ffff) to
- * a sequence of 1..4 bytes, in addition to a flag that indicates if there is
- * a roundtrip mapping.
- *
- * The lookup is done with a 3-stage trie, using 11/6/4 bits for stage 1/2/3
- * like in the character properties table.
- * The beginning of the trie is at offsetFromUTable, the beginning of stage 3
- * with the resulting bytes is at offsetFromUBytes.
- *
- * Beginning with version 4, single-byte codepages have a significantly different
- * trie compared to other codepages.
- * In all cases, the entry in stage 1 is directly the index of the block of
- * 64 entries in stage 2.
- *
- * Single-byte lookup:
- *
- * Stage 2 only contains 16-bit indexes directly to the 16-blocks in stage 3.
- * Stage 3 contains one 16-bit word per result:
- * Bits 15..8 indicate the kind of result:
- * f roundtrip result
- * c fallback result from private-use code point
- * 8 fallback result from other code points
- * 0 unassigned
- * Bits 7..0 contain the codepage byte. A zero byte is always possible.
- *
- * In version 4.3, the runtime code can build an sbcsIndex for a utf8Friendly
- * file. For 2-byte UTF-8 byte sequences and some 3-byte sequences the lookup
- * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
- * ASCII code points can be looked up with a linear array access into stage 3.
- * See maxFastUChar and other details in ucnvmbcs.h.
- *
- * Multi-byte lookup:
- *
- * Stage 2 contains a 32-bit word for each 16-block in stage 3:
- * Bits 31..16 contain flags for which stage 3 entries contain roundtrip results
- * test: MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)
- * If this test is false, then a non-zero result will be interpreted as
- * a fallback mapping.
- * Bits 15..0 contain the index to stage 3, which must be multiplied by 16*(bytes per char)
- *
- * Stage 3 contains 2, 3, or 4 bytes per result.
- * 2 or 4 bytes are stored as uint16_t/uint32_t in platform endianness,
- * while 3 bytes are stored as bytes in big-endian order.
- * Leading zero bytes are ignored, and the number of bytes is counted.
- * A zero byte mapping result is possible as a roundtrip result.
- * For some output types, the actual result is processed from this;
- * see ucnv_MBCSFromUnicodeWithOffsets().
- *
- * Note that stage 1 always contains 0x440=1088 entries (0x440==0x110000>>10),
- * or (version 3 and up) for BMP-only codepages, it contains 64 entries.
- *
- * In version 4.3, a utf8Friendly file contains an mbcsIndex table.
- * For 2-byte UTF-8 byte sequences and most 3-byte sequences the lookup
- * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
- * ASCII code points can be looked up with a linear array access into stage 3.
- * See maxFastUChar, mbcsIndex and other details in ucnvmbcs.h.
- *
- * In version 3, stage 2 blocks may overlap by multiples of the multiplier
- * for compaction.
- * In version 4, stage 2 blocks (and for single-byte codepages, stage 3 blocks)
- * may overlap by any number of entries.
- *
- * MBCS version 2 added:
- * the converter checks for known output types, which allows
- * adding new ones without crashing an unaware converter
- */
-
-/**
- * Callback from ucnv_MBCSEnumToUnicode(), takes 32 mappings from
- * consecutive sequences of bytes, starting from the one encoded in value,
- * to Unicode code points. (Multiple mappings to reduce per-function call overhead.)
- * Does not currently support m:n mappings or reverse fallbacks.
- * This function will not be called for sequences of bytes with leading zeros.
- *
- * @param context an opaque pointer, as passed into ucnv_MBCSEnumToUnicode()
- * @param value contains 1..4 bytes of the first byte sequence, right-aligned
- * @param codePoints resulting Unicode code points, or negative if a byte sequence does
- * not map to anything
- * @return TRUE to continue enumeration, FALSE to stop
- */
-typedef UBool U_CALLCONV
-UConverterEnumToUCallback(const void *context, uint32_t value, UChar32 codePoints[32]);
-
-static void U_CALLCONV
-ucnv_MBCSLoad(UConverterSharedData *sharedData,
- UConverterLoadArgs *pArgs,
- const uint8_t *raw,
- UErrorCode *pErrorCode);
-
-static void U_CALLCONV
-ucnv_MBCSUnload(UConverterSharedData *sharedData);
-
-static void U_CALLCONV
-ucnv_MBCSOpen(UConverter *cnv,
- UConverterLoadArgs *pArgs,
- UErrorCode *pErrorCode);
-
-static UChar32 U_CALLCONV
-ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode);
-
-static void U_CALLCONV
-ucnv_MBCSGetStarters(const UConverter* cnv,
- UBool starters[256],
- UErrorCode *pErrorCode);
-
-U_CDECL_BEGIN
-static const char* U_CALLCONV
-ucnv_MBCSGetName(const UConverter *cnv);
-U_CDECL_END
-
-static void U_CALLCONV
-ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
- int32_t offsetIndex,
- UErrorCode *pErrorCode);
-
-static UChar32 U_CALLCONV
-ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode);
-
-static void U_CALLCONV
-ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
- UConverterToUnicodeArgs *pToUArgs,
- UErrorCode *pErrorCode);
-
-static void U_CALLCONV
-ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode);
-
-static void U_CALLCONV
-ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
- UConverterToUnicodeArgs *pToUArgs,
- UErrorCode *pErrorCode);
-
-static const UConverterImpl _SBCSUTF8Impl={
- UCNV_MBCS,
-
- ucnv_MBCSLoad,
- ucnv_MBCSUnload,
-
- ucnv_MBCSOpen,
- NULL,
- NULL,
-
- ucnv_MBCSToUnicodeWithOffsets,
- ucnv_MBCSToUnicodeWithOffsets,
- ucnv_MBCSFromUnicodeWithOffsets,
- ucnv_MBCSFromUnicodeWithOffsets,
- ucnv_MBCSGetNextUChar,
-
- ucnv_MBCSGetStarters,
- ucnv_MBCSGetName,
- ucnv_MBCSWriteSub,
- NULL,
- ucnv_MBCSGetUnicodeSet,
-
- NULL,
- ucnv_SBCSFromUTF8
-};
-
-static const UConverterImpl _DBCSUTF8Impl={
- UCNV_MBCS,
-
- ucnv_MBCSLoad,
- ucnv_MBCSUnload,
-
- ucnv_MBCSOpen,
- NULL,
- NULL,
-
- ucnv_MBCSToUnicodeWithOffsets,
- ucnv_MBCSToUnicodeWithOffsets,
- ucnv_MBCSFromUnicodeWithOffsets,
- ucnv_MBCSFromUnicodeWithOffsets,
- ucnv_MBCSGetNextUChar,
-
- ucnv_MBCSGetStarters,
- ucnv_MBCSGetName,
- ucnv_MBCSWriteSub,
- NULL,
- ucnv_MBCSGetUnicodeSet,
-
- NULL,
- ucnv_DBCSFromUTF8
-};
-
-static const UConverterImpl _MBCSImpl={
- UCNV_MBCS,
-
- ucnv_MBCSLoad,
- ucnv_MBCSUnload,
-
- ucnv_MBCSOpen,
- NULL,
- NULL,
-
- ucnv_MBCSToUnicodeWithOffsets,
- ucnv_MBCSToUnicodeWithOffsets,
- ucnv_MBCSFromUnicodeWithOffsets,
- ucnv_MBCSFromUnicodeWithOffsets,
- ucnv_MBCSGetNextUChar,
-
- ucnv_MBCSGetStarters,
- ucnv_MBCSGetName,
- ucnv_MBCSWriteSub,
- NULL,
- ucnv_MBCSGetUnicodeSet,
- NULL,
- NULL
-};
-
-/* Static data is in tools/makeconv/ucnvstat.c for data-based
- * converters. Be sure to update it as well.
- */
-
-const UConverterSharedData _MBCSData={
- sizeof(UConverterSharedData), 1,
- NULL, NULL, FALSE, TRUE, &_MBCSImpl,
- 0, UCNV_MBCS_TABLE_INITIALIZER
-};
-
-
-/* GB 18030 data ------------------------------------------------------------ */
-
-/* helper macros for linear values for GB 18030 four-byte sequences */
-#define LINEAR_18030(a, b, c, d) ((((a)*10+(b))*126L+(c))*10L+(d))
-
-#define LINEAR_18030_BASE LINEAR_18030(0x81, 0x30, 0x81, 0x30)
-
-#define LINEAR(x) LINEAR_18030(x>>24, (x>>16)&0xff, (x>>8)&0xff, x&0xff)
-
-/*
- * Some ranges of GB 18030 where both the Unicode code points and the
- * GB four-byte sequences are contiguous and are handled algorithmically by
- * the special callback functions below.
- * The values are start & end of Unicode & GB codes.
- *
- * Note that single surrogates are not mapped by GB 18030
- * as of the re-released mapping tables from 2000-nov-30.
- */
-static const uint32_t
-gb18030Ranges[14][4]={
- {0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)},
- {0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)},
- {0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)},
- {0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)},
- {0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)},
- {0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)},
- {0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)},
- {0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32)},
- {0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237)},
- {0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733)},
- {0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837)},
- {0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638)},
- {0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931)},
- {0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439)}
-};
-
-/* bit flag for UConverter.options indicating GB 18030 special handling */
-#define _MBCS_OPTION_GB18030 0x8000
-
-/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
-#define _MBCS_OPTION_KEIS 0x01000
-#define _MBCS_OPTION_JEF 0x02000
-#define _MBCS_OPTION_JIPS 0x04000
-
-#define KEIS_SO_CHAR_1 0x0A
-#define KEIS_SO_CHAR_2 0x42
-#define KEIS_SI_CHAR_1 0x0A
-#define KEIS_SI_CHAR_2 0x41
-
-#define JEF_SO_CHAR 0x28
-#define JEF_SI_CHAR 0x29
-
-#define JIPS_SO_CHAR_1 0x1A
-#define JIPS_SO_CHAR_2 0x70
-#define JIPS_SI_CHAR_1 0x1A
-#define JIPS_SI_CHAR_2 0x71
-
-enum SISO_Option {
- SI,
- SO
-};
-typedef enum SISO_Option SISO_Option;
-
-static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) {
- int32_t SISOLength = 0;
-
- switch (option) {
- case SI:
- if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
- value[0] = KEIS_SI_CHAR_1;
- value[1] = KEIS_SI_CHAR_2;
- SISOLength = 2;
- } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
- value[0] = JEF_SI_CHAR;
- SISOLength = 1;
- } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
- value[0] = JIPS_SI_CHAR_1;
- value[1] = JIPS_SI_CHAR_2;
- SISOLength = 2;
- } else {
- value[0] = UCNV_SI;
- SISOLength = 1;
- }
- break;
- case SO:
- if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
- value[0] = KEIS_SO_CHAR_1;
- value[1] = KEIS_SO_CHAR_2;
- SISOLength = 2;
- } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
- value[0] = JEF_SO_CHAR;
- SISOLength = 1;
- } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
- value[0] = JIPS_SO_CHAR_1;
- value[1] = JIPS_SO_CHAR_2;
- SISOLength = 2;
- } else {
- value[0] = UCNV_SO;
- SISOLength = 1;
- }
- break;
- default:
- /* Should never happen. */
- break;
- }
-
- return SISOLength;
-}
-
-/* Miscellaneous ------------------------------------------------------------ */
-
-/* similar to ucnv_MBCSGetNextUChar() but recursive */
-static UBool
-enumToU(UConverterMBCSTable *mbcsTable, int8_t stateProps[],
- int32_t state, uint32_t offset,
- uint32_t value,
- UConverterEnumToUCallback *callback, const void *context,
- UErrorCode *pErrorCode) {
- UChar32 codePoints[32];
- const int32_t *row;
- const uint16_t *unicodeCodeUnits;
- UChar32 anyCodePoints;
- int32_t b, limit;
-
- row=mbcsTable->stateTable[state];
- unicodeCodeUnits=mbcsTable->unicodeCodeUnits;
-
- value<<=8;
- anyCodePoints=-1; /* becomes non-negative if there is a mapping */
-
- b=(stateProps[state]&0x38)<<2;
- if(b==0 && stateProps[state]>=0x40) {
- /* skip byte sequences with leading zeros because they are not stored in the fromUnicode table */
- codePoints[0]=U_SENTINEL;
- b=1;
- }
- limit=((stateProps[state]&7)+1)<<5;
- while(b<limit) {
- int32_t entry=row[b];
- if(MBCS_ENTRY_IS_TRANSITION(entry)) {
- int32_t nextState=MBCS_ENTRY_TRANSITION_STATE(entry);
- if(stateProps[nextState]>=0) {
- /* recurse to a state with non-ignorable actions */
- if(!enumToU(
- mbcsTable, stateProps, nextState,
- offset+MBCS_ENTRY_TRANSITION_OFFSET(entry),
- value|(uint32_t)b,
- callback, context,
- pErrorCode)) {
- return FALSE;
- }
- }
- codePoints[b&0x1f]=U_SENTINEL;
- } else {
- UChar32 c;
- int32_t action;
-
- /*
- * An if-else-if chain provides more reliable performance for
- * the most common cases compared to a switch.
- */
- action=MBCS_ENTRY_FINAL_ACTION(entry);
- if(action==MBCS_STATE_VALID_DIRECT_16) {
- /* output BMP code point */
- c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- } else if(action==MBCS_STATE_VALID_16) {
- int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
- c=unicodeCodeUnits[finalOffset];
- if(c<0xfffe) {
- /* output BMP code point */
- } else {
- c=U_SENTINEL;
- }
- } else if(action==MBCS_STATE_VALID_16_PAIR) {
- int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
- c=unicodeCodeUnits[finalOffset++];
- if(c<0xd800) {
- /* output BMP code point below 0xd800 */
- } else if(c<=0xdbff) {
- /* output roundtrip or fallback supplementary code point */
- c=((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);
- } else if(c==0xe000) {
- /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
- c=unicodeCodeUnits[finalOffset];
- } else {
- c=U_SENTINEL;
- }
- } else if(action==MBCS_STATE_VALID_DIRECT_20) {
- /* output supplementary code point */
- c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
- } else {
- c=U_SENTINEL;
- }
-
- codePoints[b&0x1f]=c;
- anyCodePoints&=c;
- }
- if(((++b)&0x1f)==0) {
- if(anyCodePoints>=0) {
- if(!callback(context, value|(uint32_t)(b-0x20), codePoints)) {
- return FALSE;
- }
- anyCodePoints=-1;
- }
- }
- }
- return TRUE;
-}
-
-/*
- * Only called if stateProps[state]==-1.
- * A recursive call may do stateProps[state]|=0x40 if this state is the target of an
- * MBCS_STATE_CHANGE_ONLY.
- */
-static int8_t
-getStateProp(const int32_t (*stateTable)[256], int8_t stateProps[], int state) {
- const int32_t *row;
- int32_t min, max, entry, nextState;
-
- row=stateTable[state];
- stateProps[state]=0;
-
- /* find first non-ignorable state */
- for(min=0;; ++min) {
- entry=row[min];
- nextState=MBCS_ENTRY_STATE(entry);
- if(stateProps[nextState]==-1) {
- getStateProp(stateTable, stateProps, nextState);
- }
- if(MBCS_ENTRY_IS_TRANSITION(entry)) {
- if(stateProps[nextState]>=0) {
- break;
- }
- } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
- break;
- }
- if(min==0xff) {
- stateProps[state]=-0x40; /* (int8_t)0xc0 */
- return stateProps[state];
- }
- }
- stateProps[state]|=(int8_t)((min>>5)<<3);
-
- /* find last non-ignorable state */
- for(max=0xff; min<max; --max) {
- entry=row[max];
- nextState=MBCS_ENTRY_STATE(entry);
- if(stateProps[nextState]==-1) {
- getStateProp(stateTable, stateProps, nextState);
- }
- if(MBCS_ENTRY_IS_TRANSITION(entry)) {
- if(stateProps[nextState]>=0) {
- break;
- }
- } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
- break;
- }
- }
- stateProps[state]|=(int8_t)(max>>5);
-
- /* recurse further and collect direct-state information */
- while(min<=max) {
- entry=row[min];
- nextState=MBCS_ENTRY_STATE(entry);
- if(stateProps[nextState]==-1) {
- getStateProp(stateTable, stateProps, nextState);
- }
- if(MBCS_ENTRY_IS_FINAL(entry)) {
- stateProps[nextState]|=0x40;
- if(MBCS_ENTRY_FINAL_ACTION(entry)<=MBCS_STATE_FALLBACK_DIRECT_20) {
- stateProps[state]|=0x40;
- }
- }
- ++min;
- }
- return stateProps[state];
-}
-
-/*
- * Internal function enumerating the toUnicode data of an MBCS converter.
- * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U
- * table, but could also be used for a future ucnv_getUnicodeSet() option
- * that includes reverse fallbacks (after updating this function's implementation).
- * Currently only handles roundtrip mappings.
- * Does not currently handle extensions.
- */
-static void
-ucnv_MBCSEnumToUnicode(UConverterMBCSTable *mbcsTable,
- UConverterEnumToUCallback *callback, const void *context,
- UErrorCode *pErrorCode) {
- /*
- * Properties for each state, to speed up the enumeration.
- * Ignorable actions are unassigned/illegal/state-change-only:
- * They do not lead to mappings.
- *
- * Bits 7..6:
- * 1 direct/initial state (stateful converters have multiple)
- * 0 non-initial state with transitions or with non-ignorable result actions
- * -1 final state with only ignorable actions
- *
- * Bits 5..3:
- * The lowest byte value with non-ignorable actions is
- * value<<5 (rounded down).
- *
- * Bits 2..0:
- * The highest byte value with non-ignorable actions is
- * (value<<5)&0x1f (rounded up).
- */
- int8_t stateProps[MBCS_MAX_STATE_COUNT];
- int32_t state;
-
- uprv_memset(stateProps, -1, sizeof(stateProps));
-
- /* recurse from state 0 and set all stateProps */
- getStateProp(mbcsTable->stateTable, stateProps, 0);
-
- for(state=0; state<mbcsTable->countStates; ++state) {
- /*if(stateProps[state]==-1) {
- printf("unused/unreachable <icu:state> %d\n", state);
- }*/
- if(stateProps[state]>=0x40) {
- /* start from each direct state */
- enumToU(
- mbcsTable, stateProps, state, 0, 0,
- callback, context,
- pErrorCode);
- }
- }
-}
-
-U_CFUNC void
-ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UConverterSetFilter filter,
- UErrorCode *pErrorCode) {
- const UConverterMBCSTable *mbcsTable;
- const uint16_t *table;
-
- uint32_t st3;
- uint16_t st1, maxStage1, st2;
-
- UChar32 c;
-
- /* enumerate the from-Unicode trie table */
- mbcsTable=&sharedData->mbcs;
- table=mbcsTable->fromUnicodeTable;
- if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
- maxStage1=0x440;
- } else {
- maxStage1=0x40;
- }
-
- c=0; /* keep track of the current code point while enumerating */
-
- if(mbcsTable->outputType==MBCS_OUTPUT_1) {
- const uint16_t *stage2, *stage3, *results;
- uint16_t minValue;
-
- results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
-
- /*
- * Set a threshold variable for selecting which mappings to use.
- * See ucnv_MBCSSingleFromBMPWithOffsets() and
- * MBCS_SINGLE_RESULT_FROM_U() for details.
- */
- if(which==UCNV_ROUNDTRIP_SET) {
- /* use only roundtrips */
- minValue=0xf00;
- } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
- /* use all roundtrip and fallback results */
- minValue=0x800;
- }
-
- for(st1=0; st1<maxStage1; ++st1) {
- st2=table[st1];
- if(st2>maxStage1) {
- stage2=table+st2;
- for(st2=0; st2<64; ++st2) {
- if((st3=stage2[st2])!=0) {
- /* read the stage 3 block */
- stage3=results+st3;
-
- do {
- if(*stage3++>=minValue) {
- sa->add(sa->set, c);
- }
- } while((++c&0xf)!=0);
- } else {
- c+=16; /* empty stage 3 block */
- }
- }
- } else {
- c+=1024; /* empty stage 2 block */
- }
- }
- } else {
- const uint32_t *stage2;
- const uint8_t *stage3, *bytes;
- uint32_t st3Multiplier;
- uint32_t value;
- UBool useFallback;
-
- bytes=mbcsTable->fromUnicodeBytes;
-
- useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
-
- switch(mbcsTable->outputType) {
- case MBCS_OUTPUT_3:
- case MBCS_OUTPUT_4_EUC:
- st3Multiplier=3;
- break;
- case MBCS_OUTPUT_4:
- st3Multiplier=4;
- break;
- default:
- st3Multiplier=2;
- break;
- }
-
- for(st1=0; st1<maxStage1; ++st1) {
- st2=table[st1];
- if(st2>(maxStage1>>1)) {
- stage2=(const uint32_t *)table+st2;
- for(st2=0; st2<64; ++st2) {
- if((st3=stage2[st2])!=0) {
- /* read the stage 3 block */
- stage3=bytes+st3Multiplier*16*(uint32_t)(uint16_t)st3;
-
- /* get the roundtrip flags for the stage 3 block */
- st3>>=16;
-
- /*
- * Add code points for which the roundtrip flag is set,
- * or which map to non-zero bytes if we use fallbacks.
- * See ucnv_MBCSFromUnicodeWithOffsets() for details.
- */
- switch(filter) {
- case UCNV_SET_FILTER_NONE:
- do {
- if(st3&1) {
- sa->add(sa->set, c);
- stage3+=st3Multiplier;
- } else if(useFallback) {
- uint8_t b=0;
- switch(st3Multiplier) {
- case 4:
- b|=*stage3++;
- U_FALLTHROUGH;
- case 3:
- b|=*stage3++;
- U_FALLTHROUGH;
- case 2:
- b|=stage3[0]|stage3[1];
- stage3+=2;
- U_FALLTHROUGH;
- default:
- break;
- }
- if(b!=0) {
- sa->add(sa->set, c);
- }
- }
- st3>>=1;
- } while((++c&0xf)!=0);
- break;
- case UCNV_SET_FILTER_DBCS_ONLY:
- /* Ignore single-byte results (<0x100). */
- do {
- if(((st3&1)!=0 || useFallback) && *((const uint16_t *)stage3)>=0x100) {
- sa->add(sa->set, c);
- }
- st3>>=1;
- stage3+=2; /* +=st3Multiplier */
- } while((++c&0xf)!=0);
- break;
- case UCNV_SET_FILTER_2022_CN:
- /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */
- do {
- if(((st3&1)!=0 || useFallback) && ((value=*stage3)==0x81 || value==0x82)) {
- sa->add(sa->set, c);
- }
- st3>>=1;
- stage3+=3; /* +=st3Multiplier */
- } while((++c&0xf)!=0);
- break;
- case UCNV_SET_FILTER_SJIS:
- /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */
- do {
- if(((st3&1)!=0 || useFallback) && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) {
- sa->add(sa->set, c);
- }
- st3>>=1;
- stage3+=2; /* +=st3Multiplier */
- } while((++c&0xf)!=0);
- break;
- case UCNV_SET_FILTER_GR94DBCS:
- /* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */
- do {
- if( ((st3&1)!=0 || useFallback) &&
- (uint16_t)((value=*((const uint16_t *)stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) &&
- (uint8_t)(value-0xa1)<=(0xfe - 0xa1)
- ) {
- sa->add(sa->set, c);
- }
- st3>>=1;
- stage3+=2; /* +=st3Multiplier */
- } while((++c&0xf)!=0);
- break;
- case UCNV_SET_FILTER_HZ:
- /* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */
- do {
- if( ((st3&1)!=0 || useFallback) &&
- (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
- (uint8_t)(value-0xa1)<=(0xfe - 0xa1)
- ) {
- sa->add(sa->set, c);
- }
- st3>>=1;
- stage3+=2; /* +=st3Multiplier */
- } while((++c&0xf)!=0);
- break;
- default:
- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- } else {
- c+=16; /* empty stage 3 block */
- }
- }
- } else {
- c+=1024; /* empty stage 2 block */
- }
- }
- }
-
- ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode);
-}
-
-U_CFUNC void
-ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode) {
- ucnv_MBCSGetFilteredUnicodeSetForUnicode(
- sharedData, sa, which,
- sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ?
- UCNV_SET_FILTER_DBCS_ONLY :
- UCNV_SET_FILTER_NONE,
- pErrorCode);
-}
-
-static void U_CALLCONV
-ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode) {
- if(cnv->options&_MBCS_OPTION_GB18030) {
- sa->addRange(sa->set, 0, 0xd7ff);
- sa->addRange(sa->set, 0xe000, 0x10ffff);
- } else {
- ucnv_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode);
- }
-}
-
-/* conversion extensions for input not in the main table -------------------- */
-
-/*
- * Hardcoded extension handling for GB 18030.
- * Definition of LINEAR macros and gb18030Ranges see near the beginning of the file.
- *
- * In the future, conversion extensions may handle m:n mappings and delta tables,
- * see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/conversion_extensions.html
- *
- * If an input character cannot be mapped, then these functions set an error
- * code. The framework will then call the callback function.
- */
-
-/*
- * @return if(U_FAILURE) return the code point for cnv->fromUChar32
- * else return 0 after output has been written to the target
- */
-static UChar32
-_extFromU(UConverter *cnv, const UConverterSharedData *sharedData,
- UChar32 cp,
- const UChar **source, const UChar *sourceLimit,
- uint8_t **target, const uint8_t *targetLimit,
- int32_t **offsets, int32_t sourceIndex,
- UBool flush,
- UErrorCode *pErrorCode) {
- const int32_t *cx;
-
- cnv->useSubChar1=FALSE;
-
- if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
- ucnv_extInitialMatchFromU(
- cnv, cx,
- cp, source, sourceLimit,
- (char **)target, (char *)targetLimit,
- offsets, sourceIndex,
- flush,
- pErrorCode)
- ) {
- return 0; /* an extension mapping handled the input */
- }
-
- /* GB 18030 */
- if((cnv->options&_MBCS_OPTION_GB18030)!=0) {
- const uint32_t *range;
- int32_t i;
-
- range=gb18030Ranges[0];
- for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) {
- if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) {
- /* found the Unicode code point, output the four-byte sequence for it */
- uint32_t linear;
- char bytes[4];
-
- /* get the linear value of the first GB 18030 code in this range */
- linear=range[2]-LINEAR_18030_BASE;
-
- /* add the offset from the beginning of the range */
- linear+=((uint32_t)cp-range[0]);
-
- /* turn this into a four-byte sequence */
- bytes[3]=(char)(0x30+linear%10); linear/=10;
- bytes[2]=(char)(0x81+linear%126); linear/=126;
- bytes[1]=(char)(0x30+linear%10); linear/=10;
- bytes[0]=(char)(0x81+linear);
-
- /* output this sequence */
- ucnv_fromUWriteBytes(cnv,
- bytes, 4, (char **)target, (char *)targetLimit,
- offsets, sourceIndex, pErrorCode);
- return 0;
- }
- }
- }
-
- /* no mapping */
- *pErrorCode=U_INVALID_CHAR_FOUND;
- return cp;
-}
-
-/*
- * Input sequence: cnv->toUBytes[0..length[
- * @return if(U_FAILURE) return the length (toULength, byteIndex) for the input
- * else return 0 after output has been written to the target
- */
-static int8_t
-_extToU(UConverter *cnv, const UConverterSharedData *sharedData,
- int8_t length,
- const uint8_t **source, const uint8_t *sourceLimit,
- UChar **target, const UChar *targetLimit,
- int32_t **offsets, int32_t sourceIndex,
- UBool flush,
- UErrorCode *pErrorCode) {
- const int32_t *cx;
-
- if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
- ucnv_extInitialMatchToU(
- cnv, cx,
- length, (const char **)source, (const char *)sourceLimit,
- target, targetLimit,
- offsets, sourceIndex,
- flush,
- pErrorCode)
- ) {
- return 0; /* an extension mapping handled the input */
- }
-
- /* GB 18030 */
- if(length==4 && (cnv->options&_MBCS_OPTION_GB18030)!=0) {
- const uint32_t *range;
- uint32_t linear;
- int32_t i;
-
- linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2], cnv->toUBytes[3]);
- range=gb18030Ranges[0];
- for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) {
- if(range[2]<=linear && linear<=range[3]) {
- /* found the sequence, output the Unicode code point for it */
- *pErrorCode=U_ZERO_ERROR;
-
- /* add the linear difference between the input and start sequences to the start code point */
- linear=range[0]+(linear-range[2]);
-
- /* output this code point */
- ucnv_toUWriteCodePoint(cnv, linear, target, targetLimit, offsets, sourceIndex, pErrorCode);
-
- return 0;
- }
- }
- }
-
- /* no mapping */
- *pErrorCode=U_INVALID_CHAR_FOUND;
- return length;
-}
-
-/* EBCDIC swap LF<->NL ------------------------------------------------------ */
-
-/*
- * This code modifies a standard EBCDIC<->Unicode mapping table for
- * OS/390 (z/OS) Unix System Services (Open Edition).
- * The difference is in the mapping of Line Feed and New Line control codes:
- * Standard EBCDIC maps
- *
- * <U000A> \x25 |0
- * <U0085> \x15 |0
- *
- * but OS/390 USS EBCDIC swaps the control codes for LF and NL,
- * mapping
- *
- * <U000A> \x15 |0
- * <U0085> \x25 |0
- *
- * This code modifies a loaded standard EBCDIC<->Unicode mapping table
- * by copying it into allocated memory and swapping the LF and NL values.
- * It allows to support the same EBCDIC charset in both versions without
- * duplicating the entire installed table.
- */
-
-/* standard EBCDIC codes */
-#define EBCDIC_LF 0x25
-#define EBCDIC_NL 0x15
-
-/* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */
-#define EBCDIC_RT_LF 0xf25
-#define EBCDIC_RT_NL 0xf15
-
-/* Unicode code points */
-#define U_LF 0x0a
-#define U_NL 0x85
-
-static UBool
-_EBCDICSwapLFNL(UConverterSharedData *sharedData, UErrorCode *pErrorCode) {
- UConverterMBCSTable *mbcsTable;
-
- const uint16_t *table, *results;
- const uint8_t *bytes;
-
- int32_t (*newStateTable)[256];
- uint16_t *newResults;
- uint8_t *p;
- char *name;
-
- uint32_t stage2Entry;
- uint32_t size, sizeofFromUBytes;
-
- mbcsTable=&sharedData->mbcs;
-
- table=mbcsTable->fromUnicodeTable;
- bytes=mbcsTable->fromUnicodeBytes;
- results=(const uint16_t *)bytes;
-
- /*
- * Check that this is an EBCDIC table with SBCS portion -
- * SBCS or EBCDIC_STATEFUL with standard EBCDIC LF and NL mappings.
- *
- * If not, ignore the option. Options are always ignored if they do not apply.
- */
- if(!(
- (mbcsTable->outputType==MBCS_OUTPUT_1 || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) &&
- mbcsTable->stateTable[0][EBCDIC_LF]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&
- mbcsTable->stateTable[0][EBCDIC_NL]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL)
- )) {
- return FALSE;
- }
-
- if(mbcsTable->outputType==MBCS_OUTPUT_1) {
- if(!(
- EBCDIC_RT_LF==MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) &&
- EBCDIC_RT_NL==MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL)
- )) {
- return FALSE;
- }
- } else /* MBCS_OUTPUT_2_SISO */ {
- stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF);
- if(!(
- MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF)!=0 &&
- EBCDIC_LF==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF)
- )) {
- return FALSE;
- }
-
- stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL);
- if(!(
- MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL)!=0 &&
- EBCDIC_NL==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL)
- )) {
- return FALSE;
- }
- }
-
- if(mbcsTable->fromUBytesLength>0) {
- /*
- * We _know_ the number of bytes in the fromUnicodeBytes array
- * starting with header.version 4.1.
- */
- sizeofFromUBytes=mbcsTable->fromUBytesLength;
- } else {
- /*
- * Otherwise:
- * There used to be code to enumerate the fromUnicode
- * trie and find the highest entry, but it was removed in ICU 3.2
- * because it was not tested and caused a low code coverage number.
- * See Jitterbug 3674.
- * This affects only some .cnv file formats with a header.version
- * below 4.1, and only when swaplfnl is requested.
- *
- * ucnvmbcs.c revision 1.99 is the last one with the
- * ucnv_MBCSSizeofFromUBytes() function.
- */
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return FALSE;
- }
-
- /*
- * The table has an appropriate format.
- * Allocate and build
- * - a modified to-Unicode state table
- * - a modified from-Unicode output array
- * - a converter name string with the swap option appended
- */
- size=
- mbcsTable->countStates*1024+
- sizeofFromUBytes+
- UCNV_MAX_CONVERTER_NAME_LENGTH+20;
- p=(uint8_t *)uprv_malloc(size);
- if(p==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
-
- /* copy and modify the to-Unicode state table */
- newStateTable=(int32_t (*)[256])p;
- uprv_memcpy(newStateTable, mbcsTable->stateTable, mbcsTable->countStates*1024);
-
- newStateTable[0][EBCDIC_LF]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL);
- newStateTable[0][EBCDIC_NL]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);
-
- /* copy and modify the from-Unicode result table */
- newResults=(uint16_t *)newStateTable[mbcsTable->countStates];
- uprv_memcpy(newResults, bytes, sizeofFromUBytes);
-
- /* conveniently, the table access macros work on the left side of expressions */
- if(mbcsTable->outputType==MBCS_OUTPUT_1) {
- MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_LF)=EBCDIC_RT_NL;
- MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_NL)=EBCDIC_RT_LF;
- } else /* MBCS_OUTPUT_2_SISO */ {
- stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF);
- MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_LF)=EBCDIC_NL;
-
- stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL);
- MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_NL)=EBCDIC_LF;
- }
-
- /* set the canonical converter name */
- name=(char *)newResults+sizeofFromUBytes;
- uprv_strcpy(name, sharedData->staticData->name);
- uprv_strcat(name, UCNV_SWAP_LFNL_OPTION_STRING);
-
- /* set the pointers */
- icu::umtx_lock(NULL);
- if(mbcsTable->swapLFNLStateTable==NULL) {
- mbcsTable->swapLFNLStateTable=newStateTable;
- mbcsTable->swapLFNLFromUnicodeBytes=(uint8_t *)newResults;
- mbcsTable->swapLFNLName=name;
-
- newStateTable=NULL;
- }
- icu::umtx_unlock(NULL);
-
- /* release the allocated memory if another thread beat us to it */
- if(newStateTable!=NULL) {
- uprv_free(newStateTable);
- }
- return TRUE;
-}
-
-/* reconstitute omitted fromUnicode data ------------------------------------ */
-
-/* for details, compare with genmbcs.c MBCSAddFromUnicode() and transformEUC() */
-static UBool U_CALLCONV
-writeStage3Roundtrip(const void *context, uint32_t value, UChar32 codePoints[32]) {
- UConverterMBCSTable *mbcsTable=(UConverterMBCSTable *)context;
- const uint16_t *table;
- uint32_t *stage2;
- uint8_t *bytes, *p;
- UChar32 c;
- int32_t i, st3;
-
- table=mbcsTable->fromUnicodeTable;
- bytes=(uint8_t *)mbcsTable->fromUnicodeBytes;
-
- /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
- switch(mbcsTable->outputType) {
- case MBCS_OUTPUT_3_EUC:
- if(value<=0xffff) {
- /* short sequences are stored directly */
- /* code set 0 or 1 */
- } else if(value<=0x8effff) {
- /* code set 2 */
- value&=0x7fff;
- } else /* first byte is 0x8f */ {
- /* code set 3 */
- value&=0xff7f;
- }
- break;
- case MBCS_OUTPUT_4_EUC:
- if(value<=0xffffff) {
- /* short sequences are stored directly */
- /* code set 0 or 1 */
- } else if(value<=0x8effffff) {
- /* code set 2 */
- value&=0x7fffff;
- } else /* first byte is 0x8f */ {
- /* code set 3 */
- value&=0xff7fff;
- }
- break;
- default:
- break;
- }
-
- for(i=0; i<=0x1f; ++value, ++i) {
- c=codePoints[i];
- if(c<0) {
- continue;
- }
-
- /* locate the stage 2 & 3 data */
- stage2=((uint32_t *)table)+table[c>>10]+((c>>4)&0x3f);
- p=bytes;
- st3=(int32_t)(uint16_t)*stage2*16+(c&0xf);
-
- /* write the codepage bytes into stage 3 */
- switch(mbcsTable->outputType) {
- case MBCS_OUTPUT_3:
- case MBCS_OUTPUT_4_EUC:
- p+=st3*3;
- p[0]=(uint8_t)(value>>16);
- p[1]=(uint8_t)(value>>8);
- p[2]=(uint8_t)value;
- break;
- case MBCS_OUTPUT_4:
- ((uint32_t *)p)[st3]=value;
- break;
- default:
- /* 2 bytes per character */
- ((uint16_t *)p)[st3]=(uint16_t)value;
- break;
- }
-
- /* set the roundtrip flag */
- *stage2|=(1UL<<(16+(c&0xf)));
- }
- return TRUE;
- }
-
-static void
-reconstituteData(UConverterMBCSTable *mbcsTable,
- uint32_t stage1Length, uint32_t stage2Length,
- uint32_t fullStage2Length, /* lengths are numbers of units, not bytes */
- UErrorCode *pErrorCode) {
- uint16_t *stage1;
- uint32_t *stage2;
- uint32_t dataLength=stage1Length*2+fullStage2Length*4+mbcsTable->fromUBytesLength;
- mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength);
- if(mbcsTable->reconstitutedData==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- uprv_memset(mbcsTable->reconstitutedData, 0, dataLength);
-
- /* copy existing data and reroute the pointers */
- stage1=(uint16_t *)mbcsTable->reconstitutedData;
- uprv_memcpy(stage1, mbcsTable->fromUnicodeTable, stage1Length*2);
-
- stage2=(uint32_t *)(stage1+stage1Length);
- uprv_memcpy(stage2+(fullStage2Length-stage2Length),
- mbcsTable->fromUnicodeTable+stage1Length,
- stage2Length*4);
-
- mbcsTable->fromUnicodeTable=stage1;
- mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length);
-
- /* indexes into stage 2 count from the bottom of the fromUnicodeTable */
- stage2=(uint32_t *)stage1;
-
- /* reconstitute the initial part of stage 2 from the mbcsIndex */
- {
- int32_t stageUTF8Length=((int32_t)mbcsTable->maxFastUChar+1)>>6;
- int32_t stageUTF8Index=0;
- int32_t st1, st2, st3, i;
-
- for(st1=0; stageUTF8Index<stageUTF8Length; ++st1) {
- st2=stage1[st1];
- if(st2!=(int32_t)stage1Length/2) {
- /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
- for(i=0; i<16; ++i) {
- st3=mbcsTable->mbcsIndex[stageUTF8Index++];
- if(st3!=0) {
- /* an stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */
- st3>>=4;
- /*
- * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
- * allocated together as a single 64-block for access from the mbcsIndex
- */
- stage2[st2++]=st3++;
- stage2[st2++]=st3++;
- stage2[st2++]=st3++;
- stage2[st2++]=st3;
- } else {
- /* no stage 3 block, skip */
- st2+=4;
- }
- }
- } else {
- /* no stage 2 block, skip */
- stageUTF8Index+=16;
- }
- }
- }
-
- /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
- ucnv_MBCSEnumToUnicode(mbcsTable, writeStage3Roundtrip, mbcsTable, pErrorCode);
-}
-
-/* MBCS setup functions ----------------------------------------------------- */
-
-static void U_CALLCONV
-ucnv_MBCSLoad(UConverterSharedData *sharedData,
- UConverterLoadArgs *pArgs,
- const uint8_t *raw,
- UErrorCode *pErrorCode) {
- UDataInfo info;
- UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
- _MBCSHeader *header=(_MBCSHeader *)raw;
- uint32_t offset;
- uint32_t headerLength;
- UBool noFromU=FALSE;
-
- if(header->version[0]==4) {
- headerLength=MBCS_HEADER_V4_LENGTH;
- } else if(header->version[0]==5 && header->version[1]>=3 &&
- (header->options&MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0) {
- headerLength=header->options&MBCS_OPT_LENGTH_MASK;
- noFromU=(UBool)((header->options&MBCS_OPT_NO_FROM_U)!=0);
- } else {
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
- }
-
- mbcsTable->outputType=(uint8_t)header->flags;
- if(noFromU && mbcsTable->outputType==MBCS_OUTPUT_1) {
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
- }
-
- /* extension data, header version 4.2 and higher */
- offset=header->flags>>8;
- if(offset!=0) {
- mbcsTable->extIndexes=(const int32_t *)(raw+offset);
- }
-
- if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) {
- UConverterLoadArgs args=UCNV_LOAD_ARGS_INITIALIZER;
- UConverterSharedData *baseSharedData;
- const int32_t *extIndexes;
- const char *baseName;
-
- /* extension-only file, load the base table and set values appropriately */
- if((extIndexes=mbcsTable->extIndexes)==NULL) {
- /* extension-only file without extension */
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
- }
-
- if(pArgs->nestedLoads!=1) {
- /* an extension table must not be loaded as a base table */
- *pErrorCode=U_INVALID_TABLE_FILE;
- return;
- }
-
- /* load the base table */
- baseName=(const char *)header+headerLength*4;
- if(0==uprv_strcmp(baseName, sharedData->staticData->name)) {
- /* forbid loading this same extension-only file */
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
- }
-
- /* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
- args.size=sizeof(UConverterLoadArgs);
- args.nestedLoads=2;
- args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable;
- args.reserved=pArgs->reserved;
- args.options=pArgs->options;
- args.pkg=pArgs->pkg;
- args.name=baseName;
- baseSharedData=ucnv_load(&args, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- if( baseSharedData->staticData->conversionType!=UCNV_MBCS ||
- baseSharedData->mbcs.baseSharedData!=NULL
- ) {
- ucnv_unload(baseSharedData);
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
- }
- if(pArgs->onlyTestIsLoadable) {
- /*
- * Exit as soon as we know that we can load the converter
- * and the format is valid and supported.
- * The worst that can happen in the following code is a memory
- * allocation error.
- */
- ucnv_unload(baseSharedData);
- return;
- }
-
- /* copy the base table data */
- uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable));
-
- /* overwrite values with relevant ones for the extension converter */
- mbcsTable->baseSharedData=baseSharedData;
- mbcsTable->extIndexes=extIndexes;
-
- /*
- * It would be possible to share the swapLFNL data with a base converter,
- * but the generated name would have to be different, and the memory
- * would have to be free'd only once.
- * It is easier to just create the data for the extension converter
- * separately when it is requested.
- */
- mbcsTable->swapLFNLStateTable=NULL;
- mbcsTable->swapLFNLFromUnicodeBytes=NULL;
- mbcsTable->swapLFNLName=NULL;
-
- /*
- * The reconstitutedData must be deleted only when the base converter
- * is unloaded.
- */
- mbcsTable->reconstitutedData=NULL;
-
- /*
- * Set a special, runtime-only outputType if the extension converter
- * is a DBCS version of a base converter that also maps single bytes.
- */
- if( sharedData->staticData->conversionType==UCNV_DBCS ||
- (sharedData->staticData->conversionType==UCNV_MBCS &&
- sharedData->staticData->minBytesPerChar>=2)
- ) {
- if(baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO) {
- /* the base converter is SI/SO-stateful */
- int32_t entry;
-
- /* get the dbcs state from the state table entry for SO=0x0e */
- entry=mbcsTable->stateTable[0][0xe];
- if( MBCS_ENTRY_IS_FINAL(entry) &&
- MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY &&
- MBCS_ENTRY_FINAL_STATE(entry)!=0
- ) {
- mbcsTable->dbcsOnlyState=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry);
-
- mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
- }
- } else if(
- baseSharedData->staticData->conversionType==UCNV_MBCS &&
- baseSharedData->staticData->minBytesPerChar==1 &&
- baseSharedData->staticData->maxBytesPerChar==2 &&
- mbcsTable->countStates<=127
- ) {
- /* non-stateful base converter, need to modify the state table */
- int32_t (*newStateTable)[256];
- int32_t *state;
- int32_t i, count;
-
- /* allocate a new state table and copy the base state table contents */
- count=mbcsTable->countStates;
- newStateTable=(int32_t (*)[256])uprv_malloc((count+1)*1024);
- if(newStateTable==NULL) {
- ucnv_unload(baseSharedData);
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- uprv_memcpy(newStateTable, mbcsTable->stateTable, count*1024);
-
- /* change all final single-byte entries to go to a new all-illegal state */
- state=newStateTable[0];
- for(i=0; i<256; ++i) {
- if(MBCS_ENTRY_IS_FINAL(state[i])) {
- state[i]=MBCS_ENTRY_TRANSITION(count, 0);
- }
- }
-
- /* build the new all-illegal state */
- state=newStateTable[count];
- for(i=0; i<256; ++i) {
- state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);
- }
- mbcsTable->stateTable=(const int32_t (*)[256])newStateTable;
- mbcsTable->countStates=(uint8_t)(count+1);
- mbcsTable->stateTableOwned=TRUE;
-
- mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
- }
- }
-
- /*
- * unlike below for files with base tables, do not get the unicodeMask
- * from the sharedData; instead, use the base table's unicodeMask,
- * which we copied in the memcpy above;
- * this is necessary because the static data unicodeMask, especially
- * the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
- */
- } else {
- /* conversion file with a base table; an additional extension table is optional */
- /* make sure that the output type is known */
- switch(mbcsTable->outputType) {
- case MBCS_OUTPUT_1:
- case MBCS_OUTPUT_2:
- case MBCS_OUTPUT_3:
- case MBCS_OUTPUT_4:
- case MBCS_OUTPUT_3_EUC:
- case MBCS_OUTPUT_4_EUC:
- case MBCS_OUTPUT_2_SISO:
- /* OK */
- break;
- default:
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
- }
- if(pArgs->onlyTestIsLoadable) {
- /*
- * Exit as soon as we know that we can load the converter
- * and the format is valid and supported.
- * The worst that can happen in the following code is a memory
- * allocation error.
- */
- return;
- }
-
- mbcsTable->countStates=(uint8_t)header->countStates;
- mbcsTable->countToUFallbacks=header->countToUFallbacks;
- mbcsTable->stateTable=(const int32_t (*)[256])(raw+headerLength*4);
- mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
- mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
-
- mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
- mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
- mbcsTable->fromUBytesLength=header->fromUBytesLength;
-
- /*
- * converter versions 6.1 and up contain a unicodeMask that is
- * used here to select the most efficient function implementations
- */
- info.size=sizeof(UDataInfo);
- udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
- if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
- /* mask off possible future extensions to be safe */
- mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3);
- } else {
- /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
- mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES;
- }
-
- /*
- * _MBCSHeader.version 4.3 adds utf8Friendly data structures.
- * Check for the header version, SBCS vs. MBCS, and for whether the
- * data structures are optimized for code points as high as what the
- * runtime code is designed for.
- * The implementation does not handle mapping tables with entries for
- * unpaired surrogates.
- */
- if( header->version[1]>=3 &&
- (mbcsTable->unicodeMask&UCNV_HAS_SURROGATES)==0 &&
- (mbcsTable->countStates==1 ?
- (header->version[2]>=(SBCS_FAST_MAX>>8)) :
- (header->version[2]>=(MBCS_FAST_MAX>>8))
- )
- ) {
- mbcsTable->utf8Friendly=TRUE;
-
- if(mbcsTable->countStates==1) {
- /*
- * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
- * Build a table with indexes to each block, to be used instead of
- * the regular stage 1/2 table.
- */
- int32_t i;
- for(i=0; i<(SBCS_FAST_LIMIT>>6); ++i) {
- mbcsTable->sbcsIndex[i]=mbcsTable->fromUnicodeTable[mbcsTable->fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
- }
- /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header->version[2]>(SBCS_FAST_MAX>>8) */
- mbcsTable->maxFastUChar=SBCS_FAST_MAX;
- } else {
- /*
- * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
- * The .cnv file is prebuilt with an additional stage table with indexes
- * to each block.
- */
- mbcsTable->mbcsIndex=(const uint16_t *)
- (mbcsTable->fromUnicodeBytes+
- (noFromU ? 0 : mbcsTable->fromUBytesLength));
- mbcsTable->maxFastUChar=(((UChar)header->version[2])<<8)|0xff;
- }
- }
-
- /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
- {
- uint32_t asciiRoundtrips=0xffffffff;
- int32_t i;
-
- for(i=0; i<0x80; ++i) {
- if(mbcsTable->stateTable[0][i]!=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {
- asciiRoundtrips&=~((uint32_t)1<<(i>>2));
- }
- }
- mbcsTable->asciiRoundtrips=asciiRoundtrips;
- }
-
- if(noFromU) {
- uint32_t stage1Length=
- mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY ?
- 0x440 : 0x40;
- uint32_t stage2Length=
- (header->offsetFromUBytes-header->offsetFromUTable)/4-
- stage1Length/2;
- reconstituteData(mbcsTable, stage1Length, stage2Length, header->fullStage2Length, pErrorCode);
- }
- }
-
- /* Set the impl pointer here so that it is set for both extension-only and base tables. */
- if(mbcsTable->utf8Friendly) {
- if(mbcsTable->countStates==1) {
- sharedData->impl=&_SBCSUTF8Impl;
- } else {
- if(mbcsTable->outputType==MBCS_OUTPUT_2) {
- sharedData->impl=&_DBCSUTF8Impl;
- }
- }
- }
-
- if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) {
- /*
- * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip.
- * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly.
- */
- mbcsTable->asciiRoundtrips=0;
- }
-}
-
-static void U_CALLCONV
-ucnv_MBCSUnload(UConverterSharedData *sharedData) {
- UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
-
- if(mbcsTable->swapLFNLStateTable!=NULL) {
- uprv_free(mbcsTable->swapLFNLStateTable);
- }
- if(mbcsTable->stateTableOwned) {
- uprv_free((void *)mbcsTable->stateTable);
- }
- if(mbcsTable->baseSharedData!=NULL) {
- ucnv_unload(mbcsTable->baseSharedData);
- }
- if(mbcsTable->reconstitutedData!=NULL) {
- uprv_free(mbcsTable->reconstitutedData);
- }
-}
-
-static void U_CALLCONV
-ucnv_MBCSOpen(UConverter *cnv,
- UConverterLoadArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverterMBCSTable *mbcsTable;
- const int32_t *extIndexes;
- uint8_t outputType;
- int8_t maxBytesPerUChar;
-
- if(pArgs->onlyTestIsLoadable) {
- return;
- }
-
- mbcsTable=&cnv->sharedData->mbcs;
- outputType=mbcsTable->outputType;
-
- if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
- /* the swaplfnl option does not apply, remove it */
- cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
- }
-
- if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) {
- /* do this because double-checked locking is broken */
- UBool isCached;
-
- icu::umtx_lock(NULL);
- isCached=mbcsTable->swapLFNLStateTable!=NULL;
- icu::umtx_unlock(NULL);
-
- if(!isCached) {
- if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) {
- if(U_FAILURE(*pErrorCode)) {
- return; /* something went wrong */
- }
-
- /* the option does not apply, remove it */
- cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
- }
- }
- }
-
- if(uprv_strstr(pArgs->name, "18030")!=NULL) {
- if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) {
- /* set a flag for GB 18030 mode, which changes the callback behavior */
- cnv->options|=_MBCS_OPTION_GB18030;
- }
- } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->name, "keis")!=NULL)) {
- /* set a flag for KEIS converter, which changes the SI/SO character sequence */
- cnv->options|=_MBCS_OPTION_KEIS;
- } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->name, "jef")!=NULL)) {
- /* set a flag for JEF converter, which changes the SI/SO character sequence */
- cnv->options|=_MBCS_OPTION_JEF;
- } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->name, "jips")!=NULL)) {
- /* set a flag for JIPS converter, which changes the SI/SO character sequence */
- cnv->options|=_MBCS_OPTION_JIPS;
- }
-
- /* fix maxBytesPerUChar depending on outputType and options etc. */
- if(outputType==MBCS_OUTPUT_2_SISO) {
- cnv->maxBytesPerUChar=3; /* SO+DBCS */
- }
-
- extIndexes=mbcsTable->extIndexes;
- if(extIndexes!=NULL) {
- maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes);
- if(outputType==MBCS_OUTPUT_2_SISO) {
- ++maxBytesPerUChar; /* SO + multiple DBCS */
- }
-
- if(maxBytesPerUChar>cnv->maxBytesPerUChar) {
- cnv->maxBytesPerUChar=maxBytesPerUChar;
- }
- }
-
-#if 0
- /*
- * documentation of UConverter fields used for status
- * all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset()
- */
-
- /* toUnicode */
- cnv->toUnicodeStatus=0; /* offset */
- cnv->mode=0; /* state */
- cnv->toULength=0; /* byteIndex */
-
- /* fromUnicode */
- cnv->fromUChar32=0;
- cnv->fromUnicodeStatus=1; /* prevLength */
-#endif
-}
-
-U_CDECL_BEGIN
-
-static const char* U_CALLCONV
-ucnv_MBCSGetName(const UConverter *cnv) {
- if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL) {
- return cnv->sharedData->mbcs.swapLFNLName;
- } else {
- return cnv->sharedData->staticData->name;
- }
-}
-U_CDECL_END
-
-
-/* MBCS-to-Unicode conversion functions ------------------------------------- */
-
-static UChar32 U_CALLCONV
-ucnv_MBCSGetFallback(UConverterMBCSTable *mbcsTable, uint32_t offset) {
- const _MBCSToUFallback *toUFallbacks;
- uint32_t i, start, limit;
-
- limit=mbcsTable->countToUFallbacks;
- if(limit>0) {
- /* do a binary search for the fallback mapping */
- toUFallbacks=mbcsTable->toUFallbacks;
- start=0;
- while(start<limit-1) {
- i=(start+limit)/2;
- if(offset<toUFallbacks[i].offset) {
- limit=i;
- } else {
- start=i;
- }
- }
-
- /* did we really find it? */
- if(offset==toUFallbacks[start].offset) {
- return toUFallbacks[start].codePoint;
- }
- }
-
- return 0xfffe;
-}
-
-/* This version of ucnv_MBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */
-static void
-ucnv_MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const uint8_t *source, *sourceLimit;
- UChar *target;
- const UChar *targetLimit;
- int32_t *offsets;
-
- const int32_t (*stateTable)[256];
-
- int32_t sourceIndex;
-
- int32_t entry;
- UChar c;
- uint8_t action;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
- offsets=pArgs->offsets;
-
- if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
- stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
- } else {
- stateTable=cnv->sharedData->mbcs.stateTable;
- }
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex=0;
-
- /* conversion loop */
- while(source<sourceLimit) {
- /*
- * This following test is to see if available input would overflow the output.
- * It does not catch output of more than one code unit that
- * overflows as a result of a surrogate pair or callback output
- * from the last source byte.
- * Therefore, those situations also test for overflows and will
- * then break the loop, too.
- */
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
-
- entry=stateTable[0][*source++];
- /* MBCS_ENTRY_IS_FINAL(entry) */
-
- /* test the most common case first */
- if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
- /* output BMP code point */
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
-
- /* normal end of action codes: prepare for a new character */
- ++sourceIndex;
- continue;
- }
-
- /*
- * An if-else-if chain provides more reliable performance for
- * the most common cases compared to a switch.
- */
- action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
- if(action==MBCS_STATE_VALID_DIRECT_20 ||
- (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
- ) {
- entry=MBCS_ENTRY_FINAL_VALUE(entry);
- /* output surrogate pair */
- *target++=(UChar)(0xd800|(UChar)(entry>>10));
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- c=(UChar)(0xdc00|(UChar)(entry&0x3ff));
- if(target<targetLimit) {
- *target++=c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- } else {
- /* target overflow */
- cnv->UCharErrorBuffer[0]=c;
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
-
- ++sourceIndex;
- continue;
- } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
- if(UCNV_TO_U_USE_FALLBACK(cnv)) {
- /* output BMP code point */
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
-
- ++sourceIndex;
- continue;
- }
- } else if(action==MBCS_STATE_UNASSIGNED) {
- /* just fall through */
- } else if(action==MBCS_STATE_ILLEGAL) {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- } else {
- /* reserved, must never occur */
- ++sourceIndex;
- continue;
- }
-
- if(U_FAILURE(*pErrorCode)) {
- /* callback(illegal) */
- break;
- } else /* unassigned sequences indicated with byteIndex>0 */ {
- /* try an extension mapping */
- pArgs->source=(const char *)source;
- cnv->toUBytes[0]=*(source-1);
- cnv->toULength=_extToU(cnv, cnv->sharedData,
- 1, &source, sourceLimit,
- &target, targetLimit,
- &offsets, sourceIndex,
- pArgs->flush,
- pErrorCode);
- sourceIndex+=1+(int32_t)(source-(const uint8_t *)pArgs->source);
-
- if(U_FAILURE(*pErrorCode)) {
- /* not mappable or buffer overflow */
- break;
- }
- }
- }
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
-}
-
-/*
- * This version of ucnv_MBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages
- * that only map to and from the BMP.
- * In addition to single-byte optimizations, the offset calculations
- * become much easier.
- */
-static void
-ucnv_MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const uint8_t *source, *sourceLimit, *lastSource;
- UChar *target;
- int32_t targetCapacity, length;
- int32_t *offsets;
-
- const int32_t (*stateTable)[256];
-
- int32_t sourceIndex;
-
- int32_t entry;
- uint8_t action;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
-
- if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
- stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
- } else {
- stateTable=cnv->sharedData->mbcs.stateTable;
- }
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex=0;
- lastSource=source;
-
- /*
- * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
- * for the minimum of the sourceLength and targetCapacity
- */
- length=(int32_t)(sourceLimit-source);
- if(length<targetCapacity) {
- targetCapacity=length;
- }
-
-#if MBCS_UNROLL_SINGLE_TO_BMP
- /* unrolling makes it faster on Pentium III/Windows 2000 */
- /* unroll the loop with the most common case */
-unrolled:
- if(targetCapacity>=16) {
- int32_t count, loops, oredEntries;
-
- loops=count=targetCapacity>>4;
- do {
- oredEntries=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- oredEntries|=entry=stateTable[0][*source++];
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
-
- /* were all 16 entries really valid? */
- if(!MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(oredEntries)) {
- /* no, return to the first of these 16 */
- source-=16;
- target-=16;
- break;
- }
- } while(--count>0);
- count=loops-count;
- targetCapacity-=16*count;
-
- if(offsets!=NULL) {
- lastSource+=16*count;
- while(count>0) {
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- --count;
- }
- }
- }
-#endif
-
- /* conversion loop */
- while(targetCapacity > 0 && source < sourceLimit) {
- entry=stateTable[0][*source++];
- /* MBCS_ENTRY_IS_FINAL(entry) */
-
- /* test the most common case first */
- if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
- /* output BMP code point */
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- --targetCapacity;
- continue;
- }
-
- /*
- * An if-else-if chain provides more reliable performance for
- * the most common cases compared to a switch.
- */
- action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
- if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
- if(UCNV_TO_U_USE_FALLBACK(cnv)) {
- /* output BMP code point */
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- --targetCapacity;
- continue;
- }
- } else if(action==MBCS_STATE_UNASSIGNED) {
- /* just fall through */
- } else if(action==MBCS_STATE_ILLEGAL) {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- } else {
- /* reserved, must never occur */
- continue;
- }
-
- /* set offsets since the start or the last extension */
- if(offsets!=NULL) {
- int32_t count=(int32_t)(source-lastSource);
-
- /* predecrement: do not set the offset for the callback-causing character */
- while(--count>0) {
- *offsets++=sourceIndex++;
- }
- /* offset and sourceIndex are now set for the current character */
- }
-
- if(U_FAILURE(*pErrorCode)) {
- /* callback(illegal) */
- break;
- } else /* unassigned sequences indicated with byteIndex>0 */ {
- /* try an extension mapping */
- lastSource=source;
- cnv->toUBytes[0]=*(source-1);
- cnv->toULength=_extToU(cnv, cnv->sharedData,
- 1, &source, sourceLimit,
- &target, pArgs->targetLimit,
- &offsets, sourceIndex,
- pArgs->flush,
- pErrorCode);
- sourceIndex+=1+(int32_t)(source-lastSource);
-
- if(U_FAILURE(*pErrorCode)) {
- /* not mappable or buffer overflow */
- break;
- }
-
- /* recalculate the targetCapacity after an extension mapping */
- targetCapacity=(int32_t)(pArgs->targetLimit-target);
- length=(int32_t)(sourceLimit-source);
- if(length<targetCapacity) {
- targetCapacity=length;
- }
- }
-
-#if MBCS_UNROLL_SINGLE_TO_BMP
- /* unrolling makes it faster on Pentium III/Windows 2000 */
- goto unrolled;
-#endif
- }
-
- if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
-
- /* set offsets since the start or the last callback */
- if(offsets!=NULL) {
- size_t count=source-lastSource;
- while(count>0) {
- *offsets++=sourceIndex++;
- --count;
- }
- }
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
-}
-
-static UBool
-hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) {
- const int32_t *row=stateTable[state];
- int32_t b, entry;
- /* First test for final entries in this state for some commonly valid byte values. */
- entry=row[0xa1];
- if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
- MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
- ) {
- return TRUE;
- }
- entry=row[0x41];
- if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
- MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
- ) {
- return TRUE;
- }
- /* Then test for final entries in this state. */
- for(b=0; b<=0xff; ++b) {
- entry=row[b];
- if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
- MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
- ) {
- return TRUE;
- }
- }
- /* Then recurse for transition entries. */
- for(b=0; b<=0xff; ++b) {
- entry=row[b];
- if( MBCS_ENTRY_IS_TRANSITION(entry) &&
- hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry))
- ) {
- return TRUE;
- }
- }
- return FALSE;
-}
-
-/*
- * Is byte b a single/lead byte in this state?
- * Recurse for transition states, because here we don't want to say that
- * b is a lead byte if all byte sequences that start with b are illegal.
- */
-static UBool
-isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) {
- const int32_t *row=stateTable[state];
- int32_t entry=row[b];
- if(MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
- return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry));
- } else {
- uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
- if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
- return FALSE; /* SI/SO are illegal for DBCS-only conversion */
- } else {
- return action!=MBCS_STATE_ILLEGAL;
- }
- }
-}
-
-U_CFUNC void
-ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const uint8_t *source, *sourceLimit;
- UChar *target;
- const UChar *targetLimit;
- int32_t *offsets;
-
- const int32_t (*stateTable)[256];
- const uint16_t *unicodeCodeUnits;
-
- uint32_t offset;
- uint8_t state;
- int8_t byteIndex;
- uint8_t *bytes;
-
- int32_t sourceIndex, nextSourceIndex;
-
- int32_t entry;
- UChar c;
- uint8_t action;
-
- /* use optimized function if possible */
- cnv=pArgs->converter;
-
- if(cnv->preToULength>0) {
- /*
- * pass sourceIndex=-1 because we continue from an earlier buffer
- * in the future, this may change with continuous offsets
- */
- ucnv_extContinueMatchToU(cnv, pArgs, -1, pErrorCode);
-
- if(U_FAILURE(*pErrorCode) || cnv->preToULength<0) {
- return;
- }
- }
-
- if(cnv->sharedData->mbcs.countStates==1) {
- if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
- ucnv_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode);
- } else {
- ucnv_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode);
- }
- return;
- }
-
- /* set up the local pointers */
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
- offsets=pArgs->offsets;
-
- if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
- stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
- } else {
- stateTable=cnv->sharedData->mbcs.stateTable;
- }
- unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
-
- /* get the converter state from UConverter */
- offset=cnv->toUnicodeStatus;
- byteIndex=cnv->toULength;
- bytes=cnv->toUBytes;
-
- /*
- * if we are in the SBCS state for a DBCS-only converter,
- * then load the DBCS state from the MBCS data
- * (dbcsOnlyState==0 if it is not a DBCS-only converter)
- */
- if((state=(uint8_t)(cnv->mode))==0) {
- state=cnv->sharedData->mbcs.dbcsOnlyState;
- }
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex=byteIndex==0 ? 0 : -1;
- nextSourceIndex=0;
-
- /* conversion loop */
- while(source<sourceLimit) {
- /*
- * This following test is to see if available input would overflow the output.
- * It does not catch output of more than one code unit that
- * overflows as a result of a surrogate pair or callback output
- * from the last source byte.
- * Therefore, those situations also test for overflows and will
- * then break the loop, too.
- */
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
-
- if(byteIndex==0) {
- /* optimized loop for 1/2-byte input and BMP output */
- if(offsets==NULL) {
- do {
- entry=stateTable[state][*source];
- if(MBCS_ENTRY_IS_TRANSITION(entry)) {
- state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
- offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
-
- ++source;
- if( source<sourceLimit &&
- MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
- MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
- (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
- ) {
- ++source;
- *target++=c;
- state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
- offset=0;
- } else {
- /* set the state and leave the optimized loop */
- bytes[0]=*(source-1);
- byteIndex=1;
- break;
- }
- } else {
- if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
- /* output BMP code point */
- ++source;
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
- } else {
- /* leave the optimized loop */
- break;
- }
- }
- } while(source<sourceLimit && target<targetLimit);
- } else /* offsets!=NULL */ {
- do {
- entry=stateTable[state][*source];
- if(MBCS_ENTRY_IS_TRANSITION(entry)) {
- state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
- offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
-
- ++source;
- if( source<sourceLimit &&
- MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
- MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
- (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
- ) {
- ++source;
- *target++=c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- sourceIndex=(nextSourceIndex+=2);
- }
- state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
- offset=0;
- } else {
- /* set the state and leave the optimized loop */
- ++nextSourceIndex;
- bytes[0]=*(source-1);
- byteIndex=1;
- break;
- }
- } else {
- if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
- /* output BMP code point */
- ++source;
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- sourceIndex=++nextSourceIndex;
- }
- state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
- } else {
- /* leave the optimized loop */
- break;
- }
- }
- } while(source<sourceLimit && target<targetLimit);
- }
-
- /*
- * these tests and break statements could be put inside the loop
- * if C had "break outerLoop" like Java
- */
- if(source>=sourceLimit) {
- break;
- }
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
-
- ++nextSourceIndex;
- bytes[byteIndex++]=*source++;
- } else /* byteIndex>0 */ {
- ++nextSourceIndex;
- entry=stateTable[state][bytes[byteIndex++]=*source++];
- }
-
- if(MBCS_ENTRY_IS_TRANSITION(entry)) {
- state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
- offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
- continue;
- }
-
- /* save the previous state for proper extension mapping with SI/SO-stateful converters */
- cnv->mode=state;
-
- /* set the next state early so that we can reuse the entry variable */
- state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
-
- /*
- * An if-else-if chain provides more reliable performance for
- * the most common cases compared to a switch.
- */
- action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
- if(action==MBCS_STATE_VALID_16) {
- offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
- c=unicodeCodeUnits[offset];
- if(c<0xfffe) {
- /* output BMP code point */
- *target++=c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- byteIndex=0;
- } else if(c==0xfffe) {
- if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
- /* output fallback BMP code point */
- *target++=(UChar)entry;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- byteIndex=0;
- }
- } else {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- } else if(action==MBCS_STATE_VALID_DIRECT_16) {
- /* output BMP code point */
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- byteIndex=0;
- } else if(action==MBCS_STATE_VALID_16_PAIR) {
- offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
- c=unicodeCodeUnits[offset++];
- if(c<0xd800) {
- /* output BMP code point below 0xd800 */
- *target++=c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- byteIndex=0;
- } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
- /* output roundtrip or fallback surrogate pair */
- *target++=(UChar)(c&0xdbff);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- byteIndex=0;
- if(target<targetLimit) {
- *target++=unicodeCodeUnits[offset];
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- } else {
- /* target overflow */
- cnv->UCharErrorBuffer[0]=unicodeCodeUnits[offset];
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-
- offset=0;
- break;
- }
- } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
- /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
- *target++=unicodeCodeUnits[offset];
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- byteIndex=0;
- } else if(c==0xffff) {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- } else if(action==MBCS_STATE_VALID_DIRECT_20 ||
- (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
- ) {
- entry=MBCS_ENTRY_FINAL_VALUE(entry);
- /* output surrogate pair */
- *target++=(UChar)(0xd800|(UChar)(entry>>10));
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- byteIndex=0;
- c=(UChar)(0xdc00|(UChar)(entry&0x3ff));
- if(target<targetLimit) {
- *target++=c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- } else {
- /* target overflow */
- cnv->UCharErrorBuffer[0]=c;
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-
- offset=0;
- break;
- }
- } else if(action==MBCS_STATE_CHANGE_ONLY) {
- /*
- * This serves as a state change without any output.
- * It is useful for reading simple stateful encodings,
- * for example using just Shift-In/Shift-Out codes.
- * The 21 unused bits may later be used for more sophisticated
- * state transitions.
- */
- if(cnv->sharedData->mbcs.dbcsOnlyState==0) {
- byteIndex=0;
- } else {
- /* SI/SO are illegal for DBCS-only conversion */
- state=(uint8_t)(cnv->mode); /* restore the previous state */
-
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
- if(UCNV_TO_U_USE_FALLBACK(cnv)) {
- /* output BMP code point */
- *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- byteIndex=0;
- }
- } else if(action==MBCS_STATE_UNASSIGNED) {
- /* just fall through */
- } else if(action==MBCS_STATE_ILLEGAL) {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- } else {
- /* reserved, must never occur */
- byteIndex=0;
- }
-
- /* end of action codes: prepare for a new character */
- offset=0;
-
- if(byteIndex==0) {
- sourceIndex=nextSourceIndex;
- } else if(U_FAILURE(*pErrorCode)) {
- /* callback(illegal) */
- if(byteIndex>1) {
- /*
- * Ticket 5691: consistent illegal sequences:
- * - We include at least the first byte in the illegal sequence.
- * - If any of the non-initial bytes could be the start of a character,
- * we stop the illegal sequence before the first one of those.
- */
- UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
- int8_t i;
- for(i=1;
- i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]);
- ++i) {}
- if(i<byteIndex) {
- /* Back out some bytes. */
- int8_t backOutDistance=byteIndex-i;
- int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source);
- byteIndex=i; /* length of reported illegal byte sequence */
- if(backOutDistance<=bytesFromThisBuffer) {
- source-=backOutDistance;
- } else {
- /* Back out bytes from the previous buffer: Need to replay them. */
- cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
- /* preToULength is negative! */
- uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength);
- source=(const uint8_t *)pArgs->source;
- }
- }
- }
- break;
- } else /* unassigned sequences indicated with byteIndex>0 */ {
- /* try an extension mapping */
- pArgs->source=(const char *)source;
- byteIndex=_extToU(cnv, cnv->sharedData,
- byteIndex, &source, sourceLimit,
- &target, targetLimit,
- &offsets, sourceIndex,
- pArgs->flush,
- pErrorCode);
- sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source);
-
- if(U_FAILURE(*pErrorCode)) {
- /* not mappable or buffer overflow */
- break;
- }
- }
- }
-
- /* set the converter state back into UConverter */
- cnv->toUnicodeStatus=offset;
- cnv->mode=state;
- cnv->toULength=byteIndex;
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
-}
-
-/*
- * This version of ucnv_MBCSGetNextUChar() is optimized for single-byte, single-state codepages.
- * We still need a conversion loop in case we find reserved action codes, which are to be ignored.
- */
-static UChar32
-ucnv_MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const int32_t (*stateTable)[256];
- const uint8_t *source, *sourceLimit;
-
- int32_t entry;
- uint8_t action;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
- stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
- } else {
- stateTable=cnv->sharedData->mbcs.stateTable;
- }
-
- /* conversion loop */
- while(source<sourceLimit) {
- entry=stateTable[0][*source++];
- /* MBCS_ENTRY_IS_FINAL(entry) */
-
- /* write back the updated pointer early so that we can return directly */
- pArgs->source=(const char *)source;
-
- if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
- /* output BMP code point */
- return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- }
-
- /*
- * An if-else-if chain provides more reliable performance for
- * the most common cases compared to a switch.
- */
- action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
- if( action==MBCS_STATE_VALID_DIRECT_20 ||
- (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
- ) {
- /* output supplementary code point */
- return (UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
- } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
- if(UCNV_TO_U_USE_FALLBACK(cnv)) {
- /* output BMP code point */
- return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- }
- } else if(action==MBCS_STATE_UNASSIGNED) {
- /* just fall through */
- } else if(action==MBCS_STATE_ILLEGAL) {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- } else {
- /* reserved, must never occur */
- continue;
- }
-
- if(U_FAILURE(*pErrorCode)) {
- /* callback(illegal) */
- break;
- } else /* unassigned sequence */ {
- /* defer to the generic implementation */
- pArgs->source=(const char *)source-1;
- return UCNV_GET_NEXT_UCHAR_USE_TO_U;
- }
- }
-
- /* no output because of empty input or only state changes */
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0xffff;
-}
-
-/*
- * Version of _MBCSToUnicodeWithOffsets() optimized for single-character
- * conversion without offset handling.
- *
- * When a character does not have a mapping to Unicode, then we return to the
- * generic ucnv_getNextUChar() code for extension/GB 18030 and error/callback
- * handling.
- * We also defer to the generic code in other complicated cases and have them
- * ultimately handled by _MBCSToUnicodeWithOffsets() itself.
- *
- * All normal mappings and errors are handled here.
- */
-static UChar32 U_CALLCONV
-ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const uint8_t *source, *sourceLimit, *lastSource;
-
- const int32_t (*stateTable)[256];
- const uint16_t *unicodeCodeUnits;
-
- uint32_t offset;
- uint8_t state;
-
- int32_t entry;
- UChar32 c;
- uint8_t action;
-
- /* use optimized function if possible */
- cnv=pArgs->converter;
-
- if(cnv->preToULength>0) {
- /* use the generic code in ucnv_getNextUChar() to continue with a partial match */
- return UCNV_GET_NEXT_UCHAR_USE_TO_U;
- }
-
- if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES) {
- /*
- * Using the generic ucnv_getNextUChar() code lets us deal correctly
- * with the rare case of a codepage that maps single surrogates
- * without adding the complexity to this already complicated function here.
- */
- return UCNV_GET_NEXT_UCHAR_USE_TO_U;
- } else if(cnv->sharedData->mbcs.countStates==1) {
- return ucnv_MBCSSingleGetNextUChar(pArgs, pErrorCode);
- }
-
- /* set up the local pointers */
- source=lastSource=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
-
- if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
- stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
- } else {
- stateTable=cnv->sharedData->mbcs.stateTable;
- }
- unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
-
- /* get the converter state from UConverter */
- offset=cnv->toUnicodeStatus;
-
- /*
- * if we are in the SBCS state for a DBCS-only converter,
- * then load the DBCS state from the MBCS data
- * (dbcsOnlyState==0 if it is not a DBCS-only converter)
- */
- if((state=(uint8_t)(cnv->mode))==0) {
- state=cnv->sharedData->mbcs.dbcsOnlyState;
- }
-
- /* conversion loop */
- c=U_SENTINEL;
- while(source<sourceLimit) {
- entry=stateTable[state][*source++];
- if(MBCS_ENTRY_IS_TRANSITION(entry)) {
- state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
- offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
-
- /* optimization for 1/2-byte input and BMP output */
- if( source<sourceLimit &&
- MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
- MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
- (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
- ) {
- ++source;
- state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
- /* output BMP code point */
- break;
- }
- } else {
- /* save the previous state for proper extension mapping with SI/SO-stateful converters */
- cnv->mode=state;
-
- /* set the next state early so that we can reuse the entry variable */
- state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
-
- /*
- * An if-else-if chain provides more reliable performance for
- * the most common cases compared to a switch.
- */
- action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
- if(action==MBCS_STATE_VALID_DIRECT_16) {
- /* output BMP code point */
- c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- break;
- } else if(action==MBCS_STATE_VALID_16) {
- offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
- c=unicodeCodeUnits[offset];
- if(c<0xfffe) {
- /* output BMP code point */
- break;
- } else if(c==0xfffe) {
- if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
- break;
- }
- } else {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- } else if(action==MBCS_STATE_VALID_16_PAIR) {
- offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
- c=unicodeCodeUnits[offset++];
- if(c<0xd800) {
- /* output BMP code point below 0xd800 */
- break;
- } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
- /* output roundtrip or fallback supplementary code point */
- c=((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00);
- break;
- } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
- /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
- c=unicodeCodeUnits[offset];
- break;
- } else if(c==0xffff) {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- } else if(action==MBCS_STATE_VALID_DIRECT_20 ||
- (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
- ) {
- /* output supplementary code point */
- c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
- break;
- } else if(action==MBCS_STATE_CHANGE_ONLY) {
- /*
- * This serves as a state change without any output.
- * It is useful for reading simple stateful encodings,
- * for example using just Shift-In/Shift-Out codes.
- * The 21 unused bits may later be used for more sophisticated
- * state transitions.
- */
- if(cnv->sharedData->mbcs.dbcsOnlyState!=0) {
- /* SI/SO are illegal for DBCS-only conversion */
- state=(uint8_t)(cnv->mode); /* restore the previous state */
-
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- }
- } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
- if(UCNV_TO_U_USE_FALLBACK(cnv)) {
- /* output BMP code point */
- c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- break;
- }
- } else if(action==MBCS_STATE_UNASSIGNED) {
- /* just fall through */
- } else if(action==MBCS_STATE_ILLEGAL) {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- } else {
- /* reserved (must never occur), or only state change */
- offset=0;
- lastSource=source;
- continue;
- }
-
- /* end of action codes: prepare for a new character */
- offset=0;
-
- if(U_FAILURE(*pErrorCode)) {
- /* callback(illegal) */
- break;
- } else /* unassigned sequence */ {
- /* defer to the generic implementation */
- cnv->toUnicodeStatus=0;
- cnv->mode=state;
- pArgs->source=(const char *)lastSource;
- return UCNV_GET_NEXT_UCHAR_USE_TO_U;
- }
- }
- }
-
- if(c<0) {
- if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) {
- /* incomplete character byte sequence */
- uint8_t *bytes=cnv->toUBytes;
- cnv->toULength=(int8_t)(source-lastSource);
- do {
- *bytes++=*lastSource++;
- } while(lastSource<source);
- *pErrorCode=U_TRUNCATED_CHAR_FOUND;
- } else if(U_FAILURE(*pErrorCode)) {
- /* callback(illegal) */
- /*
- * Ticket 5691: consistent illegal sequences:
- * - We include at least the first byte in the illegal sequence.
- * - If any of the non-initial bytes could be the start of a character,
- * we stop the illegal sequence before the first one of those.
- */
- UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
- uint8_t *bytes=cnv->toUBytes;
- *bytes++=*lastSource++; /* first byte */
- if(lastSource==source) {
- cnv->toULength=1;
- } else /* lastSource<source: multi-byte character */ {
- int8_t i;
- for(i=1;
- lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource);
- ++i
- ) {
- *bytes++=*lastSource++;
- }
- cnv->toULength=i;
- source=lastSource;
- }
- } else {
- /* no output because of empty input or only state changes */
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- }
- c=0xffff;
- }
-
- /* set the converter state back into UConverter, ready for a new character */
- cnv->toUnicodeStatus=0;
- cnv->mode=state;
-
- /* write back the updated pointer */
- pArgs->source=(const char *)source;
- return c;
-}
-
-#if 0
-/*
- * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
- * Removal improves code coverage.
- */
-/**
- * This version of ucnv_MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages.
- * It does not handle the EBCDIC swaplfnl option (set in UConverter).
- * It does not handle conversion extensions (_extToU()).
- */
-U_CFUNC UChar32
-ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
- uint8_t b, UBool useFallback) {
- int32_t entry;
- uint8_t action;
-
- entry=sharedData->mbcs.stateTable[0][b];
- /* MBCS_ENTRY_IS_FINAL(entry) */
-
- if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
- /* output BMP code point */
- return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- }
-
- /*
- * An if-else-if chain provides more reliable performance for
- * the most common cases compared to a switch.
- */
- action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
- if(action==MBCS_STATE_VALID_DIRECT_20) {
- /* output supplementary code point */
- return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
- } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
- if(!TO_U_USE_FALLBACK(useFallback)) {
- return 0xfffe;
- }
- /* output BMP code point */
- return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
- if(!TO_U_USE_FALLBACK(useFallback)) {
- return 0xfffe;
- }
- /* output supplementary code point */
- return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
- } else if(action==MBCS_STATE_UNASSIGNED) {
- return 0xfffe;
- } else if(action==MBCS_STATE_ILLEGAL) {
- return 0xffff;
- } else {
- /* reserved, must never occur */
- return 0xffff;
- }
-}
-#endif
-
-/*
- * This is a simple version of _MBCSGetNextUChar() that is used
- * by other converter implementations.
- * It only returns an "assigned" result if it consumes the entire input.
- * It does not use state from the converter, nor error codes.
- * It does not handle the EBCDIC swaplfnl option (set in UConverter).
- * It handles conversion extensions but not GB 18030.
- *
- * Return value:
- * U+fffe unassigned
- * U+ffff illegal
- * otherwise the Unicode code point
- */
-U_CFUNC UChar32
-ucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
- const char *source, int32_t length,
- UBool useFallback) {
- const int32_t (*stateTable)[256];
- const uint16_t *unicodeCodeUnits;
-
- uint32_t offset;
- uint8_t state, action;
-
- UChar32 c;
- int32_t i, entry;
-
- if(length<=0) {
- /* no input at all: "illegal" */
- return 0xffff;
- }
-
-#if 0
-/*
- * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
- * TODO In future releases, verify that this function is never called for SBCS
- * conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
- * Removal improves code coverage.
- */
- /* use optimized function if possible */
- if(sharedData->mbcs.countStates==1) {
- if(length==1) {
- return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);
- } else {
- return 0xffff; /* illegal: more than a single byte for an SBCS converter */
- }
- }
-#endif
-
- /* set up the local pointers */
- stateTable=sharedData->mbcs.stateTable;
- unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits;
-
- /* converter state */
- offset=0;
- state=sharedData->mbcs.dbcsOnlyState;
-
- /* conversion loop */
- for(i=0;;) {
- entry=stateTable[state][(uint8_t)source[i++]];
- if(MBCS_ENTRY_IS_TRANSITION(entry)) {
- state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
- offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
-
- if(i==length) {
- return 0xffff; /* truncated character */
- }
- } else {
- /*
- * An if-else-if chain provides more reliable performance for
- * the most common cases compared to a switch.
- */
- action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
- if(action==MBCS_STATE_VALID_16) {
- offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
- c=unicodeCodeUnits[offset];
- if(c!=0xfffe) {
- /* done */
- } else if(UCNV_TO_U_USE_FALLBACK(cnv)) {
- c=ucnv_MBCSGetFallback(&sharedData->mbcs, offset);
- /* else done with 0xfffe */
- }
- break;
- } else if(action==MBCS_STATE_VALID_DIRECT_16) {
- /* output BMP code point */
- c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- break;
- } else if(action==MBCS_STATE_VALID_16_PAIR) {
- offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
- c=unicodeCodeUnits[offset++];
- if(c<0xd800) {
- /* output BMP code point below 0xd800 */
- } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
- /* output roundtrip or fallback supplementary code point */
- c=(UChar32)(((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00));
- } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
- /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
- c=unicodeCodeUnits[offset];
- } else if(c==0xffff) {
- return 0xffff;
- } else {
- c=0xfffe;
- }
- break;
- } else if(action==MBCS_STATE_VALID_DIRECT_20) {
- /* output supplementary code point */
- c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
- break;
- } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
- if(!TO_U_USE_FALLBACK(useFallback)) {
- c=0xfffe;
- break;
- }
- /* output BMP code point */
- c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
- break;
- } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
- if(!TO_U_USE_FALLBACK(useFallback)) {
- c=0xfffe;
- break;
- }
- /* output supplementary code point */
- c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
- break;
- } else if(action==MBCS_STATE_UNASSIGNED) {
- c=0xfffe;
- break;
- }
-
- /*
- * forbid MBCS_STATE_CHANGE_ONLY for this function,
- * and MBCS_STATE_ILLEGAL and reserved action codes
- */
- return 0xffff;
- }
- }
-
- if(i!=length) {
- /* illegal for this function: not all input consumed */
- return 0xffff;
- }
-
- if(c==0xfffe) {
- /* try an extension mapping */
- const int32_t *cx=sharedData->mbcs.extIndexes;
- if(cx!=NULL) {
- return ucnv_extSimpleMatchToU(cx, source, length, useFallback);
- }
- }
-
- return c;
-}
-
-/* MBCS-from-Unicode conversion functions ----------------------------------- */
-
-/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */
-static void
-ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
- int32_t *offsets;
-
- const uint16_t *table;
- const uint16_t *mbcsIndex;
- const uint8_t *bytes;
-
- UChar32 c;
-
- int32_t sourceIndex, nextSourceIndex;
-
- uint32_t stage2Entry;
- uint32_t asciiRoundtrips;
- uint32_t value;
- uint8_t unicodeMask;
-
- /* use optimized function if possible */
- cnv=pArgs->converter;
- unicodeMask=cnv->sharedData->mbcs.unicodeMask;
-
- /* set up the local pointers */
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
-
- table=cnv->sharedData->mbcs.fromUnicodeTable;
- mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
- if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
- bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
- } else {
- bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
- }
- asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
-
- /* get the converter state from UConverter */
- c=cnv->fromUChar32;
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex= c==0 ? 0 : -1;
- nextSourceIndex=0;
-
- /* conversion loop */
- if(c!=0 && targetCapacity>0) {
- goto getTrail;
- }
-
- while(source<sourceLimit) {
- /*
- * This following test is to see if available input would overflow the output.
- * It does not catch output of more than one byte that
- * overflows as a result of a multi-byte character or callback output
- * from the last source character.
- * Therefore, those situations also test for overflows and will
- * then break the loop, too.
- */
- if(targetCapacity>0) {
- /*
- * Get a correct Unicode code point:
- * a single UChar for a BMP code point or
- * a matched surrogate pair for a "supplementary code point".
- */
- c=*source++;
- ++nextSourceIndex;
- if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
- *target++=(uint8_t)c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- sourceIndex=nextSourceIndex;
- }
- --targetCapacity;
- c=0;
- continue;
- }
- /*
- * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
- * to avoid dealing with surrogates.
- * MBCS_FAST_MAX must be >=0xd7ff.
- */
- if(c<=0xd7ff) {
- value=DBCS_RESULT_FROM_MOST_BMP(mbcsIndex, (const uint16_t *)bytes, c);
- /* There are only roundtrips (!=0) and no-mapping (==0) entries. */
- if(value==0) {
- goto unassigned;
- }
- /* output the value */
- } else {
- /*
- * This also tests if the codepage maps single surrogates.
- * If it does, then surrogates are not paired but mapped separately.
- * Note that in this case unmatched surrogates are not detected.
- */
- if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
- if(U16_IS_SURROGATE_LEAD(c)) {
-getTrail:
- if(source<sourceLimit) {
- /* test the following code unit */
- UChar trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- ++nextSourceIndex;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
- /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
- /* callback(unassigned) */
- goto unassigned;
- }
- /* convert this supplementary code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
-
- /* convert the Unicode code point in c into codepage bytes */
- stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
-
- /* get the bytes and the length for the output */
- /* MBCS_OUTPUT_2 */
- value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
-
- /* is this code point assigned, or do we use fallbacks? */
- if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ||
- (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
- ) {
- /*
- * We allow a 0 byte output if the "assigned" bit is set for this entry.
- * There is no way with this data structure for fallback output
- * to be a zero byte.
- */
-
-unassigned:
- /* try an extension mapping */
- pArgs->source=source;
- c=_extFromU(cnv, cnv->sharedData,
- c, &source, sourceLimit,
- &target, target+targetCapacity,
- &offsets, sourceIndex,
- pArgs->flush,
- pErrorCode);
- nextSourceIndex+=(int32_t)(source-pArgs->source);
-
- if(U_FAILURE(*pErrorCode)) {
- /* not mappable or buffer overflow */
- break;
- } else {
- /* a mapping was written to the target, continue */
-
- /* recalculate the targetCapacity after an extension mapping */
- targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
-
- /* normal end of conversion: prepare for a new character */
- sourceIndex=nextSourceIndex;
- continue;
- }
- }
- }
-
- /* write the output character bytes from value and length */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(value<=0xff) {
- /* this is easy because we know that there is enough space */
- *target++=(uint8_t)value;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else /* length==2 */ {
- *target++=(uint8_t)(value>>8);
- if(2<=targetCapacity) {
- *target++=(uint8_t)value;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- }
- targetCapacity-=2;
- } else {
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- cnv->charErrorBuffer[0]=(char)value;
- cnv->charErrorBufferLength=1;
-
- /* target overflow */
- targetCapacity=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- c=0;
- break;
- }
- }
-
- /* normal end of conversion: prepare for a new character */
- c=0;
- sourceIndex=nextSourceIndex;
- continue;
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- /* set the converter state back into UConverter */
- cnv->fromUChar32=c;
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
-}
-
-/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */
-static void
-ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
- int32_t *offsets;
-
- const uint16_t *table;
- const uint16_t *results;
-
- UChar32 c;
-
- int32_t sourceIndex, nextSourceIndex;
-
- uint16_t value, minValue;
- UBool hasSupplementary;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
-
- table=cnv->sharedData->mbcs.fromUnicodeTable;
- if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
- results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
- } else {
- results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
- }
-
- if(cnv->useFallback) {
- /* use all roundtrip and fallback results */
- minValue=0x800;
- } else {
- /* use only roundtrips and fallbacks from private-use characters */
- minValue=0xc00;
- }
- hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
-
- /* get the converter state from UConverter */
- c=cnv->fromUChar32;
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex= c==0 ? 0 : -1;
- nextSourceIndex=0;
-
- /* conversion loop */
- if(c!=0 && targetCapacity>0) {
- goto getTrail;
- }
-
- while(source<sourceLimit) {
- /*
- * This following test is to see if available input would overflow the output.
- * It does not catch output of more than one byte that
- * overflows as a result of a multi-byte character or callback output
- * from the last source character.
- * Therefore, those situations also test for overflows and will
- * then break the loop, too.
- */
- if(targetCapacity>0) {
- /*
- * Get a correct Unicode code point:
- * a single UChar for a BMP code point or
- * a matched surrogate pair for a "supplementary code point".
- */
- c=*source++;
- ++nextSourceIndex;
- if(U16_IS_SURROGATE(c)) {
- if(U16_IS_SURROGATE_LEAD(c)) {
-getTrail:
- if(source<sourceLimit) {
- /* test the following code unit */
- UChar trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- ++nextSourceIndex;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- if(!hasSupplementary) {
- /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
- /* callback(unassigned) */
- goto unassigned;
- }
- /* convert this supplementary code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
-
- /* convert the Unicode code point in c into codepage bytes */
- value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
-
- /* is this code point assigned, or do we use fallbacks? */
- if(value>=minValue) {
- /* assigned, write the output character bytes from value and length */
- /* length==1 */
- /* this is easy because we know that there is enough space */
- *target++=(uint8_t)value;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
-
- /* normal end of conversion: prepare for a new character */
- c=0;
- sourceIndex=nextSourceIndex;
- } else { /* unassigned */
-unassigned:
- /* try an extension mapping */
- pArgs->source=source;
- c=_extFromU(cnv, cnv->sharedData,
- c, &source, sourceLimit,
- &target, target+targetCapacity,
- &offsets, sourceIndex,
- pArgs->flush,
- pErrorCode);
- nextSourceIndex+=(int32_t)(source-pArgs->source);
-
- if(U_FAILURE(*pErrorCode)) {
- /* not mappable or buffer overflow */
- break;
- } else {
- /* a mapping was written to the target, continue */
-
- /* recalculate the targetCapacity after an extension mapping */
- targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
-
- /* normal end of conversion: prepare for a new character */
- sourceIndex=nextSourceIndex;
- }
- }
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- /* set the converter state back into UConverter */
- cnv->fromUChar32=c;
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
-}
-
-/*
- * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages
- * that map only to and from the BMP.
- * In addition to single-byte/state optimizations, the offset calculations
- * become much easier.
- * It would be possible to use the sbcsIndex for UTF-8-friendly tables,
- * but measurements have shown that this diminishes performance
- * in more cases than it improves it.
- * See SVN revision 21013 (2007-feb-06) for the last version with #if switches
- * for various MBCS and SBCS optimizations.
- */
-static void
-ucnv_MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source, *sourceLimit, *lastSource;
- uint8_t *target;
- int32_t targetCapacity, length;
- int32_t *offsets;
-
- const uint16_t *table;
- const uint16_t *results;
-
- UChar32 c;
-
- int32_t sourceIndex;
-
- uint32_t asciiRoundtrips;
- uint16_t value, minValue;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
-
- table=cnv->sharedData->mbcs.fromUnicodeTable;
- if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
- results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
- } else {
- results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
- }
- asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
-
- if(cnv->useFallback) {
- /* use all roundtrip and fallback results */
- minValue=0x800;
- } else {
- /* use only roundtrips and fallbacks from private-use characters */
- minValue=0xc00;
- }
-
- /* get the converter state from UConverter */
- c=cnv->fromUChar32;
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex= c==0 ? 0 : -1;
- lastSource=source;
-
- /*
- * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
- * for the minimum of the sourceLength and targetCapacity
- */
- length=(int32_t)(sourceLimit-source);
- if(length<targetCapacity) {
- targetCapacity=length;
- }
-
- /* conversion loop */
- if(c!=0 && targetCapacity>0) {
- goto getTrail;
- }
-
-#if MBCS_UNROLL_SINGLE_FROM_BMP
- /* unrolling makes it slower on Pentium III/Windows 2000?! */
- /* unroll the loop with the most common case */
-unrolled:
- if(targetCapacity>=4) {
- int32_t count, loops;
- uint16_t andedValues;
-
- loops=count=targetCapacity>>2;
- do {
- c=*source++;
- andedValues=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
- *target++=(uint8_t)value;
- c=*source++;
- andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
- *target++=(uint8_t)value;
- c=*source++;
- andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
- *target++=(uint8_t)value;
- c=*source++;
- andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
- *target++=(uint8_t)value;
-
- /* were all 4 entries really valid? */
- if(andedValues<minValue) {
- /* no, return to the first of these 4 */
- source-=4;
- target-=4;
- break;
- }
- } while(--count>0);
- count=loops-count;
- targetCapacity-=4*count;
-
- if(offsets!=NULL) {
- lastSource+=4*count;
- while(count>0) {
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- *offsets++=sourceIndex++;
- --count;
- }
- }
-
- c=0;
- }
-#endif
-
- while(targetCapacity>0) {
- /*
- * Get a correct Unicode code point:
- * a single UChar for a BMP code point or
- * a matched surrogate pair for a "supplementary code point".
- */
- c=*source++;
- /*
- * Do not immediately check for single surrogates:
- * Assume that they are unassigned and check for them in that case.
- * This speeds up the conversion of assigned characters.
- */
- /* convert the Unicode code point in c into codepage bytes */
- if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
- *target++=(uint8_t)c;
- --targetCapacity;
- c=0;
- continue;
- }
- value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
- /* is this code point assigned, or do we use fallbacks? */
- if(value>=minValue) {
- /* assigned, write the output character bytes from value and length */
- /* length==1 */
- /* this is easy because we know that there is enough space */
- *target++=(uint8_t)value;
- --targetCapacity;
-
- /* normal end of conversion: prepare for a new character */
- c=0;
- continue;
- } else if(!U16_IS_SURROGATE(c)) {
- /* normal, unassigned BMP character */
- } else if(U16_IS_SURROGATE_LEAD(c)) {
-getTrail:
- if(source<sourceLimit) {
- /* test the following code unit */
- UChar trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* this codepage does not map supplementary code points */
- /* callback(unassigned) */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- } else {
- /* no more input */
- if (pArgs->flush) {
- *pErrorCode=U_TRUNCATED_CHAR_FOUND;
- }
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
-
- /* c does not have a mapping */
-
- /* get the number of code units for c to correctly advance sourceIndex */
- length=U16_LENGTH(c);
-
- /* set offsets since the start or the last extension */
- if(offsets!=NULL) {
- int32_t count=(int32_t)(source-lastSource);
-
- /* do not set the offset for this character */
- count-=length;
-
- while(count>0) {
- *offsets++=sourceIndex++;
- --count;
- }
- /* offsets and sourceIndex are now set for the current character */
- }
-
- /* try an extension mapping */
- lastSource=source;
- c=_extFromU(cnv, cnv->sharedData,
- c, &source, sourceLimit,
- &target, (const uint8_t *)(pArgs->targetLimit),
- &offsets, sourceIndex,
- pArgs->flush,
- pErrorCode);
- sourceIndex+=length+(int32_t)(source-lastSource);
- lastSource=source;
-
- if(U_FAILURE(*pErrorCode)) {
- /* not mappable or buffer overflow */
- break;
- } else {
- /* a mapping was written to the target, continue */
-
- /* recalculate the targetCapacity after an extension mapping */
- targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
- length=(int32_t)(sourceLimit-source);
- if(length<targetCapacity) {
- targetCapacity=length;
- }
- }
-
-#if MBCS_UNROLL_SINGLE_FROM_BMP
- /* unrolling makes it slower on Pentium III/Windows 2000?! */
- goto unrolled;
-#endif
- }
-
- if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
-
- /* set offsets since the start or the last callback */
- if(offsets!=NULL) {
- size_t count=source-lastSource;
- if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) {
- /*
- Caller gave us a partial supplementary character,
- which this function couldn't convert in any case.
- The callback will handle the offset.
- */
- count--;
- }
- while(count>0) {
- *offsets++=sourceIndex++;
- --count;
- }
- }
-
- /* set the converter state back into UConverter */
- cnv->fromUChar32=c;
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
-}
-
-U_CFUNC void
-ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
- int32_t *offsets;
-
- const uint16_t *table;
- const uint16_t *mbcsIndex;
- const uint8_t *p, *bytes;
- uint8_t outputType;
-
- UChar32 c;
-
- int32_t prevSourceIndex, sourceIndex, nextSourceIndex;
-
- uint32_t stage2Entry;
- uint32_t asciiRoundtrips;
- uint32_t value;
- /* Shift-In and Shift-Out byte sequences differ by encoding scheme. */
- uint8_t siBytes[2] = {0, 0};
- uint8_t soBytes[2] = {0, 0};
- uint8_t siLength, soLength;
- int32_t length = 0, prevLength;
- uint8_t unicodeMask;
-
- cnv=pArgs->converter;
-
- if(cnv->preFromUFirstCP>=0) {
- /*
- * pass sourceIndex=-1 because we continue from an earlier buffer
- * in the future, this may change with continuous offsets
- */
- ucnv_extContinueMatchFromU(cnv, pArgs, -1, pErrorCode);
-
- if(U_FAILURE(*pErrorCode) || cnv->preFromULength<0) {
- return;
- }
- }
-
- /* use optimized function if possible */
- outputType=cnv->sharedData->mbcs.outputType;
- unicodeMask=cnv->sharedData->mbcs.unicodeMask;
- if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES)) {
- if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
- ucnv_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode);
- } else {
- ucnv_MBCSSingleFromUnicodeWithOffsets(pArgs, pErrorCode);
- }
- return;
- } else if(outputType==MBCS_OUTPUT_2 && cnv->sharedData->mbcs.utf8Friendly) {
- ucnv_MBCSDoubleFromUnicodeWithOffsets(pArgs, pErrorCode);
- return;
- }
-
- /* set up the local pointers */
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
-
- table=cnv->sharedData->mbcs.fromUnicodeTable;
- if(cnv->sharedData->mbcs.utf8Friendly) {
- mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
- } else {
- mbcsIndex=NULL;
- }
- if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
- bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
- } else {
- bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
- }
- asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
-
- /* get the converter state from UConverter */
- c=cnv->fromUChar32;
-
- if(outputType==MBCS_OUTPUT_2_SISO) {
- prevLength=cnv->fromUnicodeStatus;
- if(prevLength==0) {
- /* set the real value */
- prevLength=1;
- }
- } else {
- /* prevent fromUnicodeStatus from being set to something non-0 */
- prevLength=0;
- }
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- prevSourceIndex=-1;
- sourceIndex= c==0 ? 0 : -1;
- nextSourceIndex=0;
-
- /* Get the SI/SO character for the converter */
- siLength = static_cast<uint8_t>(getSISOBytes(SI, cnv->options, siBytes));
- soLength = static_cast<uint8_t>(getSISOBytes(SO, cnv->options, soBytes));
-
- /* conversion loop */
- /*
- * This is another piece of ugly code:
- * A goto into the loop if the converter state contains a first surrogate
- * from the previous function call.
- * It saves me to check in each loop iteration a check of if(c==0)
- * and duplicating the trail-surrogate-handling code in the else
- * branch of that check.
- * I could not find any other way to get around this other than
- * using a function call for the conversion and callback, which would
- * be even more inefficient.
- *
- * Markus Scherer 2000-jul-19
- */
- if(c!=0 && targetCapacity>0) {
- goto getTrail;
- }
-
- while(source<sourceLimit) {
- /*
- * This following test is to see if available input would overflow the output.
- * It does not catch output of more than one byte that
- * overflows as a result of a multi-byte character or callback output
- * from the last source character.
- * Therefore, those situations also test for overflows and will
- * then break the loop, too.
- */
- if(targetCapacity>0) {
- /*
- * Get a correct Unicode code point:
- * a single UChar for a BMP code point or
- * a matched surrogate pair for a "supplementary code point".
- */
- c=*source++;
- ++nextSourceIndex;
- if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
- *target++=(uint8_t)c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- prevSourceIndex=sourceIndex;
- sourceIndex=nextSourceIndex;
- }
- --targetCapacity;
- c=0;
- continue;
- }
- /*
- * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
- * to avoid dealing with surrogates.
- * MBCS_FAST_MAX must be >=0xd7ff.
- */
- if(c<=0xd7ff && mbcsIndex!=NULL) {
- value=mbcsIndex[c>>6];
-
- /* get the bytes and the length for the output (copied from below and adapted for utf8Friendly data) */
- /* There are only roundtrips (!=0) and no-mapping (==0) entries. */
- switch(outputType) {
- case MBCS_OUTPUT_2:
- value=((const uint16_t *)bytes)[value +(c&0x3f)];
- if(value<=0xff) {
- if(value==0) {
- goto unassigned;
- } else {
- length=1;
- }
- } else {
- length=2;
- }
- break;
- case MBCS_OUTPUT_2_SISO:
- /* 1/2-byte stateful with Shift-In/Shift-Out */
- /*
- * Save the old state in the converter object
- * right here, then change the local prevLength state variable if necessary.
- * Then, if this character turns out to be unassigned or a fallback that
- * is not taken, the callback code must not save the new state in the converter
- * because the new state is for a character that is not output.
- * However, the callback must still restore the state from the converter
- * in case the callback function changed it for its output.
- */
- cnv->fromUnicodeStatus=prevLength; /* save the old state */
- value=((const uint16_t *)bytes)[value +(c&0x3f)];
- if(value<=0xff) {
- if(value==0) {
- goto unassigned;
- } else if(prevLength<=1) {
- length=1;
- } else {
- /* change from double-byte mode to single-byte */
- if (siLength == 1) {
- value|=(uint32_t)siBytes[0]<<8;
- length = 2;
- } else if (siLength == 2) {
- value|=(uint32_t)siBytes[1]<<8;
- value|=(uint32_t)siBytes[0]<<16;
- length = 3;
- }
- prevLength=1;
- }
- } else {
- if(prevLength==2) {
- length=2;
- } else {
- /* change from single-byte mode to double-byte */
- if (soLength == 1) {
- value|=(uint32_t)soBytes[0]<<16;
- length = 3;
- } else if (soLength == 2) {
- value|=(uint32_t)soBytes[1]<<16;
- value|=(uint32_t)soBytes[0]<<24;
- length = 4;
- }
- prevLength=2;
- }
- }
- break;
- case MBCS_OUTPUT_DBCS_ONLY:
- /* table with single-byte results, but only DBCS mappings used */
- value=((const uint16_t *)bytes)[value +(c&0x3f)];
- if(value<=0xff) {
- /* no mapping or SBCS result, not taken for DBCS-only */
- goto unassigned;
- } else {
- length=2;
- }
- break;
- case MBCS_OUTPUT_3:
- p=bytes+(value+(c&0x3f))*3;
- value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
- if(value<=0xff) {
- if(value==0) {
- goto unassigned;
- } else {
- length=1;
- }
- } else if(value<=0xffff) {
- length=2;
- } else {
- length=3;
- }
- break;
- case MBCS_OUTPUT_4:
- value=((const uint32_t *)bytes)[value +(c&0x3f)];
- if(value<=0xff) {
- if(value==0) {
- goto unassigned;
- } else {
- length=1;
- }
- } else if(value<=0xffff) {
- length=2;
- } else if(value<=0xffffff) {
- length=3;
- } else {
- length=4;
- }
- break;
- case MBCS_OUTPUT_3_EUC:
- value=((const uint16_t *)bytes)[value +(c&0x3f)];
- /* EUC 16-bit fixed-length representation */
- if(value<=0xff) {
- if(value==0) {
- goto unassigned;
- } else {
- length=1;
- }
- } else if((value&0x8000)==0) {
- value|=0x8e8000;
- length=3;
- } else if((value&0x80)==0) {
- value|=0x8f0080;
- length=3;
- } else {
- length=2;
- }
- break;
- case MBCS_OUTPUT_4_EUC:
- p=bytes+(value+(c&0x3f))*3;
- value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
- /* EUC 16-bit fixed-length representation applied to the first two bytes */
- if(value<=0xff) {
- if(value==0) {
- goto unassigned;
- } else {
- length=1;
- }
- } else if(value<=0xffff) {
- length=2;
- } else if((value&0x800000)==0) {
- value|=0x8e800000;
- length=4;
- } else if((value&0x8000)==0) {
- value|=0x8f008000;
- length=4;
- } else {
- length=3;
- }
- break;
- default:
- /* must not occur */
- /*
- * To avoid compiler warnings that value & length may be
- * used without having been initialized, we set them here.
- * In reality, this is unreachable code.
- * Not having a default branch also causes warnings with
- * some compilers.
- */
- value=0;
- length=0;
- break;
- }
- /* output the value */
- } else {
- /*
- * This also tests if the codepage maps single surrogates.
- * If it does, then surrogates are not paired but mapped separately.
- * Note that in this case unmatched surrogates are not detected.
- */
- if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
- if(U16_IS_SURROGATE_LEAD(c)) {
-getTrail:
- if(source<sourceLimit) {
- /* test the following code unit */
- UChar trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- ++nextSourceIndex;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
- /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
- cnv->fromUnicodeStatus=prevLength; /* save the old state */
- /* callback(unassigned) */
- goto unassigned;
- }
- /* convert this supplementary code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- }
-
- /* convert the Unicode code point in c into codepage bytes */
-
- /*
- * The basic lookup is a triple-stage compact array (trie) lookup.
- * For details see the beginning of this file.
- *
- * Single-byte codepages are handled with a different data structure
- * by _MBCSSingle... functions.
- *
- * The result consists of a 32-bit value from stage 2 and
- * a pointer to as many bytes as are stored per character.
- * The pointer points to the character's bytes in stage 3.
- * Bits 15..0 of the stage 2 entry contain the stage 3 index
- * for that pointer, while bits 31..16 are flags for which of
- * the 16 characters in the block are roundtrip-assigned.
- *
- * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t
- * respectively as uint32_t, in the platform encoding.
- * For 3-byte codepages, the bytes are always stored in big-endian order.
- *
- * For EUC encodings that use only either 0x8e or 0x8f as the first
- * byte of their longest byte sequences, the first two bytes in
- * this third stage indicate with their 7th bits whether these bytes
- * are to be written directly or actually need to be preceeded by
- * one of the two Single-Shift codes. With this, the third stage
- * stores one byte fewer per character than the actual maximum length of
- * EUC byte sequences.
- *
- * Other than that, leading zero bytes are removed and the other
- * bytes output. A single zero byte may be output if the "assigned"
- * bit in stage 2 was on.
- * The data structure does not support zero byte output as a fallback,
- * and also does not allow output of leading zeros.
- */
- stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
-
- /* get the bytes and the length for the output */
- switch(outputType) {
- case MBCS_OUTPUT_2:
- value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
- if(value<=0xff) {
- length=1;
- } else {
- length=2;
- }
- break;
- case MBCS_OUTPUT_2_SISO:
- /* 1/2-byte stateful with Shift-In/Shift-Out */
- /*
- * Save the old state in the converter object
- * right here, then change the local prevLength state variable if necessary.
- * Then, if this character turns out to be unassigned or a fallback that
- * is not taken, the callback code must not save the new state in the converter
- * because the new state is for a character that is not output.
- * However, the callback must still restore the state from the converter
- * in case the callback function changed it for its output.
- */
- cnv->fromUnicodeStatus=prevLength; /* save the old state */
- value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
- if(value<=0xff) {
- if(value==0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)==0) {
- /* no mapping, leave value==0 */
- length=0;
- } else if(prevLength<=1) {
- length=1;
- } else {
- /* change from double-byte mode to single-byte */
- if (siLength == 1) {
- value|=(uint32_t)siBytes[0]<<8;
- length = 2;
- } else if (siLength == 2) {
- value|=(uint32_t)siBytes[1]<<8;
- value|=(uint32_t)siBytes[0]<<16;
- length = 3;
- }
- prevLength=1;
- }
- } else {
- if(prevLength==2) {
- length=2;
- } else {
- /* change from single-byte mode to double-byte */
- if (soLength == 1) {
- value|=(uint32_t)soBytes[0]<<16;
- length = 3;
- } else if (soLength == 2) {
- value|=(uint32_t)soBytes[1]<<16;
- value|=(uint32_t)soBytes[0]<<24;
- length = 4;
- }
- prevLength=2;
- }
- }
- break;
- case MBCS_OUTPUT_DBCS_ONLY:
- /* table with single-byte results, but only DBCS mappings used */
- value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
- if(value<=0xff) {
- /* no mapping or SBCS result, not taken for DBCS-only */
- value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
- length=0;
- } else {
- length=2;
- }
- break;
- case MBCS_OUTPUT_3:
- p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
- value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
- if(value<=0xff) {
- length=1;
- } else if(value<=0xffff) {
- length=2;
- } else {
- length=3;
- }
- break;
- case MBCS_OUTPUT_4:
- value=MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);
- if(value<=0xff) {
- length=1;
- } else if(value<=0xffff) {
- length=2;
- } else if(value<=0xffffff) {
- length=3;
- } else {
- length=4;
- }
- break;
- case MBCS_OUTPUT_3_EUC:
- value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
- /* EUC 16-bit fixed-length representation */
- if(value<=0xff) {
- length=1;
- } else if((value&0x8000)==0) {
- value|=0x8e8000;
- length=3;
- } else if((value&0x80)==0) {
- value|=0x8f0080;
- length=3;
- } else {
- length=2;
- }
- break;
- case MBCS_OUTPUT_4_EUC:
- p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
- value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
- /* EUC 16-bit fixed-length representation applied to the first two bytes */
- if(value<=0xff) {
- length=1;
- } else if(value<=0xffff) {
- length=2;
- } else if((value&0x800000)==0) {
- value|=0x8e800000;
- length=4;
- } else if((value&0x8000)==0) {
- value|=0x8f008000;
- length=4;
- } else {
- length=3;
- }
- break;
- default:
- /* must not occur */
- /*
- * To avoid compiler warnings that value & length may be
- * used without having been initialized, we set them here.
- * In reality, this is unreachable code.
- * Not having a default branch also causes warnings with
- * some compilers.
- */
- value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
- length=0;
- break;
- }
-
- /* is this code point assigned, or do we use fallbacks? */
- if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)!=0 ||
- (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
- ) {
- /*
- * We allow a 0 byte output if the "assigned" bit is set for this entry.
- * There is no way with this data structure for fallback output
- * to be a zero byte.
- */
-
-unassigned:
- /* try an extension mapping */
- pArgs->source=source;
- c=_extFromU(cnv, cnv->sharedData,
- c, &source, sourceLimit,
- &target, target+targetCapacity,
- &offsets, sourceIndex,
- pArgs->flush,
- pErrorCode);
- nextSourceIndex+=(int32_t)(source-pArgs->source);
- prevLength=cnv->fromUnicodeStatus; /* restore SISO state */
-
- if(U_FAILURE(*pErrorCode)) {
- /* not mappable or buffer overflow */
- break;
- } else {
- /* a mapping was written to the target, continue */
-
- /* recalculate the targetCapacity after an extension mapping */
- targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
-
- /* normal end of conversion: prepare for a new character */
- if(offsets!=NULL) {
- prevSourceIndex=sourceIndex;
- sourceIndex=nextSourceIndex;
- }
- continue;
- }
- }
- }
-
- /* write the output character bytes from value and length */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(length<=targetCapacity) {
- if(offsets==NULL) {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(value>>24);
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(value>>16);
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(value>>8);
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)value;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- } else {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(value>>24);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(value>>16);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(value>>8);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)value;
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- }
- targetCapacity-=length;
- } else {
- uint8_t *charErrorBuffer;
-
- /*
- * We actually do this backwards here:
- * In order to save an intermediate variable, we output
- * first to the overflow buffer what does not fit into the
- * regular target.
- */
- /* we know that 1<=targetCapacity<length<=4 */
- length-=targetCapacity;
- charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
- switch(length) {
- /* each branch falls through to the next one */
- case 3:
- *charErrorBuffer++=(uint8_t)(value>>16);
- U_FALLTHROUGH;
- case 2:
- *charErrorBuffer++=(uint8_t)(value>>8);
- U_FALLTHROUGH;
- case 1:
- *charErrorBuffer=(uint8_t)value;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- cnv->charErrorBufferLength=(int8_t)length;
-
- /* now output what fits into the regular target */
- value>>=8*length; /* length was reduced by targetCapacity */
- switch(targetCapacity) {
- /* each branch falls through to the next one */
- case 3:
- *target++=(uint8_t)(value>>16);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(value>>8);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)value;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
-
- /* target overflow */
- targetCapacity=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- c=0;
- break;
- }
-
- /* normal end of conversion: prepare for a new character */
- c=0;
- if(offsets!=NULL) {
- prevSourceIndex=sourceIndex;
- sourceIndex=nextSourceIndex;
- }
- continue;
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- /*
- * the end of the input stream and detection of truncated input
- * are handled by the framework, but for EBCDIC_STATEFUL conversion
- * we need to emit an SI at the very end
- *
- * conditions:
- * successful
- * EBCDIC_STATEFUL in DBCS mode
- * end of input and no truncated input
- */
- if( U_SUCCESS(*pErrorCode) &&
- outputType==MBCS_OUTPUT_2_SISO && prevLength==2 &&
- pArgs->flush && source>=sourceLimit && c==0
- ) {
- /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
- if(targetCapacity>0) {
- *target++=(uint8_t)siBytes[0];
- if (siLength == 2) {
- if (targetCapacity<2) {
- cnv->charErrorBuffer[0]=(uint8_t)siBytes[1];
- cnv->charErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- } else {
- *target++=(uint8_t)siBytes[1];
- }
- }
- if(offsets!=NULL) {
- /* set the last source character's index (sourceIndex points at sourceLimit now) */
- *offsets++=prevSourceIndex;
- }
- } else {
- /* target is full */
- cnv->charErrorBuffer[0]=(uint8_t)siBytes[0];
- if (siLength == 2) {
- cnv->charErrorBuffer[1]=(uint8_t)siBytes[1];
- }
- cnv->charErrorBufferLength=siLength;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- prevLength=1; /* we switched into SBCS */
- }
-
- /* set the converter state back into UConverter */
- cnv->fromUChar32=c;
- cnv->fromUnicodeStatus=prevLength;
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
-}
-
-/*
- * This is another simple conversion function for internal use by other
- * conversion implementations.
- * It does not use the converter state nor call callbacks.
- * It does not handle the EBCDIC swaplfnl option (set in UConverter).
- * It handles conversion extensions but not GB 18030.
- *
- * It converts one single Unicode code point into codepage bytes, encoded
- * as one 32-bit value. The function returns the number of bytes in *pValue:
- * 1..4 the number of bytes in *pValue
- * 0 unassigned (*pValue undefined)
- * -1 illegal (currently not used, *pValue undefined)
- *
- * *pValue will contain the resulting bytes with the last byte in bits 7..0,
- * the second to last byte in bits 15..8, etc.
- * Currently, the function assumes but does not check that 0<=c<=0x10ffff.
- */
-U_CFUNC int32_t
-ucnv_MBCSFromUChar32(UConverterSharedData *sharedData,
- UChar32 c, uint32_t *pValue,
- UBool useFallback) {
- const int32_t *cx;
- const uint16_t *table;
-#if 0
-/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
- const uint8_t *p;
-#endif
- uint32_t stage2Entry;
- uint32_t value;
- int32_t length;
-
- /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
- if(c<=0xffff || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
- table=sharedData->mbcs.fromUnicodeTable;
-
- /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
- if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) {
- value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
- /* is this code point assigned, or do we use fallbacks? */
- if(useFallback ? value>=0x800 : value>=0xc00) {
- *pValue=value&0xff;
- return 1;
- }
- } else /* outputType!=MBCS_OUTPUT_1 */ {
- stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
-
- /* get the bytes and the length for the output */
- switch(sharedData->mbcs.outputType) {
- case MBCS_OUTPUT_2:
- value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
- if(value<=0xff) {
- length=1;
- } else {
- length=2;
- }
- break;
-#if 0
-/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
- case MBCS_OUTPUT_DBCS_ONLY:
- /* table with single-byte results, but only DBCS mappings used */
- value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
- if(value<=0xff) {
- /* no mapping or SBCS result, not taken for DBCS-only */
- value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
- length=0;
- } else {
- length=2;
- }
- break;
- case MBCS_OUTPUT_3:
- p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
- value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
- if(value<=0xff) {
- length=1;
- } else if(value<=0xffff) {
- length=2;
- } else {
- length=3;
- }
- break;
- case MBCS_OUTPUT_4:
- value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
- if(value<=0xff) {
- length=1;
- } else if(value<=0xffff) {
- length=2;
- } else if(value<=0xffffff) {
- length=3;
- } else {
- length=4;
- }
- break;
- case MBCS_OUTPUT_3_EUC:
- value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
- /* EUC 16-bit fixed-length representation */
- if(value<=0xff) {
- length=1;
- } else if((value&0x8000)==0) {
- value|=0x8e8000;
- length=3;
- } else if((value&0x80)==0) {
- value|=0x8f0080;
- length=3;
- } else {
- length=2;
- }
- break;
- case MBCS_OUTPUT_4_EUC:
- p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
- value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
- /* EUC 16-bit fixed-length representation applied to the first two bytes */
- if(value<=0xff) {
- length=1;
- } else if(value<=0xffff) {
- length=2;
- } else if((value&0x800000)==0) {
- value|=0x8e800000;
- length=4;
- } else if((value&0x8000)==0) {
- value|=0x8f008000;
- length=4;
- } else {
- length=3;
- }
- break;
-#endif
- default:
- /* must not occur */
- return -1;
- }
-
- /* is this code point assigned, or do we use fallbacks? */
- if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ||
- (FROM_U_USE_FALLBACK(useFallback, c) && value!=0)
- ) {
- /*
- * We allow a 0 byte output if the "assigned" bit is set for this entry.
- * There is no way with this data structure for fallback output
- * to be a zero byte.
- */
- /* assigned */
- *pValue=value;
- return length;
- }
- }
- }
-
- cx=sharedData->mbcs.extIndexes;
- if(cx!=NULL) {
- length=ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
- return length>=0 ? length : -length; /* return abs(length); */
- }
-
- /* unassigned */
- return 0;
-}
-
-
-#if 0
-/*
- * This function has been moved to ucnv2022.c for inlining.
- * This implementation is here only for documentation purposes
- */
-
-/**
- * This version of ucnv_MBCSFromUChar32() is optimized for single-byte codepages.
- * It does not handle the EBCDIC swaplfnl option (set in UConverter).
- * It does not handle conversion extensions (_extFromU()).
- *
- * It returns the codepage byte for the code point, or -1 if it is unassigned.
- */
-U_CFUNC int32_t
-ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,
- UChar32 c,
- UBool useFallback) {
- const uint16_t *table;
- int32_t value;
-
- /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
- if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
- return -1;
- }
-
- /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
- table=sharedData->mbcs.fromUnicodeTable;
-
- /* get the byte for the output */
- value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
- /* is this code point assigned, or do we use fallbacks? */
- if(useFallback ? value>=0x800 : value>=0xc00) {
- return value&0xff;
- } else {
- return -1;
- }
-}
-#endif
-
-/* MBCS-from-UTF-8 conversion functions ------------------------------------- */
-
-/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
-static const UChar32
-utf8_offsets[5]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
-
-static void U_CALLCONV
-ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
- UConverterToUnicodeArgs *pToUArgs,
- UErrorCode *pErrorCode) {
- UConverter *utf8, *cnv;
- const uint8_t *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
-
- const uint16_t *table, *sbcsIndex;
- const uint16_t *results;
-
- int8_t oldToULength, toULength, toULimit;
-
- UChar32 c;
- uint8_t b, t1, t2;
-
- uint32_t asciiRoundtrips;
- uint16_t value, minValue = 0;
- UBool hasSupplementary;
-
- /* set up the local pointers */
- utf8=pToUArgs->converter;
- cnv=pFromUArgs->converter;
- source=(uint8_t *)pToUArgs->source;
- sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
- target=(uint8_t *)pFromUArgs->target;
- targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
-
- table=cnv->sharedData->mbcs.fromUnicodeTable;
- sbcsIndex=cnv->sharedData->mbcs.sbcsIndex;
- if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
- results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
- } else {
- results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
- }
- asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
-
- if(cnv->useFallback) {
- /* use all roundtrip and fallback results */
- minValue=0x800;
- } else {
- /* use only roundtrips and fallbacks from private-use characters */
- minValue=0xc00;
- }
- hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
-
- /* get the converter state from the UTF-8 UConverter */
- if(utf8->toULength > 0) {
- toULength=oldToULength=utf8->toULength;
- toULimit=(int8_t)utf8->mode;
- c=(UChar32)utf8->toUnicodeStatus;
- } else {
- toULength=oldToULength=toULimit=0;
- c = 0;
- }
-
- // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
- // If the buffer ends with a truncated 2- or 3-byte sequence,
- // then we reduce the sourceLimit to before that,
- // and collect the remaining bytes after the conversion loop.
- {
- // Do not go back into the bytes that will be read for finishing a partial
- // sequence from the previous buffer.
- int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
- if(length>0) {
- uint8_t b1=*(sourceLimit-1);
- if(U8_IS_SINGLE(b1)) {
- // common ASCII character
- } else if(U8_IS_TRAIL(b1) && length>=2) {
- uint8_t b2=*(sourceLimit-2);
- if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
- // truncated 3-byte sequence
- sourceLimit-=2;
- }
- } else if(0xc2<=b1 && b1<0xf0) {
- // truncated 2- or 3-byte sequence
- --sourceLimit;
- }
- }
- }
-
- if(c!=0 && targetCapacity>0) {
- utf8->toUnicodeStatus=0;
- utf8->toULength=0;
- goto moreBytes;
- /*
- * Note: We could avoid the goto by duplicating some of the moreBytes
- * code, but only up to the point of collecting a complete UTF-8
- * sequence; then recurse for the toUBytes[toULength]
- * and then continue with normal conversion.
- *
- * If so, move this code to just after initializing the minimum
- * set of local variables for reading the UTF-8 input
- * (utf8, source, target, limits but not cnv, table, minValue, etc.).
- *
- * Potential advantages:
- * - avoid the goto
- * - oldToULength could become a local variable in just those code blocks
- * that deal with buffer boundaries
- * - possibly faster if the goto prevents some compiler optimizations
- * (this would need measuring to confirm)
- * Disadvantage:
- * - code duplication
- */
- }
-
- /* conversion loop */
- while(source<sourceLimit) {
- if(targetCapacity>0) {
- b=*source++;
- if(U8_IS_SINGLE(b)) {
- /* convert ASCII */
- if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
- *target++=(uint8_t)b;
- --targetCapacity;
- continue;
- } else {
- c=b;
- value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, 0, c);
- }
- } else {
- if(b<0xe0) {
- if( /* handle U+0080..U+07FF inline */
- b>=0xc2 &&
- (t1=(uint8_t)(*source-0x80)) <= 0x3f
- ) {
- c=b&0x1f;
- ++source;
- value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t1);
- if(value>=minValue) {
- *target++=(uint8_t)value;
- --targetCapacity;
- continue;
- } else {
- c=(c<<6)|t1;
- }
- } else {
- c=-1;
- }
- } else if(b==0xe0) {
- if( /* handle U+0800..U+0FFF inline */
- (t1=(uint8_t)(source[0]-0x80)) <= 0x3f && t1 >= 0x20 &&
- (t2=(uint8_t)(source[1]-0x80)) <= 0x3f
- ) {
- c=t1;
- source+=2;
- value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t2);
- if(value>=minValue) {
- *target++=(uint8_t)value;
- --targetCapacity;
- continue;
- } else {
- c=(c<<6)|t2;
- }
- } else {
- c=-1;
- }
- } else {
- c=-1;
- }
-
- if(c<0) {
- /* handle "complicated" and error cases, and continuing partial characters */
- oldToULength=0;
- toULength=1;
- toULimit=U8_COUNT_BYTES_NON_ASCII(b);
- c=b;
-moreBytes:
- while(toULength<toULimit) {
- /*
- * The sourceLimit may have been adjusted before the conversion loop
- * to stop before a truncated sequence.
- * Here we need to use the real limit in case we have two truncated
- * sequences at the end.
- * See ticket #7492.
- */
- if(source<(uint8_t *)pToUArgs->sourceLimit) {
- b=*source;
- if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
- ++source;
- ++toULength;
- c=(c<<6)+b;
- } else {
- break; /* sequence too short, stop with toULength<toULimit */
- }
- } else {
- /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
- source-=(toULength-oldToULength);
- while(oldToULength<toULength) {
- utf8->toUBytes[oldToULength++]=*source++;
- }
- utf8->toUnicodeStatus=c;
- utf8->toULength=toULength;
- utf8->mode=toULimit;
- pToUArgs->source=(char *)source;
- pFromUArgs->target=(char *)target;
- return;
- }
- }
-
- if(toULength==toULimit) {
- c-=utf8_offsets[toULength];
- if(toULength<=3) { /* BMP */
- value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
- } else {
- /* supplementary code point */
- if(!hasSupplementary) {
- /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
- value=0;
- } else {
- value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
- }
- }
- } else {
- /* error handling: illegal UTF-8 byte sequence */
- source-=(toULength-oldToULength);
- while(oldToULength<toULength) {
- utf8->toUBytes[oldToULength++]=*source++;
- }
- utf8->toULength=toULength;
- pToUArgs->source=(char *)source;
- pFromUArgs->target=(char *)target;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- return;
- }
- }
- }
-
- if(value>=minValue) {
- /* output the mapping for c */
- *target++=(uint8_t)value;
- --targetCapacity;
- } else {
- /* value<minValue means c is unassigned (unmappable) */
- /*
- * Try an extension mapping.
- * Pass in no source because we don't have UTF-16 input.
- * If we have a partial match on c, we will return and revert
- * to UTF-8->UTF-16->charset conversion.
- */
- static const UChar nul=0;
- const UChar *noSource=&nul;
- c=_extFromU(cnv, cnv->sharedData,
- c, &noSource, noSource,
- &target, target+targetCapacity,
- NULL, -1,
- pFromUArgs->flush,
- pErrorCode);
-
- if(U_FAILURE(*pErrorCode)) {
- /* not mappable or buffer overflow */
- cnv->fromUChar32=c;
- break;
- } else if(cnv->preFromUFirstCP>=0) {
- /*
- * Partial match, return and revert to pivoting.
- * In normal from-UTF-16 conversion, we would just continue
- * but then exit the loop because the extension match would
- * have consumed the source.
- */
- *pErrorCode=U_USING_DEFAULT_WARNING;
- break;
- } else {
- /* a mapping was written to the target, continue */
-
- /* recalculate the targetCapacity after an extension mapping */
- targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
- }
- }
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- /*
- * The sourceLimit may have been adjusted before the conversion loop
- * to stop before a truncated sequence.
- * If so, then collect the truncated sequence now.
- */
- if(U_SUCCESS(*pErrorCode) &&
- cnv->preFromUFirstCP<0 &&
- source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
- c=utf8->toUBytes[0]=b=*source++;
- toULength=1;
- toULimit=U8_COUNT_BYTES(b);
- while(source<sourceLimit) {
- utf8->toUBytes[toULength++]=b=*source++;
- c=(c<<6)+b;
- }
- utf8->toUnicodeStatus=c;
- utf8->toULength=toULength;
- utf8->mode=toULimit;
- }
-
- /* write back the updated pointers */
- pToUArgs->source=(char *)source;
- pFromUArgs->target=(char *)target;
-}
-
-static void U_CALLCONV
-ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
- UConverterToUnicodeArgs *pToUArgs,
- UErrorCode *pErrorCode) {
- UConverter *utf8, *cnv;
- const uint8_t *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
-
- const uint16_t *table, *mbcsIndex;
- const uint16_t *results;
-
- int8_t oldToULength, toULength, toULimit;
-
- UChar32 c;
- uint8_t b, t1, t2;
-
- uint32_t stage2Entry;
- uint32_t asciiRoundtrips;
- uint16_t value = 0;
- UBool hasSupplementary;
-
- /* set up the local pointers */
- utf8=pToUArgs->converter;
- cnv=pFromUArgs->converter;
- source=(uint8_t *)pToUArgs->source;
- sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
- target=(uint8_t *)pFromUArgs->target;
- targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
-
- table=cnv->sharedData->mbcs.fromUnicodeTable;
- mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
- if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
- results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
- } else {
- results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
- }
- asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
-
- hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
-
- /* get the converter state from the UTF-8 UConverter */
- if(utf8->toULength > 0) {
- toULength=oldToULength=utf8->toULength;
- toULimit=(int8_t)utf8->mode;
- c=(UChar32)utf8->toUnicodeStatus;
- } else {
- toULength=oldToULength=toULimit=0;
- c = 0;
- }
-
- // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
- // If the buffer ends with a truncated 2- or 3-byte sequence,
- // then we reduce the sourceLimit to before that,
- // and collect the remaining bytes after the conversion loop.
- {
- // Do not go back into the bytes that will be read for finishing a partial
- // sequence from the previous buffer.
- int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
- if(length>0) {
- uint8_t b1=*(sourceLimit-1);
- if(U8_IS_SINGLE(b1)) {
- // common ASCII character
- } else if(U8_IS_TRAIL(b1) && length>=2) {
- uint8_t b2=*(sourceLimit-2);
- if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
- // truncated 3-byte sequence
- sourceLimit-=2;
- }
- } else if(0xc2<=b1 && b1<0xf0) {
- // truncated 2- or 3-byte sequence
- --sourceLimit;
- }
- }
- }
-
- if(c!=0 && targetCapacity>0) {
- utf8->toUnicodeStatus=0;
- utf8->toULength=0;
- goto moreBytes;
- /* See note in ucnv_SBCSFromUTF8() about this goto. */
- }
-
- /* conversion loop */
- while(source<sourceLimit) {
- if(targetCapacity>0) {
- b=*source++;
- if(U8_IS_SINGLE(b)) {
- /* convert ASCII */
- if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
- *target++=b;
- --targetCapacity;
- continue;
- } else {
- value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, 0, b);
- if(value==0) {
- c=b;
- goto unassigned;
- }
- }
- } else {
- if(b>=0xe0) {
- if( /* handle U+0800..U+D7FF inline */
- b<=0xed && // do not assume maxFastUChar>0xd7ff
- U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
- (t2=(uint8_t)(source[1]-0x80)) <= 0x3f
- ) {
- c=((b&0xf)<<6)|(t1&0x3f);
- source+=2;
- value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2);
- if(value==0) {
- c=(c<<6)|t2;
- goto unassigned;
- }
- } else {
- c=-1;
- }
- } else {
- if( /* handle U+0080..U+07FF inline */
- b>=0xc2 &&
- (t1=(uint8_t)(*source-0x80)) <= 0x3f
- ) {
- c=b&0x1f;
- ++source;
- value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t1);
- if(value==0) {
- c=(c<<6)|t1;
- goto unassigned;
- }
- } else {
- c=-1;
- }
- }
-
- if(c<0) {
- /* handle "complicated" and error cases, and continuing partial characters */
- oldToULength=0;
- toULength=1;
- toULimit=U8_COUNT_BYTES_NON_ASCII(b);
- c=b;
-moreBytes:
- while(toULength<toULimit) {
- /*
- * The sourceLimit may have been adjusted before the conversion loop
- * to stop before a truncated sequence.
- * Here we need to use the real limit in case we have two truncated
- * sequences at the end.
- * See ticket #7492.
- */
- if(source<(uint8_t *)pToUArgs->sourceLimit) {
- b=*source;
- if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
- ++source;
- ++toULength;
- c=(c<<6)+b;
- } else {
- break; /* sequence too short, stop with toULength<toULimit */
- }
- } else {
- /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
- source-=(toULength-oldToULength);
- while(oldToULength<toULength) {
- utf8->toUBytes[oldToULength++]=*source++;
- }
- utf8->toUnicodeStatus=c;
- utf8->toULength=toULength;
- utf8->mode=toULimit;
- pToUArgs->source=(char *)source;
- pFromUArgs->target=(char *)target;
- return;
- }
- }
-
- if(toULength==toULimit) {
- c-=utf8_offsets[toULength];
- if(toULength<=3) { /* BMP */
- stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
- } else {
- /* supplementary code point */
- if(!hasSupplementary) {
- /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
- stage2Entry=0;
- } else {
- stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
- }
- }
- } else {
- /* error handling: illegal UTF-8 byte sequence */
- source-=(toULength-oldToULength);
- while(oldToULength<toULength) {
- utf8->toUBytes[oldToULength++]=*source++;
- }
- utf8->toULength=toULength;
- pToUArgs->source=(char *)source;
- pFromUArgs->target=(char *)target;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- return;
- }
-
- /* get the bytes and the length for the output */
- /* MBCS_OUTPUT_2 */
- value=MBCS_VALUE_2_FROM_STAGE_2(results, stage2Entry, c);
-
- /* is this code point assigned, or do we use fallbacks? */
- if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ||
- (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
- ) {
- goto unassigned;
- }
- }
- }
-
- /* write the output character bytes from value and length */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(value<=0xff) {
- /* this is easy because we know that there is enough space */
- *target++=(uint8_t)value;
- --targetCapacity;
- } else /* length==2 */ {
- *target++=(uint8_t)(value>>8);
- if(2<=targetCapacity) {
- *target++=(uint8_t)value;
- targetCapacity-=2;
- } else {
- cnv->charErrorBuffer[0]=(char)value;
- cnv->charErrorBufferLength=1;
-
- /* target overflow */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- continue;
-
-unassigned:
- {
- /*
- * Try an extension mapping.
- * Pass in no source because we don't have UTF-16 input.
- * If we have a partial match on c, we will return and revert
- * to UTF-8->UTF-16->charset conversion.
- */
- static const UChar nul=0;
- const UChar *noSource=&nul;
- c=_extFromU(cnv, cnv->sharedData,
- c, &noSource, noSource,
- &target, target+targetCapacity,
- NULL, -1,
- pFromUArgs->flush,
- pErrorCode);
-
- if(U_FAILURE(*pErrorCode)) {
- /* not mappable or buffer overflow */
- cnv->fromUChar32=c;
- break;
- } else if(cnv->preFromUFirstCP>=0) {
- /*
- * Partial match, return and revert to pivoting.
- * In normal from-UTF-16 conversion, we would just continue
- * but then exit the loop because the extension match would
- * have consumed the source.
- */
- *pErrorCode=U_USING_DEFAULT_WARNING;
- break;
- } else {
- /* a mapping was written to the target, continue */
-
- /* recalculate the targetCapacity after an extension mapping */
- targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
- continue;
- }
- }
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- /*
- * The sourceLimit may have been adjusted before the conversion loop
- * to stop before a truncated sequence.
- * If so, then collect the truncated sequence now.
- */
- if(U_SUCCESS(*pErrorCode) &&
- cnv->preFromUFirstCP<0 &&
- source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
- c=utf8->toUBytes[0]=b=*source++;
- toULength=1;
- toULimit=U8_COUNT_BYTES(b);
- while(source<sourceLimit) {
- utf8->toUBytes[toULength++]=b=*source++;
- c=(c<<6)+b;
- }
- utf8->toUnicodeStatus=c;
- utf8->toULength=toULength;
- utf8->mode=toULimit;
- }
-
- /* write back the updated pointers */
- pToUArgs->source=(char *)source;
- pFromUArgs->target=(char *)target;
-}
-
-/* miscellaneous ------------------------------------------------------------ */
-
-static void U_CALLCONV
-ucnv_MBCSGetStarters(const UConverter* cnv,
- UBool starters[256],
- UErrorCode *) {
- const int32_t *state0;
- int i;
-
- state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState];
- for(i=0; i<256; ++i) {
- /* all bytes that cause a state transition from state 0 are lead bytes */
- starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i]);
- }
-}
-
-/*
- * This is an internal function that allows other converter implementations
- * to check whether a byte is a lead byte.
- */
-U_CFUNC UBool
-ucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) {
- return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte]);
-}
-
-static void U_CALLCONV
-ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
- int32_t offsetIndex,
- UErrorCode *pErrorCode) {
- UConverter *cnv=pArgs->converter;
- char *p, *subchar;
- char buffer[4];
- int32_t length;
-
- /* first, select between subChar and subChar1 */
- if( cnv->subChar1!=0 &&
- (cnv->sharedData->mbcs.extIndexes!=NULL ?
- cnv->useSubChar1 :
- (cnv->invalidUCharBuffer[0]<=0xff))
- ) {
- /* select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS behavior) */
- subchar=(char *)&cnv->subChar1;
- length=1;
- } else {
- /* select subChar in all other cases */
- subchar=(char *)cnv->subChars;
- length=cnv->subCharLen;
- }
-
- /* reset the selector for the next code point */
- cnv->useSubChar1=FALSE;
-
- if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) {
- p=buffer;
-
- /* fromUnicodeStatus contains prevLength */
- switch(length) {
- case 1:
- if(cnv->fromUnicodeStatus==2) {
- /* DBCS mode and SBCS sub char: change to SBCS */
- cnv->fromUnicodeStatus=1;
- *p++=UCNV_SI;
- }
- *p++=subchar[0];
- break;
- case 2:
- if(cnv->fromUnicodeStatus<=1) {
- /* SBCS mode and DBCS sub char: change to DBCS */
- cnv->fromUnicodeStatus=2;
- *p++=UCNV_SO;
- }
- *p++=subchar[0];
- *p++=subchar[1];
- break;
- default:
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- subchar=buffer;
- length=(int32_t)(p-buffer);
- }
-
- ucnv_cbFromUWriteBytes(pArgs, subchar, length, offsetIndex, pErrorCode);
-}
-
-U_CFUNC UConverterType
-ucnv_MBCSGetType(const UConverter* converter) {
- /* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */
- if(converter->sharedData->mbcs.countStates==1) {
- return (UConverterType)UCNV_SBCS;
- } else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) {
- return (UConverterType)UCNV_EBCDIC_STATEFUL;
- } else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) {
- return (UConverterType)UCNV_DBCS;
- }
- return (UConverterType)UCNV_MBCS;
-}
-
-#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
diff --git a/contrib/libs/icu/common/ucnvmbcs.h b/contrib/libs/icu/common/ucnvmbcs.h
deleted file mode 100644
index 209cdc532d4..00000000000
--- a/contrib/libs/icu/common/ucnvmbcs.h
+++ /dev/null
@@ -1,605 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2000-2013, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ucnvmbcs.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2000jul07
-* created by: Markus W. Scherer
-*/
-
-#ifndef __UCNVMBCS_H__
-#define __UCNVMBCS_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "ucnv_cnv.h"
-#include "ucnv_ext.h"
-
-/**
- * ICU conversion (.cnv) data file structure, following the usual UDataInfo
- * header.
- *
- * Format version: 6.2
- *
- * struct UConverterStaticData -- struct containing the converter name, IBM CCSID,
- * min/max bytes per character, etc.
- * see ucnv_bld.h
- *
- * --------------------
- *
- * The static data is followed by conversionType-specific data structures.
- * At the moment, there are only variations of MBCS converters. They all have
- * the same toUnicode structures, while the fromUnicode structures for SBCS
- * differ from those for other MBCS-style converters.
- *
- * _MBCSHeader.version 5 is optional and not backward-compatible
- * (as usual for changes in the major version field).
- *
- * Versions 5.m work like versions 4.m except:
- * - The _MBCSHeader has variable length (and is always longer than in version 4).
- * See the struct _MBCSHeader further description below.
- * - There is a set of flags which indicate further incompatible changes.
- * (Reader code must reject the file if it does not recognize them all.)
- * - In particular, one of these flags indicates that most of the fromUnicode
- * data is missing and must be reconstituted from the toUnicode data
- * and from the utf8Friendly mbcsIndex at load time.
- * (This only works with a utf8Friendly table.)
- * In this case, makeconv may increase maxFastUChar automatically to U+FFFF.
- *
- * The first of these versions is 5.3, which is like 4.3 except for the differences above.
- *
- * When possible, makeconv continues to generate version 4.m files.
- *
- * _MBCSHeader.version 5.4/4.4 supports "good one-way" mappings (|4)
- * in the extension tables (fromUTableValues bit 30). See ucnv_ext.h for details.
- *
- * _MBCSHeader.version 4.3 optionally modifies the fromUnicode data structures
- * slightly and optionally adds a table for conversion to MBCS (non-SBCS)
- * charsets.
- *
- * The modifications are to make the data utf8Friendly. Not every 4.3 file
- * file contains utf8Friendly data.
- * It is utf8Friendly if _MBCSHeader.version[2]!=0.
- * In this case, the data structures are utf8Friendly up to the code point
- * maxFastUChar=((_MBCSHeader.version[2]<<8)|0xff)
- *
- * A utf8Friendly file has fromUnicode stage 3 entries for code points up to
- * maxFastUChar allocated in blocks of 64 for indexing with the 6 bits from
- * a UTF-8 trail byte. ASCII is allocated linearly with 128 contiguous entries.
- *
- * In addition, a utf8Friendly MBCS file contains an additional
- * uint16_t mbcsIndex[(maxFastUChar+1)>>6];
- * which replaces the stage 1 and 2 tables for indexing with bits from the
- * UTF-8 lead byte and middle trail byte. Unlike the older MBCS stage 2 table,
- * the mbcsIndex does not contain roundtrip flags. Therefore, all fallbacks
- * from code points up to maxFastUChar (and roundtrips to 0x00) are moved to
- * the extension data structure. This also allows for faster roundtrip
- * conversion from UTF-16.
- *
- * SBCS files do not contain an additional sbcsIndex[] array because the
- * proportional size increase would be noticeable, but the runtime
- * code builds one for the code point range for which the runtime conversion
- * code is optimized.
- *
- * For SBCS, maxFastUChar should be at least U+0FFF. The initial makeconv
- * implementation sets it to U+1FFF. Because the sbcsIndex is not stored in
- * the file, a larger maxFastUChar only affects stage 3 block allocation size
- * and is free in empty blocks. (Larger blocks with sparse contents cause larger
- * files.) U+1FFF includes almost all of the small scripts.
- * U+0FFF covers UTF-8 two-byte sequences and three-byte sequences starting with
- * 0xe0. This includes most scripts with legacy SBCS charsets.
- * The initial runtime implementation using 4.3 files only builds an sbcsIndex
- * for code points up to U+0FFF.
- *
- * For MBCS, maxFastUChar should be at least U+D7FF (=initial value).
- * This boundary is convenient because practically all of the commonly used
- * characters are below it, and because it is the boundary to surrogate
- * code points, above which special handling is necessary anyway.
- * (Surrogate pair assembly for UTF-16, validity checking for UTF-8.)
- *
- * maxFastUChar could be up to U+FFFF to cover the whole BMP, which could be
- * useful especially for conversion from UTF-8 when the input can be assumed
- * to be valid, because the surrogate range would then not have to be
- * checked.
- * (With maxFastUChar=0xffff, makeconv would have to check for mbcsIndex value
- * overflow because with the all-unassigned block 0 and nearly full mappings
- * from the BMP it is theoretically possible that an index into stage 3
- * exceeds 16 bits.)
- *
- * _MBCSHeader.version 4.2 adds an optional conversion extension data structure.
- * If it is present, then an ICU version reading header versions 4.0 or 4.1
- * will be able to use the base table and ignore the extension.
- *
- * The unicodeMask in the static data is part of the base table data structure.
- * Especially, the UCNV_HAS_SUPPLEMENTARY flag determines the length of the
- * fromUnicode stage 1 array.
- * The static data unicodeMask refers only to the base table's properties if
- * a base table is included.
- * In an extension-only file, the static data unicodeMask is 0.
- * The extension data indexes have a separate field with the unicodeMask flags.
- *
- * MBCS-style data structure following the static data.
- * Offsets are counted in bytes from the beginning of the MBCS header structure.
- * Details about usage in comments in ucnvmbcs.c.
- *
- * struct _MBCSHeader (see the definition in this header file below)
- * contains 32-bit fields as follows:
- * 8 values:
- * 0 uint8_t[4] MBCS version in UVersionInfo format (currently 4.3.x.0)
- * 1 uint32_t countStates
- * 2 uint32_t countToUFallbacks
- * 3 uint32_t offsetToUCodeUnits
- * 4 uint32_t offsetFromUTable
- * 5 uint32_t offsetFromUBytes
- * 6 uint32_t flags, bits:
- * 31.. 8 offsetExtension -- _MBCSHeader.version 4.2 (ICU 2.8) and higher
- * 0 for older versions and if
- * there is not extension structure
- * 7.. 0 outputType
- * 7 uint32_t fromUBytesLength -- _MBCSHeader.version 4.1 (ICU 2.4) and higher
- * counts bytes in fromUBytes[]
- *
- * New and required in version 5:
- * 8 uint32_t options, bits:
- * 31..16 reserved for flags that can be added without breaking
- * backward compatibility
- * 15.. 6 reserved for flags whose addition will break
- * backward compatibility
- * 6 MBCS_OPT_FROM_U -- if set,
- * then most of the fromUnicode data is omitted;
- * fullStage2Length is present and the missing
- * bottom part of stage 2 must be reconstituted from
- * the toUnicode data;
- * stage 3 is missing completely as well;
- * not used for SBCS tables
- * 5.. 0 length of the _MBCSHeader (number of uint32_t)
- *
- * New and optional in version 5:
- * 9 uint32_t fullStage2Length: used if MBCS_OPT_FROM_U is set
- * specifies the full length of stage 2
- * including the omitted part
- *
- * if(outputType==MBCS_OUTPUT_EXT_ONLY) {
- * -- base table name for extension-only table
- * char baseTableName[variable]; -- with NUL plus padding for 4-alignment
- *
- * -- all _MBCSHeader fields except for version and flags are 0
- * } else {
- * -- normal base table with optional extension
- *
- * int32_t stateTable[countStates][256];
- *
- * struct _MBCSToUFallback { (fallbacks are sorted by offset)
- * uint32_t offset;
- * UChar32 codePoint;
- * } toUFallbacks[countToUFallbacks];
- *
- * uint16_t unicodeCodeUnits[(offsetFromUTable-offsetToUCodeUnits)/2];
- * (padded to an even number of units)
- *
- * -- stage 1 tables
- * if(staticData.unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
- * -- stage 1 table for all of Unicode
- * uint16_t fromUTable[0x440]; (32-bit-aligned)
- * } else {
- * -- BMP-only tables have a smaller stage 1 table
- * uint16_t fromUTable[0x40]; (32-bit-aligned)
- * }
- *
- * -- stage 2 tables
- * length determined by top of stage 1 and bottom of stage 3 tables
- * if(outputType==MBCS_OUTPUT_1) {
- * -- SBCS: pure indexes
- * uint16_t stage 2 indexes[?];
- * } else {
- * -- DBCS, MBCS, EBCDIC_STATEFUL, ...: roundtrip flags and indexes
- * uint32_t stage 2 flags and indexes[?];
- * if(options&MBCS_OPT_NO_FROM_U) {
- * stage 2 really has length fullStage2Length
- * and the omitted lower part must be reconstituted from
- * the toUnicode data
- * }
- * }
- *
- * -- stage 3 tables with byte results
- * if(outputType==MBCS_OUTPUT_1) {
- * -- SBCS: each 16-bit result contains flags and the result byte, see ucnvmbcs.c
- * uint16_t fromUBytes[fromUBytesLength/2];
- * } else if(!(options&MBCS_OPT_NO_FROM_U)) {
- * -- DBCS, MBCS, EBCDIC_STATEFUL, ... 2/3/4 bytes result, see ucnvmbcs.c
- * uint8_t fromUBytes[fromUBytesLength]; or
- * uint16_t fromUBytes[fromUBytesLength/2]; or
- * uint32_t fromUBytes[fromUBytesLength/4];
- * } else {
- * fromUBytes[] must be reconstituted from the toUnicode data
- * }
- *
- * -- optional utf8Friendly mbcsIndex -- _MBCSHeader.version 4.3 (ICU 3.8) and higher
- * if(outputType!=MBCS_OUTPUT_1 &&
- * _MBCSHeader.version[1]>=3 &&
- * (maxFastUChar=_MBCSHeader.version[2])!=0
- * ) {
- * maxFastUChar=(maxFastUChar<<8)|0xff;
- * uint16_t mbcsIndex[(maxFastUChar+1)>>6];
- * }
- * }
- *
- * -- extension table, details see ucnv_ext.h
- * int32_t indexes[>=32]; ...
- */
-
-/* MBCS converter data and state -------------------------------------------- */
-
-enum {
- MBCS_MAX_STATE_COUNT=128
-};
-
-/**
- * MBCS action codes for conversions to Unicode.
- * These values are in bits 23..20 of the state table entries.
- */
-enum {
- MBCS_STATE_VALID_DIRECT_16,
- MBCS_STATE_VALID_DIRECT_20,
-
- MBCS_STATE_FALLBACK_DIRECT_16,
- MBCS_STATE_FALLBACK_DIRECT_20,
-
- MBCS_STATE_VALID_16,
- MBCS_STATE_VALID_16_PAIR,
-
- MBCS_STATE_UNASSIGNED,
- MBCS_STATE_ILLEGAL,
-
- MBCS_STATE_CHANGE_ONLY
-};
-
-/* Macros for state table entries */
-#define MBCS_ENTRY_TRANSITION(state, offset) (int32_t)(((int32_t)(state)<<24L)|(offset))
-#define MBCS_ENTRY_TRANSITION_SET_OFFSET(entry, offset) (int32_t)(((entry)&0xff000000)|(offset))
-#define MBCS_ENTRY_TRANSITION_ADD_OFFSET(entry, offset) (int32_t)((entry)+(offset))
-
-#define MBCS_ENTRY_FINAL(state, action, value) (int32_t)(0x80000000|((int32_t)(state)<<24L)|((action)<<20L)|(value))
-#define MBCS_ENTRY_SET_FINAL(entry) (int32_t)((entry)|0x80000000)
-#define MBCS_ENTRY_FINAL_SET_ACTION(entry, action) (int32_t)(((entry)&0xff0fffff)|((int32_t)(action)<<20L))
-#define MBCS_ENTRY_FINAL_SET_VALUE(entry, value) (int32_t)(((entry)&0xfff00000)|(value))
-#define MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, action, value) (int32_t)(((entry)&0xff000000)|((int32_t)(action)<<20L)|(value))
-
-#define MBCS_ENTRY_SET_STATE(entry, state) (int32_t)(((entry)&0x80ffffff)|((int32_t)(state)<<24L))
-
-#define MBCS_ENTRY_STATE(entry) ((((uint32_t)entry)>>24)&0x7f)
-
-#define MBCS_ENTRY_IS_TRANSITION(entry) ((entry)>=0)
-#define MBCS_ENTRY_IS_FINAL(entry) ((entry)<0)
-
-#define MBCS_ENTRY_TRANSITION_STATE(entry) (((uint32_t)entry)>>24)
-#define MBCS_ENTRY_TRANSITION_OFFSET(entry) ((entry)&0xffffff)
-
-#define MBCS_ENTRY_FINAL_STATE(entry) ((((uint32_t)entry)>>24)&0x7f)
-#define MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry) ((entry)<(int32_t)0x80100000)
-#define MBCS_ENTRY_FINAL_ACTION(entry) ((((uint32_t)entry)>>20)&0xf)
-#define MBCS_ENTRY_FINAL_VALUE(entry) ((entry)&0xfffff)
-#define MBCS_ENTRY_FINAL_VALUE_16(entry) (uint16_t)(entry)
-
-#define IS_ASCII_ROUNDTRIP(b, asciiRoundtrips) (((asciiRoundtrips) & (1<<((b)>>2)))!=0)
-
-/* single-byte fromUnicode: get the 16-bit result word */
-#define MBCS_SINGLE_RESULT_FROM_U(table, results, c) (results)[ (table)[ (table)[(c)>>10] +(((c)>>4)&0x3f) ] +((c)&0xf) ]
-
-/* single-byte fromUnicode using the sbcsIndex */
-#define SBCS_RESULT_FROM_LOW_BMP(table, results, c) (results)[ (table)[(c)>>6] +((c)&0x3f) ]
-
-/* single-byte fromUTF8 using the sbcsIndex; l and t must be masked externally; can be l=0 and t<=0x7f */
-#define SBCS_RESULT_FROM_UTF8(table, results, l, t) (results)[ (table)[l] +(t) ]
-
-/* multi-byte fromUnicode: get the 32-bit stage 2 entry */
-#define MBCS_STAGE_2_FROM_U(table, c) ((const uint32_t *)(table))[ (table)[(c)>>10] +(((c)>>4)&0x3f) ]
-#define MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ( ((stage2Entry) & ((uint32_t)1<< (16+((c)&0xf)) )) !=0)
-
-#define MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c) ((uint16_t *)(bytes))[16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)]
-#define MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c) ((uint32_t *)(bytes))[16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)]
-
-#define MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c) ((bytes)+(16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf))*3)
-
-/* double-byte fromUnicode using the mbcsIndex */
-#define DBCS_RESULT_FROM_MOST_BMP(table, results, c) (results)[ (table)[(c)>>6] +((c)&0x3f) ]
-
-/* double-byte fromUTF8 using the mbcsIndex; l and t1 combined into lt1; lt1 and t2 must be masked externally */
-#define DBCS_RESULT_FROM_UTF8(table, results, lt1, t2) (results)[ (table)[lt1] +(t2) ]
-
-
-/**
- * MBCS output types for conversions from Unicode.
- * These per-converter types determine the storage method in stage 3 of the lookup table,
- * mostly how many bytes are stored per entry.
- */
-enum {
- MBCS_OUTPUT_1, /* 0 */
- MBCS_OUTPUT_2, /* 1 */
- MBCS_OUTPUT_3, /* 2 */
- MBCS_OUTPUT_4, /* 3 */
-
- MBCS_OUTPUT_3_EUC=8, /* 8 */
- MBCS_OUTPUT_4_EUC, /* 9 */
-
- MBCS_OUTPUT_2_SISO=12, /* c */
- MBCS_OUTPUT_2_HZ, /* d */
-
- MBCS_OUTPUT_EXT_ONLY, /* e */
-
- MBCS_OUTPUT_COUNT,
-
- MBCS_OUTPUT_DBCS_ONLY=0xdb /* runtime-only type for DBCS-only handling of SISO tables */
-};
-
-/**
- * Fallbacks to Unicode are stored outside the normal state table and code point structures
- * in a vector of items of this type. They are sorted by offset.
- */
-typedef struct {
- uint32_t offset;
- UChar32 codePoint;
-} _MBCSToUFallback;
-
-/** Constants for fast and UTF-8-friendly conversion. */
-enum {
- SBCS_FAST_MAX=0x0fff, /* maximum code point with UTF-8-friendly SBCS runtime code, see makeconv SBCS_UTF8_MAX */
- SBCS_FAST_LIMIT=SBCS_FAST_MAX+1, /* =0x1000 */
- MBCS_FAST_MAX=0xd7ff, /* maximum code point with UTF-8-friendly MBCS runtime code, see makeconv MBCS_UTF8_MAX */
- MBCS_FAST_LIMIT=MBCS_FAST_MAX+1 /* =0xd800 */
-};
-
-/**
- * This is the MBCS part of the UConverterTable union (a runtime data structure).
- * It keeps all the per-converter data and points into the loaded mapping tables.
- *
- * utf8Friendly data structures added with _MBCSHeader.version 4.3
- */
-typedef struct UConverterMBCSTable {
- /* toUnicode */
- uint8_t countStates, dbcsOnlyState, stateTableOwned;
- uint32_t countToUFallbacks;
-
- const int32_t (*stateTable)/*[countStates]*/[256];
- int32_t (*swapLFNLStateTable)/*[countStates]*/[256]; /* for swaplfnl */
- const uint16_t *unicodeCodeUnits/*[countUnicodeResults]*/;
- const _MBCSToUFallback *toUFallbacks;
-
- /* fromUnicode */
- const uint16_t *fromUnicodeTable;
- const uint16_t *mbcsIndex; /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */
- uint16_t sbcsIndex[SBCS_FAST_LIMIT>>6]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */
- const uint8_t *fromUnicodeBytes;
- uint8_t *swapLFNLFromUnicodeBytes; /* for swaplfnl */
- uint32_t fromUBytesLength;
- uint8_t outputType, unicodeMask;
- UBool utf8Friendly; /* for utf8Friendly data */
- UChar maxFastUChar; /* for utf8Friendly data */
-
- /* roundtrips */
- uint32_t asciiRoundtrips;
-
- /* reconstituted data that was omitted from the .cnv file */
- uint8_t *reconstitutedData;
-
- /* converter name for swaplfnl */
- char *swapLFNLName;
-
- /* extension data */
- struct UConverterSharedData *baseSharedData;
- const int32_t *extIndexes;
-} UConverterMBCSTable;
-
-#define UCNV_MBCS_TABLE_INITIALIZER { \
- /* toUnicode */ \
- 0, 0, 0, \
- 0, \
- \
- NULL, \
- NULL, \
- NULL, \
- NULL, \
- \
- /* fromUnicode */ \
- NULL, \
- NULL, \
- { 0 }, \
- NULL, \
- NULL, \
- 0, \
- 0, 0, \
- FALSE, \
- 0, \
- \
- /* roundtrips */ \
- 0, \
- \
- /* reconstituted data that was omitted from the .cnv file */ \
- NULL, \
- \
- /* converter name for swaplfnl */ \
- NULL, \
- \
- /* extension data */ \
- NULL, \
- NULL \
-}
-
-enum {
- MBCS_OPT_LENGTH_MASK=0x3f,
- MBCS_OPT_NO_FROM_U=0x40,
- /*
- * If any of the following options bits are set,
- * then the file must be rejected.
- */
- MBCS_OPT_INCOMPATIBLE_MASK=0xffc0,
- /*
- * Remove bits from this mask as more options are recognized
- * by all implementations that use this constant.
- */
- MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK=0xff80
-};
-
-enum {
- MBCS_HEADER_V4_LENGTH=8,
- MBCS_HEADER_V5_MIN_LENGTH=9
-};
-
-/**
- * MBCS data header. See data format description above.
- */
-typedef struct {
- UVersionInfo version;
- uint32_t countStates,
- countToUFallbacks,
- offsetToUCodeUnits,
- offsetFromUTable,
- offsetFromUBytes,
- flags,
- fromUBytesLength;
-
- /* new and required in version 5 */
- uint32_t options;
-
- /* new and optional in version 5; used if options&MBCS_OPT_NO_FROM_U */
- uint32_t fullStage2Length; /* number of 32-bit units */
-} _MBCSHeader;
-
-#define UCNV_MBCS_HEADER_INITIALIZER { { 0 }, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-
-/*
- * This is a simple version of _MBCSGetNextUChar() that is used
- * by other converter implementations.
- * It only returns an "assigned" result if it consumes the entire input.
- * It does not use state from the converter, nor error codes.
- * It does not handle the EBCDIC swaplfnl option (set in UConverter).
- * It handles conversion extensions but not GB 18030.
- *
- * Return value:
- * U+fffe unassigned
- * U+ffff illegal
- * otherwise the Unicode code point
- */
-U_CFUNC UChar32
-ucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
- const char *source, int32_t length,
- UBool useFallback);
-
-/**
- * This version of _MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages.
- * It does not handle the EBCDIC swaplfnl option (set in UConverter).
- * It does not handle conversion extensions (_extToU()).
- */
-U_CFUNC UChar32
-ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
- uint8_t b, UBool useFallback);
-
-/**
- * This macro version of _MBCSSingleSimpleGetNextUChar() gets a code point from a byte.
- * It works for single-byte, single-state codepages that only map
- * to and from BMP code points, and it always
- * returns fallback values.
- */
-#define _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(sharedData, b) \
- (UChar)MBCS_ENTRY_FINAL_VALUE_16((sharedData)->mbcs.stateTable[0][(uint8_t)(b)])
-
-/**
- * This is an internal function that allows other converter implementations
- * to check whether a byte is a lead byte.
- */
-U_CFUNC UBool
-ucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte);
-
-/** This is a macro version of _MBCSIsLeadByte(). */
-#define _MBCS_IS_LEAD_BYTE(sharedData, byte) \
- (UBool)MBCS_ENTRY_IS_TRANSITION((sharedData)->mbcs.stateTable[0][(uint8_t)(byte)])
-
-/*
- * This is another simple conversion function for internal use by other
- * conversion implementations.
- * It does not use the converter state nor call callbacks.
- * It does not handle the EBCDIC swaplfnl option (set in UConverter).
- * It handles conversion extensions but not GB 18030.
- *
- * It converts one single Unicode code point into codepage bytes, encoded
- * as one 32-bit value. The function returns the number of bytes in *pValue:
- * 1..4 the number of bytes in *pValue
- * 0 unassigned (*pValue undefined)
- * -1 illegal (currently not used, *pValue undefined)
- *
- * *pValue will contain the resulting bytes with the last byte in bits 7..0,
- * the second to last byte in bits 15..8, etc.
- * Currently, the function assumes but does not check that 0<=c<=0x10ffff.
- */
-U_CFUNC int32_t
-ucnv_MBCSFromUChar32(UConverterSharedData *sharedData,
- UChar32 c, uint32_t *pValue,
- UBool useFallback);
-
-/**
- * This version of _MBCSFromUChar32() is optimized for single-byte codepages.
- * It does not handle the EBCDIC swaplfnl option (set in UConverter).
- *
- * It returns the codepage byte for the code point, or -1 if it is unassigned.
- */
-U_CFUNC int32_t
-ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,
- UChar32 c,
- UBool useFallback);
-
-/**
- * SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but
- * we cheat a little about the type, returning the old types if appropriate.
- */
-U_CFUNC UConverterType
-ucnv_MBCSGetType(const UConverter* converter);
-
-U_CFUNC void
-ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode);
-U_CFUNC void
-ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode);
-
-/*
- * Internal function returning a UnicodeSet for toUnicode() conversion.
- * Currently only used for ISO-2022-CN, and only handles roundtrip mappings.
- * In the future, if we add support for fallback sets, this function
- * needs to be updated.
- * Handles extensions.
- * Does not empty the set first.
- */
-U_CFUNC void
-ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode);
-
-/*
- * Same as ucnv_MBCSGetUnicodeSetForUnicode() but
- * the set can be filtered by encoding scheme.
- * Used by stateful converters which share regular conversion tables
- * but only use a subset of their mappings.
- */
-U_CFUNC void
-ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UConverterSetFilter filter,
- UErrorCode *pErrorCode);
-
-#endif
-
-#endif
diff --git a/contrib/libs/icu/common/ucnvscsu.cpp b/contrib/libs/icu/common/ucnvscsu.cpp
deleted file mode 100644
index 74b5722b975..00000000000
--- a/contrib/libs/icu/common/ucnvscsu.cpp
+++ /dev/null
@@ -1,2045 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2000-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ucnvscsu.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2000nov18
-* created by: Markus W. Scherer
-*
-* This is an implementation of the Standard Compression Scheme for Unicode
-* as defined in http://www.unicode.org/unicode/reports/tr6/ .
-* Reserved commands and window settings are treated as illegal sequences and
-* will result in callback calls.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/ucnv_cb.h"
-#include "unicode/utf16.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "cmemory.h"
-
-/* SCSU definitions --------------------------------------------------------- */
-
-/* SCSU command byte values */
-enum {
- SQ0=0x01, /* Quote from window pair 0 */
- SQ7=0x08, /* Quote from window pair 7 */
- SDX=0x0B, /* Define a window as extended */
- Srs=0x0C, /* reserved */
- SQU=0x0E, /* Quote a single Unicode character */
- SCU=0x0F, /* Change to Unicode mode */
- SC0=0x10, /* Select window 0 */
- SC7=0x17, /* Select window 7 */
- SD0=0x18, /* Define and select window 0 */
- SD7=0x1F, /* Define and select window 7 */
-
- UC0=0xE0, /* Select window 0 */
- UC7=0xE7, /* Select window 7 */
- UD0=0xE8, /* Define and select window 0 */
- UD7=0xEF, /* Define and select window 7 */
- UQU=0xF0, /* Quote a single Unicode character */
- UDX=0xF1, /* Define a Window as extended */
- Urs=0xF2 /* reserved */
-};
-
-enum {
- /*
- * Unicode code points from 3400 to E000 are not adressible by
- * dynamic window, since in these areas no short run alphabets are
- * found. Therefore add gapOffset to all values from gapThreshold.
- */
- gapThreshold=0x68,
- gapOffset=0xAC00,
-
- /* values between reservedStart and fixedThreshold are reserved */
- reservedStart=0xA8,
-
- /* use table of predefined fixed offsets for values from fixedThreshold */
- fixedThreshold=0xF9
-};
-
-/* constant offsets for the 8 static windows */
-static const uint32_t staticOffsets[8]={
- 0x0000, /* ASCII for quoted tags */
- 0x0080, /* Latin - 1 Supplement (for access to punctuation) */
- 0x0100, /* Latin Extended-A */
- 0x0300, /* Combining Diacritical Marks */
- 0x2000, /* General Punctuation */
- 0x2080, /* Currency Symbols */
- 0x2100, /* Letterlike Symbols and Number Forms */
- 0x3000 /* CJK Symbols and punctuation */
-};
-
-/* initial offsets for the 8 dynamic (sliding) windows */
-static const uint32_t initialDynamicOffsets[8]={
- 0x0080, /* Latin-1 */
- 0x00C0, /* Latin Extended A */
- 0x0400, /* Cyrillic */
- 0x0600, /* Arabic */
- 0x0900, /* Devanagari */
- 0x3040, /* Hiragana */
- 0x30A0, /* Katakana */
- 0xFF00 /* Fullwidth ASCII */
-};
-
-/* Table of fixed predefined Offsets */
-static const uint32_t fixedOffsets[]={
- /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
- /* 0xFA */ 0x0250, /* IPA extensions */
- /* 0xFB */ 0x0370, /* Greek */
- /* 0xFC */ 0x0530, /* Armenian */
- /* 0xFD */ 0x3040, /* Hiragana */
- /* 0xFE */ 0x30A0, /* Katakana */
- /* 0xFF */ 0xFF60 /* Halfwidth Katakana */
-};
-
-/* state values */
-enum {
- readCommand,
- quotePairOne,
- quotePairTwo,
- quoteOne,
- definePairOne,
- definePairTwo,
- defineOne
-};
-
-typedef struct SCSUData {
- /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
- uint32_t toUDynamicOffsets[8];
- uint32_t fromUDynamicOffsets[8];
-
- /* state machine state - toUnicode */
- UBool toUIsSingleByteMode;
- uint8_t toUState;
- int8_t toUQuoteWindow, toUDynamicWindow;
- uint8_t toUByteOne;
- uint8_t toUPadding[3];
-
- /* state machine state - fromUnicode */
- UBool fromUIsSingleByteMode;
- int8_t fromUDynamicWindow;
-
- /*
- * windowUse[] keeps track of the use of the dynamic windows:
- * At nextWindowUseIndex there is the least recently used window,
- * and the following windows (in a wrapping manner) are more and more
- * recently used.
- * At nextWindowUseIndex-1 there is the most recently used window.
- */
- uint8_t locale;
- int8_t nextWindowUseIndex;
- int8_t windowUse[8];
-} SCSUData;
-
-static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
-static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
-
-enum {
- lGeneric, l_ja
-};
-
-/* SCSU setup functions ----------------------------------------------------- */
-U_CDECL_BEGIN
-static void U_CALLCONV
-_SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
- SCSUData *scsu=(SCSUData *)cnv->extraInfo;
-
- if(choice<=UCNV_RESET_TO_UNICODE) {
- /* reset toUnicode */
- uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
-
- scsu->toUIsSingleByteMode=TRUE;
- scsu->toUState=readCommand;
- scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
- scsu->toUByteOne=0;
-
- cnv->toULength=0;
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- /* reset fromUnicode */
- uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
-
- scsu->fromUIsSingleByteMode=TRUE;
- scsu->fromUDynamicWindow=0;
-
- scsu->nextWindowUseIndex=0;
- switch(scsu->locale) {
- case l_ja:
- uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
- break;
- default:
- uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
- break;
- }
-
- cnv->fromUChar32=0;
- }
-}
-
-static void U_CALLCONV
-_SCSUOpen(UConverter *cnv,
- UConverterLoadArgs *pArgs,
- UErrorCode *pErrorCode) {
- const char *locale=pArgs->locale;
- if(pArgs->onlyTestIsLoadable) {
- return;
- }
- cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
- if(cnv->extraInfo!=NULL) {
- if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
- ((SCSUData *)cnv->extraInfo)->locale=l_ja;
- } else {
- ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
- }
- _SCSUReset(cnv, UCNV_RESET_BOTH);
- } else {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- }
-
- /* Set the substitution character U+fffd as a Unicode string. */
- cnv->subUChars[0]=0xfffd;
- cnv->subCharLen=-1;
-}
-
-static void U_CALLCONV
-_SCSUClose(UConverter *cnv) {
- if(cnv->extraInfo!=NULL) {
- if(!cnv->isExtraLocal) {
- uprv_free(cnv->extraInfo);
- }
- cnv->extraInfo=NULL;
- }
-}
-
-/* SCSU-to-Unicode conversion functions ------------------------------------- */
-
-static void U_CALLCONV
-_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- SCSUData *scsu;
- const uint8_t *source, *sourceLimit;
- UChar *target;
- const UChar *targetLimit;
- int32_t *offsets;
- UBool isSingleByteMode;
- uint8_t state, byteOne;
- int8_t quoteWindow, dynamicWindow;
-
- int32_t sourceIndex, nextSourceIndex;
-
- uint8_t b;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- scsu=(SCSUData *)cnv->extraInfo;
-
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
- offsets=pArgs->offsets;
-
- /* get the state machine state */
- isSingleByteMode=scsu->toUIsSingleByteMode;
- state=scsu->toUState;
- quoteWindow=scsu->toUQuoteWindow;
- dynamicWindow=scsu->toUDynamicWindow;
- byteOne=scsu->toUByteOne;
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex=state==readCommand ? 0 : -1;
- nextSourceIndex=0;
-
- /*
- * conversion "loop"
- *
- * For performance, this is not a normal C loop.
- * Instead, there are two code blocks for the two SCSU modes.
- * The function branches to either one, and a change of the mode is done with a goto to
- * the other branch.
- *
- * Each branch has two conventional loops:
- * - a fast-path loop for the most common codes in the mode
- * - a loop for all other codes in the mode
- * When the fast-path runs into a code that it cannot handle, its loop ends and it
- * runs into the following loop to handle the other codes.
- * The end of the input or output buffer is also handled by the slower loop.
- * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
- *
- * The callback handling is done by returning with an error code.
- * The conversion framework actually calls the callback function.
- */
- if(isSingleByteMode) {
- /* fast path for single-byte mode */
- if(state==readCommand) {
-fastSingle:
- while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
- ++source;
- ++nextSourceIndex;
- if(b<=0x7f) {
- /* write US-ASCII graphic character or DEL */
- *target++=(UChar)b;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- } else {
- /* write from dynamic window */
- uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
- if(c<=0xffff) {
- *target++=(UChar)c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- } else {
- /* output surrogate pair */
- *target++=(UChar)(0xd7c0+(c>>10));
- if(target<targetLimit) {
- *target++=(UChar)(0xdc00|(c&0x3ff));
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- }
- } else {
- /* target overflow */
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- goto endloop;
- }
- }
- }
- sourceIndex=nextSourceIndex;
- }
- }
-
- /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
-singleByteMode:
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- b=*source++;
- ++nextSourceIndex;
- switch(state) {
- case readCommand:
- /* redundant conditions are commented out */
- /* here: b<0x20 because otherwise we would be in fastSingle */
- if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
- /* CR/LF/TAB/NUL */
- *target++=(UChar)b;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- sourceIndex=nextSourceIndex;
- goto fastSingle;
- } else if(SC0<=b) {
- if(b<=SC7) {
- dynamicWindow=(int8_t)(b-SC0);
- sourceIndex=nextSourceIndex;
- goto fastSingle;
- } else /* if(SD0<=b && b<=SD7) */ {
- dynamicWindow=(int8_t)(b-SD0);
- state=defineOne;
- }
- } else if(/* SQ0<=b && */ b<=SQ7) {
- quoteWindow=(int8_t)(b-SQ0);
- state=quoteOne;
- } else if(b==SDX) {
- state=definePairOne;
- } else if(b==SQU) {
- state=quotePairOne;
- } else if(b==SCU) {
- sourceIndex=nextSourceIndex;
- isSingleByteMode=FALSE;
- goto fastUnicode;
- } else /* Srs */ {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- goto endloop;
- }
-
- /* store the first byte of a multibyte sequence in toUBytes[] */
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- break;
- case quotePairOne:
- byteOne=b;
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=quotePairTwo;
- break;
- case quotePairTwo:
- *target++=(UChar)((byteOne<<8)|b);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastSingle;
- case quoteOne:
- if(b<0x80) {
- /* all static offsets are in the BMP */
- *target++=(UChar)(staticOffsets[quoteWindow]+b);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- } else {
- /* write from dynamic window */
- uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
- if(c<=0xffff) {
- *target++=(UChar)c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- } else {
- /* output surrogate pair */
- *target++=(UChar)(0xd7c0+(c>>10));
- if(target<targetLimit) {
- *target++=(UChar)(0xdc00|(c&0x3ff));
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- }
- } else {
- /* target overflow */
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- goto endloop;
- }
- }
- }
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastSingle;
- case definePairOne:
- dynamicWindow=(int8_t)((b>>5)&7);
- byteOne=(uint8_t)(b&0x1f);
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=definePairTwo;
- break;
- case definePairTwo:
- scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastSingle;
- case defineOne:
- if(b==0) {
- /* callback(illegal): Reserved window offset value 0 */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- goto endloop;
- } else if(b<gapThreshold) {
- scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
- } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
- scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
- } else if(b>=fixedThreshold) {
- scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
- } else {
- /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- goto endloop;
- }
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastSingle;
- }
- }
- } else {
- /* fast path for Unicode mode */
- if(state==readCommand) {
-fastUnicode:
- while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
- *target++=(UChar)((b<<8)|source[1]);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- sourceIndex=nextSourceIndex;
- nextSourceIndex+=2;
- source+=2;
- }
- }
-
- /* normal state machine for Unicode mode */
-/* unicodeByteMode: */
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- b=*source++;
- ++nextSourceIndex;
- switch(state) {
- case readCommand:
- if((uint8_t)(b-UC0)>(Urs-UC0)) {
- byteOne=b;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=quotePairTwo;
- } else if(/* UC0<=b && */ b<=UC7) {
- dynamicWindow=(int8_t)(b-UC0);
- sourceIndex=nextSourceIndex;
- isSingleByteMode=TRUE;
- goto fastSingle;
- } else if(/* UD0<=b && */ b<=UD7) {
- dynamicWindow=(int8_t)(b-UD0);
- isSingleByteMode=TRUE;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=defineOne;
- goto singleByteMode;
- } else if(b==UDX) {
- isSingleByteMode=TRUE;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=definePairOne;
- goto singleByteMode;
- } else if(b==UQU) {
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=quotePairOne;
- } else /* Urs */ {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- goto endloop;
- }
- break;
- case quotePairOne:
- byteOne=b;
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=quotePairTwo;
- break;
- case quotePairTwo:
- *target++=(UChar)((byteOne<<8)|b);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastUnicode;
- }
- }
- }
-endloop:
-
- /* set the converter state back into UConverter */
- if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
- /* reset to deal with the next character */
- state=readCommand;
- } else if(state==readCommand) {
- /* not in a multi-byte sequence, reset toULength */
- cnv->toULength=0;
- }
- scsu->toUIsSingleByteMode=isSingleByteMode;
- scsu->toUState=state;
- scsu->toUQuoteWindow=quoteWindow;
- scsu->toUDynamicWindow=dynamicWindow;
- scsu->toUByteOne=byteOne;
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
- return;
-}
-
-/*
- * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
- * If a change is made in the original function, then either
- * change this function the same way or
- * re-copy the original function and remove the variables
- * offsets, sourceIndex, and nextSourceIndex.
- */
-static void U_CALLCONV
-_SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- SCSUData *scsu;
- const uint8_t *source, *sourceLimit;
- UChar *target;
- const UChar *targetLimit;
- UBool isSingleByteMode;
- uint8_t state, byteOne;
- int8_t quoteWindow, dynamicWindow;
-
- uint8_t b;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- scsu=(SCSUData *)cnv->extraInfo;
-
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
-
- /* get the state machine state */
- isSingleByteMode=scsu->toUIsSingleByteMode;
- state=scsu->toUState;
- quoteWindow=scsu->toUQuoteWindow;
- dynamicWindow=scsu->toUDynamicWindow;
- byteOne=scsu->toUByteOne;
-
- /*
- * conversion "loop"
- *
- * For performance, this is not a normal C loop.
- * Instead, there are two code blocks for the two SCSU modes.
- * The function branches to either one, and a change of the mode is done with a goto to
- * the other branch.
- *
- * Each branch has two conventional loops:
- * - a fast-path loop for the most common codes in the mode
- * - a loop for all other codes in the mode
- * When the fast-path runs into a code that it cannot handle, its loop ends and it
- * runs into the following loop to handle the other codes.
- * The end of the input or output buffer is also handled by the slower loop.
- * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
- *
- * The callback handling is done by returning with an error code.
- * The conversion framework actually calls the callback function.
- */
- if(isSingleByteMode) {
- /* fast path for single-byte mode */
- if(state==readCommand) {
-fastSingle:
- while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
- ++source;
- if(b<=0x7f) {
- /* write US-ASCII graphic character or DEL */
- *target++=(UChar)b;
- } else {
- /* write from dynamic window */
- uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
- if(c<=0xffff) {
- *target++=(UChar)c;
- } else {
- /* output surrogate pair */
- *target++=(UChar)(0xd7c0+(c>>10));
- if(target<targetLimit) {
- *target++=(UChar)(0xdc00|(c&0x3ff));
- } else {
- /* target overflow */
- cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- goto endloop;
- }
- }
- }
- }
- }
-
- /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
-singleByteMode:
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- b=*source++;
- switch(state) {
- case readCommand:
- /* redundant conditions are commented out */
- /* here: b<0x20 because otherwise we would be in fastSingle */
- if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
- /* CR/LF/TAB/NUL */
- *target++=(UChar)b;
- goto fastSingle;
- } else if(SC0<=b) {
- if(b<=SC7) {
- dynamicWindow=(int8_t)(b-SC0);
- goto fastSingle;
- } else /* if(SD0<=b && b<=SD7) */ {
- dynamicWindow=(int8_t)(b-SD0);
- state=defineOne;
- }
- } else if(/* SQ0<=b && */ b<=SQ7) {
- quoteWindow=(int8_t)(b-SQ0);
- state=quoteOne;
- } else if(b==SDX) {
- state=definePairOne;
- } else if(b==SQU) {
- state=quotePairOne;
- } else if(b==SCU) {
- isSingleByteMode=FALSE;
- goto fastUnicode;
- } else /* Srs */ {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- goto endloop;
- }
-
- /* store the first byte of a multibyte sequence in toUBytes[] */
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- break;
- case quotePairOne:
- byteOne=b;
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=quotePairTwo;
- break;
- case quotePairTwo:
- *target++=(UChar)((byteOne<<8)|b);
- state=readCommand;
- goto fastSingle;
- case quoteOne:
- if(b<0x80) {
- /* all static offsets are in the BMP */
- *target++=(UChar)(staticOffsets[quoteWindow]+b);
- } else {
- /* write from dynamic window */
- uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
- if(c<=0xffff) {
- *target++=(UChar)c;
- } else {
- /* output surrogate pair */
- *target++=(UChar)(0xd7c0+(c>>10));
- if(target<targetLimit) {
- *target++=(UChar)(0xdc00|(c&0x3ff));
- } else {
- /* target overflow */
- cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- goto endloop;
- }
- }
- }
- state=readCommand;
- goto fastSingle;
- case definePairOne:
- dynamicWindow=(int8_t)((b>>5)&7);
- byteOne=(uint8_t)(b&0x1f);
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=definePairTwo;
- break;
- case definePairTwo:
- scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
- state=readCommand;
- goto fastSingle;
- case defineOne:
- if(b==0) {
- /* callback(illegal): Reserved window offset value 0 */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- goto endloop;
- } else if(b<gapThreshold) {
- scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
- } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
- scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
- } else if(b>=fixedThreshold) {
- scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
- } else {
- /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- goto endloop;
- }
- state=readCommand;
- goto fastSingle;
- }
- }
- } else {
- /* fast path for Unicode mode */
- if(state==readCommand) {
-fastUnicode:
- while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
- *target++=(UChar)((b<<8)|source[1]);
- source+=2;
- }
- }
-
- /* normal state machine for Unicode mode */
-/* unicodeByteMode: */
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- b=*source++;
- switch(state) {
- case readCommand:
- if((uint8_t)(b-UC0)>(Urs-UC0)) {
- byteOne=b;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=quotePairTwo;
- } else if(/* UC0<=b && */ b<=UC7) {
- dynamicWindow=(int8_t)(b-UC0);
- isSingleByteMode=TRUE;
- goto fastSingle;
- } else if(/* UD0<=b && */ b<=UD7) {
- dynamicWindow=(int8_t)(b-UD0);
- isSingleByteMode=TRUE;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=defineOne;
- goto singleByteMode;
- } else if(b==UDX) {
- isSingleByteMode=TRUE;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=definePairOne;
- goto singleByteMode;
- } else if(b==UQU) {
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=quotePairOne;
- } else /* Urs */ {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- goto endloop;
- }
- break;
- case quotePairOne:
- byteOne=b;
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=quotePairTwo;
- break;
- case quotePairTwo:
- *target++=(UChar)((byteOne<<8)|b);
- state=readCommand;
- goto fastUnicode;
- }
- }
- }
-endloop:
-
- /* set the converter state back into UConverter */
- if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
- /* reset to deal with the next character */
- state=readCommand;
- } else if(state==readCommand) {
- /* not in a multi-byte sequence, reset toULength */
- cnv->toULength=0;
- }
- scsu->toUIsSingleByteMode=isSingleByteMode;
- scsu->toUState=state;
- scsu->toUQuoteWindow=quoteWindow;
- scsu->toUDynamicWindow=dynamicWindow;
- scsu->toUByteOne=byteOne;
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- return;
-}
-U_CDECL_END
-/* SCSU-from-Unicode conversion functions ----------------------------------- */
-
-/*
- * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
- * reasonable results. The lookahead is minimal.
- * Many cases are simple:
- * A character fits directly into the current mode, a dynamic or static window,
- * or is not compressible. These cases are tested first.
- * Real compression heuristics are applied to the rest, in code branches for
- * single/Unicode mode and BMP/supplementary code points.
- * The heuristics used here are extremely simple.
- */
-
-/* get the number of the window that this character is in, or -1 */
-static int8_t
-getWindow(const uint32_t offsets[8], uint32_t c) {
- int i;
- for(i=0; i<8; ++i) {
- if((uint32_t)(c-offsets[i])<=0x7f) {
- return (int8_t)(i);
- }
- }
- return -1;
-}
-
-/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
-static UBool
-isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
- return (UBool)(c<=offset+0x7f &&
- (c>=offset || (c<=0x7f &&
- (c>=0x20 || (1UL<<c)&0x2601))));
- /* binary 0010 0110 0000 0001,
- check for b==0xd || b==0xa || b==9 || b==0 */
-}
-
-/*
- * getNextDynamicWindow returns the next dynamic window to be redefined
- */
-static int8_t
-getNextDynamicWindow(SCSUData *scsu) {
- int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
- if(++scsu->nextWindowUseIndex==8) {
- scsu->nextWindowUseIndex=0;
- }
- return window;
-}
-
-/*
- * useDynamicWindow() adjusts
- * windowUse[] and nextWindowUseIndex for the algorithm to choose
- * the next dynamic window to be defined;
- * a subclass may override it and provide its own algorithm.
- */
-static void
-useDynamicWindow(SCSUData *scsu, int8_t window) {
- /*
- * move the existing window, which just became the most recently used one,
- * up in windowUse[] to nextWindowUseIndex-1
- */
-
- /* first, find the index of the window - backwards to favor the more recently used windows */
- int i, j;
-
- i=scsu->nextWindowUseIndex;
- do {
- if(--i<0) {
- i=7;
- }
- } while(scsu->windowUse[i]!=window);
-
- /* now copy each windowUse[i+1] to [i] */
- j=i+1;
- if(j==8) {
- j=0;
- }
- while(j!=scsu->nextWindowUseIndex) {
- scsu->windowUse[i]=scsu->windowUse[j];
- i=j;
- if(++j==8) { j=0; }
- }
-
- /* finally, set the window into the most recently used index */
- scsu->windowUse[i]=window;
-}
-
-/*
- * calculate the offset and the code for a dynamic window that contains the character
- * takes fixed offsets into account
- * the offset of the window is stored in the offset variable,
- * the code is returned
- *
- * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code
- */
-static int
-getDynamicOffset(uint32_t c, uint32_t *pOffset) {
- int i;
-
- for(i=0; i<7; ++i) {
- if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
- *pOffset=fixedOffsets[i];
- return 0xf9+i;
- }
- }
-
- if(c<0x80) {
- /* No dynamic window for US-ASCII. */
- return -1;
- } else if(c<0x3400 ||
- (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
- (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
- ) {
- /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
- *pOffset=c&0x7fffff80;
- return (int)(c>>7);
- } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
- /* For these characters we need to take the gapOffset into account. */
- *pOffset=c&0x7fffff80;
- return (int)((c-gapOffset)>>7);
- } else {
- return -1;
- }
-}
-U_CDECL_BEGIN
-/*
- * Idea for compression:
- * - save SCSUData and other state before really starting work
- * - at endloop, see if compression could be better with just unicode mode
- * - don't do this if a callback has been called
- * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
- * - different buffer handling!
- *
- * Drawback or need for corrective handling:
- * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
- * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
- * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
- *
- * How to achieve both?
- * - Only replace the result after an SDX or SCU?
- */
-
-static void U_CALLCONV
-_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- SCSUData *scsu;
- const UChar *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
- int32_t *offsets;
-
- UBool isSingleByteMode;
- uint8_t dynamicWindow;
- uint32_t currentOffset;
-
- uint32_t c, delta;
-
- int32_t sourceIndex, nextSourceIndex;
-
- int32_t length;
-
- /* variables for compression heuristics */
- uint32_t offset;
- UChar lead, trail;
- int code;
- int8_t window;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- scsu=(SCSUData *)cnv->extraInfo;
-
- /* set up the local pointers */
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
-
- /* get the state machine state */
- isSingleByteMode=scsu->fromUIsSingleByteMode;
- dynamicWindow=scsu->fromUDynamicWindow;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
-
- c=cnv->fromUChar32;
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex= c==0 ? 0 : -1;
- nextSourceIndex=0;
-
- /* similar conversion "loop" as in toUnicode */
-loop:
- if(isSingleByteMode) {
- if(c!=0 && targetCapacity>0) {
- goto getTrailSingle;
- }
-
- /* state machine for single-byte mode */
-/* singleByteMode: */
- while(source<sourceLimit) {
- if(targetCapacity<=0) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- c=*source++;
- ++nextSourceIndex;
-
- if((c-0x20)<=0x5f) {
- /* pass US-ASCII graphic character through */
- *target++=(uint8_t)c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else if(c<0x20) {
- if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
- /* CR/LF/TAB/NUL */
- *target++=(uint8_t)c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else {
- /* quote C0 control character */
- c|=SQ0<<8;
- length=2;
- goto outputBytes;
- }
- } else if((delta=c-currentOffset)<=0x7f) {
- /* use the current dynamic window */
- *target++=(uint8_t)(delta|0x80);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else if(U16_IS_SURROGATE(c)) {
- if(U16_IS_SURROGATE_LEAD(c)) {
-getTrailSingle:
- lead=(UChar)c;
- if(source<sourceLimit) {
- /* test the following code unit */
- trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- ++nextSourceIndex;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
-
- /* compress supplementary character U+10000..U+10ffff */
- if((delta=c-currentOffset)<=0x7f) {
- /* use the current dynamic window */
- *target++=(uint8_t)(delta|0x80);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a dynamic window that contains this character, change to it */
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* might check if there are more characters in this window to come */
- /* define an extended window with this character */
- code-=0x200;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=4;
- goto outputBytes;
- } else {
- /* change to Unicode mode and output this (lead, trail) pair */
- isSingleByteMode=FALSE;
- *target++=(uint8_t)SCU;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- c=((uint32_t)lead<<16)|trail;
- length=4;
- goto outputBytes;
- }
- } else if(c<0xa0) {
- /* quote C1 control character */
- c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
- length=2;
- goto outputBytes;
- } else if(c==0xfeff || c>=0xfff0) {
- /* quote signature character=byte order mark and specials */
- c|=SQU<<16;
- length=3;
- goto outputBytes;
- } else {
- /* compress all other BMP characters */
- if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a window defined that contains this character - switch to it or quote from it? */
- if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
- /* change to dynamic window */
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else {
- /* quote from dynamic window */
- c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
- length=2;
- goto outputBytes;
- }
- } else if((window=getWindow(staticOffsets, c))>=0) {
- /* quote from static window */
- c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* define a dynamic window with this character */
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=3;
- goto outputBytes;
- } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
- (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
- ) {
- /*
- * this character is not compressible (a BMP ideograph or similar);
- * switch to Unicode mode if this is the last character in the block
- * or there is at least one more ideograph following immediately
- */
- isSingleByteMode=FALSE;
- c|=SCU<<16;
- length=3;
- goto outputBytes;
- } else {
- /* quote Unicode */
- c|=SQU<<16;
- length=3;
- goto outputBytes;
- }
- }
-
- /* normal end of conversion: prepare for a new character */
- c=0;
- sourceIndex=nextSourceIndex;
- }
- } else {
- if(c!=0 && targetCapacity>0) {
- goto getTrailUnicode;
- }
-
- /* state machine for Unicode mode */
-/* unicodeByteMode: */
- while(source<sourceLimit) {
- if(targetCapacity<=0) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- c=*source++;
- ++nextSourceIndex;
-
- if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
- /* not compressible, write character directly */
- if(targetCapacity>=2) {
- *target++=(uint8_t)(c>>8);
- *target++=(uint8_t)c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- }
- targetCapacity-=2;
- } else {
- length=2;
- goto outputBytes;
- }
- } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
- /* compress BMP character if the following one is not an uncompressible ideograph */
- if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
- if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
- /* ASCII digit or letter */
- isSingleByteMode=TRUE;
- c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
- length=2;
- goto outputBytes;
- } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a dynamic window that contains this character, change to it */
- isSingleByteMode=TRUE;
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* define a dynamic window with this character */
- isSingleByteMode=TRUE;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=3;
- goto outputBytes;
- }
- }
-
- /* don't know how to compress this character, just write it directly */
- length=2;
- goto outputBytes;
- } else if(c<0xe000) {
- /* c is a surrogate */
- if(U16_IS_SURROGATE_LEAD(c)) {
-getTrailUnicode:
- lead=(UChar)c;
- if(source<sourceLimit) {
- /* test the following code unit */
- trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- ++nextSourceIndex;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
-
- /* compress supplementary character */
- if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
- !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
- ) {
- /*
- * there is a dynamic window that contains this character and
- * the following character is not uncompressible,
- * change to the window
- */
- isSingleByteMode=TRUE;
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
- (code=getDynamicOffset(c, &offset))>=0
- ) {
- /* two supplementary characters in (probably) the same window - define an extended one */
- isSingleByteMode=TRUE;
- code-=0x200;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=4;
- goto outputBytes;
- } else {
- /* don't know how to compress this character, just write it directly */
- c=((uint32_t)lead<<16)|trail;
- length=4;
- goto outputBytes;
- }
- } else /* 0xe000<=c<0xf300 */ {
- /* quote to avoid SCSU tags */
- c|=UQU<<16;
- length=3;
- goto outputBytes;
- }
-
- /* normal end of conversion: prepare for a new character */
- c=0;
- sourceIndex=nextSourceIndex;
- }
- }
-endloop:
-
- /* set the converter state back into UConverter */
- scsu->fromUIsSingleByteMode=isSingleByteMode;
- scsu->fromUDynamicWindow=dynamicWindow;
-
- cnv->fromUChar32=c;
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
- return;
-
-outputBytes:
- /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(length<=targetCapacity) {
- if(offsets==NULL) {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(c>>24);
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- } else {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(c>>24);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(c>>16);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- }
- targetCapacity-=length;
-
- /* normal end of conversion: prepare for a new character */
- c=0;
- sourceIndex=nextSourceIndex;
- goto loop;
- } else {
- uint8_t *p;
-
- /*
- * We actually do this backwards here:
- * In order to save an intermediate variable, we output
- * first to the overflow buffer what does not fit into the
- * regular target.
- */
- /* we know that 0<=targetCapacity<length<=4 */
- /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
- length-=targetCapacity;
- p=(uint8_t *)cnv->charErrorBuffer;
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *p++=(uint8_t)(c>>24);
- U_FALLTHROUGH;
- case 3:
- *p++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *p++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *p=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- cnv->charErrorBufferLength=(int8_t)length;
-
- /* now output what fits into the regular target */
- c>>=8*length; /* length was reduced by targetCapacity */
- switch(targetCapacity) {
- /* each branch falls through to the next one */
- case 3:
- *target++=(uint8_t)(c>>16);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- if(offsets!=NULL) {
- *offsets++=sourceIndex;
- }
- U_FALLTHROUGH;
- default:
- break;
- }
-
- /* target overflow */
- targetCapacity=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- c=0;
- goto endloop;
- }
-}
-
-/*
- * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
- * If a change is made in the original function, then either
- * change this function the same way or
- * re-copy the original function and remove the variables
- * offsets, sourceIndex, and nextSourceIndex.
- */
-static void U_CALLCONV
-_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- SCSUData *scsu;
- const UChar *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
-
- UBool isSingleByteMode;
- uint8_t dynamicWindow;
- uint32_t currentOffset;
-
- uint32_t c, delta;
-
- int32_t length;
-
- /* variables for compression heuristics */
- uint32_t offset;
- UChar lead, trail;
- int code;
- int8_t window;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- scsu=(SCSUData *)cnv->extraInfo;
-
- /* set up the local pointers */
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
-
- /* get the state machine state */
- isSingleByteMode=scsu->fromUIsSingleByteMode;
- dynamicWindow=scsu->fromUDynamicWindow;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
-
- c=cnv->fromUChar32;
-
- /* similar conversion "loop" as in toUnicode */
-loop:
- if(isSingleByteMode) {
- if(c!=0 && targetCapacity>0) {
- goto getTrailSingle;
- }
-
- /* state machine for single-byte mode */
-/* singleByteMode: */
- while(source<sourceLimit) {
- if(targetCapacity<=0) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- c=*source++;
-
- if((c-0x20)<=0x5f) {
- /* pass US-ASCII graphic character through */
- *target++=(uint8_t)c;
- --targetCapacity;
- } else if(c<0x20) {
- if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
- /* CR/LF/TAB/NUL */
- *target++=(uint8_t)c;
- --targetCapacity;
- } else {
- /* quote C0 control character */
- c|=SQ0<<8;
- length=2;
- goto outputBytes;
- }
- } else if((delta=c-currentOffset)<=0x7f) {
- /* use the current dynamic window */
- *target++=(uint8_t)(delta|0x80);
- --targetCapacity;
- } else if(U16_IS_SURROGATE(c)) {
- if(U16_IS_SURROGATE_LEAD(c)) {
-getTrailSingle:
- lead=(UChar)c;
- if(source<sourceLimit) {
- /* test the following code unit */
- trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
-
- /* compress supplementary character U+10000..U+10ffff */
- if((delta=c-currentOffset)<=0x7f) {
- /* use the current dynamic window */
- *target++=(uint8_t)(delta|0x80);
- --targetCapacity;
- } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a dynamic window that contains this character, change to it */
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* might check if there are more characters in this window to come */
- /* define an extended window with this character */
- code-=0x200;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=4;
- goto outputBytes;
- } else {
- /* change to Unicode mode and output this (lead, trail) pair */
- isSingleByteMode=FALSE;
- *target++=(uint8_t)SCU;
- --targetCapacity;
- c=((uint32_t)lead<<16)|trail;
- length=4;
- goto outputBytes;
- }
- } else if(c<0xa0) {
- /* quote C1 control character */
- c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
- length=2;
- goto outputBytes;
- } else if(c==0xfeff || c>=0xfff0) {
- /* quote signature character=byte order mark and specials */
- c|=SQU<<16;
- length=3;
- goto outputBytes;
- } else {
- /* compress all other BMP characters */
- if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a window defined that contains this character - switch to it or quote from it? */
- if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
- /* change to dynamic window */
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else {
- /* quote from dynamic window */
- c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
- length=2;
- goto outputBytes;
- }
- } else if((window=getWindow(staticOffsets, c))>=0) {
- /* quote from static window */
- c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* define a dynamic window with this character */
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=3;
- goto outputBytes;
- } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
- (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
- ) {
- /*
- * this character is not compressible (a BMP ideograph or similar);
- * switch to Unicode mode if this is the last character in the block
- * or there is at least one more ideograph following immediately
- */
- isSingleByteMode=FALSE;
- c|=SCU<<16;
- length=3;
- goto outputBytes;
- } else {
- /* quote Unicode */
- c|=SQU<<16;
- length=3;
- goto outputBytes;
- }
- }
-
- /* normal end of conversion: prepare for a new character */
- c=0;
- }
- } else {
- if(c!=0 && targetCapacity>0) {
- goto getTrailUnicode;
- }
-
- /* state machine for Unicode mode */
-/* unicodeByteMode: */
- while(source<sourceLimit) {
- if(targetCapacity<=0) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- c=*source++;
-
- if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
- /* not compressible, write character directly */
- if(targetCapacity>=2) {
- *target++=(uint8_t)(c>>8);
- *target++=(uint8_t)c;
- targetCapacity-=2;
- } else {
- length=2;
- goto outputBytes;
- }
- } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
- /* compress BMP character if the following one is not an uncompressible ideograph */
- if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
- if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
- /* ASCII digit or letter */
- isSingleByteMode=TRUE;
- c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
- length=2;
- goto outputBytes;
- } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a dynamic window that contains this character, change to it */
- isSingleByteMode=TRUE;
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* define a dynamic window with this character */
- isSingleByteMode=TRUE;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=3;
- goto outputBytes;
- }
- }
-
- /* don't know how to compress this character, just write it directly */
- length=2;
- goto outputBytes;
- } else if(c<0xe000) {
- /* c is a surrogate */
- if(U16_IS_SURROGATE_LEAD(c)) {
-getTrailUnicode:
- lead=(UChar)c;
- if(source<sourceLimit) {
- /* test the following code unit */
- trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
-
- /* compress supplementary character */
- if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
- !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
- ) {
- /*
- * there is a dynamic window that contains this character and
- * the following character is not uncompressible,
- * change to the window
- */
- isSingleByteMode=TRUE;
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
- (code=getDynamicOffset(c, &offset))>=0
- ) {
- /* two supplementary characters in (probably) the same window - define an extended one */
- isSingleByteMode=TRUE;
- code-=0x200;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=4;
- goto outputBytes;
- } else {
- /* don't know how to compress this character, just write it directly */
- c=((uint32_t)lead<<16)|trail;
- length=4;
- goto outputBytes;
- }
- } else /* 0xe000<=c<0xf300 */ {
- /* quote to avoid SCSU tags */
- c|=UQU<<16;
- length=3;
- goto outputBytes;
- }
-
- /* normal end of conversion: prepare for a new character */
- c=0;
- }
- }
-endloop:
-
- /* set the converter state back into UConverter */
- scsu->fromUIsSingleByteMode=isSingleByteMode;
- scsu->fromUDynamicWindow=dynamicWindow;
-
- cnv->fromUChar32=c;
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- return;
-
-outputBytes:
- /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(length<=targetCapacity) {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(c>>24);
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- targetCapacity-=length;
-
- /* normal end of conversion: prepare for a new character */
- c=0;
- goto loop;
- } else {
- uint8_t *p;
-
- /*
- * We actually do this backwards here:
- * In order to save an intermediate variable, we output
- * first to the overflow buffer what does not fit into the
- * regular target.
- */
- /* we know that 0<=targetCapacity<length<=4 */
- /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
- length-=targetCapacity;
- p=(uint8_t *)cnv->charErrorBuffer;
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *p++=(uint8_t)(c>>24);
- U_FALLTHROUGH;
- case 3:
- *p++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *p++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *p=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- cnv->charErrorBufferLength=(int8_t)length;
-
- /* now output what fits into the regular target */
- c>>=8*length; /* length was reduced by targetCapacity */
- switch(targetCapacity) {
- /* each branch falls through to the next one */
- case 3:
- *target++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- break;
- }
-
- /* target overflow */
- targetCapacity=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- c=0;
- goto endloop;
- }
-}
-
-/* miscellaneous ------------------------------------------------------------ */
-
-static const char * U_CALLCONV
-_SCSUGetName(const UConverter *cnv) {
- SCSUData *scsu=(SCSUData *)cnv->extraInfo;
-
- switch(scsu->locale) {
- case l_ja:
- return "SCSU,locale=ja";
- default:
- return "SCSU";
- }
-}
-
-/* structure for SafeClone calculations */
-struct cloneSCSUStruct
-{
- UConverter cnv;
- SCSUData mydata;
-};
-
-static UConverter * U_CALLCONV
-_SCSUSafeClone(const UConverter *cnv,
- void *stackBuffer,
- int32_t *pBufferSize,
- UErrorCode *status)
-{
- struct cloneSCSUStruct * localClone;
- int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
-
- if (U_FAILURE(*status)){
- return 0;
- }
-
- if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
- *pBufferSize = bufferSizeNeeded;
- return 0;
- }
-
- localClone = (struct cloneSCSUStruct *)stackBuffer;
- /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
-
- uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
- localClone->cnv.extraInfo = &localClone->mydata;
- localClone->cnv.isExtraLocal = TRUE;
-
- return &localClone->cnv;
-}
-U_CDECL_END
-
-static const UConverterImpl _SCSUImpl={
- UCNV_SCSU,
-
- NULL,
- NULL,
-
- _SCSUOpen,
- _SCSUClose,
- _SCSUReset,
-
- _SCSUToUnicode,
- _SCSUToUnicodeWithOffsets,
- _SCSUFromUnicode,
- _SCSUFromUnicodeWithOffsets,
- NULL,
-
- NULL,
- _SCSUGetName,
- NULL,
- _SCSUSafeClone,
- ucnv_getCompleteUnicodeSet,
- NULL,
- NULL
-};
-
-static const UConverterStaticData _SCSUStaticData={
- sizeof(UConverterStaticData),
- "SCSU",
- 1212, /* CCSID for SCSU */
- UCNV_IBM, UCNV_SCSU,
- 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
- /*
- * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
- * substitution string.
- */
- { 0x0e, 0xff, 0xfd, 0 }, 3,
- FALSE, FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-const UConverterSharedData _SCSUData=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl);
-
-#endif
diff --git a/contrib/libs/icu/common/ucnvsel.cpp b/contrib/libs/icu/common/ucnvsel.cpp
deleted file mode 100644
index 2dff5ac1bc8..00000000000
--- a/contrib/libs/icu/common/ucnvsel.cpp
+++ /dev/null
@@ -1,823 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2008-2011, International Business Machines
-* Corporation, Google and others. All Rights Reserved.
-*
-*******************************************************************************
-*/
-// Author : [email protected] (Mohamed Eldawy)
-// ucnvsel.cpp
-//
-// Purpose: To generate a list of encodings capable of handling
-// a given Unicode text
-//
-// Started 09-April-2008
-
-/**
- * \file
- *
- * This is an implementation of an encoding selector.
- * The goal is, given a unicode string, find the encodings
- * this string can be mapped to. To make processing faster
- * a trie is built when you call ucnvsel_open() that
- * stores all encodings a codepoint can map to
- */
-
-#include "unicode/ucnvsel.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include <string.h>
-
-#include "unicode/uchar.h"
-#include "unicode/uniset.h"
-#include "unicode/ucnv.h"
-#include "unicode/ustring.h"
-#include "unicode/uchriter.h"
-#include "utrie2.h"
-#include "propsvec.h"
-#include "uassert.h"
-#include "ucmndata.h"
-#include "udataswp.h"
-#include "uenumimp.h"
-#include "cmemory.h"
-#include "cstring.h"
-
-U_NAMESPACE_USE
-
-struct UConverterSelector {
- UTrie2 *trie; // 16 bit trie containing offsets into pv
- uint32_t* pv; // table of bits!
- int32_t pvCount;
- char** encodings; // which encodings did user ask to use?
- int32_t encodingsCount;
- int32_t encodingStrLength;
- uint8_t* swapped;
- UBool ownPv, ownEncodingStrings;
-};
-
-static void generateSelectorData(UConverterSelector* result,
- UPropsVectors *upvec,
- const USet* excludedCodePoints,
- const UConverterUnicodeSet whichSet,
- UErrorCode* status) {
- if (U_FAILURE(*status)) {
- return;
- }
-
- int32_t columns = (result->encodingsCount+31)/32;
-
- // set errorValue to all-ones
- for (int32_t col = 0; col < columns; col++) {
- upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP,
- col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), status);
- }
-
- for (int32_t i = 0; i < result->encodingsCount; ++i) {
- uint32_t mask;
- uint32_t column;
- int32_t item_count;
- int32_t j;
- UConverter* test_converter = ucnv_open(result->encodings[i], status);
- if (U_FAILURE(*status)) {
- return;
- }
- USet* unicode_point_set;
- unicode_point_set = uset_open(1, 0); // empty set
-
- ucnv_getUnicodeSet(test_converter, unicode_point_set,
- whichSet, status);
- if (U_FAILURE(*status)) {
- ucnv_close(test_converter);
- return;
- }
-
- column = i / 32;
- mask = 1 << (i%32);
- // now iterate over intervals on set i!
- item_count = uset_getItemCount(unicode_point_set);
-
- for (j = 0; j < item_count; ++j) {
- UChar32 start_char;
- UChar32 end_char;
- UErrorCode smallStatus = U_ZERO_ERROR;
- uset_getItem(unicode_point_set, j, &start_char, &end_char, NULL, 0,
- &smallStatus);
- if (U_FAILURE(smallStatus)) {
- // this will be reached for the converters that fill the set with
- // strings. Those should be ignored by our system
- } else {
- upvec_setValue(upvec, start_char, end_char, column, static_cast<uint32_t>(~0), mask,
- status);
- }
- }
- ucnv_close(test_converter);
- uset_close(unicode_point_set);
- if (U_FAILURE(*status)) {
- return;
- }
- }
-
- // handle excluded encodings! Simply set their values to all 1's in the upvec
- if (excludedCodePoints) {
- int32_t item_count = uset_getItemCount(excludedCodePoints);
- for (int32_t j = 0; j < item_count; ++j) {
- UChar32 start_char;
- UChar32 end_char;
-
- uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0,
- status);
- for (int32_t col = 0; col < columns; col++) {
- upvec_setValue(upvec, start_char, end_char, col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0),
- status);
- }
- }
- }
-
- // alright. Now, let's put things in the same exact form you'd get when you
- // unserialize things.
- result->trie = upvec_compactToUTrie2WithRowIndexes(upvec, status);
- result->pv = upvec_cloneArray(upvec, &result->pvCount, NULL, status);
- result->pvCount *= columns; // number of uint32_t = rows * columns
- result->ownPv = TRUE;
-}
-
-/* open a selector. If converterListSize is 0, build for all converters.
- If excludedCodePoints is NULL, don't exclude any codepoints */
-U_CAPI UConverterSelector* U_EXPORT2
-ucnvsel_open(const char* const* converterList, int32_t converterListSize,
- const USet* excludedCodePoints,
- const UConverterUnicodeSet whichSet, UErrorCode* status) {
- // check if already failed
- if (U_FAILURE(*status)) {
- return NULL;
- }
- // ensure args make sense!
- if (converterListSize < 0 || (converterList == NULL && converterListSize != 0)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- // allocate a new converter
- LocalUConverterSelectorPointer newSelector(
- (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector)));
- if (newSelector.isNull()) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memset(newSelector.getAlias(), 0, sizeof(UConverterSelector));
-
- if (converterListSize == 0) {
- converterList = NULL;
- converterListSize = ucnv_countAvailable();
- }
- newSelector->encodings =
- (char**)uprv_malloc(converterListSize * sizeof(char*));
- if (!newSelector->encodings) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- newSelector->encodings[0] = NULL; // now we can call ucnvsel_close()
-
- // make a backup copy of the list of converters
- int32_t totalSize = 0;
- int32_t i;
- for (i = 0; i < converterListSize; i++) {
- totalSize +=
- (int32_t)uprv_strlen(converterList != NULL ? converterList[i] : ucnv_getAvailableName(i)) + 1;
- }
- // 4-align the totalSize to 4-align the size of the serialized form
- int32_t encodingStrPadding = totalSize & 3;
- if (encodingStrPadding != 0) {
- encodingStrPadding = 4 - encodingStrPadding;
- }
- newSelector->encodingStrLength = totalSize += encodingStrPadding;
- char* allStrings = (char*) uprv_malloc(totalSize);
- if (!allStrings) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- for (i = 0; i < converterListSize; i++) {
- newSelector->encodings[i] = allStrings;
- uprv_strcpy(newSelector->encodings[i],
- converterList != NULL ? converterList[i] : ucnv_getAvailableName(i));
- allStrings += uprv_strlen(newSelector->encodings[i]) + 1;
- }
- while (encodingStrPadding > 0) {
- *allStrings++ = 0;
- --encodingStrPadding;
- }
-
- newSelector->ownEncodingStrings = TRUE;
- newSelector->encodingsCount = converterListSize;
- UPropsVectors *upvec = upvec_open((converterListSize+31)/32, status);
- generateSelectorData(newSelector.getAlias(), upvec, excludedCodePoints, whichSet, status);
- upvec_close(upvec);
-
- if (U_FAILURE(*status)) {
- return NULL;
- }
-
- return newSelector.orphan();
-}
-
-/* close opened selector */
-U_CAPI void U_EXPORT2
-ucnvsel_close(UConverterSelector *sel) {
- if (!sel) {
- return;
- }
- if (sel->ownEncodingStrings) {
- uprv_free(sel->encodings[0]);
- }
- uprv_free(sel->encodings);
- if (sel->ownPv) {
- uprv_free(sel->pv);
- }
- utrie2_close(sel->trie);
- uprv_free(sel->swapped);
- uprv_free(sel);
-}
-
-static const UDataInfo dataInfo = {
- sizeof(UDataInfo),
- 0,
-
- U_IS_BIG_ENDIAN,
- U_CHARSET_FAMILY,
- U_SIZEOF_UCHAR,
- 0,
-
- { 0x43, 0x53, 0x65, 0x6c }, /* dataFormat="CSel" */
- { 1, 0, 0, 0 }, /* formatVersion */
- { 0, 0, 0, 0 } /* dataVersion */
-};
-
-enum {
- UCNVSEL_INDEX_TRIE_SIZE, // trie size in bytes
- UCNVSEL_INDEX_PV_COUNT, // number of uint32_t in the bit vectors
- UCNVSEL_INDEX_NAMES_COUNT, // number of encoding names
- UCNVSEL_INDEX_NAMES_LENGTH, // number of encoding name bytes including padding
- UCNVSEL_INDEX_SIZE = 15, // bytes following the DataHeader
- UCNVSEL_INDEX_COUNT = 16
-};
-
-/*
- * Serialized form of a UConverterSelector, formatVersion 1:
- *
- * The serialized form begins with a standard ICU DataHeader with a UDataInfo
- * as the template above.
- * This is followed by:
- * int32_t indexes[UCNVSEL_INDEX_COUNT]; // see index entry constants above
- * serialized UTrie2; // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes
- * uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]]; // bit vectors
- * char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]]; // NUL-terminated strings + padding
- */
-
-/* serialize a selector */
-U_CAPI int32_t U_EXPORT2
-ucnvsel_serialize(const UConverterSelector* sel,
- void* buffer, int32_t bufferCapacity, UErrorCode* status) {
- // check if already failed
- if (U_FAILURE(*status)) {
- return 0;
- }
- // ensure args make sense!
- uint8_t *p = (uint8_t *)buffer;
- if (bufferCapacity < 0 ||
- (bufferCapacity > 0 && (p == NULL || (U_POINTER_MASK_LSB(p, 3) != 0)))
- ) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- // add up the size of the serialized form
- int32_t serializedTrieSize = utrie2_serialize(sel->trie, NULL, 0, status);
- if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) {
- return 0;
- }
- *status = U_ZERO_ERROR;
-
- DataHeader header;
- uprv_memset(&header, 0, sizeof(header));
- header.dataHeader.headerSize = (uint16_t)((sizeof(header) + 15) & ~15);
- header.dataHeader.magic1 = 0xda;
- header.dataHeader.magic2 = 0x27;
- uprv_memcpy(&header.info, &dataInfo, sizeof(dataInfo));
-
- int32_t indexes[UCNVSEL_INDEX_COUNT] = {
- serializedTrieSize,
- sel->pvCount,
- sel->encodingsCount,
- sel->encodingStrLength
- };
-
- int32_t totalSize =
- header.dataHeader.headerSize +
- (int32_t)sizeof(indexes) +
- serializedTrieSize +
- sel->pvCount * 4 +
- sel->encodingStrLength;
- indexes[UCNVSEL_INDEX_SIZE] = totalSize - header.dataHeader.headerSize;
- if (totalSize > bufferCapacity) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- return totalSize;
- }
- // ok, save!
- int32_t length = header.dataHeader.headerSize;
- uprv_memcpy(p, &header, sizeof(header));
- uprv_memset(p + sizeof(header), 0, length - sizeof(header));
- p += length;
-
- length = (int32_t)sizeof(indexes);
- uprv_memcpy(p, indexes, length);
- p += length;
-
- utrie2_serialize(sel->trie, p, serializedTrieSize, status);
- p += serializedTrieSize;
-
- length = sel->pvCount * 4;
- uprv_memcpy(p, sel->pv, length);
- p += length;
-
- uprv_memcpy(p, sel->encodings[0], sel->encodingStrLength);
- p += sel->encodingStrLength;
-
- return totalSize;
-}
-
-/**
- * swap a selector into the desired Endianness and Asciiness of
- * the system. Just as FYI, selectors are always saved in the format
- * of the system that created them. They are only converted if used
- * on another system. In other words, selectors created on different
- * system can be different even if the params are identical (endianness
- * and Asciiness differences only)
- *
- * @param ds pointer to data swapper containing swapping info
- * @param inData pointer to incoming data
- * @param length length of inData in bytes
- * @param outData pointer to output data. Capacity should
- * be at least equal to capacity of inData
- * @param status an in/out ICU UErrorCode
- * @return 0 on failure, number of bytes swapped on success
- * number of bytes swapped can be smaller than length
- */
-static int32_t
-ucnvsel_swap(const UDataSwapper *ds,
- const void *inData, int32_t length,
- void *outData, UErrorCode *status) {
- /* udata_swapDataHeader checks the arguments */
- int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status);
- if(U_FAILURE(*status)) {
- return 0;
- }
-
- /* check data format and format version */
- const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4);
- if(!(
- pInfo->dataFormat[0] == 0x43 && /* dataFormat="CSel" */
- pInfo->dataFormat[1] == 0x53 &&
- pInfo->dataFormat[2] == 0x65 &&
- pInfo->dataFormat[3] == 0x6c
- )) {
- udata_printError(ds, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n",
- pInfo->dataFormat[0], pInfo->dataFormat[1],
- pInfo->dataFormat[2], pInfo->dataFormat[3]);
- *status = U_INVALID_FORMAT_ERROR;
- return 0;
- }
- if(pInfo->formatVersion[0] != 1) {
- udata_printError(ds, "ucnvsel_swap(): format version %02x is not supported\n",
- pInfo->formatVersion[0]);
- *status = U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- if(length >= 0) {
- length -= headerSize;
- if(length < 16*4) {
- udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n",
- length);
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
-
- const uint8_t *inBytes = (const uint8_t *)inData + headerSize;
- uint8_t *outBytes = (uint8_t *)outData + headerSize;
-
- /* read the indexes */
- const int32_t *inIndexes = (const int32_t *)inBytes;
- int32_t indexes[16];
- int32_t i;
- for(i = 0; i < 16; ++i) {
- indexes[i] = udata_readInt32(ds, inIndexes[i]);
- }
-
- /* get the total length of the data */
- int32_t size = indexes[UCNVSEL_INDEX_SIZE];
- if(length >= 0) {
- if(length < size) {
- udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n",
- length);
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- /* copy the data for inaccessible bytes */
- if(inBytes != outBytes) {
- uprv_memcpy(outBytes, inBytes, size);
- }
-
- int32_t offset = 0, count;
-
- /* swap the int32_t indexes[] */
- count = UCNVSEL_INDEX_COUNT*4;
- ds->swapArray32(ds, inBytes, count, outBytes, status);
- offset += count;
-
- /* swap the UTrie2 */
- count = indexes[UCNVSEL_INDEX_TRIE_SIZE];
- utrie2_swap(ds, inBytes + offset, count, outBytes + offset, status);
- offset += count;
-
- /* swap the uint32_t pv[] */
- count = indexes[UCNVSEL_INDEX_PV_COUNT]*4;
- ds->swapArray32(ds, inBytes + offset, count, outBytes + offset, status);
- offset += count;
-
- /* swap the encoding names */
- count = indexes[UCNVSEL_INDEX_NAMES_LENGTH];
- ds->swapInvChars(ds, inBytes + offset, count, outBytes + offset, status);
- offset += count;
-
- U_ASSERT(offset == size);
- }
-
- return headerSize + size;
-}
-
-/* unserialize a selector */
-U_CAPI UConverterSelector* U_EXPORT2
-ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status) {
- // check if already failed
- if (U_FAILURE(*status)) {
- return NULL;
- }
- // ensure args make sense!
- const uint8_t *p = (const uint8_t *)buffer;
- if (length <= 0 ||
- (length > 0 && (p == NULL || (U_POINTER_MASK_LSB(p, 3) != 0)))
- ) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- // header
- if (length < 32) {
- // not even enough space for a minimal header
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- return NULL;
- }
- const DataHeader *pHeader = (const DataHeader *)p;
- if (!(
- pHeader->dataHeader.magic1==0xda &&
- pHeader->dataHeader.magic2==0x27 &&
- pHeader->info.dataFormat[0] == 0x43 &&
- pHeader->info.dataFormat[1] == 0x53 &&
- pHeader->info.dataFormat[2] == 0x65 &&
- pHeader->info.dataFormat[3] == 0x6c
- )) {
- /* header not valid or dataFormat not recognized */
- *status = U_INVALID_FORMAT_ERROR;
- return NULL;
- }
- if (pHeader->info.formatVersion[0] != 1) {
- *status = U_UNSUPPORTED_ERROR;
- return NULL;
- }
- uint8_t* swapped = NULL;
- if (pHeader->info.isBigEndian != U_IS_BIG_ENDIAN ||
- pHeader->info.charsetFamily != U_CHARSET_FAMILY
- ) {
- // swap the data
- UDataSwapper *ds =
- udata_openSwapperForInputData(p, length, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, status);
- int32_t totalSize = ucnvsel_swap(ds, p, -1, NULL, status);
- if (U_FAILURE(*status)) {
- udata_closeSwapper(ds);
- return NULL;
- }
- if (length < totalSize) {
- udata_closeSwapper(ds);
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- return NULL;
- }
- swapped = (uint8_t*)uprv_malloc(totalSize);
- if (swapped == NULL) {
- udata_closeSwapper(ds);
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- ucnvsel_swap(ds, p, length, swapped, status);
- udata_closeSwapper(ds);
- if (U_FAILURE(*status)) {
- uprv_free(swapped);
- return NULL;
- }
- p = swapped;
- pHeader = (const DataHeader *)p;
- }
- if (length < (pHeader->dataHeader.headerSize + 16 * 4)) {
- // not even enough space for the header and the indexes
- uprv_free(swapped);
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- return NULL;
- }
- p += pHeader->dataHeader.headerSize;
- length -= pHeader->dataHeader.headerSize;
- // indexes
- const int32_t *indexes = (const int32_t *)p;
- if (length < indexes[UCNVSEL_INDEX_SIZE]) {
- uprv_free(swapped);
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- return NULL;
- }
- p += UCNVSEL_INDEX_COUNT * 4;
- // create and populate the selector object
- UConverterSelector* sel = (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector));
- char **encodings =
- (char **)uprv_malloc(
- indexes[UCNVSEL_INDEX_NAMES_COUNT] * sizeof(char *));
- if (sel == NULL || encodings == NULL) {
- uprv_free(swapped);
- uprv_free(sel);
- uprv_free(encodings);
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memset(sel, 0, sizeof(UConverterSelector));
- sel->pvCount = indexes[UCNVSEL_INDEX_PV_COUNT];
- sel->encodings = encodings;
- sel->encodingsCount = indexes[UCNVSEL_INDEX_NAMES_COUNT];
- sel->encodingStrLength = indexes[UCNVSEL_INDEX_NAMES_LENGTH];
- sel->swapped = swapped;
- // trie
- sel->trie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
- p, indexes[UCNVSEL_INDEX_TRIE_SIZE], NULL,
- status);
- p += indexes[UCNVSEL_INDEX_TRIE_SIZE];
- if (U_FAILURE(*status)) {
- ucnvsel_close(sel);
- return NULL;
- }
- // bit vectors
- sel->pv = (uint32_t *)p;
- p += sel->pvCount * 4;
- // encoding names
- char* s = (char*)p;
- for (int32_t i = 0; i < sel->encodingsCount; ++i) {
- sel->encodings[i] = s;
- s += uprv_strlen(s) + 1;
- }
- p += sel->encodingStrLength;
-
- return sel;
-}
-
-// a bunch of functions for the enumeration thingie! Nothing fancy here. Just
-// iterate over the selected encodings
-struct Enumerator {
- int16_t* index;
- int16_t length;
- int16_t cur;
- const UConverterSelector* sel;
-};
-
-U_CDECL_BEGIN
-
-static void U_CALLCONV
-ucnvsel_close_selector_iterator(UEnumeration *enumerator) {
- uprv_free(((Enumerator*)(enumerator->context))->index);
- uprv_free(enumerator->context);
- uprv_free(enumerator);
-}
-
-
-static int32_t U_CALLCONV
-ucnvsel_count_encodings(UEnumeration *enumerator, UErrorCode *status) {
- // check if already failed
- if (U_FAILURE(*status)) {
- return 0;
- }
- return ((Enumerator*)(enumerator->context))->length;
-}
-
-
-static const char* U_CALLCONV ucnvsel_next_encoding(UEnumeration* enumerator,
- int32_t* resultLength,
- UErrorCode* status) {
- // check if already failed
- if (U_FAILURE(*status)) {
- return NULL;
- }
-
- int16_t cur = ((Enumerator*)(enumerator->context))->cur;
- const UConverterSelector* sel;
- const char* result;
- if (cur >= ((Enumerator*)(enumerator->context))->length) {
- return NULL;
- }
- sel = ((Enumerator*)(enumerator->context))->sel;
- result = sel->encodings[((Enumerator*)(enumerator->context))->index[cur] ];
- ((Enumerator*)(enumerator->context))->cur++;
- if (resultLength) {
- *resultLength = (int32_t)uprv_strlen(result);
- }
- return result;
-}
-
-static void U_CALLCONV ucnvsel_reset_iterator(UEnumeration* enumerator,
- UErrorCode* status) {
- // check if already failed
- if (U_FAILURE(*status)) {
- return ;
- }
- ((Enumerator*)(enumerator->context))->cur = 0;
-}
-
-U_CDECL_END
-
-
-static const UEnumeration defaultEncodings = {
- NULL,
- NULL,
- ucnvsel_close_selector_iterator,
- ucnvsel_count_encodings,
- uenum_unextDefault,
- ucnvsel_next_encoding,
- ucnvsel_reset_iterator
-};
-
-
-// internal fn to intersect two sets of masks
-// returns whether the mask has reduced to all zeros
-static UBool intersectMasks(uint32_t* dest, const uint32_t* source1, int32_t len) {
- int32_t i;
- uint32_t oredDest = 0;
- for (i = 0 ; i < len ; ++i) {
- oredDest |= (dest[i] &= source1[i]);
- }
- return oredDest == 0;
-}
-
-// internal fn to count how many 1's are there in a mask
-// algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html
-static int16_t countOnes(uint32_t* mask, int32_t len) {
- int32_t i, totalOnes = 0;
- for (i = 0 ; i < len ; ++i) {
- uint32_t ent = mask[i];
- for (; ent; totalOnes++)
- {
- ent &= ent - 1; // clear the least significant bit set
- }
- }
- return static_cast<int16_t>(totalOnes);
-}
-
-
-/* internal function! */
-static UEnumeration *selectForMask(const UConverterSelector* sel,
- uint32_t *theMask, UErrorCode *status) {
- LocalMemory<uint32_t> mask(theMask);
- // this is the context we will use. Store a table of indices to which
- // encodings are legit.
- LocalMemory<Enumerator> result(static_cast<Enumerator *>(uprv_malloc(sizeof(Enumerator))));
- if (result.isNull()) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- result->index = nullptr; // this will be allocated later!
- result->length = result->cur = 0;
- result->sel = sel;
-
- LocalMemory<UEnumeration> en(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
- if (en.isNull()) {
- // TODO(markus): Combine Enumerator and UEnumeration into one struct.
- *status = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- memcpy(en.getAlias(), &defaultEncodings, sizeof(UEnumeration));
-
- int32_t columns = (sel->encodingsCount+31)/32;
- int16_t numOnes = countOnes(mask.getAlias(), columns);
- // now, we know the exact space we need for index
- if (numOnes > 0) {
- result->index = static_cast<int16_t*>(uprv_malloc(numOnes * sizeof(int16_t)));
- if (result->index == nullptr) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- int32_t i, j;
- int16_t k = 0;
- for (j = 0 ; j < columns; j++) {
- uint32_t v = mask[j];
- for (i = 0 ; i < 32 && k < sel->encodingsCount; i++, k++) {
- if ((v & 1) != 0) {
- result->index[result->length++] = k;
- }
- v >>= 1;
- }
- }
- } //otherwise, index will remain NULL (and will never be touched by
- //the enumerator code anyway)
- en->context = result.orphan();
- return en.orphan();
-}
-
-/* check a string against the selector - UTF16 version */
-U_CAPI UEnumeration * U_EXPORT2
-ucnvsel_selectForString(const UConverterSelector* sel,
- const UChar *s, int32_t length, UErrorCode *status) {
- // check if already failed
- if (U_FAILURE(*status)) {
- return NULL;
- }
- // ensure args make sense!
- if (sel == NULL || (s == NULL && length != 0)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- int32_t columns = (sel->encodingsCount+31)/32;
- uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4);
- if (mask == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memset(mask, ~0, columns *4);
-
- if(s!=NULL) {
- const UChar *limit;
- if (length >= 0) {
- limit = s + length;
- } else {
- limit = NULL;
- }
-
- while (limit == NULL ? *s != 0 : s != limit) {
- UChar32 c;
- uint16_t pvIndex;
- UTRIE2_U16_NEXT16(sel->trie, s, limit, c, pvIndex);
- if (intersectMasks(mask, sel->pv+pvIndex, columns)) {
- break;
- }
- }
- }
- return selectForMask(sel, mask, status);
-}
-
-/* check a string against the selector - UTF8 version */
-U_CAPI UEnumeration * U_EXPORT2
-ucnvsel_selectForUTF8(const UConverterSelector* sel,
- const char *s, int32_t length, UErrorCode *status) {
- // check if already failed
- if (U_FAILURE(*status)) {
- return NULL;
- }
- // ensure args make sense!
- if (sel == NULL || (s == NULL && length != 0)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- int32_t columns = (sel->encodingsCount+31)/32;
- uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4);
- if (mask == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memset(mask, ~0, columns *4);
-
- if (length < 0) {
- length = (int32_t)uprv_strlen(s);
- }
-
- if(s!=NULL) {
- const char *limit = s + length;
-
- while (s != limit) {
- uint16_t pvIndex;
- UTRIE2_U8_NEXT16(sel->trie, s, limit, pvIndex);
- if (intersectMasks(mask, sel->pv+pvIndex, columns)) {
- break;
- }
- }
- }
- return selectForMask(sel, mask, status);
-}
-
-#endif // !UCONFIG_NO_CONVERSION
diff --git a/contrib/libs/icu/common/ucol_data.h b/contrib/libs/icu/common/ucol_data.h
deleted file mode 100644
index 83f54abba13..00000000000
--- a/contrib/libs/icu/common/ucol_data.h
+++ /dev/null
@@ -1,89 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2000-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: ucol_data.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011jul02
-* created by: Markus Scherer
-*
-* Private implementation header for C/C++ collation.
-* Some file data structure definitions were moved here from i18n/ucol_imp.h
-* so that the common library (via ucol_swp.cpp) need not depend on the i18n library at all.
-*
-* We do not want to move the collation swapper to the i18n library because
-* a) the resource bundle swapper depends on it and would have to move too, and
-* b) we might want to eventually implement runtime data swapping,
-* which might (or might not) be easier if all swappers are in the common library.
-*/
-
-#ifndef __UCOL_DATA_H__
-#define __UCOL_DATA_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION
-
-/* let us know whether reserved fields are reset to zero or junked */
-#define UCOL_HEADER_MAGIC 0x20030618
-
-typedef struct {
- int32_t size;
- /* all the offsets are in bytes */
- /* to get the address add to the header address and cast properly */
- uint32_t options; /* these are the default options for the collator */
- uint32_t UCAConsts; /* structure which holds values for indirect positioning and implicit ranges */
- uint32_t contractionUCACombos; /* this one is needed only for UCA, to copy the appropriate contractions */
- uint32_t magic; /* magic number - lets us know whether reserved data is reset or junked */
- uint32_t mappingPosition; /* const uint8_t *mappingPosition; */
- uint32_t expansion; /* uint32_t *expansion; */
- uint32_t contractionIndex; /* UChar *contractionIndex; */
- uint32_t contractionCEs; /* uint32_t *contractionCEs; */
- uint32_t contractionSize; /* needed for various closures */
- /*int32_t latinOneMapping;*/ /* this is now handled in the trie itself *//* fast track to latin1 chars */
-
- uint32_t endExpansionCE; /* array of last collation element in
- expansion */
- uint32_t expansionCESize; /* array of maximum expansion size
- corresponding to the expansion
- collation elements with last element
- in endExpansionCE*/
- int32_t endExpansionCECount; /* size of endExpansionCE */
- uint32_t unsafeCP; /* hash table of unsafe code points */
- uint32_t contrEndCP; /* hash table of final code points */
- /* in contractions. */
-
- int32_t contractionUCACombosSize; /* number of UCA contraction items. */
- /*Length is contractionUCACombosSize*contractionUCACombosWidth*sizeof(UChar) */
- UBool jamoSpecial; /* is jamoSpecial */
- UBool isBigEndian; /* is this data big endian? from the UDataInfo header*/
- uint8_t charSetFamily; /* what is the charset family of this data from the UDataInfo header*/
- uint8_t contractionUCACombosWidth; /* width of UCA combos field */
- UVersionInfo version;
- UVersionInfo UCAVersion; /* version of the UCA, read from file */
- UVersionInfo UCDVersion; /* UCD version, obtained by u_getUnicodeVersion */
- UVersionInfo formatVersion; /* format version from the UDataInfo header */
- uint32_t scriptToLeadByte; /* offset to script to lead collation byte mapping data */
- uint32_t leadByteToScript; /* offset to lead collation byte to script mapping data */
- uint8_t reserved[76]; /* for future use */
-} UCATableHeader;
-
-typedef struct {
- uint32_t byteSize;
- uint32_t tableSize;
- uint32_t contsSize;
- uint32_t table;
- uint32_t conts;
- UVersionInfo UCAVersion; /* version of the UCA, read from file */
- uint8_t padding[8];
-} InverseUCATableHeader;
-
-#endif /* !UCONFIG_NO_COLLATION */
-
-#endif /* __UCOL_DATA_H__ */
diff --git a/contrib/libs/icu/common/ucol_swp.cpp b/contrib/libs/icu/common/ucol_swp.cpp
deleted file mode 100644
index 1af19863fa8..00000000000
--- a/contrib/libs/icu/common/ucol_swp.cpp
+++ /dev/null
@@ -1,615 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2003-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ucol_swp.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2003sep10
-* created by: Markus W. Scherer
-*
-* Swap collation binaries.
-*/
-
-#include "unicode/udata.h" /* UDataInfo */
-#include "utrie.h"
-#include "utrie2.h"
-#include "udataswp.h"
-#include "cmemory.h"
-#include "ucol_data.h"
-#include "ucol_swp.h"
-
-/* swapping ----------------------------------------------------------------- */
-
-#if !UCONFIG_NO_COLLATION
-
-U_CAPI UBool U_EXPORT2
-ucol_looksLikeCollationBinary(const UDataSwapper *ds,
- const void *inData, int32_t length) {
- if(ds==NULL || inData==NULL || length<-1) {
- return FALSE;
- }
-
- // First check for format version 4+ which has a standard data header.
- UErrorCode errorCode=U_ZERO_ERROR;
- (void)udata_swapDataHeader(ds, inData, -1, NULL, &errorCode);
- if(U_SUCCESS(errorCode)) {
- const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
- if(info.dataFormat[0]==0x55 && // dataFormat="UCol"
- info.dataFormat[1]==0x43 &&
- info.dataFormat[2]==0x6f &&
- info.dataFormat[3]==0x6c) {
- return TRUE;
- }
- }
-
- // Else check for format version 3.
- const UCATableHeader *inHeader=(const UCATableHeader *)inData;
-
- /*
- * The collation binary must contain at least the UCATableHeader,
- * starting with its size field.
- * sizeof(UCATableHeader)==42*4 in ICU 2.8
- * check the length against the header size before reading the size field
- */
- UCATableHeader header;
- uprv_memset(&header, 0, sizeof(header));
- if(length<0) {
- header.size=udata_readInt32(ds, inHeader->size);
- } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
- return FALSE;
- }
-
- header.magic=ds->readUInt32(inHeader->magic);
- if(!(
- header.magic==UCOL_HEADER_MAGIC &&
- inHeader->formatVersion[0]==3 /*&&
- inHeader->formatVersion[1]>=0*/
- )) {
- return FALSE;
- }
-
- if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
- return FALSE;
- }
-
- return TRUE;
-}
-
-namespace {
-
-/* swap a header-less collation formatVersion=3 binary, inside a resource bundle or ucadata.icu */
-int32_t
-swapFormatVersion3(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const uint8_t *inBytes;
- uint8_t *outBytes;
-
- const UCATableHeader *inHeader;
- UCATableHeader *outHeader;
- UCATableHeader header;
-
- uint32_t count;
-
- /* argument checking in case we were not called from ucol_swap() */
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- inBytes=(const uint8_t *)inData;
- outBytes=(uint8_t *)outData;
-
- inHeader=(const UCATableHeader *)inData;
- outHeader=(UCATableHeader *)outData;
-
- /*
- * The collation binary must contain at least the UCATableHeader,
- * starting with its size field.
- * sizeof(UCATableHeader)==42*4 in ICU 2.8
- * check the length against the header size before reading the size field
- */
- uprv_memset(&header, 0, sizeof(header));
- if(length<0) {
- header.size=udata_readInt32(ds, inHeader->size);
- } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
- udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n",
- length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- header.magic=ds->readUInt32(inHeader->magic);
- if(!(
- header.magic==UCOL_HEADER_MAGIC &&
- inHeader->formatVersion[0]==3 /*&&
- inHeader->formatVersion[1]>=0*/
- )) {
- udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
- header.magic,
- inHeader->formatVersion[0], inHeader->formatVersion[1]);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
- udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n",
- inHeader->isBigEndian, inHeader->charSetFamily);
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
-
- if(length>=0) {
- /* copy everything, takes care of data that needs no swapping */
- if(inBytes!=outBytes) {
- uprv_memcpy(outBytes, inBytes, header.size);
- }
-
- /* swap the necessary pieces in the order of their occurrence in the data */
-
- /* read more of the UCATableHeader (the size field was read above) */
- header.options= ds->readUInt32(inHeader->options);
- header.UCAConsts= ds->readUInt32(inHeader->UCAConsts);
- header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos);
- header.mappingPosition= ds->readUInt32(inHeader->mappingPosition);
- header.expansion= ds->readUInt32(inHeader->expansion);
- header.contractionIndex= ds->readUInt32(inHeader->contractionIndex);
- header.contractionCEs= ds->readUInt32(inHeader->contractionCEs);
- header.contractionSize= ds->readUInt32(inHeader->contractionSize);
- header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE);
- header.expansionCESize= ds->readUInt32(inHeader->expansionCESize);
- header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount);
- header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);
- header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte);
- header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript);
-
- /* swap the 32-bit integers in the header */
- ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader),
- outHeader, pErrorCode);
- ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript),
- &(outHeader->scriptToLeadByte), pErrorCode);
- /* set the output platform properties */
- outHeader->isBigEndian=ds->outIsBigEndian;
- outHeader->charSetFamily=ds->outCharset;
-
- /* swap the options */
- if(header.options!=0) {
- ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options,
- outBytes+header.options, pErrorCode);
- }
-
- /* swap the expansions */
- if(header.mappingPosition!=0 && header.expansion!=0) {
- if(header.contractionIndex!=0) {
- /* expansions bounded by contractions */
- count=header.contractionIndex-header.expansion;
- } else {
- /* no contractions: expansions bounded by the main trie */
- count=header.mappingPosition-header.expansion;
- }
- ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count,
- outBytes+header.expansion, pErrorCode);
- }
-
- /* swap the contractions */
- if(header.contractionSize!=0) {
- /* contractionIndex: UChar[] */
- ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2,
- outBytes+header.contractionIndex, pErrorCode);
-
- /* contractionCEs: CEs[] */
- ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4,
- outBytes+header.contractionCEs, pErrorCode);
- }
-
- /* swap the main trie */
- if(header.mappingPosition!=0) {
- count=header.endExpansionCE-header.mappingPosition;
- utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count,
- outBytes+header.mappingPosition, pErrorCode);
- }
-
- /* swap the max expansion table */
- if(header.endExpansionCECount!=0) {
- ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4,
- outBytes+header.endExpansionCE, pErrorCode);
- }
-
- /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
-
- /* swap UCA constants */
- if(header.UCAConsts!=0) {
- /*
- * if UCAConsts!=0 then contractionUCACombos because we are swapping
- * the UCA data file, and we know that the UCA contains contractions
- */
- ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,
- outBytes+header.UCAConsts, pErrorCode);
- }
-
- /* swap UCA contractions */
- if(header.contractionUCACombosSize!=0) {
- count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR;
- ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count,
- outBytes+header.contractionUCACombos, pErrorCode);
- }
-
- /* swap the script to lead bytes */
- if(header.scriptToLeadByte!=0) {
- int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16
- int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16
- ds->swapArray16(ds, inBytes+header.scriptToLeadByte,
- 4 + (4 * indexCount) + (2 * dataCount),
- outBytes+header.scriptToLeadByte, pErrorCode);
- }
-
- /* swap the lead byte to scripts */
- if(header.leadByteToScript!=0) {
- int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16
- int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16
- ds->swapArray16(ds, inBytes+header.leadByteToScript,
- 4 + (2 * indexCount) + (2 * dataCount),
- outBytes+header.leadByteToScript, pErrorCode);
- }
- }
-
- return header.size;
-}
-
-// swap formatVersion 4 or 5 ----------------------------------------------- ***
-
-// The following are copied from CollationDataReader, trading an awkward copy of constants
-// for an awkward relocation of the i18n collationdatareader.h file into the common library.
-// Keep them in sync!
-
-enum {
- IX_INDEXES_LENGTH, // 0
- IX_OPTIONS,
- IX_RESERVED2,
- IX_RESERVED3,
-
- IX_JAMO_CE32S_START, // 4
- IX_REORDER_CODES_OFFSET,
- IX_REORDER_TABLE_OFFSET,
- IX_TRIE_OFFSET,
-
- IX_RESERVED8_OFFSET, // 8
- IX_CES_OFFSET,
- IX_RESERVED10_OFFSET,
- IX_CE32S_OFFSET,
-
- IX_ROOT_ELEMENTS_OFFSET, // 12
- IX_CONTEXTS_OFFSET,
- IX_UNSAFE_BWD_OFFSET,
- IX_FAST_LATIN_TABLE_OFFSET,
-
- IX_SCRIPTS_OFFSET, // 16
- IX_COMPRESSIBLE_BYTES_OFFSET,
- IX_RESERVED18_OFFSET,
- IX_TOTAL_SIZE
-};
-
-int32_t
-swapFormatVersion4(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return 0; }
-
- const uint8_t *inBytes=(const uint8_t *)inData;
- uint8_t *outBytes=(uint8_t *)outData;
-
- const int32_t *inIndexes=(const int32_t *)inBytes;
- int32_t indexes[IX_TOTAL_SIZE+1];
-
- // Need at least IX_INDEXES_LENGTH and IX_OPTIONS.
- if(0<=length && length<8) {
- udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
- "(%d after header) for collation data\n",
- length);
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- int32_t indexesLength=indexes[0]=udata_readInt32(ds, inIndexes[0]);
- if(0<=length && length<(indexesLength*4)) {
- udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
- "(%d after header) for collation data\n",
- length);
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- for(int32_t i=1; i<=IX_TOTAL_SIZE && i<indexesLength; ++i) {
- indexes[i]=udata_readInt32(ds, inIndexes[i]);
- }
- for(int32_t i=indexesLength; i<=IX_TOTAL_SIZE; ++i) {
- indexes[i]=-1;
- }
- inIndexes=NULL; // Make sure we do not accidentally use these instead of indexes[].
-
- // Get the total length of the data.
- int32_t size;
- if(indexesLength>IX_TOTAL_SIZE) {
- size=indexes[IX_TOTAL_SIZE];
- } else if(indexesLength>IX_REORDER_CODES_OFFSET) {
- size=indexes[indexesLength-1];
- } else {
- size=indexesLength*4;
- }
- if(length<0) { return size; }
-
- if(length<size) {
- udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
- "(%d after header) for collation data\n",
- length);
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- // Copy the data for inaccessible bytes and arrays of bytes.
- if(inBytes!=outBytes) {
- uprv_memcpy(outBytes, inBytes, size);
- }
-
- // Swap the int32_t indexes[].
- ds->swapArray32(ds, inBytes, indexesLength * 4, outBytes, &errorCode);
-
- // The following is a modified version of CollationDataReader::read().
- // Here we use indexes[] not inIndexes[] because
- // the inIndexes[] may not be in this machine's endianness.
- int32_t index; // one of the indexes[] slots
- int32_t offset; // byte offset for the index part
- // int32_t length; // number of bytes in the index part
-
- index = IX_REORDER_CODES_OFFSET;
- offset = indexes[index];
- length = indexes[index + 1] - offset;
- if(length > 0) {
- ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
- }
-
- // Skip the IX_REORDER_TABLE_OFFSET byte array.
-
- index = IX_TRIE_OFFSET;
- offset = indexes[index];
- length = indexes[index + 1] - offset;
- if(length > 0) {
- utrie2_swap(ds, inBytes + offset, length, outBytes + offset, &errorCode);
- }
-
- index = IX_RESERVED8_OFFSET;
- offset = indexes[index];
- length = indexes[index + 1] - offset;
- if(length > 0) {
- udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED8_OFFSET\n", length);
- errorCode = U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- index = IX_CES_OFFSET;
- offset = indexes[index];
- length = indexes[index + 1] - offset;
- if(length > 0) {
- ds->swapArray64(ds, inBytes + offset, length, outBytes + offset, &errorCode);
- }
-
- index = IX_RESERVED10_OFFSET;
- offset = indexes[index];
- length = indexes[index + 1] - offset;
- if(length > 0) {
- udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED10_OFFSET\n", length);
- errorCode = U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- index = IX_CE32S_OFFSET;
- offset = indexes[index];
- length = indexes[index + 1] - offset;
- if(length > 0) {
- ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
- }
-
- index = IX_ROOT_ELEMENTS_OFFSET;
- offset = indexes[index];
- length = indexes[index + 1] - offset;
- if(length > 0) {
- ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
- }
-
- index = IX_CONTEXTS_OFFSET;
- offset = indexes[index];
- length = indexes[index + 1] - offset;
- if(length > 0) {
- ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
- }
-
- index = IX_UNSAFE_BWD_OFFSET;
- offset = indexes[index];
- length = indexes[index + 1] - offset;
- if(length > 0) {
- ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
- }
-
- index = IX_FAST_LATIN_TABLE_OFFSET;
- offset = indexes[index];
- length = indexes[index + 1] - offset;
- if(length > 0) {
- ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
- }
-
- index = IX_SCRIPTS_OFFSET;
- offset = indexes[index];
- length = indexes[index + 1] - offset;
- if(length > 0) {
- ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
- }
-
- // Skip the IX_COMPRESSIBLE_BYTES_OFFSET byte array.
-
- index = IX_RESERVED18_OFFSET;
- offset = indexes[index];
- length = indexes[index + 1] - offset;
- if(length > 0) {
- udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED18_OFFSET\n", length);
- errorCode = U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- return size;
-}
-
-} // namespace
-
-/* swap ICU collation data like ucadata.icu */
-U_CAPI int32_t U_EXPORT2
-ucol_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) { return 0; }
-
- /* udata_swapDataHeader checks the arguments */
- int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- // Try to swap the old format version which did not have a standard data header.
- *pErrorCode=U_ZERO_ERROR;
- return swapFormatVersion3(ds, inData, length, outData, pErrorCode);
- }
-
- /* check data format and format version */
- const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
- if(!(
- info.dataFormat[0]==0x55 && // dataFormat="UCol"
- info.dataFormat[1]==0x43 &&
- info.dataFormat[2]==0x6f &&
- info.dataFormat[3]==0x6c &&
- (3<=info.formatVersion[0] && info.formatVersion[0]<=5)
- )) {
- udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x "
- "(format version %02x.%02x) is not recognized as collation data\n",
- info.dataFormat[0], info.dataFormat[1],
- info.dataFormat[2], info.dataFormat[3],
- info.formatVersion[0], info.formatVersion[1]);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- inData=(const char *)inData+headerSize;
- if(length>=0) { length-=headerSize; }
- outData=(char *)outData+headerSize;
- int32_t collationSize;
- if(info.formatVersion[0]>=4) {
- collationSize=swapFormatVersion4(ds, inData, length, outData, *pErrorCode);
- } else {
- collationSize=swapFormatVersion3(ds, inData, length, outData, pErrorCode);
- }
- if(U_SUCCESS(*pErrorCode)) {
- return headerSize+collationSize;
- } else {
- return 0;
- }
-}
-
-/* swap inverse UCA collation data (invuca.icu) */
-U_CAPI int32_t U_EXPORT2
-ucol_swapInverseUCA(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UDataInfo *pInfo;
- int32_t headerSize;
-
- const uint8_t *inBytes;
- uint8_t *outBytes;
-
- const InverseUCATableHeader *inHeader;
- InverseUCATableHeader *outHeader;
- InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} };
-
- /* udata_swapDataHeader checks the arguments */
- headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* check data format and format version */
- pInfo=(const UDataInfo *)((const char *)inData+4);
- if(!(
- pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */
- pInfo->dataFormat[1]==0x6e &&
- pInfo->dataFormat[2]==0x76 &&
- pInfo->dataFormat[3]==0x43 &&
- pInfo->formatVersion[0]==2 &&
- pInfo->formatVersion[1]>=1
- )) {
- udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
- pInfo->dataFormat[0], pInfo->dataFormat[1],
- pInfo->dataFormat[2], pInfo->dataFormat[3],
- pInfo->formatVersion[0], pInfo->formatVersion[1]);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- inBytes=(const uint8_t *)inData+headerSize;
- outBytes=(uint8_t *)outData+headerSize;
-
- inHeader=(const InverseUCATableHeader *)inBytes;
- outHeader=(InverseUCATableHeader *)outBytes;
-
- /*
- * The inverse UCA collation binary must contain at least the InverseUCATableHeader,
- * starting with its size field.
- * sizeof(UCATableHeader)==8*4 in ICU 2.8
- * check the length against the header size before reading the size field
- */
- if(length<0) {
- header.byteSize=udata_readInt32(ds, inHeader->byteSize);
- } else if(
- ((length-headerSize)<(8*4) ||
- (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize)))
- ) {
- udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
- length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- if(length>=0) {
- /* copy everything, takes care of data that needs no swapping */
- if(inBytes!=outBytes) {
- uprv_memcpy(outBytes, inBytes, header.byteSize);
- }
-
- /* swap the necessary pieces in the order of their occurrence in the data */
-
- /* read more of the InverseUCATableHeader (the byteSize field was read above) */
- header.tableSize= ds->readUInt32(inHeader->tableSize);
- header.contsSize= ds->readUInt32(inHeader->contsSize);
- header.table= ds->readUInt32(inHeader->table);
- header.conts= ds->readUInt32(inHeader->conts);
-
- /* swap the 32-bit integers in the header */
- ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode);
-
- /* swap the inverse table; tableSize counts uint32_t[3] rows */
- ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4,
- outBytes+header.table, pErrorCode);
-
- /* swap the continuation table; contsSize counts UChars */
- ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR,
- outBytes+header.conts, pErrorCode);
- }
-
- return headerSize+header.byteSize;
-}
-
-#endif /* #if !UCONFIG_NO_COLLATION */
diff --git a/contrib/libs/icu/common/ucol_swp.h b/contrib/libs/icu/common/ucol_swp.h
deleted file mode 100644
index fd8be9aa54f..00000000000
--- a/contrib/libs/icu/common/ucol_swp.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2003-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ucol_swp.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2003sep10
-* created by: Markus W. Scherer
-*
-* Swap collation binaries.
-*/
-
-#ifndef __UCOL_SWP_H__
-#define __UCOL_SWP_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION
-
-#include "udataswp.h"
-
-/*
- * Does the data look like a collation binary?
- * @internal
- */
-U_INTERNAL UBool U_EXPORT2
-ucol_looksLikeCollationBinary(const UDataSwapper *ds,
- const void *inData, int32_t length);
-
-/**
- * Swap ICU collation data like ucadata.icu. See udataswp.h.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-ucol_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/**
- * Swap inverse UCA collation data (invuca.icu). See udataswp.h.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-ucol_swapInverseUCA(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-#endif
diff --git a/contrib/libs/icu/common/ucptrie.cpp b/contrib/libs/icu/common/ucptrie.cpp
deleted file mode 100644
index 0004160a238..00000000000
--- a/contrib/libs/icu/common/ucptrie.cpp
+++ /dev/null
@@ -1,601 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// ucptrie.cpp (modified from utrie2.cpp)
-// created: 2017dec29 Markus W. Scherer
-
-// #define UCPTRIE_DEBUG
-#ifdef UCPTRIE_DEBUG
-# include <stdio.h>
-#endif
-
-#include "unicode/utypes.h"
-#include "unicode/ucptrie.h"
-#include "unicode/utf.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "uassert.h"
-#include "ucptrie_impl.h"
-
-U_CAPI UCPTrie * U_EXPORT2
-ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth,
- const void *data, int32_t length, int32_t *pActualLength,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
-
- if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) ||
- type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type ||
- valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
-
- // Enough data for a trie header?
- if (length < (int32_t)sizeof(UCPTrieHeader)) {
- *pErrorCode = U_INVALID_FORMAT_ERROR;
- return nullptr;
- }
-
- // Check the signature.
- const UCPTrieHeader *header = (const UCPTrieHeader *)data;
- if (header->signature != UCPTRIE_SIG) {
- *pErrorCode = U_INVALID_FORMAT_ERROR;
- return nullptr;
- }
-
- int32_t options = header->options;
- int32_t typeInt = (options >> 6) & 3;
- int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK;
- if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 ||
- (options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) {
- *pErrorCode = U_INVALID_FORMAT_ERROR;
- return nullptr;
- }
- UCPTrieType actualType = (UCPTrieType)typeInt;
- UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt;
- if (type < 0) {
- type = actualType;
- }
- if (valueWidth < 0) {
- valueWidth = actualValueWidth;
- }
- if (type != actualType || valueWidth != actualValueWidth) {
- *pErrorCode = U_INVALID_FORMAT_ERROR;
- return nullptr;
- }
-
- // Get the length values and offsets.
- UCPTrie tempTrie;
- uprv_memset(&tempTrie, 0, sizeof(tempTrie));
- tempTrie.indexLength = header->indexLength;
- tempTrie.dataLength =
- ((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength;
- tempTrie.index3NullOffset = header->index3NullOffset;
- tempTrie.dataNullOffset =
- ((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset;
-
- tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2;
- tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12;
- tempTrie.type = type;
- tempTrie.valueWidth = valueWidth;
-
- // Calculate the actual length.
- int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2;
- if (valueWidth == UCPTRIE_VALUE_BITS_16) {
- actualLength += tempTrie.dataLength * 2;
- } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
- actualLength += tempTrie.dataLength * 4;
- } else {
- actualLength += tempTrie.dataLength;
- }
- if (length < actualLength) {
- *pErrorCode = U_INVALID_FORMAT_ERROR; // Not enough bytes.
- return nullptr;
- }
-
- // Allocate the trie.
- UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie));
- if (trie == nullptr) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
-#ifdef UCPTRIE_DEBUG
- trie->name = "fromSerialized";
-#endif
-
- // Set the pointers to its index and data arrays.
- const uint16_t *p16 = (const uint16_t *)(header + 1);
- trie->index = p16;
- p16 += trie->indexLength;
-
- // Get the data.
- int32_t nullValueOffset = trie->dataNullOffset;
- if (nullValueOffset >= trie->dataLength) {
- nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
- }
- switch (valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- trie->data.ptr16 = p16;
- trie->nullValue = trie->data.ptr16[nullValueOffset];
- break;
- case UCPTRIE_VALUE_BITS_32:
- trie->data.ptr32 = (const uint32_t *)p16;
- trie->nullValue = trie->data.ptr32[nullValueOffset];
- break;
- case UCPTRIE_VALUE_BITS_8:
- trie->data.ptr8 = (const uint8_t *)p16;
- trie->nullValue = trie->data.ptr8[nullValueOffset];
- break;
- default:
- // Unreachable because valueWidth was checked above.
- *pErrorCode = U_INVALID_FORMAT_ERROR;
- return nullptr;
- }
-
- if (pActualLength != nullptr) {
- *pActualLength = actualLength;
- }
- return trie;
-}
-
-U_CAPI void U_EXPORT2
-ucptrie_close(UCPTrie *trie) {
- uprv_free(trie);
-}
-
-U_CAPI UCPTrieType U_EXPORT2
-ucptrie_getType(const UCPTrie *trie) {
- return (UCPTrieType)trie->type;
-}
-
-U_CAPI UCPTrieValueWidth U_EXPORT2
-ucptrie_getValueWidth(const UCPTrie *trie) {
- return (UCPTrieValueWidth)trie->valueWidth;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) {
- int32_t i1 = c >> UCPTRIE_SHIFT_1;
- if (trie->type == UCPTRIE_TYPE_FAST) {
- U_ASSERT(0xffff < c && c < trie->highStart);
- i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
- } else {
- U_ASSERT((uint32_t)c < (uint32_t)trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT);
- i1 += UCPTRIE_SMALL_INDEX_LENGTH;
- }
- int32_t i3Block = trie->index[
- (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
- int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
- int32_t dataBlock;
- if ((i3Block & 0x8000) == 0) {
- // 16-bit indexes
- dataBlock = trie->index[i3Block + i3];
- } else {
- // 18-bit indexes stored in groups of 9 entries per 8 indexes.
- i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
- i3 &= 7;
- dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000;
- dataBlock |= trie->index[i3Block + i3];
- }
- return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK);
-}
-
-U_CAPI int32_t U_EXPORT2
-ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3) {
- UChar32 c = (lt1 << 12) | (t2 << 6) | t3;
- if (c >= trie->highStart) {
- // Possible because the UTF-8 macro compares with shifted12HighStart which may be higher.
- return trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
- }
- return ucptrie_internalSmallIndex(trie, c);
-}
-
-U_CAPI int32_t U_EXPORT2
-ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
- const uint8_t *start, const uint8_t *src) {
- int32_t i, length;
- // Support 64-bit pointers by avoiding cast of arbitrary difference.
- if ((src - start) <= 7) {
- i = length = (int32_t)(src - start);
- } else {
- i = length = 7;
- start = src - 7;
- }
- c = utf8_prevCharSafeBody(start, 0, &i, c, -1);
- i = length - i; // Number of bytes read backward from src.
- int32_t idx = _UCPTRIE_CP_INDEX(trie, 0xffff, c);
- return (idx << 3) | i;
-}
-
-namespace {
-
-inline uint32_t getValue(UCPTrieData data, UCPTrieValueWidth valueWidth, int32_t dataIndex) {
- switch (valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- return data.ptr16[dataIndex];
- case UCPTRIE_VALUE_BITS_32:
- return data.ptr32[dataIndex];
- case UCPTRIE_VALUE_BITS_8:
- return data.ptr8[dataIndex];
- default:
- // Unreachable if the trie is properly initialized.
- return 0xffffffff;
- }
-}
-
-} // namespace
-
-U_CAPI uint32_t U_EXPORT2
-ucptrie_get(const UCPTrie *trie, UChar32 c) {
- int32_t dataIndex;
- if ((uint32_t)c <= 0x7f) {
- // linear ASCII
- dataIndex = c;
- } else {
- UChar32 fastMax = trie->type == UCPTRIE_TYPE_FAST ? 0xffff : UCPTRIE_SMALL_MAX;
- dataIndex = _UCPTRIE_CP_INDEX(trie, fastMax, c);
- }
- return getValue(trie->data, (UCPTrieValueWidth)trie->valueWidth, dataIndex);
-}
-
-namespace {
-
-constexpr int32_t MAX_UNICODE = 0x10ffff;
-
-inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue,
- UCPMapValueFilter *filter, const void *context) {
- if (value == trieNullValue) {
- value = nullValue;
- } else if (filter != nullptr) {
- value = filter(context, value);
- }
- return value;
-}
-
-UChar32 getRange(const void *t, UChar32 start,
- UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
- if ((uint32_t)start > MAX_UNICODE) {
- return U_SENTINEL;
- }
- const UCPTrie *trie = reinterpret_cast<const UCPTrie *>(t);
- UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth;
- if (start >= trie->highStart) {
- if (pValue != nullptr) {
- int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
- uint32_t value = getValue(trie->data, valueWidth, di);
- if (filter != nullptr) { value = filter(context, value); }
- *pValue = value;
- }
- return MAX_UNICODE;
- }
-
- uint32_t nullValue = trie->nullValue;
- if (filter != nullptr) { nullValue = filter(context, nullValue); }
- const uint16_t *index = trie->index;
-
- int32_t prevI3Block = -1;
- int32_t prevBlock = -1;
- UChar32 c = start;
- uint32_t trieValue, value = nullValue;
- bool haveValue = false;
- do {
- int32_t i3Block;
- int32_t i3;
- int32_t i3BlockLength;
- int32_t dataBlockLength;
- if (c <= 0xffff && (trie->type == UCPTRIE_TYPE_FAST || c <= UCPTRIE_SMALL_MAX)) {
- i3Block = 0;
- i3 = c >> UCPTRIE_FAST_SHIFT;
- i3BlockLength = trie->type == UCPTRIE_TYPE_FAST ?
- UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
- dataBlockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
- } else {
- // Use the multi-stage index.
- int32_t i1 = c >> UCPTRIE_SHIFT_1;
- if (trie->type == UCPTRIE_TYPE_FAST) {
- U_ASSERT(0xffff < c && c < trie->highStart);
- i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
- } else {
- U_ASSERT(c < trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT);
- i1 += UCPTRIE_SMALL_INDEX_LENGTH;
- }
- i3Block = trie->index[
- (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
- if (i3Block == prevI3Block && (c - start) >= UCPTRIE_CP_PER_INDEX_2_ENTRY) {
- // The index-3 block is the same as the previous one, and filled with value.
- U_ASSERT((c & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
- c += UCPTRIE_CP_PER_INDEX_2_ENTRY;
- continue;
- }
- prevI3Block = i3Block;
- if (i3Block == trie->index3NullOffset) {
- // This is the index-3 null block.
- if (haveValue) {
- if (nullValue != value) {
- return c - 1;
- }
- } else {
- trieValue = trie->nullValue;
- value = nullValue;
- if (pValue != nullptr) { *pValue = nullValue; }
- haveValue = true;
- }
- prevBlock = trie->dataNullOffset;
- c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
- continue;
- }
- i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
- i3BlockLength = UCPTRIE_INDEX_3_BLOCK_LENGTH;
- dataBlockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- }
- // Enumerate data blocks for one index-3 block.
- do {
- int32_t block;
- if ((i3Block & 0x8000) == 0) {
- block = index[i3Block + i3];
- } else {
- // 18-bit indexes stored in groups of 9 entries per 8 indexes.
- int32_t group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
- int32_t gi = i3 & 7;
- block = ((int32_t)index[group++] << (2 + (2 * gi))) & 0x30000;
- block |= index[group + gi];
- }
- if (block == prevBlock && (c - start) >= dataBlockLength) {
- // The block is the same as the previous one, and filled with value.
- U_ASSERT((c & (dataBlockLength - 1)) == 0);
- c += dataBlockLength;
- } else {
- int32_t dataMask = dataBlockLength - 1;
- prevBlock = block;
- if (block == trie->dataNullOffset) {
- // This is the data null block.
- if (haveValue) {
- if (nullValue != value) {
- return c - 1;
- }
- } else {
- trieValue = trie->nullValue;
- value = nullValue;
- if (pValue != nullptr) { *pValue = nullValue; }
- haveValue = true;
- }
- c = (c + dataBlockLength) & ~dataMask;
- } else {
- int32_t di = block + (c & dataMask);
- uint32_t trieValue2 = getValue(trie->data, valueWidth, di);
- if (haveValue) {
- if (trieValue2 != trieValue) {
- if (filter == nullptr ||
- maybeFilterValue(trieValue2, trie->nullValue, nullValue,
- filter, context) != value) {
- return c - 1;
- }
- trieValue = trieValue2; // may or may not help
- }
- } else {
- trieValue = trieValue2;
- value = maybeFilterValue(trieValue2, trie->nullValue, nullValue,
- filter, context);
- if (pValue != nullptr) { *pValue = value; }
- haveValue = true;
- }
- while ((++c & dataMask) != 0) {
- trieValue2 = getValue(trie->data, valueWidth, ++di);
- if (trieValue2 != trieValue) {
- if (filter == nullptr ||
- maybeFilterValue(trieValue2, trie->nullValue, nullValue,
- filter, context) != value) {
- return c - 1;
- }
- trieValue = trieValue2; // may or may not help
- }
- }
- }
- }
- } while (++i3 < i3BlockLength);
- } while (c < trie->highStart);
- U_ASSERT(haveValue);
- int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
- uint32_t highValue = getValue(trie->data, valueWidth, di);
- if (maybeFilterValue(highValue, trie->nullValue, nullValue,
- filter, context) != value) {
- return c - 1;
- } else {
- return MAX_UNICODE;
- }
-}
-
-} // namespace
-
-U_CFUNC UChar32
-ucptrie_internalGetRange(UCPTrieGetRange *getRange,
- const void *trie, UChar32 start,
- UCPMapRangeOption option, uint32_t surrogateValue,
- UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
- if (option == UCPMAP_RANGE_NORMAL) {
- return getRange(trie, start, filter, context, pValue);
- }
- uint32_t value;
- if (pValue == nullptr) {
- // We need to examine the range value even if the caller does not want it.
- pValue = &value;
- }
- UChar32 surrEnd = option == UCPMAP_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
- UChar32 end = getRange(trie, start, filter, context, pValue);
- if (end < 0xd7ff || start > surrEnd) {
- return end;
- }
- // The range overlaps with surrogates, or ends just before the first one.
- if (*pValue == surrogateValue) {
- if (end >= surrEnd) {
- // Surrogates followed by a non-surrogateValue range,
- // or surrogates are part of a larger surrogateValue range.
- return end;
- }
- } else {
- if (start <= 0xd7ff) {
- return 0xd7ff; // Non-surrogateValue range ends before surrogateValue surrogates.
- }
- // Start is a surrogate with a non-surrogateValue code *unit* value.
- // Return a surrogateValue code *point* range.
- *pValue = surrogateValue;
- if (end > surrEnd) {
- return surrEnd; // Surrogate range ends before non-surrogateValue rest of range.
- }
- }
- // See if the surrogateValue surrogate range can be merged with
- // an immediately following range.
- uint32_t value2;
- UChar32 end2 = getRange(trie, surrEnd + 1, filter, context, &value2);
- if (value2 == surrogateValue) {
- return end2;
- }
- return surrEnd;
-}
-
-U_CAPI UChar32 U_EXPORT2
-ucptrie_getRange(const UCPTrie *trie, UChar32 start,
- UCPMapRangeOption option, uint32_t surrogateValue,
- UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
- return ucptrie_internalGetRange(getRange, trie, start,
- option, surrogateValue,
- filter, context, pValue);
-}
-
-U_CAPI int32_t U_EXPORT2
-ucptrie_toBinary(const UCPTrie *trie,
- void *data, int32_t capacity,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- UCPTrieType type = (UCPTrieType)trie->type;
- UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth;
- if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
- valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth ||
- capacity < 0 ||
- (capacity > 0 && (data == nullptr || (U_POINTER_MASK_LSB(data, 3) != 0)))) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- int32_t length = (int32_t)sizeof(UCPTrieHeader) + trie->indexLength * 2;
- switch (valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- length += trie->dataLength * 2;
- break;
- case UCPTRIE_VALUE_BITS_32:
- length += trie->dataLength * 4;
- break;
- case UCPTRIE_VALUE_BITS_8:
- length += trie->dataLength;
- break;
- default:
- // unreachable
- break;
- }
- if (capacity < length) {
- *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
- return length;
- }
-
- char *bytes = (char *)data;
- UCPTrieHeader *header = (UCPTrieHeader *)bytes;
- header->signature = UCPTRIE_SIG; // "Tri3"
- header->options = (uint16_t)(
- ((trie->dataLength & 0xf0000) >> 4) |
- ((trie->dataNullOffset & 0xf0000) >> 8) |
- (trie->type << 6) |
- valueWidth);
- header->indexLength = (uint16_t)trie->indexLength;
- header->dataLength = (uint16_t)trie->dataLength;
- header->index3NullOffset = trie->index3NullOffset;
- header->dataNullOffset = (uint16_t)trie->dataNullOffset;
- header->shiftedHighStart = trie->highStart >> UCPTRIE_SHIFT_2;
- bytes += sizeof(UCPTrieHeader);
-
- uprv_memcpy(bytes, trie->index, trie->indexLength * 2);
- bytes += trie->indexLength * 2;
-
- switch (valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- uprv_memcpy(bytes, trie->data.ptr16, trie->dataLength * 2);
- break;
- case UCPTRIE_VALUE_BITS_32:
- uprv_memcpy(bytes, trie->data.ptr32, trie->dataLength * 4);
- break;
- case UCPTRIE_VALUE_BITS_8:
- uprv_memcpy(bytes, trie->data.ptr8, trie->dataLength);
- break;
- default:
- // unreachable
- break;
- }
- return length;
-}
-
-namespace {
-
-#ifdef UCPTRIE_DEBUG
-long countNull(const UCPTrie *trie) {
- uint32_t nullValue=trie->nullValue;
- int32_t length=trie->dataLength;
- long count=0;
- switch (trie->valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- for(int32_t i=0; i<length; ++i) {
- if(trie->data.ptr16[i]==nullValue) { ++count; }
- }
- break;
- case UCPTRIE_VALUE_BITS_32:
- for(int32_t i=0; i<length; ++i) {
- if(trie->data.ptr32[i]==nullValue) { ++count; }
- }
- break;
- case UCPTRIE_VALUE_BITS_8:
- for(int32_t i=0; i<length; ++i) {
- if(trie->data.ptr8[i]==nullValue) { ++count; }
- }
- break;
- default:
- // unreachable
- break;
- }
- return count;
-}
-
-U_CFUNC void
-ucptrie_printLengths(const UCPTrie *trie, const char *which) {
- long indexLength=trie->indexLength;
- long dataLength=(long)trie->dataLength;
- long totalLength=(long)sizeof(UCPTrieHeader)+indexLength*2+
- dataLength*(trie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 2 :
- trie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 4 : 1);
- printf("**UCPTrieLengths(%s %s)** index:%6ld data:%6ld countNull:%6ld serialized:%6ld\n",
- which, trie->name, indexLength, dataLength, countNull(trie), totalLength);
-}
-#endif
-
-} // namespace
-
-// UCPMap ----
-// Initially, this is the same as UCPTrie. This may well change.
-
-U_CAPI uint32_t U_EXPORT2
-ucpmap_get(const UCPMap *map, UChar32 c) {
- return ucptrie_get(reinterpret_cast<const UCPTrie *>(map), c);
-}
-
-U_CAPI UChar32 U_EXPORT2
-ucpmap_getRange(const UCPMap *map, UChar32 start,
- UCPMapRangeOption option, uint32_t surrogateValue,
- UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
- return ucptrie_getRange(reinterpret_cast<const UCPTrie *>(map), start,
- option, surrogateValue,
- filter, context, pValue);
-}
diff --git a/contrib/libs/icu/common/ucptrie_impl.h b/contrib/libs/icu/common/ucptrie_impl.h
deleted file mode 100644
index 1fe6a18ac53..00000000000
--- a/contrib/libs/icu/common/ucptrie_impl.h
+++ /dev/null
@@ -1,289 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// ucptrie_impl.h (modified from utrie2_impl.h)
-// created: 2017dec29 Markus W. Scherer
-
-#ifndef __UCPTRIE_IMPL_H__
-#define __UCPTRIE_IMPL_H__
-
-#include "unicode/ucptrie.h"
-#ifdef UCPTRIE_DEBUG
-#include "unicode/umutablecptrie.h"
-#endif
-
-// UCPTrie signature values, in platform endianness and opposite endianness.
-// The UCPTrie signature ASCII byte values spell "Tri3".
-#define UCPTRIE_SIG 0x54726933
-#define UCPTRIE_OE_SIG 0x33697254
-
-/**
- * Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
- * @internal
- */
-struct UCPTrieHeader {
- /** "Tri3" in big-endian US-ASCII (0x54726933) */
- uint32_t signature;
-
- /**
- * Options bit field:
- * Bits 15..12: Data length bits 19..16.
- * Bits 11..8: Data null block offset bits 19..16.
- * Bits 7..6: UCPTrieType
- * Bits 5..3: Reserved (0).
- * Bits 2..0: UCPTrieValueWidth
- */
- uint16_t options;
-
- /** Total length of the index tables. */
- uint16_t indexLength;
-
- /** Data length bits 15..0. */
- uint16_t dataLength;
-
- /** Index-3 null block offset, 0x7fff or 0xffff if none. */
- uint16_t index3NullOffset;
-
- /** Data null block offset bits 15..0, 0xfffff if none. */
- uint16_t dataNullOffset;
-
- /**
- * First code point of the single-value range ending with U+10ffff,
- * rounded up and then shifted right by UCPTRIE_SHIFT_2.
- */
- uint16_t shiftedHighStart;
-};
-
-/**
- * Constants for use with UCPTrieHeader.options.
- * @internal
- */
-enum {
- UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000,
- UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00,
- UCPTRIE_OPTIONS_RESERVED_MASK = 0x38,
- UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7,
- /**
- * Value for index3NullOffset which indicates that there is no index-3 null block.
- * Bit 15 is unused for this value because this bit is used if the index-3 contains
- * 18-bit indexes.
- */
- UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff,
- UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff
-};
-
-// Internal constants.
-enum {
- /** The length of the BMP index table. 1024=0x400 */
- UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT,
-
- UCPTRIE_SMALL_LIMIT = 0x1000,
- UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT,
-
- /** Shift size for getting the index-3 table offset. */
- UCPTRIE_SHIFT_3 = 4,
-
- /** Shift size for getting the index-2 table offset. */
- UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3,
-
- /** Shift size for getting the index-1 table offset. */
- UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2,
-
- /**
- * Difference between two shift sizes,
- * for getting an index-2 offset from an index-3 offset. 5=9-4
- */
- UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3,
-
- /**
- * Difference between two shift sizes,
- * for getting an index-1 offset from an index-2 offset. 5=14-9
- */
- UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2,
-
- /**
- * Number of index-1 entries for the BMP. (4)
- * This part of the index-1 table is omitted from the serialized form.
- */
- UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1,
-
- /** Number of entries in an index-2 block. 32=0x20 */
- UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2,
-
- /** Mask for getting the lower bits for the in-index-2-block offset. */
- UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1,
-
- /** Number of code points per index-2 table entry. 512=0x200 */
- UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2,
-
- /** Number of entries in an index-3 block. 32=0x20 */
- UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3,
-
- /** Mask for getting the lower bits for the in-index-3-block offset. */
- UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1,
-
- /** Number of entries in a small data block. 16=0x10 */
- UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3,
-
- /** Mask for getting the lower bits for the in-small-data-block offset. */
- UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1
-};
-
-typedef UChar32
-UCPTrieGetRange(const void *trie, UChar32 start,
- UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
-
-U_CFUNC UChar32
-ucptrie_internalGetRange(UCPTrieGetRange *getRange,
- const void *trie, UChar32 start,
- UCPMapRangeOption option, uint32_t surrogateValue,
- UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
-
-#ifdef UCPTRIE_DEBUG
-U_CFUNC void
-ucptrie_printLengths(const UCPTrie *trie, const char *which);
-
-U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name);
-#endif
-
-/*
- * Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
- * For overview information see http://site.icu-project.org/design/struct/utrie
- *
- * The binary trie data should be 32-bit-aligned.
- * The overall layout is:
- *
- * UCPTrieHeader header; -- 16 bytes, see struct definition above
- * uint16_t index[header.indexLength];
- * uintXY_t data[header.dataLength];
- *
- * The trie data array is an array of uint16_t, uint32_t, or uint8_t,
- * specified via the UCPTrieValueWidth when building the trie.
- * The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned.
- * The overall length of the trie data is a multiple of 4 bytes.
- * (Padding is added at the end of the index array and/or near the end of the data array as needed.)
- *
- * The length of the data array (dataLength) is stored as an integer split across two fields
- * of the header struct (high bits in header.options).
- *
- * The trie type can be "fast" or "small" which determines the index structure,
- * specified via the UCPTrieType when building the trie.
- *
- * The type and valueWidth are stored in the header.options.
- * There are reserved type and valueWidth values, and reserved header.options bits.
- * They could be used in future format extensions.
- * Code reading the trie structure must fail with an error when unknown values or options are set.
- *
- * Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array.
- *
- * Values for code points below a type-specific fast-indexing limit are found via two-stage lookup.
- * For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000.
- * For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000.
- *
- * All code points in the range highStart..U+10FFFF map to a single highValue
- * which is stored at the second-to-last position of the data array.
- * (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.)
- * The highStart value is header.shiftedHighStart<<UCPTRIE_SHIFT_2.
- * (UCPTRIE_SHIFT_2=9)
- *
- * Values for code points fast_limit..highStart-1 are found via four-stage lookup.
- * The data block size is smaller for this range than for the fast range.
- * This together with more index stages with small blocks makes this range
- * more easily compactable.
- *
- * There is also a trie error value stored at the last position of the data array.
- * (See UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET.)
- * It is intended to be returned for inputs that are not Unicode code points
- * (outside U+0000..U+10FFFF), or in string processing for ill-formed input
- * (unpaired surrogate in UTF-16, ill-formed UTF-8 subsequence).
- *
- * For a "fast" trie:
- *
- * The index array starts with the BMP index table for BMP code point lookup.
- * Its length is 1024=0x400.
- *
- * The supplementary index-1 table follows the BMP index table.
- * Variable length, for code points up to highStart-1.
- * Maximum length 64=0x40=0x100000>>UCPTRIE_SHIFT_1.
- * (For 0x100000 supplementary code points U+10000..U+10ffff.)
- *
- * After this index-1 table follow the variable-length index-3 and index-2 tables.
- *
- * The supplementary index tables are omitted completely
- * if there is only BMP data (highStart<=U+10000).
- *
- * For a "small" trie:
- *
- * The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF.
- *
- * The "supplementary" index tables are always stored.
- * The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1.
- *
- * For both trie types:
- *
- * The last index-2 block may be a partial block, storing indexes only for code points
- * below highStart.
- *
- * Lookup for ASCII code point c:
- *
- * Linear access from the start of the data array.
- *
- * value = data[c];
- *
- * Lookup for fast-range code point c:
- *
- * Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits,
- * fetch the index array value at that offset,
- * add the lower code point bits, index into the data array.
- *
- * value = data[index[c>>6] + (c&0x3f)];
- *
- * (This works for ASCII as well.)
- *
- * Lookup for small-range code point c below highStart:
- *
- * Split the code point into four bit fields using several sets of shifts & masks
- * to read consecutive values from the index-1, index-2, index-3 and data tables.
- *
- * If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff),
- * then the data block offsets are stored directly as uint16_t.
- *
- * Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block
- * has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by
- * an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored
- * in the additional word.
- *
- * See ucptrie_internalSmallIndex() for details.
- *
- * (In a "small" trie, this works for ASCII and below-fast_limit code points as well.)
- *
- * Compaction:
- *
- * Multiple code point ranges ("blocks") that are aligned on certain boundaries
- * (determined by the shifting/bit fields of code points) and
- * map to the same data values normally share a single subsequence of the data array.
- * Data blocks can also overlap partially.
- * (Depending on the builder code finding duplicate and overlapping blocks.)
- *
- * Iteration over same-value ranges:
- *
- * Range iteration (ucptrie_getRange()) walks the structure from a start code point
- * until some code point is found that maps to a different value;
- * the end of the returned range is just before that.
- *
- * The header.dataNullOffset (split across two header fields, high bits in header.options)
- * is the offset of a widely shared data block filled with one single value.
- * It helps quickly skip over large ranges of data with that value.
- * The builder must ensure that if the start of any data block (fast or small)
- * matches the dataNullOffset, then the whole block must be filled with the null value.
- * Special care must be taken if there is no fast null data block
- * but a small one, which is shorter, and it matches the *start* of some fast data block.
- *
- * Similarly, the header.index3NullOffset is the index-array offset of an index-3 block
- * where all index entries point to the dataNullOffset.
- * If there is no such data or index-3 block, then these offsets are set to
- * values that cannot be reached (data offset out of range/reserved index offset),
- * normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively.
- */
-
-#endif
diff --git a/contrib/libs/icu/common/ucurr.cpp b/contrib/libs/icu/common/ucurr.cpp
deleted file mode 100644
index 5eacc4a99bc..00000000000
--- a/contrib/libs/icu/common/ucurr.cpp
+++ /dev/null
@@ -1,2688 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2002-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/ucurr.h"
-#include "unicode/locid.h"
-#include "unicode/ures.h"
-#include "unicode/ustring.h"
-#include "unicode/parsepos.h"
-#include "unicode/uniset.h"
-#include "unicode/usetiter.h"
-#include "unicode/utf16.h"
-#include "ustr_imp.h"
-#include "charstr.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "static_unicode_sets.h"
-#include "uassert.h"
-#include "umutex.h"
-#include "ucln_cmn.h"
-#include "uenumimp.h"
-#include "uhash.h"
-#include "hash.h"
-#include "uinvchar.h"
-#include "uresimp.h"
-#include "ulist.h"
-#include "uresimp.h"
-#include "ureslocs.h"
-#include "ulocimp.h"
-
-using namespace icu;
-
-//#define UCURR_DEBUG_EQUIV 1
-#ifdef UCURR_DEBUG_EQUIV
-#include "stdio.h"
-#endif
-//#define UCURR_DEBUG 1
-#ifdef UCURR_DEBUG
-#include "stdio.h"
-#endif
-
-typedef struct IsoCodeEntry {
- const UChar *isoCode; /* const because it's a reference to a resource bundle string. */
- UDate from;
- UDate to;
-} IsoCodeEntry;
-
-//------------------------------------------------------------
-// Constants
-
-// Default currency meta data of last resort. We try to use the
-// defaults encoded in the meta data resource bundle. If there is a
-// configuration/build error and these are not available, we use these
-// hard-coded defaults (which should be identical).
-static const int32_t LAST_RESORT_DATA[] = { 2, 0, 2, 0 };
-
-// POW10[i] = 10^i, i=0..MAX_POW10
-static const int32_t POW10[] = { 1, 10, 100, 1000, 10000, 100000,
- 1000000, 10000000, 100000000, 1000000000 };
-
-static const int32_t MAX_POW10 = UPRV_LENGTHOF(POW10) - 1;
-
-#define ISO_CURRENCY_CODE_LENGTH 3
-
-//------------------------------------------------------------
-// Resource tags
-//
-
-static const char CURRENCY_DATA[] = "supplementalData";
-// Tag for meta-data, in root.
-static const char CURRENCY_META[] = "CurrencyMeta";
-
-// Tag for map from countries to currencies, in root.
-static const char CURRENCY_MAP[] = "CurrencyMap";
-
-// Tag for default meta-data, in CURRENCY_META
-static const char DEFAULT_META[] = "DEFAULT";
-
-// Variant delimiter
-static const char VAR_DELIM = '_';
-
-// Tag for localized display names (symbols) of currencies
-static const char CURRENCIES[] = "Currencies";
-static const char CURRENCIES_NARROW[] = "Currencies%narrow";
-static const char CURRENCYPLURALS[] = "CurrencyPlurals";
-
-// ISO codes mapping table
-static const UHashtable* gIsoCodes = NULL;
-static icu::UInitOnce gIsoCodesInitOnce = U_INITONCE_INITIALIZER;
-
-// Currency symbol equivalances
-static const icu::Hashtable* gCurrSymbolsEquiv = NULL;
-static icu::UInitOnce gCurrSymbolsEquivInitOnce = U_INITONCE_INITIALIZER;
-
-U_NAMESPACE_BEGIN
-
-// EquivIterator iterates over all strings that are equivalent to a given
-// string, s. Note that EquivIterator will never yield s itself.
-class EquivIterator : public icu::UMemory {
-public:
- // Constructor. hash stores the equivalence relationships; s is the string
- // for which we find equivalent strings.
- inline EquivIterator(const icu::Hashtable& hash, const icu::UnicodeString& s)
- : _hash(hash) {
- _start = _current = &s;
- }
- inline ~EquivIterator() { }
-
- // next returns the next equivalent string or NULL if there are no more.
- // If s has no equivalent strings, next returns NULL on the first call.
- const icu::UnicodeString *next();
-private:
- const icu::Hashtable& _hash;
- const icu::UnicodeString* _start;
- const icu::UnicodeString* _current;
-};
-
-const icu::UnicodeString *
-EquivIterator::next() {
- const icu::UnicodeString* _next = (const icu::UnicodeString*) _hash.get(*_current);
- if (_next == NULL) {
- U_ASSERT(_current == _start);
- return NULL;
- }
- if (*_next == *_start) {
- return NULL;
- }
- _current = _next;
- return _next;
-}
-
-U_NAMESPACE_END
-
-// makeEquivalent makes lhs and rhs equivalent by updating the equivalence
-// relations in hash accordingly.
-static void makeEquivalent(
- const icu::UnicodeString &lhs,
- const icu::UnicodeString &rhs,
- icu::Hashtable* hash, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return;
- }
- if (lhs == rhs) {
- // already equivalent
- return;
- }
- icu::EquivIterator leftIter(*hash, lhs);
- icu::EquivIterator rightIter(*hash, rhs);
- const icu::UnicodeString *firstLeft = leftIter.next();
- const icu::UnicodeString *firstRight = rightIter.next();
- const icu::UnicodeString *nextLeft = firstLeft;
- const icu::UnicodeString *nextRight = firstRight;
- while (nextLeft != NULL && nextRight != NULL) {
- if (*nextLeft == rhs || *nextRight == lhs) {
- // Already equivalent
- return;
- }
- nextLeft = leftIter.next();
- nextRight = rightIter.next();
- }
- // Not equivalent. Must join.
- icu::UnicodeString *newFirstLeft;
- icu::UnicodeString *newFirstRight;
- if (firstRight == NULL && firstLeft == NULL) {
- // Neither lhs or rhs belong to an equivalence circle, so we form
- // a new equivalnce circle of just lhs and rhs.
- newFirstLeft = new icu::UnicodeString(rhs);
- newFirstRight = new icu::UnicodeString(lhs);
- } else if (firstRight == NULL) {
- // lhs belongs to an equivalence circle, but rhs does not, so we link
- // rhs into lhs' circle.
- newFirstLeft = new icu::UnicodeString(rhs);
- newFirstRight = new icu::UnicodeString(*firstLeft);
- } else if (firstLeft == NULL) {
- // rhs belongs to an equivlance circle, but lhs does not, so we link
- // lhs into rhs' circle.
- newFirstLeft = new icu::UnicodeString(*firstRight);
- newFirstRight = new icu::UnicodeString(lhs);
- } else {
- // Both lhs and rhs belong to different equivalnce circles. We link
- // them together to form one single, larger equivalnce circle.
- newFirstLeft = new icu::UnicodeString(*firstRight);
- newFirstRight = new icu::UnicodeString(*firstLeft);
- }
- if (newFirstLeft == NULL || newFirstRight == NULL) {
- delete newFirstLeft;
- delete newFirstRight;
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- hash->put(lhs, (void *) newFirstLeft, status);
- hash->put(rhs, (void *) newFirstRight, status);
-}
-
-// countEquivalent counts how many strings are equivalent to s.
-// hash stores all the equivalnce relations.
-// countEquivalent does not include s itself in the count.
-static int32_t countEquivalent(const icu::Hashtable &hash, const icu::UnicodeString &s) {
- int32_t result = 0;
- icu::EquivIterator iter(hash, s);
- while (iter.next() != NULL) {
- ++result;
- }
-#ifdef UCURR_DEBUG_EQUIV
- {
- char tmp[200];
- s.extract(0,s.length(),tmp, "UTF-8");
- printf("CountEquivalent('%s') = %d\n", tmp, result);
- }
-#endif
- return result;
-}
-
-static const icu::Hashtable* getCurrSymbolsEquiv();
-
-//------------------------------------------------------------
-// Code
-
-/**
- * Cleanup callback func
- */
-static UBool U_CALLCONV
-isoCodes_cleanup(void)
-{
- if (gIsoCodes != NULL) {
- uhash_close(const_cast<UHashtable *>(gIsoCodes));
- gIsoCodes = NULL;
- }
- gIsoCodesInitOnce.reset();
- return TRUE;
-}
-
-/**
- * Cleanup callback func
- */
-static UBool U_CALLCONV
-currSymbolsEquiv_cleanup(void)
-{
- delete const_cast<icu::Hashtable *>(gCurrSymbolsEquiv);
- gCurrSymbolsEquiv = NULL;
- gCurrSymbolsEquivInitOnce.reset();
- return TRUE;
-}
-
-/**
- * Deleter for OlsonToMetaMappingEntry
- */
-static void U_CALLCONV
-deleteIsoCodeEntry(void *obj) {
- IsoCodeEntry *entry = (IsoCodeEntry*)obj;
- uprv_free(entry);
-}
-
-/**
- * Deleter for gCurrSymbolsEquiv.
- */
-static void U_CALLCONV
-deleteUnicode(void *obj) {
- icu::UnicodeString *entry = (icu::UnicodeString*)obj;
- delete entry;
-}
-
-/**
- * Unfortunately, we have to convert the UChar* currency code to char*
- * to use it as a resource key.
- */
-static inline char*
-myUCharsToChars(char* resultOfLen4, const UChar* currency) {
- u_UCharsToChars(currency, resultOfLen4, ISO_CURRENCY_CODE_LENGTH);
- resultOfLen4[ISO_CURRENCY_CODE_LENGTH] = 0;
- return resultOfLen4;
-}
-
-/**
- * Internal function to look up currency data. Result is an array of
- * four integers. The first is the fraction digits. The second is the
- * rounding increment, or 0 if none. The rounding increment is in
- * units of 10^(-fraction_digits). The third and fourth are the same
- * except that they are those used in cash transations ( cashDigits
- * and cashRounding ).
- */
-static const int32_t*
-_findMetaData(const UChar* currency, UErrorCode& ec) {
-
- if (currency == 0 || *currency == 0) {
- if (U_SUCCESS(ec)) {
- ec = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return LAST_RESORT_DATA;
- }
-
- // Get CurrencyMeta resource out of root locale file. [This may
- // move out of the root locale file later; if it does, update this
- // code.]
- UResourceBundle* currencyData = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &ec);
- UResourceBundle* currencyMeta = ures_getByKey(currencyData, CURRENCY_META, currencyData, &ec);
-
- if (U_FAILURE(ec)) {
- ures_close(currencyMeta);
- // Config/build error; return hard-coded defaults
- return LAST_RESORT_DATA;
- }
-
- // Look up our currency, or if that's not available, then DEFAULT
- char buf[ISO_CURRENCY_CODE_LENGTH+1];
- UErrorCode ec2 = U_ZERO_ERROR; // local error code: soft failure
- UResourceBundle* rb = ures_getByKey(currencyMeta, myUCharsToChars(buf, currency), NULL, &ec2);
- if (U_FAILURE(ec2)) {
- ures_close(rb);
- rb = ures_getByKey(currencyMeta,DEFAULT_META, NULL, &ec);
- if (U_FAILURE(ec)) {
- ures_close(currencyMeta);
- ures_close(rb);
- // Config/build error; return hard-coded defaults
- return LAST_RESORT_DATA;
- }
- }
-
- int32_t len;
- const int32_t *data = ures_getIntVector(rb, &len, &ec);
- if (U_FAILURE(ec) || len != 4) {
- // Config/build error; return hard-coded defaults
- if (U_SUCCESS(ec)) {
- ec = U_INVALID_FORMAT_ERROR;
- }
- ures_close(currencyMeta);
- ures_close(rb);
- return LAST_RESORT_DATA;
- }
-
- ures_close(currencyMeta);
- ures_close(rb);
- return data;
-}
-
-// -------------------------------------
-
-static void
-idForLocale(const char* locale, char* countryAndVariant, int capacity, UErrorCode* ec)
-{
- ulocimp_getRegionForSupplementalData(locale, FALSE, countryAndVariant, capacity, ec);
-}
-
-// ------------------------------------------
-//
-// Registration
-//
-//-------------------------------------------
-
-// don't use ICUService since we don't need fallback
-
-U_CDECL_BEGIN
-static UBool U_CALLCONV currency_cleanup(void);
-U_CDECL_END
-
-#if !UCONFIG_NO_SERVICE
-struct CReg;
-
-static UMutex gCRegLock;
-static CReg* gCRegHead = 0;
-
-struct CReg : public icu::UMemory {
- CReg *next;
- UChar iso[ISO_CURRENCY_CODE_LENGTH+1];
- char id[ULOC_FULLNAME_CAPACITY];
-
- CReg(const UChar* _iso, const char* _id)
- : next(0)
- {
- int32_t len = (int32_t)uprv_strlen(_id);
- if (len > (int32_t)(sizeof(id)-1)) {
- len = (sizeof(id)-1);
- }
- uprv_strncpy(id, _id, len);
- id[len] = 0;
- u_memcpy(iso, _iso, ISO_CURRENCY_CODE_LENGTH);
- iso[ISO_CURRENCY_CODE_LENGTH] = 0;
- }
-
- static UCurrRegistryKey reg(const UChar* _iso, const char* _id, UErrorCode* status)
- {
- if (status && U_SUCCESS(*status) && _iso && _id) {
- CReg* n = new CReg(_iso, _id);
- if (n) {
- umtx_lock(&gCRegLock);
- if (!gCRegHead) {
- /* register for the first time */
- ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup);
- }
- n->next = gCRegHead;
- gCRegHead = n;
- umtx_unlock(&gCRegLock);
- return n;
- }
- *status = U_MEMORY_ALLOCATION_ERROR;
- }
- return 0;
- }
-
- static UBool unreg(UCurrRegistryKey key) {
- UBool found = FALSE;
- umtx_lock(&gCRegLock);
-
- CReg** p = &gCRegHead;
- while (*p) {
- if (*p == key) {
- *p = ((CReg*)key)->next;
- delete (CReg*)key;
- found = TRUE;
- break;
- }
- p = &((*p)->next);
- }
-
- umtx_unlock(&gCRegLock);
- return found;
- }
-
- static const UChar* get(const char* id) {
- const UChar* result = NULL;
- umtx_lock(&gCRegLock);
- CReg* p = gCRegHead;
-
- /* register cleanup of the mutex */
- ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup);
- while (p) {
- if (uprv_strcmp(id, p->id) == 0) {
- result = p->iso;
- break;
- }
- p = p->next;
- }
- umtx_unlock(&gCRegLock);
- return result;
- }
-
- /* This doesn't need to be thread safe. It's for u_cleanup only. */
- static void cleanup(void) {
- while (gCRegHead) {
- CReg* n = gCRegHead;
- gCRegHead = gCRegHead->next;
- delete n;
- }
- }
-};
-
-// -------------------------------------
-
-U_CAPI UCurrRegistryKey U_EXPORT2
-ucurr_register(const UChar* isoCode, const char* locale, UErrorCode *status)
-{
- if (status && U_SUCCESS(*status)) {
- char id[ULOC_FULLNAME_CAPACITY];
- idForLocale(locale, id, sizeof(id), status);
- return CReg::reg(isoCode, id, status);
- }
- return NULL;
-}
-
-// -------------------------------------
-
-U_CAPI UBool U_EXPORT2
-ucurr_unregister(UCurrRegistryKey key, UErrorCode* status)
-{
- if (status && U_SUCCESS(*status)) {
- return CReg::unreg(key);
- }
- return FALSE;
-}
-#endif /* UCONFIG_NO_SERVICE */
-
-// -------------------------------------
-
-/**
- * Release all static memory held by currency.
- */
-/*The declaration here is needed so currency_cleanup(void)
- * can call this function.
- */
-static UBool U_CALLCONV
-currency_cache_cleanup(void);
-
-U_CDECL_BEGIN
-static UBool U_CALLCONV currency_cleanup(void) {
-#if !UCONFIG_NO_SERVICE
- CReg::cleanup();
-#endif
- /*
- * There might be some cached currency data or isoCodes data.
- */
- currency_cache_cleanup();
- isoCodes_cleanup();
- currSymbolsEquiv_cleanup();
-
- return TRUE;
-}
-U_CDECL_END
-
-// -------------------------------------
-
-U_CAPI int32_t U_EXPORT2
-ucurr_forLocale(const char* locale,
- UChar* buff,
- int32_t buffCapacity,
- UErrorCode* ec) {
- if (U_FAILURE(*ec)) { return 0; }
- if (buffCapacity < 0 || (buff == nullptr && buffCapacity > 0)) {
- *ec = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- char currency[4]; // ISO currency codes are alpha3 codes.
- UErrorCode localStatus = U_ZERO_ERROR;
- int32_t resLen = uloc_getKeywordValue(locale, "currency",
- currency, UPRV_LENGTHOF(currency), &localStatus);
- if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency, resLen)) {
- if (resLen < buffCapacity) {
- T_CString_toUpperCase(currency);
- u_charsToUChars(currency, buff, resLen);
- }
- return u_terminateUChars(buff, buffCapacity, resLen, ec);
- }
-
- // get country or country_variant in `id'
- char id[ULOC_FULLNAME_CAPACITY];
- idForLocale(locale, id, UPRV_LENGTHOF(id), ec);
- if (U_FAILURE(*ec)) {
- return 0;
- }
-
-#if !UCONFIG_NO_SERVICE
- const UChar* result = CReg::get(id);
- if (result) {
- if(buffCapacity > u_strlen(result)) {
- u_strcpy(buff, result);
- }
- resLen = u_strlen(result);
- return u_terminateUChars(buff, buffCapacity, resLen, ec);
- }
-#endif
- // Remove variants, which is only needed for registration.
- char *idDelim = uprv_strchr(id, VAR_DELIM);
- if (idDelim) {
- idDelim[0] = 0;
- }
-
- const UChar* s = NULL; // Currency code from data file.
- if (id[0] == 0) {
- // No point looking in the data for an empty string.
- // This is what we would get.
- localStatus = U_MISSING_RESOURCE_ERROR;
- } else {
- // Look up the CurrencyMap element in the root bundle.
- localStatus = U_ZERO_ERROR;
- UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
- UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
- UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
- UResourceBundle *currencyReq = ures_getByIndex(countryArray, 0, NULL, &localStatus);
- s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus);
- ures_close(currencyReq);
- ures_close(countryArray);
- }
-
- if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0) {
- // We don't know about it. Check to see if we support the variant.
- uloc_getParent(locale, id, UPRV_LENGTHOF(id), ec);
- *ec = U_USING_FALLBACK_WARNING;
- // TODO: Loop over the shortened id rather than recursing and
- // looking again for a currency keyword.
- return ucurr_forLocale(id, buff, buffCapacity, ec);
- }
- if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) {
- // There is nothing to fallback to. Report the failure/warning if possible.
- *ec = localStatus;
- }
- if (U_SUCCESS(*ec)) {
- if(buffCapacity > resLen) {
- u_strcpy(buff, s);
- }
- }
- return u_terminateUChars(buff, buffCapacity, resLen, ec);
-}
-
-// end registration
-
-/**
- * Modify the given locale name by removing the rightmost _-delimited
- * element. If there is none, empty the string ("" == root).
- * NOTE: The string "root" is not recognized; do not use it.
- * @return TRUE if the fallback happened; FALSE if locale is already
- * root ("").
- */
-static UBool fallback(char *loc) {
- if (!*loc) {
- return FALSE;
- }
- UErrorCode status = U_ZERO_ERROR;
- if (uprv_strcmp(loc, "en_GB") == 0) {
- // HACK: See #13368. We need "en_GB" to fall back to "en_001" instead of "en"
- // in order to consume the correct data strings. This hack will be removed
- // when proper data sink loading is implemented here.
- // NOTE: "001" adds 1 char over "GB". However, both call sites allocate
- // arrays with length ULOC_FULLNAME_CAPACITY (plenty of room for en_001).
- uprv_strcpy(loc + 3, "001");
- } else {
- uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status);
- }
- /*
- char *i = uprv_strrchr(loc, '_');
- if (i == NULL) {
- i = loc;
- }
- *i = 0;
- */
- return TRUE;
-}
-
-
-U_CAPI const UChar* U_EXPORT2
-ucurr_getName(const UChar* currency,
- const char* locale,
- UCurrNameStyle nameStyle,
- UBool* isChoiceFormat, // fillin
- int32_t* len, // fillin
- UErrorCode* ec) {
-
- // Look up the Currencies resource for the given locale. The
- // Currencies locale data looks like this:
- //|en {
- //| Currencies {
- //| USD { "US$", "US Dollar" }
- //| CHF { "Sw F", "Swiss Franc" }
- //| INR { "=0#Rs|1#Re|1<Rs", "=0#Rupees|1#Rupee|1<Rupees" }
- //| //...
- //| }
- //|}
-
- if (U_FAILURE(*ec)) {
- return 0;
- }
-
- int32_t choice = (int32_t) nameStyle;
- if (choice < 0 || choice > 2) {
- *ec = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- // In the future, resource bundles may implement multi-level
- // fallback. That is, if a currency is not found in the en_US
- // Currencies data, then the en Currencies data will be searched.
- // Currently, if a Currencies datum exists in en_US and en, the
- // en_US entry hides that in en.
-
- // We want multi-level fallback for this resource, so we implement
- // it manually.
-
- // Use a separate UErrorCode here that does not propagate out of
- // this function.
- UErrorCode ec2 = U_ZERO_ERROR;
-
- char loc[ULOC_FULLNAME_CAPACITY];
- uloc_getName(locale, loc, sizeof(loc), &ec2);
- if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) {
- *ec = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- char buf[ISO_CURRENCY_CODE_LENGTH+1];
- myUCharsToChars(buf, currency);
-
- /* Normalize the keyword value to uppercase */
- T_CString_toUpperCase(buf);
-
- const UChar* s = NULL;
- ec2 = U_ZERO_ERROR;
- LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc, &ec2));
-
- if (nameStyle == UCURR_NARROW_SYMBOL_NAME) {
- CharString key;
- key.append(CURRENCIES_NARROW, ec2);
- key.append("/", ec2);
- key.append(buf, ec2);
- s = ures_getStringByKeyWithFallback(rb.getAlias(), key.data(), len, &ec2);
- if (ec2 == U_MISSING_RESOURCE_ERROR) {
- *ec = U_USING_FALLBACK_WARNING;
- ec2 = U_ZERO_ERROR;
- choice = UCURR_SYMBOL_NAME;
- }
- }
- if (s == NULL) {
- ures_getByKey(rb.getAlias(), CURRENCIES, rb.getAlias(), &ec2);
- ures_getByKeyWithFallback(rb.getAlias(), buf, rb.getAlias(), &ec2);
- s = ures_getStringByIndex(rb.getAlias(), choice, len, &ec2);
- }
-
- // If we've succeeded we're done. Otherwise, try to fallback.
- // If that fails (because we are already at root) then exit.
- if (U_SUCCESS(ec2)) {
- if (ec2 == U_USING_DEFAULT_WARNING
- || (ec2 == U_USING_FALLBACK_WARNING && *ec != U_USING_DEFAULT_WARNING)) {
- *ec = ec2;
- }
- }
-
- // We no longer support choice format data in names. Data should not contain
- // choice patterns.
- if (isChoiceFormat != NULL) {
- *isChoiceFormat = FALSE;
- }
- if (U_SUCCESS(ec2)) {
- U_ASSERT(s != NULL);
- return s;
- }
-
- // If we fail to find a match, use the ISO 4217 code
- *len = u_strlen(currency); // Should == ISO_CURRENCY_CODE_LENGTH, but maybe not...?
- *ec = U_USING_DEFAULT_WARNING;
- return currency;
-}
-
-U_CAPI const UChar* U_EXPORT2
-ucurr_getPluralName(const UChar* currency,
- const char* locale,
- UBool* isChoiceFormat,
- const char* pluralCount,
- int32_t* len, // fillin
- UErrorCode* ec) {
- // Look up the Currencies resource for the given locale. The
- // Currencies locale data looks like this:
- //|en {
- //| CurrencyPlurals {
- //| USD{
- //| one{"US dollar"}
- //| other{"US dollars"}
- //| }
- //| }
- //|}
-
- if (U_FAILURE(*ec)) {
- return 0;
- }
-
- // Use a separate UErrorCode here that does not propagate out of
- // this function.
- UErrorCode ec2 = U_ZERO_ERROR;
-
- char loc[ULOC_FULLNAME_CAPACITY];
- uloc_getName(locale, loc, sizeof(loc), &ec2);
- if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) {
- *ec = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- char buf[ISO_CURRENCY_CODE_LENGTH+1];
- myUCharsToChars(buf, currency);
-
- const UChar* s = NULL;
- ec2 = U_ZERO_ERROR;
- UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2);
-
- rb = ures_getByKey(rb, CURRENCYPLURALS, rb, &ec2);
-
- // Fetch resource with multi-level resource inheritance fallback
- rb = ures_getByKeyWithFallback(rb, buf, rb, &ec2);
-
- s = ures_getStringByKeyWithFallback(rb, pluralCount, len, &ec2);
- if (U_FAILURE(ec2)) {
- // fall back to "other"
- ec2 = U_ZERO_ERROR;
- s = ures_getStringByKeyWithFallback(rb, "other", len, &ec2);
- if (U_FAILURE(ec2)) {
- ures_close(rb);
- // fall back to long name in Currencies
- return ucurr_getName(currency, locale, UCURR_LONG_NAME,
- isChoiceFormat, len, ec);
- }
- }
- ures_close(rb);
-
- // If we've succeeded we're done. Otherwise, try to fallback.
- // If that fails (because we are already at root) then exit.
- if (U_SUCCESS(ec2)) {
- if (ec2 == U_USING_DEFAULT_WARNING
- || (ec2 == U_USING_FALLBACK_WARNING && *ec != U_USING_DEFAULT_WARNING)) {
- *ec = ec2;
- }
- U_ASSERT(s != NULL);
- return s;
- }
-
- // If we fail to find a match, use the ISO 4217 code
- *len = u_strlen(currency); // Should == ISO_CURRENCY_CODE_LENGTH, but maybe not...?
- *ec = U_USING_DEFAULT_WARNING;
- return currency;
-}
-
-
-//========================================================================
-// Following are structure and function for parsing currency names
-
-#define NEED_TO_BE_DELETED 0x1
-
-// TODO: a better way to define this?
-#define MAX_CURRENCY_NAME_LEN 100
-
-typedef struct {
- const char* IsoCode; // key
- UChar* currencyName; // value
- int32_t currencyNameLen; // value length
- int32_t flag; // flags
-} CurrencyNameStruct;
-
-
-#ifndef MIN
-#define MIN(a,b) (((a)<(b)) ? (a) : (b))
-#endif
-
-#ifndef MAX
-#define MAX(a,b) (((a)<(b)) ? (b) : (a))
-#endif
-
-
-// Comparason function used in quick sort.
-static int U_CALLCONV currencyNameComparator(const void* a, const void* b) {
- const CurrencyNameStruct* currName_1 = (const CurrencyNameStruct*)a;
- const CurrencyNameStruct* currName_2 = (const CurrencyNameStruct*)b;
- for (int32_t i = 0;
- i < MIN(currName_1->currencyNameLen, currName_2->currencyNameLen);
- ++i) {
- if (currName_1->currencyName[i] < currName_2->currencyName[i]) {
- return -1;
- }
- if (currName_1->currencyName[i] > currName_2->currencyName[i]) {
- return 1;
- }
- }
- if (currName_1->currencyNameLen < currName_2->currencyNameLen) {
- return -1;
- } else if (currName_1->currencyNameLen > currName_2->currencyNameLen) {
- return 1;
- }
- return 0;
-}
-
-
-// Give a locale, return the maximum number of currency names associated with
-// this locale.
-// It gets currency names from resource bundles using fallback.
-// It is the maximum number because in the fallback chain, some of the
-// currency names are duplicated.
-// For example, given locale as "en_US", the currency names get from resource
-// bundle in "en_US" and "en" are duplicated. The fallback mechanism will count
-// all currency names in "en_US" and "en".
-static void
-getCurrencyNameCount(const char* loc, int32_t* total_currency_name_count, int32_t* total_currency_symbol_count) {
- U_NAMESPACE_USE
- *total_currency_name_count = 0;
- *total_currency_symbol_count = 0;
- const UChar* s = NULL;
- char locale[ULOC_FULLNAME_CAPACITY] = "";
- uprv_strcpy(locale, loc);
- const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv();
- for (;;) {
- UErrorCode ec2 = U_ZERO_ERROR;
- // TODO: ures_openDirect?
- UResourceBundle* rb = ures_open(U_ICUDATA_CURR, locale, &ec2);
- UResourceBundle* curr = ures_getByKey(rb, CURRENCIES, NULL, &ec2);
- int32_t n = ures_getSize(curr);
- for (int32_t i=0; i<n; ++i) {
- UResourceBundle* names = ures_getByIndex(curr, i, NULL, &ec2);
- int32_t len;
- s = ures_getStringByIndex(names, UCURR_SYMBOL_NAME, &len, &ec2);
- ++(*total_currency_symbol_count); // currency symbol
- if (currencySymbolsEquiv != NULL) {
- *total_currency_symbol_count += countEquivalent(*currencySymbolsEquiv, UnicodeString(TRUE, s, len));
- }
- ++(*total_currency_symbol_count); // iso code
- ++(*total_currency_name_count); // long name
- ures_close(names);
- }
-
- // currency plurals
- UErrorCode ec3 = U_ZERO_ERROR;
- UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec3);
- n = ures_getSize(curr_p);
- for (int32_t i=0; i<n; ++i) {
- UResourceBundle* names = ures_getByIndex(curr_p, i, NULL, &ec3);
- *total_currency_name_count += ures_getSize(names);
- ures_close(names);
- }
- ures_close(curr_p);
- ures_close(curr);
- ures_close(rb);
-
- if (!fallback(locale)) {
- break;
- }
- }
-}
-
-static UChar*
-toUpperCase(const UChar* source, int32_t len, const char* locale) {
- UChar* dest = NULL;
- UErrorCode ec = U_ZERO_ERROR;
- int32_t destLen = u_strToUpper(dest, 0, source, len, locale, &ec);
-
- ec = U_ZERO_ERROR;
- dest = (UChar*)uprv_malloc(sizeof(UChar) * MAX(destLen, len));
- u_strToUpper(dest, destLen, source, len, locale, &ec);
- if (U_FAILURE(ec)) {
- u_memcpy(dest, source, len);
- }
- return dest;
-}
-
-
-// Collect all available currency names associated with the given locale
-// (enable fallback chain).
-// Read currenc names defined in resource bundle "Currencies" and
-// "CurrencyPlural", enable fallback chain.
-// return the malloc-ed currency name arrays and the total number of currency
-// names in the array.
-static void
-collectCurrencyNames(const char* locale,
- CurrencyNameStruct** currencyNames,
- int32_t* total_currency_name_count,
- CurrencyNameStruct** currencySymbols,
- int32_t* total_currency_symbol_count,
- UErrorCode& ec) {
- U_NAMESPACE_USE
- const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv();
- // Look up the Currencies resource for the given locale.
- UErrorCode ec2 = U_ZERO_ERROR;
-
- char loc[ULOC_FULLNAME_CAPACITY] = "";
- uloc_getName(locale, loc, sizeof(loc), &ec2);
- if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) {
- ec = U_ILLEGAL_ARGUMENT_ERROR;
- }
-
- // Get maximum currency name count first.
- getCurrencyNameCount(loc, total_currency_name_count, total_currency_symbol_count);
-
- *currencyNames = (CurrencyNameStruct*)uprv_malloc
- (sizeof(CurrencyNameStruct) * (*total_currency_name_count));
- *currencySymbols = (CurrencyNameStruct*)uprv_malloc
- (sizeof(CurrencyNameStruct) * (*total_currency_symbol_count));
-
- if(currencyNames == NULL || currencySymbols == NULL) {
- ec = U_MEMORY_ALLOCATION_ERROR;
- }
-
- if (U_FAILURE(ec)) return;
-
- const UChar* s = NULL; // currency name
- char* iso = NULL; // currency ISO code
-
- *total_currency_name_count = 0;
- *total_currency_symbol_count = 0;
-
- UErrorCode ec3 = U_ZERO_ERROR;
- UErrorCode ec4 = U_ZERO_ERROR;
-
- // Using hash to remove duplicates caused by locale fallback
- UHashtable* currencyIsoCodes = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &ec3);
- UHashtable* currencyPluralIsoCodes = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &ec4);
- for (int32_t localeLevel = 0; ; ++localeLevel) {
- ec2 = U_ZERO_ERROR;
- // TODO: ures_openDirect
- UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2);
- UResourceBundle* curr = ures_getByKey(rb, CURRENCIES, NULL, &ec2);
- int32_t n = ures_getSize(curr);
- for (int32_t i=0; i<n; ++i) {
- UResourceBundle* names = ures_getByIndex(curr, i, NULL, &ec2);
- int32_t len;
- s = ures_getStringByIndex(names, UCURR_SYMBOL_NAME, &len, &ec2);
- // TODO: uhash_put wont change key/value?
- iso = (char*)ures_getKey(names);
- if (localeLevel == 0) {
- uhash_put(currencyIsoCodes, iso, iso, &ec3);
- } else {
- if (uhash_get(currencyIsoCodes, iso) != NULL) {
- ures_close(names);
- continue;
- } else {
- uhash_put(currencyIsoCodes, iso, iso, &ec3);
- }
- }
- // Add currency symbol.
- (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso;
- (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*)s;
- (*currencySymbols)[*total_currency_symbol_count].flag = 0;
- (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = len;
- // Add equivalent symbols
- if (currencySymbolsEquiv != NULL) {
- UnicodeString str(TRUE, s, len);
- icu::EquivIterator iter(*currencySymbolsEquiv, str);
- const UnicodeString *symbol;
- while ((symbol = iter.next()) != NULL) {
- (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso;
- (*currencySymbols)[*total_currency_symbol_count].currencyName =
- const_cast<UChar*>(symbol->getBuffer());
- (*currencySymbols)[*total_currency_symbol_count].flag = 0;
- (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = symbol->length();
- }
- }
-
- // Add currency long name.
- s = ures_getStringByIndex(names, UCURR_LONG_NAME, &len, &ec2);
- (*currencyNames)[*total_currency_name_count].IsoCode = iso;
- UChar* upperName = toUpperCase(s, len, locale);
- (*currencyNames)[*total_currency_name_count].currencyName = upperName;
- (*currencyNames)[*total_currency_name_count].flag = NEED_TO_BE_DELETED;
- (*currencyNames)[(*total_currency_name_count)++].currencyNameLen = len;
-
- // put (iso, 3, and iso) in to array
- // Add currency ISO code.
- (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso;
- (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*)uprv_malloc(sizeof(UChar)*3);
- // Must convert iso[] into Unicode
- u_charsToUChars(iso, (*currencySymbols)[*total_currency_symbol_count].currencyName, 3);
- (*currencySymbols)[*total_currency_symbol_count].flag = NEED_TO_BE_DELETED;
- (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = 3;
-
- ures_close(names);
- }
-
- // currency plurals
- UErrorCode ec5 = U_ZERO_ERROR;
- UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec5);
- n = ures_getSize(curr_p);
- for (int32_t i=0; i<n; ++i) {
- UResourceBundle* names = ures_getByIndex(curr_p, i, NULL, &ec5);
- iso = (char*)ures_getKey(names);
- // Using hash to remove duplicated ISO codes in fallback chain.
- if (localeLevel == 0) {
- uhash_put(currencyPluralIsoCodes, iso, iso, &ec4);
- } else {
- if (uhash_get(currencyPluralIsoCodes, iso) != NULL) {
- ures_close(names);
- continue;
- } else {
- uhash_put(currencyPluralIsoCodes, iso, iso, &ec4);
- }
- }
- int32_t num = ures_getSize(names);
- int32_t len;
- for (int32_t j = 0; j < num; ++j) {
- // TODO: remove duplicates between singular name and
- // currency long name?
- s = ures_getStringByIndex(names, j, &len, &ec5);
- (*currencyNames)[*total_currency_name_count].IsoCode = iso;
- UChar* upperName = toUpperCase(s, len, locale);
- (*currencyNames)[*total_currency_name_count].currencyName = upperName;
- (*currencyNames)[*total_currency_name_count].flag = NEED_TO_BE_DELETED;
- (*currencyNames)[(*total_currency_name_count)++].currencyNameLen = len;
- }
- ures_close(names);
- }
- ures_close(curr_p);
- ures_close(curr);
- ures_close(rb);
-
- if (!fallback(loc)) {
- break;
- }
- }
-
- uhash_close(currencyIsoCodes);
- uhash_close(currencyPluralIsoCodes);
-
- // quick sort the struct
- qsort(*currencyNames, *total_currency_name_count,
- sizeof(CurrencyNameStruct), currencyNameComparator);
- qsort(*currencySymbols, *total_currency_symbol_count,
- sizeof(CurrencyNameStruct), currencyNameComparator);
-
-#ifdef UCURR_DEBUG
- printf("currency name count: %d\n", *total_currency_name_count);
- for (int32_t index = 0; index < *total_currency_name_count; ++index) {
- printf("index: %d\n", index);
- printf("iso: %s\n", (*currencyNames)[index].IsoCode);
- char curNameBuf[1024];
- memset(curNameBuf, 0, 1024);
- u_austrncpy(curNameBuf, (*currencyNames)[index].currencyName, (*currencyNames)[index].currencyNameLen);
- printf("currencyName: %s\n", curNameBuf);
- printf("len: %d\n", (*currencyNames)[index].currencyNameLen);
- }
- printf("currency symbol count: %d\n", *total_currency_symbol_count);
- for (int32_t index = 0; index < *total_currency_symbol_count; ++index) {
- printf("index: %d\n", index);
- printf("iso: %s\n", (*currencySymbols)[index].IsoCode);
- char curNameBuf[1024];
- memset(curNameBuf, 0, 1024);
- u_austrncpy(curNameBuf, (*currencySymbols)[index].currencyName, (*currencySymbols)[index].currencyNameLen);
- printf("currencySymbol: %s\n", curNameBuf);
- printf("len: %d\n", (*currencySymbols)[index].currencyNameLen);
- }
-#endif
- // fail on hashtable errors
- if (U_FAILURE(ec3)) {
- ec = ec3;
- return;
- }
- if (U_FAILURE(ec4)) {
- ec = ec4;
- return;
- }
-}
-
-// @param currencyNames: currency names array
-// @param indexInCurrencyNames: the index of the character in currency names
-// array against which the comparison is done
-// @param key: input text char to compare against
-// @param begin(IN/OUT): the begin index of matching range in currency names array
-// @param end(IN/OUT): the end index of matching range in currency names array.
-static int32_t
-binarySearch(const CurrencyNameStruct* currencyNames,
- int32_t indexInCurrencyNames,
- const UChar key,
- int32_t* begin, int32_t* end) {
-#ifdef UCURR_DEBUG
- printf("key = %x\n", key);
-#endif
- int32_t first = *begin;
- int32_t last = *end;
- while (first <= last) {
- int32_t mid = (first + last) / 2; // compute mid point.
- if (indexInCurrencyNames >= currencyNames[mid].currencyNameLen) {
- first = mid + 1;
- } else {
- if (key > currencyNames[mid].currencyName[indexInCurrencyNames]) {
- first = mid + 1;
- }
- else if (key < currencyNames[mid].currencyName[indexInCurrencyNames]) {
- last = mid - 1;
- }
- else {
- // Find a match, and looking for ranges
- // Now do two more binary searches. First, on the left side for
- // the greatest L such that CurrencyNameStruct[L] < key.
- int32_t L = *begin;
- int32_t R = mid;
-
-#ifdef UCURR_DEBUG
- printf("mid = %d\n", mid);
-#endif
- while (L < R) {
- int32_t M = (L + R) / 2;
-#ifdef UCURR_DEBUG
- printf("L = %d, R = %d, M = %d\n", L, R, M);
-#endif
- if (indexInCurrencyNames >= currencyNames[M].currencyNameLen) {
- L = M + 1;
- } else {
- if (currencyNames[M].currencyName[indexInCurrencyNames] < key) {
- L = M + 1;
- } else {
-#ifdef UCURR_DEBUG
- U_ASSERT(currencyNames[M].currencyName[indexInCurrencyNames] == key);
-#endif
- R = M;
- }
- }
- }
-#ifdef UCURR_DEBUG
- U_ASSERT(L == R);
-#endif
- *begin = L;
-#ifdef UCURR_DEBUG
- printf("begin = %d\n", *begin);
- U_ASSERT(currencyNames[*begin].currencyName[indexInCurrencyNames] == key);
-#endif
-
- // Now for the second search, finding the least R such that
- // key < CurrencyNameStruct[R].
- L = mid;
- R = *end;
- while (L < R) {
- int32_t M = (L + R) / 2;
-#ifdef UCURR_DEBUG
- printf("L = %d, R = %d, M = %d\n", L, R, M);
-#endif
- if (currencyNames[M].currencyNameLen < indexInCurrencyNames) {
- L = M + 1;
- } else {
- if (currencyNames[M].currencyName[indexInCurrencyNames] > key) {
- R = M;
- } else {
-#ifdef UCURR_DEBUG
- U_ASSERT(currencyNames[M].currencyName[indexInCurrencyNames] == key);
-#endif
- L = M + 1;
- }
- }
- }
-#ifdef UCURR_DEBUG
- U_ASSERT(L == R);
-#endif
- if (currencyNames[R].currencyName[indexInCurrencyNames] > key) {
- *end = R - 1;
- } else {
- *end = R;
- }
-#ifdef UCURR_DEBUG
- printf("end = %d\n", *end);
-#endif
-
- // now, found the range. check whether there is exact match
- if (currencyNames[*begin].currencyNameLen == indexInCurrencyNames + 1) {
- return *begin; // find range and exact match.
- }
- return -1; // find range, but no exact match.
- }
- }
- }
- *begin = -1;
- *end = -1;
- return -1; // failed to find range.
-}
-
-
-// Linear search "text" in "currencyNames".
-// @param begin, end: the begin and end index in currencyNames, within which
-// range should the search be performed.
-// @param textLen: the length of the text to be compared
-// @param maxMatchLen(IN/OUT): passing in the computed max matching length
-// pass out the new max matching length
-// @param maxMatchIndex: the index in currencyName which has the longest
-// match with input text.
-static void
-linearSearch(const CurrencyNameStruct* currencyNames,
- int32_t begin, int32_t end,
- const UChar* text, int32_t textLen,
- int32_t *partialMatchLen,
- int32_t *maxMatchLen, int32_t* maxMatchIndex) {
- int32_t initialPartialMatchLen = *partialMatchLen;
- for (int32_t index = begin; index <= end; ++index) {
- int32_t len = currencyNames[index].currencyNameLen;
- if (len > *maxMatchLen && len <= textLen &&
- uprv_memcmp(currencyNames[index].currencyName, text, len * sizeof(UChar)) == 0) {
- *partialMatchLen = MAX(*partialMatchLen, len);
- *maxMatchIndex = index;
- *maxMatchLen = len;
-#ifdef UCURR_DEBUG
- printf("maxMatchIndex = %d, maxMatchLen = %d\n",
- *maxMatchIndex, *maxMatchLen);
-#endif
- } else {
- // Check for partial matches.
- for (int32_t i=initialPartialMatchLen; i<MIN(len, textLen); i++) {
- if (currencyNames[index].currencyName[i] != text[i]) {
- break;
- }
- *partialMatchLen = MAX(*partialMatchLen, i + 1);
- }
- }
- }
-}
-
-#define LINEAR_SEARCH_THRESHOLD 10
-
-// Find longest match between "text" and currency names in "currencyNames".
-// @param total_currency_count: total number of currency names in CurrencyNames.
-// @param textLen: the length of the text to be compared
-// @param maxMatchLen: passing in the computed max matching length
-// pass out the new max matching length
-// @param maxMatchIndex: the index in currencyName which has the longest
-// match with input text.
-static void
-searchCurrencyName(const CurrencyNameStruct* currencyNames,
- int32_t total_currency_count,
- const UChar* text, int32_t textLen,
- int32_t *partialMatchLen,
- int32_t* maxMatchLen, int32_t* maxMatchIndex) {
- *maxMatchIndex = -1;
- *maxMatchLen = 0;
- int32_t matchIndex = -1;
- int32_t binarySearchBegin = 0;
- int32_t binarySearchEnd = total_currency_count - 1;
- // It is a variant of binary search.
- // For example, given the currency names in currencyNames array are:
- // A AB ABC AD AZ B BB BBEX BBEXYZ BS C D E....
- // and the input text is BBEXST
- // The first round binary search search "B" in the text against
- // the first char in currency names, and find the first char matching range
- // to be "B BB BBEX BBEXYZ BS" (and the maximum matching "B").
- // The 2nd round binary search search the second "B" in the text against
- // the 2nd char in currency names, and narrow the matching range to
- // "BB BBEX BBEXYZ" (and the maximum matching "BB").
- // The 3rd round returnes the range as "BBEX BBEXYZ" (without changing
- // maximum matching).
- // The 4th round returns the same range (the maximum matching is "BBEX").
- // The 5th round returns no matching range.
- for (int32_t index = 0; index < textLen; ++index) {
- // matchIndex saves the one with exact match till the current point.
- // [binarySearchBegin, binarySearchEnd] saves the matching range.
- matchIndex = binarySearch(currencyNames, index,
- text[index],
- &binarySearchBegin, &binarySearchEnd);
- if (binarySearchBegin == -1) { // did not find the range
- break;
- }
- *partialMatchLen = MAX(*partialMatchLen, index + 1);
- if (matchIndex != -1) {
- // find an exact match for text from text[0] to text[index]
- // in currencyNames array.
- *maxMatchLen = index + 1;
- *maxMatchIndex = matchIndex;
- }
- if (binarySearchEnd - binarySearchBegin < LINEAR_SEARCH_THRESHOLD) {
- // linear search if within threshold.
- linearSearch(currencyNames, binarySearchBegin, binarySearchEnd,
- text, textLen,
- partialMatchLen,
- maxMatchLen, maxMatchIndex);
- break;
- }
- }
- return;
-}
-
-//========================= currency name cache =====================
-typedef struct {
- char locale[ULOC_FULLNAME_CAPACITY]; //key
- // currency names, case insensitive
- CurrencyNameStruct* currencyNames; // value
- int32_t totalCurrencyNameCount; // currency name count
- // currency symbols and ISO code, case sensitive
- CurrencyNameStruct* currencySymbols; // value
- int32_t totalCurrencySymbolCount; // count
- // reference count.
- // reference count is set to 1 when an entry is put to cache.
- // it increases by 1 before accessing, and decreased by 1 after accessing.
- // The entry is deleted when ref count is zero, which means
- // the entry is replaced out of cache and no process is accessing it.
- int32_t refCount;
-} CurrencyNameCacheEntry;
-
-
-#define CURRENCY_NAME_CACHE_NUM 10
-
-// Reserve 10 cache entries.
-static CurrencyNameCacheEntry* currCache[CURRENCY_NAME_CACHE_NUM] = {NULL};
-// Using an index to indicate which entry to be replaced when cache is full.
-// It is a simple round-robin replacement strategy.
-static int8_t currentCacheEntryIndex = 0;
-
-static UMutex gCurrencyCacheMutex;
-
-// Cache deletion
-static void
-deleteCurrencyNames(CurrencyNameStruct* currencyNames, int32_t count) {
- for (int32_t index = 0; index < count; ++index) {
- if ( (currencyNames[index].flag & NEED_TO_BE_DELETED) ) {
- uprv_free(currencyNames[index].currencyName);
- }
- }
- uprv_free(currencyNames);
-}
-
-
-static void
-deleteCacheEntry(CurrencyNameCacheEntry* entry) {
- deleteCurrencyNames(entry->currencyNames, entry->totalCurrencyNameCount);
- deleteCurrencyNames(entry->currencySymbols, entry->totalCurrencySymbolCount);
- uprv_free(entry);
-}
-
-
-// Cache clean up
-static UBool U_CALLCONV
-currency_cache_cleanup(void) {
- for (int32_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) {
- if (currCache[i]) {
- deleteCacheEntry(currCache[i]);
- currCache[i] = 0;
- }
- }
- return TRUE;
-}
-
-
-/**
- * Loads the currency name data from the cache, or from resource bundles if necessary.
- * The refCount is automatically incremented. It is the caller's responsibility
- * to decrement it when done!
- */
-static CurrencyNameCacheEntry*
-getCacheEntry(const char* locale, UErrorCode& ec) {
-
- int32_t total_currency_name_count = 0;
- CurrencyNameStruct* currencyNames = NULL;
- int32_t total_currency_symbol_count = 0;
- CurrencyNameStruct* currencySymbols = NULL;
- CurrencyNameCacheEntry* cacheEntry = NULL;
-
- umtx_lock(&gCurrencyCacheMutex);
- // in order to handle racing correctly,
- // not putting 'search' in a separate function.
- int8_t found = -1;
- for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) {
- if (currCache[i]!= NULL &&
- uprv_strcmp(locale, currCache[i]->locale) == 0) {
- found = i;
- break;
- }
- }
- if (found != -1) {
- cacheEntry = currCache[found];
- ++(cacheEntry->refCount);
- }
- umtx_unlock(&gCurrencyCacheMutex);
- if (found == -1) {
- collectCurrencyNames(locale, &currencyNames, &total_currency_name_count, &currencySymbols, &total_currency_symbol_count, ec);
- if (U_FAILURE(ec)) {
- return NULL;
- }
- umtx_lock(&gCurrencyCacheMutex);
- // check again.
- for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) {
- if (currCache[i]!= NULL &&
- uprv_strcmp(locale, currCache[i]->locale) == 0) {
- found = i;
- break;
- }
- }
- if (found == -1) {
- // insert new entry to
- // currentCacheEntryIndex % CURRENCY_NAME_CACHE_NUM
- // and remove the existing entry
- // currentCacheEntryIndex % CURRENCY_NAME_CACHE_NUM
- // from cache.
- cacheEntry = currCache[currentCacheEntryIndex];
- if (cacheEntry) {
- --(cacheEntry->refCount);
- // delete if the ref count is zero
- if (cacheEntry->refCount == 0) {
- deleteCacheEntry(cacheEntry);
- }
- }
- cacheEntry = (CurrencyNameCacheEntry*)uprv_malloc(sizeof(CurrencyNameCacheEntry));
- currCache[currentCacheEntryIndex] = cacheEntry;
- uprv_strcpy(cacheEntry->locale, locale);
- cacheEntry->currencyNames = currencyNames;
- cacheEntry->totalCurrencyNameCount = total_currency_name_count;
- cacheEntry->currencySymbols = currencySymbols;
- cacheEntry->totalCurrencySymbolCount = total_currency_symbol_count;
- cacheEntry->refCount = 2; // one for cache, one for reference
- currentCacheEntryIndex = (currentCacheEntryIndex + 1) % CURRENCY_NAME_CACHE_NUM;
- ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup);
- } else {
- deleteCurrencyNames(currencyNames, total_currency_name_count);
- deleteCurrencyNames(currencySymbols, total_currency_symbol_count);
- cacheEntry = currCache[found];
- ++(cacheEntry->refCount);
- }
- umtx_unlock(&gCurrencyCacheMutex);
- }
-
- return cacheEntry;
-}
-
-static void releaseCacheEntry(CurrencyNameCacheEntry* cacheEntry) {
- umtx_lock(&gCurrencyCacheMutex);
- --(cacheEntry->refCount);
- if (cacheEntry->refCount == 0) { // remove
- deleteCacheEntry(cacheEntry);
- }
- umtx_unlock(&gCurrencyCacheMutex);
-}
-
-U_CAPI void
-uprv_parseCurrency(const char* locale,
- const icu::UnicodeString& text,
- icu::ParsePosition& pos,
- int8_t type,
- int32_t* partialMatchLen,
- UChar* result,
- UErrorCode& ec) {
- U_NAMESPACE_USE
- if (U_FAILURE(ec)) {
- return;
- }
- CurrencyNameCacheEntry* cacheEntry = getCacheEntry(locale, ec);
- if (U_FAILURE(ec)) {
- return;
- }
-
- int32_t total_currency_name_count = cacheEntry->totalCurrencyNameCount;
- CurrencyNameStruct* currencyNames = cacheEntry->currencyNames;
- int32_t total_currency_symbol_count = cacheEntry->totalCurrencySymbolCount;
- CurrencyNameStruct* currencySymbols = cacheEntry->currencySymbols;
-
- int32_t start = pos.getIndex();
-
- UChar inputText[MAX_CURRENCY_NAME_LEN];
- UChar upperText[MAX_CURRENCY_NAME_LEN];
- int32_t textLen = MIN(MAX_CURRENCY_NAME_LEN, text.length() - start);
- text.extract(start, textLen, inputText);
- UErrorCode ec1 = U_ZERO_ERROR;
- textLen = u_strToUpper(upperText, MAX_CURRENCY_NAME_LEN, inputText, textLen, locale, &ec1);
-
- // Make sure partialMatchLen is initialized
- *partialMatchLen = 0;
-
- int32_t max = 0;
- int32_t matchIndex = -1;
- // case in-sensitive comparision against currency names
- searchCurrencyName(currencyNames, total_currency_name_count,
- upperText, textLen, partialMatchLen, &max, &matchIndex);
-
-#ifdef UCURR_DEBUG
- printf("search in names, max = %d, matchIndex = %d\n", max, matchIndex);
-#endif
-
- int32_t maxInSymbol = 0;
- int32_t matchIndexInSymbol = -1;
- if (type != UCURR_LONG_NAME) { // not name only
- // case sensitive comparison against currency symbols and ISO code.
- searchCurrencyName(currencySymbols, total_currency_symbol_count,
- inputText, textLen,
- partialMatchLen,
- &maxInSymbol, &matchIndexInSymbol);
- }
-
-#ifdef UCURR_DEBUG
- printf("search in symbols, maxInSymbol = %d, matchIndexInSymbol = %d\n", maxInSymbol, matchIndexInSymbol);
- if(matchIndexInSymbol != -1) {
- printf("== ISO=%s\n", currencySymbols[matchIndexInSymbol].IsoCode);
- }
-#endif
-
- if (max >= maxInSymbol && matchIndex != -1) {
- u_charsToUChars(currencyNames[matchIndex].IsoCode, result, 4);
- pos.setIndex(start + max);
- } else if (maxInSymbol >= max && matchIndexInSymbol != -1) {
- u_charsToUChars(currencySymbols[matchIndexInSymbol].IsoCode, result, 4);
- pos.setIndex(start + maxInSymbol);
- }
-
- // decrease reference count
- releaseCacheEntry(cacheEntry);
-}
-
-void uprv_currencyLeads(const char* locale, icu::UnicodeSet& result, UErrorCode& ec) {
- U_NAMESPACE_USE
- if (U_FAILURE(ec)) {
- return;
- }
- CurrencyNameCacheEntry* cacheEntry = getCacheEntry(locale, ec);
- if (U_FAILURE(ec)) {
- return;
- }
-
- for (int32_t i=0; i<cacheEntry->totalCurrencySymbolCount; i++) {
- const CurrencyNameStruct& info = cacheEntry->currencySymbols[i];
- UChar32 cp;
- U16_GET(info.currencyName, 0, 0, info.currencyNameLen, cp);
- result.add(cp);
- }
-
- for (int32_t i=0; i<cacheEntry->totalCurrencyNameCount; i++) {
- const CurrencyNameStruct& info = cacheEntry->currencyNames[i];
- UChar32 cp;
- U16_GET(info.currencyName, 0, 0, info.currencyNameLen, cp);
- result.add(cp);
- }
-
- // decrease reference count
- releaseCacheEntry(cacheEntry);
-}
-
-
-/**
- * Internal method. Given a currency ISO code and a locale, return
- * the "static" currency name. This is usually the same as the
- * UCURR_SYMBOL_NAME, but if the latter is a choice format, then the
- * format is applied to the number 2.0 (to yield the more common
- * plural) to return a static name.
- *
- * This is used for backward compatibility with old currency logic in
- * DecimalFormat and DecimalFormatSymbols.
- */
-U_CAPI void
-uprv_getStaticCurrencyName(const UChar* iso, const char* loc,
- icu::UnicodeString& result, UErrorCode& ec)
-{
- U_NAMESPACE_USE
-
- int32_t len;
- const UChar* currname = ucurr_getName(iso, loc, UCURR_SYMBOL_NAME,
- nullptr /* isChoiceFormat */, &len, &ec);
- if (U_SUCCESS(ec)) {
- result.setTo(currname, len);
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-ucurr_getDefaultFractionDigits(const UChar* currency, UErrorCode* ec) {
- return ucurr_getDefaultFractionDigitsForUsage(currency,UCURR_USAGE_STANDARD,ec);
-}
-
-U_DRAFT int32_t U_EXPORT2
-ucurr_getDefaultFractionDigitsForUsage(const UChar* currency, const UCurrencyUsage usage, UErrorCode* ec) {
- int32_t fracDigits = 0;
- if (U_SUCCESS(*ec)) {
- switch (usage) {
- case UCURR_USAGE_STANDARD:
- fracDigits = (_findMetaData(currency, *ec))[0];
- break;
- case UCURR_USAGE_CASH:
- fracDigits = (_findMetaData(currency, *ec))[2];
- break;
- default:
- *ec = U_UNSUPPORTED_ERROR;
- }
- }
- return fracDigits;
-}
-
-U_CAPI double U_EXPORT2
-ucurr_getRoundingIncrement(const UChar* currency, UErrorCode* ec) {
- return ucurr_getRoundingIncrementForUsage(currency, UCURR_USAGE_STANDARD, ec);
-}
-
-U_DRAFT double U_EXPORT2
-ucurr_getRoundingIncrementForUsage(const UChar* currency, const UCurrencyUsage usage, UErrorCode* ec) {
- double result = 0.0;
-
- const int32_t *data = _findMetaData(currency, *ec);
- if (U_SUCCESS(*ec)) {
- int32_t fracDigits;
- int32_t increment;
- switch (usage) {
- case UCURR_USAGE_STANDARD:
- fracDigits = data[0];
- increment = data[1];
- break;
- case UCURR_USAGE_CASH:
- fracDigits = data[2];
- increment = data[3];
- break;
- default:
- *ec = U_UNSUPPORTED_ERROR;
- return result;
- }
-
- // If the meta data is invalid, return 0.0
- if (fracDigits < 0 || fracDigits > MAX_POW10) {
- *ec = U_INVALID_FORMAT_ERROR;
- } else {
- // A rounding value of 0 or 1 indicates no rounding.
- if (increment >= 2) {
- // Return (increment) / 10^(fracDigits). The only actual rounding data,
- // as of this writing, is CHF { 2, 5 }.
- result = double(increment) / POW10[fracDigits];
- }
- }
- }
-
- return result;
-}
-
-U_CDECL_BEGIN
-
-typedef struct UCurrencyContext {
- uint32_t currType; /* UCurrCurrencyType */
- uint32_t listIdx;
-} UCurrencyContext;
-
-/*
-Please keep this list in alphabetical order.
-You can look at the CLDR supplemental data or ISO-4217 for the meaning of some
-of these items.
-ISO-4217: http://www.iso.org/iso/en/prods-services/popstds/currencycodeslist.html
-*/
-static const struct CurrencyList {
- const char *currency;
- uint32_t currType;
-} gCurrencyList[] = {
- {"ADP", UCURR_COMMON|UCURR_DEPRECATED},
- {"AED", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"AFA", UCURR_COMMON|UCURR_DEPRECATED},
- {"AFN", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"ALK", UCURR_COMMON|UCURR_DEPRECATED},
- {"ALL", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"AMD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"ANG", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"AOA", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"AOK", UCURR_COMMON|UCURR_DEPRECATED},
- {"AON", UCURR_COMMON|UCURR_DEPRECATED},
- {"AOR", UCURR_COMMON|UCURR_DEPRECATED},
- {"ARA", UCURR_COMMON|UCURR_DEPRECATED},
- {"ARL", UCURR_COMMON|UCURR_DEPRECATED},
- {"ARM", UCURR_COMMON|UCURR_DEPRECATED},
- {"ARP", UCURR_COMMON|UCURR_DEPRECATED},
- {"ARS", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"ATS", UCURR_COMMON|UCURR_DEPRECATED},
- {"AUD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"AWG", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"AZM", UCURR_COMMON|UCURR_DEPRECATED},
- {"AZN", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BAD", UCURR_COMMON|UCURR_DEPRECATED},
- {"BAM", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BAN", UCURR_COMMON|UCURR_DEPRECATED},
- {"BBD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BDT", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BEC", UCURR_UNCOMMON|UCURR_DEPRECATED},
- {"BEF", UCURR_COMMON|UCURR_DEPRECATED},
- {"BEL", UCURR_UNCOMMON|UCURR_DEPRECATED},
- {"BGL", UCURR_COMMON|UCURR_DEPRECATED},
- {"BGM", UCURR_COMMON|UCURR_DEPRECATED},
- {"BGN", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BGO", UCURR_COMMON|UCURR_DEPRECATED},
- {"BHD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BIF", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BMD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BND", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BOB", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BOL", UCURR_COMMON|UCURR_DEPRECATED},
- {"BOP", UCURR_COMMON|UCURR_DEPRECATED},
- {"BOV", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"BRB", UCURR_COMMON|UCURR_DEPRECATED},
- {"BRC", UCURR_COMMON|UCURR_DEPRECATED},
- {"BRE", UCURR_COMMON|UCURR_DEPRECATED},
- {"BRL", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BRN", UCURR_COMMON|UCURR_DEPRECATED},
- {"BRR", UCURR_COMMON|UCURR_DEPRECATED},
- {"BRZ", UCURR_COMMON|UCURR_DEPRECATED},
- {"BSD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BTN", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BUK", UCURR_COMMON|UCURR_DEPRECATED},
- {"BWP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BYB", UCURR_COMMON|UCURR_DEPRECATED},
- {"BYN", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"BYR", UCURR_COMMON|UCURR_DEPRECATED},
- {"BZD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"CAD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"CDF", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"CHE", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"CHF", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"CHW", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"CLE", UCURR_COMMON|UCURR_DEPRECATED},
- {"CLF", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"CLP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"CNH", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"CNX", UCURR_UNCOMMON|UCURR_DEPRECATED},
- {"CNY", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"COP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"COU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"CRC", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"CSD", UCURR_COMMON|UCURR_DEPRECATED},
- {"CSK", UCURR_COMMON|UCURR_DEPRECATED},
- {"CUC", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"CUP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"CVE", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"CYP", UCURR_COMMON|UCURR_DEPRECATED},
- {"CZK", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"DDM", UCURR_COMMON|UCURR_DEPRECATED},
- {"DEM", UCURR_COMMON|UCURR_DEPRECATED},
- {"DJF", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"DKK", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"DOP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"DZD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"ECS", UCURR_COMMON|UCURR_DEPRECATED},
- {"ECV", UCURR_UNCOMMON|UCURR_DEPRECATED},
- {"EEK", UCURR_COMMON|UCURR_DEPRECATED},
- {"EGP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"EQE", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove?
- {"ERN", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"ESA", UCURR_UNCOMMON|UCURR_DEPRECATED},
- {"ESB", UCURR_UNCOMMON|UCURR_DEPRECATED},
- {"ESP", UCURR_COMMON|UCURR_DEPRECATED},
- {"ETB", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"EUR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"FIM", UCURR_COMMON|UCURR_DEPRECATED},
- {"FJD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"FKP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"FRF", UCURR_COMMON|UCURR_DEPRECATED},
- {"GBP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"GEK", UCURR_COMMON|UCURR_DEPRECATED},
- {"GEL", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"GHC", UCURR_COMMON|UCURR_DEPRECATED},
- {"GHS", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"GIP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"GMD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"GNF", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"GNS", UCURR_COMMON|UCURR_DEPRECATED},
- {"GQE", UCURR_COMMON|UCURR_DEPRECATED},
- {"GRD", UCURR_COMMON|UCURR_DEPRECATED},
- {"GTQ", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"GWE", UCURR_COMMON|UCURR_DEPRECATED},
- {"GWP", UCURR_COMMON|UCURR_DEPRECATED},
- {"GYD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"HKD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"HNL", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"HRD", UCURR_COMMON|UCURR_DEPRECATED},
- {"HRK", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"HTG", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"HUF", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"IDR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"IEP", UCURR_COMMON|UCURR_DEPRECATED},
- {"ILP", UCURR_COMMON|UCURR_DEPRECATED},
- {"ILR", UCURR_COMMON|UCURR_DEPRECATED},
- {"ILS", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"INR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"IQD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"IRR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"ISJ", UCURR_COMMON|UCURR_DEPRECATED},
- {"ISK", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"ITL", UCURR_COMMON|UCURR_DEPRECATED},
- {"JMD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"JOD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"JPY", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"KES", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"KGS", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"KHR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"KMF", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"KPW", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"KRH", UCURR_COMMON|UCURR_DEPRECATED},
- {"KRO", UCURR_COMMON|UCURR_DEPRECATED},
- {"KRW", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"KWD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"KYD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"KZT", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"LAK", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"LBP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"LKR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"LRD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"LSL", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"LSM", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove?
- {"LTL", UCURR_COMMON|UCURR_DEPRECATED},
- {"LTT", UCURR_COMMON|UCURR_DEPRECATED},
- {"LUC", UCURR_UNCOMMON|UCURR_DEPRECATED},
- {"LUF", UCURR_COMMON|UCURR_DEPRECATED},
- {"LUL", UCURR_UNCOMMON|UCURR_DEPRECATED},
- {"LVL", UCURR_COMMON|UCURR_DEPRECATED},
- {"LVR", UCURR_COMMON|UCURR_DEPRECATED},
- {"LYD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MAD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MAF", UCURR_COMMON|UCURR_DEPRECATED},
- {"MCF", UCURR_COMMON|UCURR_DEPRECATED},
- {"MDC", UCURR_COMMON|UCURR_DEPRECATED},
- {"MDL", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MGA", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MGF", UCURR_COMMON|UCURR_DEPRECATED},
- {"MKD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MKN", UCURR_COMMON|UCURR_DEPRECATED},
- {"MLF", UCURR_COMMON|UCURR_DEPRECATED},
- {"MMK", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MNT", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MOP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MRO", UCURR_COMMON|UCURR_DEPRECATED},
- {"MRU", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MTL", UCURR_COMMON|UCURR_DEPRECATED},
- {"MTP", UCURR_COMMON|UCURR_DEPRECATED},
- {"MUR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MVP", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove?
- {"MVR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MWK", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MXN", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MXP", UCURR_COMMON|UCURR_DEPRECATED},
- {"MXV", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"MYR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"MZE", UCURR_COMMON|UCURR_DEPRECATED},
- {"MZM", UCURR_COMMON|UCURR_DEPRECATED},
- {"MZN", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"NAD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"NGN", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"NIC", UCURR_COMMON|UCURR_DEPRECATED},
- {"NIO", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"NLG", UCURR_COMMON|UCURR_DEPRECATED},
- {"NOK", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"NPR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"NZD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"OMR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"PAB", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"PEI", UCURR_COMMON|UCURR_DEPRECATED},
- {"PEN", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"PES", UCURR_COMMON|UCURR_DEPRECATED},
- {"PGK", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"PHP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"PKR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"PLN", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"PLZ", UCURR_COMMON|UCURR_DEPRECATED},
- {"PTE", UCURR_COMMON|UCURR_DEPRECATED},
- {"PYG", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"QAR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"RHD", UCURR_COMMON|UCURR_DEPRECATED},
- {"ROL", UCURR_COMMON|UCURR_DEPRECATED},
- {"RON", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"RSD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"RUB", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"RUR", UCURR_COMMON|UCURR_DEPRECATED},
- {"RWF", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SAR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SBD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SCR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SDD", UCURR_COMMON|UCURR_DEPRECATED},
- {"SDG", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SDP", UCURR_COMMON|UCURR_DEPRECATED},
- {"SEK", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SGD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SHP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SIT", UCURR_COMMON|UCURR_DEPRECATED},
- {"SKK", UCURR_COMMON|UCURR_DEPRECATED},
- {"SLL", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SOS", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SRD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SRG", UCURR_COMMON|UCURR_DEPRECATED},
- {"SSP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"STD", UCURR_COMMON|UCURR_DEPRECATED},
- {"STN", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SUR", UCURR_COMMON|UCURR_DEPRECATED},
- {"SVC", UCURR_COMMON|UCURR_DEPRECATED},
- {"SYP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"SZL", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"THB", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"TJR", UCURR_COMMON|UCURR_DEPRECATED},
- {"TJS", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"TMM", UCURR_COMMON|UCURR_DEPRECATED},
- {"TMT", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"TND", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"TOP", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"TPE", UCURR_COMMON|UCURR_DEPRECATED},
- {"TRL", UCURR_COMMON|UCURR_DEPRECATED},
- {"TRY", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"TTD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"TWD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"TZS", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"UAH", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"UAK", UCURR_COMMON|UCURR_DEPRECATED},
- {"UGS", UCURR_COMMON|UCURR_DEPRECATED},
- {"UGX", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"USD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"USN", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"USS", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"UYI", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"UYP", UCURR_COMMON|UCURR_DEPRECATED},
- {"UYU", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"UZS", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"VEB", UCURR_COMMON|UCURR_DEPRECATED},
- {"VEF", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"VND", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"VNN", UCURR_COMMON|UCURR_DEPRECATED},
- {"VUV", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"WST", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"XAF", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"XAG", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XAU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XBA", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XBB", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XBC", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XBD", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XCD", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"XDR", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XEU", UCURR_UNCOMMON|UCURR_DEPRECATED},
- {"XFO", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XFU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XOF", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"XPD", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XPF", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"XPT", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XRE", UCURR_UNCOMMON|UCURR_DEPRECATED},
- {"XSU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XTS", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XUA", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"XXX", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
- {"YDD", UCURR_COMMON|UCURR_DEPRECATED},
- {"YER", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"YUD", UCURR_COMMON|UCURR_DEPRECATED},
- {"YUM", UCURR_COMMON|UCURR_DEPRECATED},
- {"YUN", UCURR_COMMON|UCURR_DEPRECATED},
- {"YUR", UCURR_COMMON|UCURR_DEPRECATED},
- {"ZAL", UCURR_UNCOMMON|UCURR_DEPRECATED},
- {"ZAR", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"ZMK", UCURR_COMMON|UCURR_DEPRECATED},
- {"ZMW", UCURR_COMMON|UCURR_NON_DEPRECATED},
- {"ZRN", UCURR_COMMON|UCURR_DEPRECATED},
- {"ZRZ", UCURR_COMMON|UCURR_DEPRECATED},
- {"ZWD", UCURR_COMMON|UCURR_DEPRECATED},
- {"ZWL", UCURR_COMMON|UCURR_DEPRECATED},
- {"ZWR", UCURR_COMMON|UCURR_DEPRECATED},
- { NULL, 0 } // Leave here to denote the end of the list.
-};
-
-#define UCURR_MATCHES_BITMASK(variable, typeToMatch) \
- ((typeToMatch) == UCURR_ALL || ((variable) & (typeToMatch)) == (typeToMatch))
-
-static int32_t U_CALLCONV
-ucurr_countCurrencyList(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
- UCurrencyContext *myContext = (UCurrencyContext *)(enumerator->context);
- uint32_t currType = myContext->currType;
- int32_t count = 0;
-
- /* Count the number of items matching the type we are looking for. */
- for (int32_t idx = 0; gCurrencyList[idx].currency != NULL; idx++) {
- if (UCURR_MATCHES_BITMASK(gCurrencyList[idx].currType, currType)) {
- count++;
- }
- }
- return count;
-}
-
-static const char* U_CALLCONV
-ucurr_nextCurrencyList(UEnumeration *enumerator,
- int32_t* resultLength,
- UErrorCode * /*pErrorCode*/)
-{
- UCurrencyContext *myContext = (UCurrencyContext *)(enumerator->context);
-
- /* Find the next in the list that matches the type we are looking for. */
- while (myContext->listIdx < UPRV_LENGTHOF(gCurrencyList)-1) {
- const struct CurrencyList *currItem = &gCurrencyList[myContext->listIdx++];
- if (UCURR_MATCHES_BITMASK(currItem->currType, myContext->currType))
- {
- if (resultLength) {
- *resultLength = 3; /* Currency codes are only 3 chars long */
- }
- return currItem->currency;
- }
- }
- /* We enumerated too far. */
- if (resultLength) {
- *resultLength = 0;
- }
- return NULL;
-}
-
-static void U_CALLCONV
-ucurr_resetCurrencyList(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
- ((UCurrencyContext *)(enumerator->context))->listIdx = 0;
-}
-
-static void U_CALLCONV
-ucurr_closeCurrencyList(UEnumeration *enumerator) {
- uprv_free(enumerator->context);
- uprv_free(enumerator);
-}
-
-static void U_CALLCONV
-ucurr_createCurrencyList(UHashtable *isoCodes, UErrorCode* status){
- UErrorCode localStatus = U_ZERO_ERROR;
-
- // Look up the CurrencyMap element in the root bundle.
- UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
- UResourceBundle *currencyMapArray = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
-
- if (U_SUCCESS(localStatus)) {
- // process each entry in currency map
- for (int32_t i=0; i<ures_getSize(currencyMapArray); i++) {
- // get the currency resource
- UResourceBundle *currencyArray = ures_getByIndex(currencyMapArray, i, NULL, &localStatus);
- // process each currency
- if (U_SUCCESS(localStatus)) {
- for (int32_t j=0; j<ures_getSize(currencyArray); j++) {
- // get the currency resource
- UResourceBundle *currencyRes = ures_getByIndex(currencyArray, j, NULL, &localStatus);
- IsoCodeEntry *entry = (IsoCodeEntry*)uprv_malloc(sizeof(IsoCodeEntry));
- if (entry == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- // get the ISO code
- int32_t isoLength = 0;
- UResourceBundle *idRes = ures_getByKey(currencyRes, "id", NULL, &localStatus);
- if (idRes == NULL) {
- continue;
- }
- const UChar *isoCode = ures_getString(idRes, &isoLength, &localStatus);
-
- // get from date
- UDate fromDate = U_DATE_MIN;
- UResourceBundle *fromRes = ures_getByKey(currencyRes, "from", NULL, &localStatus);
-
- if (U_SUCCESS(localStatus)) {
- int32_t fromLength = 0;
- const int32_t *fromArray = ures_getIntVector(fromRes, &fromLength, &localStatus);
- int64_t currDate64 = (int64_t)fromArray[0] << 32;
- currDate64 |= ((int64_t)fromArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
- fromDate = (UDate)currDate64;
- }
- ures_close(fromRes);
-
- // get to date
- UDate toDate = U_DATE_MAX;
- localStatus = U_ZERO_ERROR;
- UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus);
-
- if (U_SUCCESS(localStatus)) {
- int32_t toLength = 0;
- const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus);
- int64_t currDate64 = (int64_t)toArray[0] << 32;
- currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
- toDate = (UDate)currDate64;
- }
- ures_close(toRes);
-
- ures_close(idRes);
- ures_close(currencyRes);
-
- entry->isoCode = isoCode;
- entry->from = fromDate;
- entry->to = toDate;
-
- localStatus = U_ZERO_ERROR;
- uhash_put(isoCodes, (UChar *)isoCode, entry, &localStatus);
- }
- } else {
- *status = localStatus;
- }
- ures_close(currencyArray);
- }
- } else {
- *status = localStatus;
- }
-
- ures_close(currencyMapArray);
-}
-
-static const UEnumeration gEnumCurrencyList = {
- NULL,
- NULL,
- ucurr_closeCurrencyList,
- ucurr_countCurrencyList,
- uenum_unextDefault,
- ucurr_nextCurrencyList,
- ucurr_resetCurrencyList
-};
-U_CDECL_END
-
-
-static void U_CALLCONV initIsoCodes(UErrorCode &status) {
- U_ASSERT(gIsoCodes == NULL);
- ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup);
-
- UHashtable *isoCodes = uhash_open(uhash_hashUChars, uhash_compareUChars, NULL, &status);
- if (U_FAILURE(status)) {
- return;
- }
- uhash_setValueDeleter(isoCodes, deleteIsoCodeEntry);
-
- ucurr_createCurrencyList(isoCodes, &status);
- if (U_FAILURE(status)) {
- uhash_close(isoCodes);
- return;
- }
- gIsoCodes = isoCodes; // Note: gIsoCodes is const. Once set up here it is never altered,
- // and read only access is safe without synchronization.
-}
-
-static void populateCurrSymbolsEquiv(icu::Hashtable *hash, UErrorCode &status) {
- if (U_FAILURE(status)) { return; }
- for (auto& entry : unisets::kCurrencyEntries) {
- UnicodeString exemplar(entry.exemplar);
- const UnicodeSet* set = unisets::get(entry.key);
- if (set == nullptr) { return; }
- UnicodeSetIterator it(*set);
- while (it.next()) {
- UnicodeString value = it.getString();
- if (value == exemplar) {
- // No need to mark the exemplar character as an equivalent
- continue;
- }
- makeEquivalent(exemplar, value, hash, status);
- if (U_FAILURE(status)) { return; }
- }
- }
-}
-
-static void U_CALLCONV initCurrSymbolsEquiv() {
- U_ASSERT(gCurrSymbolsEquiv == NULL);
- UErrorCode status = U_ZERO_ERROR;
- ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup);
- icu::Hashtable *temp = new icu::Hashtable(status);
- if (temp == NULL) {
- return;
- }
- if (U_FAILURE(status)) {
- delete temp;
- return;
- }
- temp->setValueDeleter(deleteUnicode);
- populateCurrSymbolsEquiv(temp, status);
- if (U_FAILURE(status)) {
- delete temp;
- return;
- }
- gCurrSymbolsEquiv = temp;
-}
-
-U_CAPI UBool U_EXPORT2
-ucurr_isAvailable(const UChar* isoCode, UDate from, UDate to, UErrorCode* eErrorCode) {
- umtx_initOnce(gIsoCodesInitOnce, &initIsoCodes, *eErrorCode);
- if (U_FAILURE(*eErrorCode)) {
- return FALSE;
- }
-
- IsoCodeEntry* result = (IsoCodeEntry *) uhash_get(gIsoCodes, isoCode);
- if (result == NULL) {
- return FALSE;
- } else if (from > to) {
- *eErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- } else if ((from > result->to) || (to < result->from)) {
- return FALSE;
- }
- return TRUE;
-}
-
-static const icu::Hashtable* getCurrSymbolsEquiv() {
- umtx_initOnce(gCurrSymbolsEquivInitOnce, &initCurrSymbolsEquiv);
- return gCurrSymbolsEquiv;
-}
-
-U_CAPI UEnumeration * U_EXPORT2
-ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode) {
- UEnumeration *myEnum = NULL;
- UCurrencyContext *myContext;
-
- myEnum = (UEnumeration*)uprv_malloc(sizeof(UEnumeration));
- if (myEnum == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memcpy(myEnum, &gEnumCurrencyList, sizeof(UEnumeration));
- myContext = (UCurrencyContext*)uprv_malloc(sizeof(UCurrencyContext));
- if (myContext == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(myEnum);
- return NULL;
- }
- myContext->currType = currType;
- myContext->listIdx = 0;
- myEnum->context = myContext;
- return myEnum;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucurr_countCurrencies(const char* locale,
- UDate date,
- UErrorCode* ec)
-{
- int32_t currCount = 0;
-
- if (ec != NULL && U_SUCCESS(*ec))
- {
- // local variables
- UErrorCode localStatus = U_ZERO_ERROR;
- char id[ULOC_FULLNAME_CAPACITY];
- uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus);
-
- // get country or country_variant in `id'
- idForLocale(locale, id, sizeof(id), ec);
-
- if (U_FAILURE(*ec))
- {
- return 0;
- }
-
- // Remove variants, which is only needed for registration.
- char *idDelim = strchr(id, VAR_DELIM);
- if (idDelim)
- {
- idDelim[0] = 0;
- }
-
- // Look up the CurrencyMap element in the root bundle.
- UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
- UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
-
- // Using the id derived from the local, get the currency data
- UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
-
- // process each currency to see which one is valid for the given date
- if (U_SUCCESS(localStatus))
- {
- for (int32_t i=0; i<ures_getSize(countryArray); i++)
- {
- // get the currency resource
- UResourceBundle *currencyRes = ures_getByIndex(countryArray, i, NULL, &localStatus);
-
- // get the from date
- int32_t fromLength = 0;
- UResourceBundle *fromRes = ures_getByKey(currencyRes, "from", NULL, &localStatus);
- const int32_t *fromArray = ures_getIntVector(fromRes, &fromLength, &localStatus);
-
- int64_t currDate64 = (int64_t)fromArray[0] << 32;
- currDate64 |= ((int64_t)fromArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
- UDate fromDate = (UDate)currDate64;
-
- if (ures_getSize(currencyRes)> 2)
- {
- int32_t toLength = 0;
- UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus);
- const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus);
-
- currDate64 = (int64_t)toArray[0] << 32;
- currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
- UDate toDate = (UDate)currDate64;
-
- if ((fromDate <= date) && (date < toDate))
- {
- currCount++;
- }
-
- ures_close(toRes);
- }
- else
- {
- if (fromDate <= date)
- {
- currCount++;
- }
- }
-
- // close open resources
- ures_close(currencyRes);
- ures_close(fromRes);
-
- } // end For loop
- } // end if (U_SUCCESS(localStatus))
-
- ures_close(countryArray);
-
- // Check for errors
- if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR)
- {
- // There is nothing to fallback to.
- // Report the failure/warning if possible.
- *ec = localStatus;
- }
-
- if (U_SUCCESS(*ec))
- {
- // no errors
- return currCount;
- }
-
- }
-
- // If we got here, either error code is invalid or
- // some argument passed is no good.
- return 0;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucurr_forLocaleAndDate(const char* locale,
- UDate date,
- int32_t index,
- UChar* buff,
- int32_t buffCapacity,
- UErrorCode* ec)
-{
- int32_t resLen = 0;
- int32_t currIndex = 0;
- const UChar* s = NULL;
-
- if (ec != NULL && U_SUCCESS(*ec))
- {
- // check the arguments passed
- if ((buff && buffCapacity) || !buffCapacity )
- {
- // local variables
- UErrorCode localStatus = U_ZERO_ERROR;
- char id[ULOC_FULLNAME_CAPACITY];
- resLen = uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus);
-
- // get country or country_variant in `id'
- idForLocale(locale, id, sizeof(id), ec);
- if (U_FAILURE(*ec))
- {
- return 0;
- }
-
- // Remove variants, which is only needed for registration.
- char *idDelim = strchr(id, VAR_DELIM);
- if (idDelim)
- {
- idDelim[0] = 0;
- }
-
- // Look up the CurrencyMap element in the root bundle.
- UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
- UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
-
- // Using the id derived from the local, get the currency data
- UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
-
- // process each currency to see which one is valid for the given date
- bool matchFound = false;
- if (U_SUCCESS(localStatus))
- {
- if ((index <= 0) || (index> ures_getSize(countryArray)))
- {
- // requested index is out of bounds
- ures_close(countryArray);
- return 0;
- }
-
- for (int32_t i=0; i<ures_getSize(countryArray); i++)
- {
- // get the currency resource
- UResourceBundle *currencyRes = ures_getByIndex(countryArray, i, NULL, &localStatus);
- s = ures_getStringByKey(currencyRes, "id", &resLen, &localStatus);
-
- // get the from date
- int32_t fromLength = 0;
- UResourceBundle *fromRes = ures_getByKey(currencyRes, "from", NULL, &localStatus);
- const int32_t *fromArray = ures_getIntVector(fromRes, &fromLength, &localStatus);
-
- int64_t currDate64 = (int64_t)fromArray[0] << 32;
- currDate64 |= ((int64_t)fromArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
- UDate fromDate = (UDate)currDate64;
-
- if (ures_getSize(currencyRes)> 2)
- {
- int32_t toLength = 0;
- UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus);
- const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus);
-
- currDate64 = (int64_t)toArray[0] << 32;
- currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
- UDate toDate = (UDate)currDate64;
-
- if ((fromDate <= date) && (date < toDate))
- {
- currIndex++;
- if (currIndex == index)
- {
- matchFound = true;
- }
- }
-
- ures_close(toRes);
- }
- else
- {
- if (fromDate <= date)
- {
- currIndex++;
- if (currIndex == index)
- {
- matchFound = true;
- }
- }
- }
-
- // close open resources
- ures_close(currencyRes);
- ures_close(fromRes);
-
- // check for loop exit
- if (matchFound)
- {
- break;
- }
-
- } // end For loop
- }
-
- ures_close(countryArray);
-
- // Check for errors
- if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR)
- {
- // There is nothing to fallback to.
- // Report the failure/warning if possible.
- *ec = localStatus;
- }
-
- if (U_SUCCESS(*ec))
- {
- // no errors
- if((buffCapacity> resLen) && matchFound)
- {
- // write out the currency value
- u_strcpy(buff, s);
- }
- else
- {
- return 0;
- }
- }
-
- // return null terminated currency string
- return u_terminateUChars(buff, buffCapacity, resLen, ec);
- }
- else
- {
- // illegal argument encountered
- *ec = U_ILLEGAL_ARGUMENT_ERROR;
- }
-
- }
-
- // If we got here, either error code is invalid or
- // some argument passed is no good.
- return resLen;
-}
-
-static const UEnumeration defaultKeywordValues = {
- NULL,
- NULL,
- ulist_close_keyword_values_iterator,
- ulist_count_keyword_values,
- uenum_unextDefault,
- ulist_next_keyword_value,
- ulist_reset_keyword_values_iterator
-};
-
-U_CAPI UEnumeration *U_EXPORT2 ucurr_getKeywordValuesForLocale(const char *key, const char *locale, UBool commonlyUsed, UErrorCode* status) {
- // Resolve region
- char prefRegion[ULOC_COUNTRY_CAPACITY];
- ulocimp_getRegionForSupplementalData(locale, TRUE, prefRegion, sizeof(prefRegion), status);
-
- // Read value from supplementalData
- UList *values = ulist_createEmptyList(status);
- UList *otherValues = ulist_createEmptyList(status);
- UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
- if (U_FAILURE(*status) || en == NULL) {
- if (en == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- uprv_free(en);
- }
- ulist_deleteList(values);
- ulist_deleteList(otherValues);
- return NULL;
- }
- memcpy(en, &defaultKeywordValues, sizeof(UEnumeration));
- en->context = values;
-
- UResourceBundle *bundle = ures_openDirect(U_ICUDATA_CURR, "supplementalData", status);
- ures_getByKey(bundle, "CurrencyMap", bundle, status);
- UResourceBundle bundlekey, regbndl, curbndl, to;
- ures_initStackObject(&bundlekey);
- ures_initStackObject(&regbndl);
- ures_initStackObject(&curbndl);
- ures_initStackObject(&to);
-
- while (U_SUCCESS(*status) && ures_hasNext(bundle)) {
- ures_getNextResource(bundle, &bundlekey, status);
- if (U_FAILURE(*status)) {
- break;
- }
- const char *region = ures_getKey(&bundlekey);
- UBool isPrefRegion = uprv_strcmp(region, prefRegion) == 0 ? TRUE : FALSE;
- if (!isPrefRegion && commonlyUsed) {
- // With commonlyUsed=true, we do not put
- // currencies for other regions in the
- // result list.
- continue;
- }
- ures_getByKey(bundle, region, &regbndl, status);
- if (U_FAILURE(*status)) {
- break;
- }
- while (U_SUCCESS(*status) && ures_hasNext(&regbndl)) {
- ures_getNextResource(&regbndl, &curbndl, status);
- if (ures_getType(&curbndl) != URES_TABLE) {
- // Currently, an empty ARRAY is mixed in.
- continue;
- }
- char *curID = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY);
- int32_t curIDLength = ULOC_KEYWORDS_CAPACITY;
- if (curID == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
-
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
- ures_getUTF8StringByKey(&curbndl, "id", curID, &curIDLength, TRUE, status);
- /* optimize - use the utf-8 string */
-#else
- {
- const UChar* defString = ures_getStringByKey(&curbndl, "id", &curIDLength, status);
- if(U_SUCCESS(*status)) {
- if(curIDLength+1 > ULOC_KEYWORDS_CAPACITY) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_UCharsToChars(defString, curID, curIDLength+1);
- }
- }
- }
-#endif
-
- if (U_FAILURE(*status)) {
- break;
- }
- UBool hasTo = FALSE;
- ures_getByKey(&curbndl, "to", &to, status);
- if (U_FAILURE(*status)) {
- // Do nothing here...
- *status = U_ZERO_ERROR;
- } else {
- hasTo = TRUE;
- }
- if (isPrefRegion && !hasTo && !ulist_containsString(values, curID, (int32_t)uprv_strlen(curID))) {
- // Currently active currency for the target country
- ulist_addItemEndList(values, curID, TRUE, status);
- } else if (!ulist_containsString(otherValues, curID, (int32_t)uprv_strlen(curID)) && !commonlyUsed) {
- ulist_addItemEndList(otherValues, curID, TRUE, status);
- } else {
- uprv_free(curID);
- }
- }
-
- }
- if (U_SUCCESS(*status)) {
- if (commonlyUsed) {
- if (ulist_getListSize(values) == 0) {
- // This could happen if no valid region is supplied in the input
- // locale. In this case, we use the CLDR's default.
- uenum_close(en);
- en = ucurr_getKeywordValuesForLocale(key, "und", TRUE, status);
- }
- } else {
- // Consolidate the list
- char *value = NULL;
- ulist_resetList(otherValues);
- while ((value = (char *)ulist_getNext(otherValues)) != NULL) {
- if (!ulist_containsString(values, value, (int32_t)uprv_strlen(value))) {
- char *tmpValue = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY);
- uprv_memcpy(tmpValue, value, uprv_strlen(value) + 1);
- ulist_addItemEndList(values, tmpValue, TRUE, status);
- if (U_FAILURE(*status)) {
- break;
- }
- }
- }
- }
-
- ulist_resetList((UList *)(en->context));
- } else {
- ulist_deleteList(values);
- uprv_free(en);
- values = NULL;
- en = NULL;
- }
- ures_close(&to);
- ures_close(&curbndl);
- ures_close(&regbndl);
- ures_close(&bundlekey);
- ures_close(bundle);
-
- ulist_deleteList(otherValues);
-
- return en;
-}
-
-
-U_CAPI int32_t U_EXPORT2
-ucurr_getNumericCode(const UChar* currency) {
- int32_t code = 0;
- if (currency && u_strlen(currency) == ISO_CURRENCY_CODE_LENGTH) {
- UErrorCode status = U_ZERO_ERROR;
-
- UResourceBundle *bundle = ures_openDirect(0, "currencyNumericCodes", &status);
- ures_getByKey(bundle, "codeMap", bundle, &status);
- if (U_SUCCESS(status)) {
- char alphaCode[ISO_CURRENCY_CODE_LENGTH+1];
- myUCharsToChars(alphaCode, currency);
- T_CString_toUpperCase(alphaCode);
- ures_getByKey(bundle, alphaCode, bundle, &status);
- int tmpCode = ures_getInt(bundle, &status);
- if (U_SUCCESS(status)) {
- code = tmpCode;
- }
- }
- ures_close(bundle);
- }
- return code;
-}
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-//eof
diff --git a/contrib/libs/icu/common/ucurrimp.h b/contrib/libs/icu/common/ucurrimp.h
deleted file mode 100644
index 6d9588295df..00000000000
--- a/contrib/libs/icu/common/ucurrimp.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2002-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#ifndef _UCURR_IMP_H_
-#define _UCURR_IMP_H_
-
-#include "unicode/utypes.h"
-#include "unicode/unistr.h"
-#include "unicode/parsepos.h"
-#include "unicode/uniset.h"
-
-/**
- * Internal method. Given a currency ISO code and a locale, return
- * the "static" currency name. This is usually the same as the
- * UCURR_SYMBOL_NAME, but if the latter is a choice format, then the
- * format is applied to the number 2.0 (to yield the more common
- * plural) to return a static name.
- *
- * This is used for backward compatibility with old currency logic in
- * DecimalFormat and DecimalFormatSymbols.
- */
-U_CAPI void
-uprv_getStaticCurrencyName(const UChar* iso, const char* loc,
- icu::UnicodeString& result, UErrorCode& ec);
-
-/**
- * Attempt to parse the given string as a currency, either as a
- * display name in the given locale, or as a 3-letter ISO 4217
- * code. If multiple display names match, then the longest one is
- * selected. If both a display name and a 3-letter ISO code
- * match, then the display name is preferred, unless it's length
- * is less than 3.
- *
- * The parameters must not be NULL.
- *
- * @param locale the locale of the display names to match
- * @param text the text to parse
- * @param pos input-output position; on input, the position within
- * text to match; must have 0 <= pos.getIndex() < text.length();
- * on output, the position after the last matched character. If
- * the parse fails, the position in unchanged upon output.
- * @param type currency type to parse against, LONG_NAME only or not
- * @param partialMatchLen The length of the longest matching prefix;
- * this may be nonzero even if no full currency was matched.
- * @return the ISO 4217 code, as a string, of the best match, or
- * null if there is no match
- *
- * @internal
- */
-U_CAPI void
-uprv_parseCurrency(const char* locale,
- const icu::UnicodeString& text,
- icu::ParsePosition& pos,
- int8_t type,
- int32_t* partialMatchLen,
- UChar* result,
- UErrorCode& ec);
-
-/**
- * Puts all possible first-characters of a currency into the
- * specified UnicodeSet.
- *
- * @param locale the locale of the display names of interest
- * @param result the UnicodeSet to which to add the starting characters
- */
-void uprv_currencyLeads(const char* locale, icu::UnicodeSet& result, UErrorCode& ec);
-
-
-
-#endif /* #ifndef _UCURR_IMP_H_ */
-
-//eof
diff --git a/contrib/libs/icu/common/udata.cpp b/contrib/libs/icu/common/udata.cpp
deleted file mode 100644
index ec9c999cea4..00000000000
--- a/contrib/libs/icu/common/udata.cpp
+++ /dev/null
@@ -1,1460 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: udata.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999oct25
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h" /* U_PLATFORM etc. */
-
-#ifdef __GNUC__
-/* if gcc
-#define ATTRIBUTE_WEAK __attribute__ ((weak))
-might have to #include some other header
-*/
-#endif
-
-#include "unicode/putil.h"
-#include "unicode/udata.h"
-#include "unicode/uversion.h"
-#include "charstr.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "mutex.h"
-#include "putilimp.h"
-#include "restrace.h"
-#include "uassert.h"
-#include "ucln_cmn.h"
-#include "ucmndata.h"
-#include "udatamem.h"
-#include "uhash.h"
-#include "umapfile.h"
-#include "umutex.h"
-
-/***********************************************************************
-*
-* Notes on the organization of the ICU data implementation
-*
-* All of the public API is defined in udata.h
-*
-* The implementation is split into several files...
-*
-* - udata.c (this file) contains higher level code that knows about
-* the search paths for locating data, caching opened data, etc.
-*
-* - umapfile.c contains the low level platform-specific code for actually loading
-* (memory mapping, file reading, whatever) data into memory.
-*
-* - ucmndata.c deals with the tables of contents of ICU data items within
-* an ICU common format data file. The implementation includes
-* an abstract interface and support for multiple TOC formats.
-* All knowledge of any specific TOC format is encapsulated here.
-*
-* - udatamem.c has code for managing UDataMemory structs. These are little
-* descriptor objects for blocks of memory holding ICU data of
-* various types.
-*/
-
-/* configuration ---------------------------------------------------------- */
-
-/* If you are excruciatingly bored turn this on .. */
-/* #define UDATA_DEBUG 1 */
-
-#if defined(UDATA_DEBUG)
-# include <stdio.h>
-#endif
-
-U_NAMESPACE_USE
-
-/*
- * Forward declarations
- */
-static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err);
-
-/***********************************************************************
-*
-* static (Global) data
-*
-************************************************************************/
-
-/*
- * Pointers to the common ICU data.
- *
- * We store multiple pointers to ICU data packages and iterate through them
- * when looking for a data item.
- *
- * It is possible to combine this with dependency inversion:
- * One or more data package libraries may export
- * functions that each return a pointer to their piece of the ICU data,
- * and this file would import them as weak functions, without a
- * strong linker dependency from the common library on the data library.
- *
- * Then we can have applications depend on only that part of ICU's data
- * that they really need, reducing the size of binaries that take advantage
- * of this.
- */
-static UDataMemory *gCommonICUDataArray[10] = { NULL }; // Access protected by icu global mutex.
-
-static u_atomic_int32_t gHaveTriedToLoadCommonData = ATOMIC_INT32_T_INITIALIZER(0); // See extendICUData().
-
-static UHashtable *gCommonDataCache = NULL; /* Global hash table of opened ICU data files. */
-static icu::UInitOnce gCommonDataCacheInitOnce = U_INITONCE_INITIALIZER;
-
-#if !defined(ICU_DATA_DIR_WINDOWS)
-static UDataFileAccess gDataFileAccess = UDATA_DEFAULT_ACCESS; // Access not synchronized.
- // Modifying is documented as thread-unsafe.
-#else
-// If we are using the Windows data directory, then look in one spot only.
-static UDataFileAccess gDataFileAccess = UDATA_NO_FILES;
-#endif
-
-static UBool U_CALLCONV
-udata_cleanup(void)
-{
- int32_t i;
-
- if (gCommonDataCache) { /* Delete the cache of user data mappings. */
- uhash_close(gCommonDataCache); /* Table owns the contents, and will delete them. */
- gCommonDataCache = NULL; /* Cleanup is not thread safe. */
- }
- gCommonDataCacheInitOnce.reset();
-
- for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray) && gCommonICUDataArray[i] != NULL; ++i) {
- udata_close(gCommonICUDataArray[i]);
- gCommonICUDataArray[i] = NULL;
- }
- gHaveTriedToLoadCommonData = 0;
-
- return TRUE; /* Everything was cleaned up */
-}
-
-static UBool U_CALLCONV
-findCommonICUDataByName(const char *inBasename, UErrorCode &err)
-{
- UBool found = FALSE;
- int32_t i;
-
- UDataMemory *pData = udata_findCachedData(inBasename, err);
- if (U_FAILURE(err) || pData == NULL)
- return FALSE;
-
- {
- Mutex lock;
- for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
- if ((gCommonICUDataArray[i] != NULL) && (gCommonICUDataArray[i]->pHeader == pData->pHeader)) {
- /* The data pointer is already in the array. */
- found = TRUE;
- break;
- }
- }
- }
- return found;
-}
-
-
-/*
- * setCommonICUData. Set a UDataMemory to be the global ICU Data
- */
-static UBool
-setCommonICUData(UDataMemory *pData, /* The new common data. Belongs to caller, we copy it. */
- UBool warn, /* If true, set USING_DEFAULT warning if ICUData was */
- /* changed by another thread before we got to it. */
- UErrorCode *pErr)
-{
- UDataMemory *newCommonData = UDataMemory_createNewInstance(pErr);
- int32_t i;
- UBool didUpdate = FALSE;
- if (U_FAILURE(*pErr)) {
- return FALSE;
- }
-
- /* For the assignment, other threads must cleanly see either the old */
- /* or the new, not some partially initialized new. The old can not be */
- /* deleted - someone may still have a pointer to it lying around in */
- /* their locals. */
- UDatamemory_assign(newCommonData, pData);
- umtx_lock(NULL);
- for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
- if (gCommonICUDataArray[i] == NULL) {
- gCommonICUDataArray[i] = newCommonData;
- didUpdate = TRUE;
- break;
- } else if (gCommonICUDataArray[i]->pHeader == pData->pHeader) {
- /* The same data pointer is already in the array. */
- break;
- }
- }
- umtx_unlock(NULL);
-
- if (i == UPRV_LENGTHOF(gCommonICUDataArray) && warn) {
- *pErr = U_USING_DEFAULT_WARNING;
- }
- if (didUpdate) {
- ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
- } else {
- uprv_free(newCommonData);
- }
- return didUpdate;
-}
-
-#if !defined(ICU_DATA_DIR_WINDOWS)
-
-static UBool
-setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) {
- UDataMemory tData;
- UDataMemory_init(&tData);
- UDataMemory_setData(&tData, pData);
- udata_checkCommonData(&tData, pErrorCode);
- return setCommonICUData(&tData, FALSE, pErrorCode);
-}
-
-#endif
-
-static const char *
-findBasename(const char *path) {
- const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
- if(basename==NULL) {
- return path;
- } else {
- return basename+1;
- }
-}
-
-#ifdef UDATA_DEBUG
-static const char *
-packageNameFromPath(const char *path)
-{
- if((path == NULL) || (*path == 0)) {
- return U_ICUDATA_NAME;
- }
-
- path = findBasename(path);
-
- if((path == NULL) || (*path == 0)) {
- return U_ICUDATA_NAME;
- }
-
- return path;
-}
-#endif
-
-/*----------------------------------------------------------------------*
- * *
- * Cache for common data *
- * Functions for looking up or adding entries to a cache of *
- * data that has been previously opened. Avoids a potentially *
- * expensive operation of re-opening the data for subsequent *
- * uses. *
- * *
- * Data remains cached for the duration of the process. *
- * *
- *----------------------------------------------------------------------*/
-
-typedef struct DataCacheElement {
- char *name;
- UDataMemory *item;
-} DataCacheElement;
-
-
-
-/*
- * Deleter function for DataCacheElements.
- * udata cleanup function closes the hash table; hash table in turn calls back to
- * here for each entry.
- */
-static void U_CALLCONV DataCacheElement_deleter(void *pDCEl) {
- DataCacheElement *p = (DataCacheElement *)pDCEl;
- udata_close(p->item); /* unmaps storage */
- uprv_free(p->name); /* delete the hash key string. */
- uprv_free(pDCEl); /* delete 'this' */
-}
-
-static void U_CALLCONV udata_initHashTable(UErrorCode &err) {
- U_ASSERT(gCommonDataCache == NULL);
- gCommonDataCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &err);
- if (U_FAILURE(err)) {
- return;
- }
- U_ASSERT(gCommonDataCache != NULL);
- uhash_setValueDeleter(gCommonDataCache, DataCacheElement_deleter);
- ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
-}
-
- /* udata_getCacheHashTable()
- * Get the hash table used to store the data cache entries.
- * Lazy create it if it doesn't yet exist.
- */
-static UHashtable *udata_getHashTable(UErrorCode &err) {
- umtx_initOnce(gCommonDataCacheInitOnce, &udata_initHashTable, err);
- return gCommonDataCache;
-}
-
-
-
-static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err)
-{
- UHashtable *htable;
- UDataMemory *retVal = NULL;
- DataCacheElement *el;
- const char *baseName;
-
- htable = udata_getHashTable(err);
- if (U_FAILURE(err)) {
- return NULL;
- }
-
- baseName = findBasename(path); /* Cache remembers only the base name, not the full path. */
- umtx_lock(NULL);
- el = (DataCacheElement *)uhash_get(htable, baseName);
- umtx_unlock(NULL);
- if (el != NULL) {
- retVal = el->item;
- }
-#ifdef UDATA_DEBUG
- fprintf(stderr, "Cache: [%s] -> %p\n", baseName, (void*) retVal);
-#endif
- return retVal;
-}
-
-
-static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UErrorCode *pErr) {
- DataCacheElement *newElement;
- const char *baseName;
- int32_t nameLen;
- UHashtable *htable;
- DataCacheElement *oldValue = NULL;
- UErrorCode subErr = U_ZERO_ERROR;
-
- htable = udata_getHashTable(*pErr);
- if (U_FAILURE(*pErr)) {
- return NULL;
- }
-
- /* Create a new DataCacheElement - the thingy we store in the hash table -
- * and copy the supplied path and UDataMemoryItems into it.
- */
- newElement = (DataCacheElement *)uprv_malloc(sizeof(DataCacheElement));
- if (newElement == NULL) {
- *pErr = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- newElement->item = UDataMemory_createNewInstance(pErr);
- if (U_FAILURE(*pErr)) {
- uprv_free(newElement);
- return NULL;
- }
- UDatamemory_assign(newElement->item, item);
-
- baseName = findBasename(path);
- nameLen = (int32_t)uprv_strlen(baseName);
- newElement->name = (char *)uprv_malloc(nameLen+1);
- if (newElement->name == NULL) {
- *pErr = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(newElement->item);
- uprv_free(newElement);
- return NULL;
- }
- uprv_strcpy(newElement->name, baseName);
-
- /* Stick the new DataCacheElement into the hash table.
- */
- umtx_lock(NULL);
- oldValue = (DataCacheElement *)uhash_get(htable, path);
- if (oldValue != NULL) {
- subErr = U_USING_DEFAULT_WARNING;
- }
- else {
- uhash_put(
- htable,
- newElement->name, /* Key */
- newElement, /* Value */
- &subErr);
- }
- umtx_unlock(NULL);
-
-#ifdef UDATA_DEBUG
- fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name,
- (void*) newElement->item, u_errorName(subErr), (void*) newElement->item->vFuncs);
-#endif
-
- if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) {
- *pErr = subErr; /* copy sub err unto fillin ONLY if something happens. */
- uprv_free(newElement->name);
- uprv_free(newElement->item);
- uprv_free(newElement);
- return oldValue ? oldValue->item : NULL;
- }
-
- return newElement->item;
-}
-
-/*----------------------------------------------------------------------*==============
- * *
- * Path management. Could be shared with other tools/etc if need be *
- * later on. *
- * *
- *----------------------------------------------------------------------*/
-
-U_NAMESPACE_BEGIN
-
-class UDataPathIterator
-{
-public:
- UDataPathIterator(const char *path, const char *pkg,
- const char *item, const char *suffix, UBool doCheckLastFour,
- UErrorCode *pErrorCode);
- const char *next(UErrorCode *pErrorCode);
-
-private:
- const char *path; /* working path (u_icudata_Dir) */
- const char *nextPath; /* path following this one */
- const char *basename; /* item's basename (icudt22e_mt.res)*/
-
- StringPiece suffix; /* item suffix (can be null) */
-
- uint32_t basenameLen; /* length of basename */
-
- CharString itemPath; /* path passed in with item name */
- CharString pathBuffer; /* output path for this it'ion */
- CharString packageStub; /* example: "/icudt28b". Will ignore that leaf in set paths. */
-
- UBool checkLastFour; /* if TRUE then allow paths such as '/foo/myapp.dat'
- * to match, checks last 4 chars of suffix with
- * last 4 of path, then previous chars. */
-};
-
-/**
- * @param iter The iterator to be initialized. Its current state does not matter.
- * @param inPath The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME
- * @param pkg Package which is being searched for, ex "icudt28l". Will ignore leaf directories such as /icudt28l
- * @param item Item to be searched for. Can include full path, such as /a/b/foo.dat
- * @param inSuffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
- * Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
- * '/blarg/stuff.dat' would also be found.
- * Note: inSuffix may also be the 'item' being searched for as well, (ex: "ibm-5348_P100-1997.cnv"), in which case
- * the 'item' parameter is often the same as pkg. (Though sometimes might have a tree part as well, ex: "icudt62l-curr").
- */
-UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg,
- const char *item, const char *inSuffix, UBool doCheckLastFour,
- UErrorCode *pErrorCode)
-{
-#ifdef UDATA_DEBUG
- fprintf(stderr, "SUFFIX1=%s PATH=%s\n", inSuffix, inPath);
-#endif
- /** Path **/
- if(inPath == NULL) {
- path = u_getDataDirectory();
- } else {
- path = inPath;
- }
-
- /** Package **/
- if(pkg != NULL) {
- packageStub.append(U_FILE_SEP_CHAR, *pErrorCode).append(pkg, *pErrorCode);
-#ifdef UDATA_DEBUG
- fprintf(stderr, "STUB=%s [%d]\n", packageStub.data(), packageStub.length());
-#endif
- }
-
- /** Item **/
- basename = findBasename(item);
- basenameLen = (int32_t)uprv_strlen(basename);
-
- /** Item path **/
- if(basename == item) {
- nextPath = path;
- } else {
- itemPath.append(item, (int32_t)(basename-item), *pErrorCode);
- nextPath = itemPath.data();
- }
-#ifdef UDATA_DEBUG
- fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, (void*) inSuffix);
-#endif
-
- /** Suffix **/
- if(inSuffix != NULL) {
- suffix = inSuffix;
- } else {
- suffix = "";
- }
-
- checkLastFour = doCheckLastFour;
-
- /* pathBuffer will hold the output path strings returned by this iterator */
-
-#ifdef UDATA_DEBUG
- fprintf(stderr, "0: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n",
- item,
- path,
- basename,
- suffix.data(),
- itemPath.data(),
- nextPath,
- checkLastFour?"TRUE":"false");
-#endif
-}
-
-/**
- * Get the next path on the list.
- *
- * @param iter The Iter to be used
- * @param len If set, pointer to the length of the returned path, for convenience.
- * @return Pointer to the next path segment, or NULL if there are no more.
- */
-const char *UDataPathIterator::next(UErrorCode *pErrorCode)
-{
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
-
- const char *currentPath = NULL;
- int32_t pathLen = 0;
- const char *pathBasename;
-
- do
- {
- if( nextPath == NULL ) {
- break;
- }
- currentPath = nextPath;
-
- if(nextPath == itemPath.data()) { /* we were processing item's path. */
- nextPath = path; /* start with regular path next tm. */
- pathLen = (int32_t)uprv_strlen(currentPath);
- } else {
- /* fix up next for next time */
- nextPath = uprv_strchr(currentPath, U_PATH_SEP_CHAR);
- if(nextPath == NULL) {
- /* segment: entire path */
- pathLen = (int32_t)uprv_strlen(currentPath);
- } else {
- /* segment: until next segment */
- pathLen = (int32_t)(nextPath - currentPath);
- /* skip divider */
- nextPath ++;
- }
- }
-
- if(pathLen == 0) {
- continue;
- }
-
-#ifdef UDATA_DEBUG
- fprintf(stderr, "rest of path (IDD) = %s\n", currentPath);
- fprintf(stderr, " ");
- {
- int32_t qqq;
- for(qqq=0;qqq<pathLen;qqq++)
- {
- fprintf(stderr, " ");
- }
-
- fprintf(stderr, "^\n");
- }
-#endif
- pathBuffer.clear().append(currentPath, pathLen, *pErrorCode);
-
- /* check for .dat files */
- pathBasename = findBasename(pathBuffer.data());
-
- if(checkLastFour == TRUE &&
- (pathLen>=4) &&
- uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix.data(), 4)==0 && /* suffix matches */
- uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0 && /* base matches */
- uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */
-
-#ifdef UDATA_DEBUG
- fprintf(stderr, "Have %s file on the path: %s\n", suffix.data(), pathBuffer.data());
-#endif
- /* do nothing */
- }
- else
- { /* regular dir path */
- if(pathBuffer[pathLen-1] != U_FILE_SEP_CHAR) {
- if((pathLen>=4) &&
- uprv_strncmp(pathBuffer.data()+(pathLen-4), ".dat", 4) == 0)
- {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "skipping non-directory .dat file %s\n", pathBuffer.data());
-#endif
- continue;
- }
-
- /* Check if it is a directory with the same name as our package */
- if(!packageStub.isEmpty() &&
- (pathLen > packageStub.length()) &&
- !uprv_strcmp(pathBuffer.data() + pathLen - packageStub.length(), packageStub.data())) {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "Found stub %s (will add package %s of len %d)\n", packageStub.data(), basename, basenameLen);
-#endif
- pathBuffer.truncate(pathLen - packageStub.length());
- }
- pathBuffer.append(U_FILE_SEP_CHAR, *pErrorCode);
- }
-
- /* + basename */
- pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode);
-
- if (!suffix.empty()) /* tack on suffix */
- {
- if (suffix.length() > 4) {
- // If the suffix is actually an item ("ibm-5348_P100-1997.cnv") and not an extension (".res")
- // then we need to ensure that the path ends with a separator.
- pathBuffer.ensureEndsWithFileSeparator(*pErrorCode);
- }
- pathBuffer.append(suffix, *pErrorCode);
- }
- }
-
-#ifdef UDATA_DEBUG
- fprintf(stderr, " --> %s\n", pathBuffer.data());
-#endif
-
- return pathBuffer.data();
-
- } while(path);
-
- /* fell way off the end */
- return NULL;
-}
-
-U_NAMESPACE_END
-
-/* ==================================================================================*/
-
-
-/*----------------------------------------------------------------------*
- * *
- * Add a static reference to the common data library *
- * Unless overridden by an explicit udata_setCommonData, this will be *
- * our common data. *
- * *
- *----------------------------------------------------------------------*/
-#if !defined(ICU_DATA_DIR_WINDOWS)
-// When using the Windows system data, we expect only a single data file.
-extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT;
-#endif
-
-/*
- * This would be a good place for weak-linkage declarations of
- * partial-data-library access functions where each returns a pointer
- * to its data package, if it is linked in.
- */
-/*
-extern const void *uprv_getICUData_collation(void) ATTRIBUTE_WEAK;
-extern const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK;
-*/
-
-/*----------------------------------------------------------------------*
- * *
- * openCommonData Attempt to open a common format (.dat) file *
- * Map it into memory (if it's not there already) *
- * and return a UDataMemory object for it. *
- * *
- * If the requested data is already open and cached *
- * just return the cached UDataMem object. *
- * *
- *----------------------------------------------------------------------*/
-static UDataMemory *
-openCommonData(const char *path, /* Path from OpenChoice? */
- int32_t commonDataIndex, /* ICU Data (index >= 0) if path == NULL */
- UErrorCode *pErrorCode)
-{
- UDataMemory tData;
- const char *pathBuffer;
- const char *inBasename;
-
- if (U_FAILURE(*pErrorCode)) {
- return NULL;
- }
-
- UDataMemory_init(&tData);
-
- /* ??????? TODO revisit this */
- if (commonDataIndex >= 0) {
- /* "mini-cache" for common ICU data */
- if(commonDataIndex >= UPRV_LENGTHOF(gCommonICUDataArray)) {
- return NULL;
- }
- {
- Mutex lock;
- if(gCommonICUDataArray[commonDataIndex] != NULL) {
- return gCommonICUDataArray[commonDataIndex];
- }
-#if !defined(ICU_DATA_DIR_WINDOWS)
-// When using the Windows system data, we expect only a single data file.
- int32_t i;
- for(i = 0; i < commonDataIndex; ++i) {
- if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT) {
- /* The linked-in data is already in the list. */
- return NULL;
- }
- }
-#endif
- }
-
- /* Add the linked-in data to the list. */
- /*
- * This is where we would check and call weakly linked partial-data-library
- * access functions.
- */
- /*
- if (uprv_getICUData_collation) {
- setCommonICUDataPointer(uprv_getICUData_collation(), FALSE, pErrorCode);
- }
- if (uprv_getICUData_conversion) {
- setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode);
- }
- */
-#if !defined(ICU_DATA_DIR_WINDOWS)
-// When using the Windows system data, we expect only a single data file.
- setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode);
- {
- Mutex lock;
- return gCommonICUDataArray[commonDataIndex];
- }
-#endif
- }
-
-
- /* request is NOT for ICU Data. */
-
- /* Find the base name portion of the supplied path. */
- /* inBasename will be left pointing somewhere within the original path string. */
- inBasename = findBasename(path);
-#ifdef UDATA_DEBUG
- fprintf(stderr, "inBasename = %s\n", inBasename);
-#endif
-
- if(*inBasename==0) {
- /* no basename. This will happen if the original path was a directory name, */
- /* like "a/b/c/". (Fallback to separate files will still work.) */
-#ifdef UDATA_DEBUG
- fprintf(stderr, "ocd: no basename in %s, bailing.\n", path);
-#endif
- if (U_SUCCESS(*pErrorCode)) {
- *pErrorCode=U_FILE_ACCESS_ERROR;
- }
- return NULL;
- }
-
- /* Is the requested common data file already open and cached? */
- /* Note that the cache is keyed by the base name only. The rest of the path, */
- /* if any, is not considered. */
- UDataMemory *dataToReturn = udata_findCachedData(inBasename, *pErrorCode);
- if (dataToReturn != NULL || U_FAILURE(*pErrorCode)) {
- return dataToReturn;
- }
-
- /* Requested item is not in the cache.
- * Hunt it down, trying all the path locations
- */
-
- UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", TRUE, pErrorCode);
-
- while ((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL)
- {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "ocd: trying path %s - ", pathBuffer);
-#endif
- uprv_mapFile(&tData, pathBuffer, pErrorCode);
-#ifdef UDATA_DEBUG
- fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded");
-#endif
- }
- if (U_FAILURE(*pErrorCode)) {
- return NULL;
- }
-
-#if defined(OS390_STUBDATA) && defined(OS390BATCH)
- if (!UDataMemory_isLoaded(&tData)) {
- char ourPathBuffer[1024];
- /* One more chance, for extendCommonData() */
- uprv_strncpy(ourPathBuffer, path, 1019);
- ourPathBuffer[1019]=0;
- uprv_strcat(ourPathBuffer, ".dat");
- uprv_mapFile(&tData, ourPathBuffer, pErrorCode);
- }
-#endif
-
- if (U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- if (!UDataMemory_isLoaded(&tData)) {
- /* no common data */
- *pErrorCode=U_FILE_ACCESS_ERROR;
- return NULL;
- }
-
- /* we have mapped a file, check its header */
- udata_checkCommonData(&tData, pErrorCode);
-
-
- /* Cache the UDataMemory struct for this .dat file,
- * so we won't need to hunt it down and map it again next time
- * something is needed from it. */
- return udata_cacheDataItem(inBasename, &tData, pErrorCode);
-}
-
-
-/*----------------------------------------------------------------------*
- * *
- * extendICUData If the full set of ICU data was not loaded at *
- * program startup, load it now. This function will *
- * be called when the lookup of an ICU data item in *
- * the common ICU data fails. *
- * *
- * return true if new data is loaded, false otherwise.*
- * *
- *----------------------------------------------------------------------*/
-static UBool extendICUData(UErrorCode *pErr)
-{
- UDataMemory *pData;
- UDataMemory copyPData;
- UBool didUpdate = FALSE;
-
- /*
- * There is a chance for a race condition here.
- * Normally, ICU data is loaded from a DLL or via mmap() and
- * setCommonICUData() will detect if the same address is set twice.
- * If ICU is built with data loading via fread() then the address will
- * be different each time the common data is loaded and we may add
- * multiple copies of the data.
- * In this case, use a mutex to prevent the race.
- * Use a specific mutex to avoid nested locks of the global mutex.
- */
-#if MAP_IMPLEMENTATION==MAP_STDIO
- static UMutex extendICUDataMutex;
- umtx_lock(&extendICUDataMutex);
-#endif
- if(!umtx_loadAcquire(gHaveTriedToLoadCommonData)) {
- /* See if we can explicitly open a .dat file for the ICUData. */
- pData = openCommonData(
- U_ICUDATA_NAME, /* "icudt20l" , for example. */
- -1, /* Pretend we're not opening ICUData */
- pErr);
-
- /* How about if there is no pData, eh... */
-
- UDataMemory_init(&copyPData);
- if(pData != NULL) {
- UDatamemory_assign(&copyPData, pData);
- copyPData.map = 0; /* The mapping for this data is owned by the hash table */
- copyPData.mapAddr = 0; /* which will unmap it when ICU is shut down. */
- /* CommonICUData is also unmapped when ICU is shut down.*/
- /* To avoid unmapping the data twice, zero out the map */
- /* fields in the UDataMemory that we're assigning */
- /* to CommonICUData. */
-
- didUpdate = /* no longer using this result */
- setCommonICUData(&copyPData,/* The new common data. */
- FALSE, /* No warnings if write didn't happen */
- pErr); /* setCommonICUData honors errors; NOP if error set */
- }
-
- umtx_storeRelease(gHaveTriedToLoadCommonData, 1);
- }
-
- didUpdate = findCommonICUDataByName(U_ICUDATA_NAME, *pErr); /* Return 'true' when a racing writes out the extended */
- /* data after another thread has failed to see it (in openCommonData), so */
- /* extended data can be examined. */
- /* Also handles a race through here before gHaveTriedToLoadCommonData is set. */
-
-#if MAP_IMPLEMENTATION==MAP_STDIO
- umtx_unlock(&extendICUDataMutex);
-#endif
- return didUpdate; /* Return true if ICUData pointer was updated. */
- /* (Could potentially have been done by another thread racing */
- /* us through here, but that's fine, we still return true */
- /* so that current thread will also examine extended data. */
-}
-
-/*----------------------------------------------------------------------*
- * *
- * udata_setCommonData *
- * *
- *----------------------------------------------------------------------*/
-U_CAPI void U_EXPORT2
-udata_setCommonData(const void *data, UErrorCode *pErrorCode) {
- UDataMemory dataMemory;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- if(data==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- /* set the data pointer and test for validity */
- UDataMemory_init(&dataMemory);
- UDataMemory_setData(&dataMemory, data);
- udata_checkCommonData(&dataMemory, pErrorCode);
- if (U_FAILURE(*pErrorCode)) {return;}
-
- /* we have good data */
- /* Set it up as the ICU Common Data. */
- setCommonICUData(&dataMemory, TRUE, pErrorCode);
-}
-
-/*---------------------------------------------------------------------------
- *
- * udata_setAppData
- *
- *---------------------------------------------------------------------------- */
-U_CAPI void U_EXPORT2
-udata_setAppData(const char *path, const void *data, UErrorCode *err)
-{
- UDataMemory udm;
-
- if(err==NULL || U_FAILURE(*err)) {
- return;
- }
- if(data==NULL) {
- *err=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- UDataMemory_init(&udm);
- UDataMemory_setData(&udm, data);
- udata_checkCommonData(&udm, err);
- udata_cacheDataItem(path, &udm, err);
-}
-
-/*----------------------------------------------------------------------------*
- * *
- * checkDataItem Given a freshly located/loaded data item, either *
- * an entry in a common file or a separately loaded file, *
- * sanity check its header, and see if the data is *
- * acceptable to the app. *
- * If the data is good, create and return a UDataMemory *
- * object that can be returned to the application. *
- * Return NULL on any sort of failure. *
- * *
- *----------------------------------------------------------------------------*/
-static UDataMemory *
-checkDataItem
-(
- const DataHeader *pHeader, /* The data item to be checked. */
- UDataMemoryIsAcceptable *isAcceptable, /* App's call-back function */
- void *context, /* pass-thru param for above. */
- const char *type, /* pass-thru param for above. */
- const char *name, /* pass-thru param for above. */
- UErrorCode *nonFatalErr, /* Error code if this data was not acceptable */
- /* but openChoice should continue with */
- /* trying to get data from fallback path. */
- UErrorCode *fatalErr /* Bad error, caller should return immediately */
- )
-{
- UDataMemory *rDataMem = NULL; /* the new UDataMemory, to be returned. */
-
- if (U_FAILURE(*fatalErr)) {
- return NULL;
- }
-
- if(pHeader->dataHeader.magic1==0xda &&
- pHeader->dataHeader.magic2==0x27 &&
- (isAcceptable==NULL || isAcceptable(context, type, name, &pHeader->info))
- ) {
- rDataMem=UDataMemory_createNewInstance(fatalErr);
- if (U_FAILURE(*fatalErr)) {
- return NULL;
- }
- rDataMem->pHeader = pHeader;
- } else {
- /* the data is not acceptable, look further */
- /* If we eventually find something good, this errorcode will be */
- /* cleared out. */
- *nonFatalErr=U_INVALID_FORMAT_ERROR;
- }
- return rDataMem;
-}
-
-/**
- * @return 0 if not loaded, 1 if loaded or err
- */
-static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
- const char *dataPath, const char *tocEntryPathSuffix,
- /* following arguments are the same as doOpenChoice itself */
- const char *path, const char *type, const char *name,
- UDataMemoryIsAcceptable *isAcceptable, void *context,
- UErrorCode *subErrorCode,
- UErrorCode *pErrorCode)
-{
- const char *pathBuffer;
- UDataMemory dataMemory;
- UDataMemory *pEntryData;
-
- /* look in ind. files: package\nam.typ ========================= */
- /* init path iterator for individual files */
- UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode);
-
- while ((pathBuffer = iter.next(pErrorCode)) != NULL)
- {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
-#endif
- if (uprv_mapFile(&dataMemory, pathBuffer, pErrorCode))
- {
- pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
- if (pEntryData != NULL) {
- /* Data is good.
- * Hand off ownership of the backing memory to the user's UDataMemory.
- * and return it. */
- pEntryData->mapAddr = dataMemory.mapAddr;
- pEntryData->map = dataMemory.map;
-
-#ifdef UDATA_DEBUG
- fprintf(stderr, "** Mapped file: %s\n", pathBuffer);
-#endif
- return pEntryData;
- }
-
- /* the data is not acceptable, or some error occurred. Either way, unmap the memory */
- udata_close(&dataMemory);
-
- /* If we had a nasty error, bail out completely. */
- if (U_FAILURE(*pErrorCode)) {
- return NULL;
- }
-
- /* Otherwise remember that we found data but didn't like it for some reason */
- *subErrorCode=U_INVALID_FORMAT_ERROR;
- }
-#ifdef UDATA_DEBUG
- fprintf(stderr, "%s\n", UDataMemory_isLoaded(&dataMemory)?"LOADED":"not loaded");
-#endif
- }
- return NULL;
-}
-
-/**
- * @return 0 if not loaded, 1 if loaded or err
- */
-static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName*/,
- const char * /*dataPath*/, const char * /*tocEntryPathSuffix*/, const char *tocEntryName,
- /* following arguments are the same as doOpenChoice itself */
- const char *path, const char *type, const char *name,
- UDataMemoryIsAcceptable *isAcceptable, void *context,
- UErrorCode *subErrorCode,
- UErrorCode *pErrorCode)
-{
- UDataMemory *pEntryData;
- const DataHeader *pHeader;
- UDataMemory *pCommonData;
- int32_t commonDataIndex;
- UBool checkedExtendedICUData = FALSE;
- /* try to get common data. The loop is for platforms such as the 390 that do
- * not initially load the full set of ICU data. If the lookup of an ICU data item
- * fails, the full (but slower to load) set is loaded, the and the loop repeats,
- * trying the lookup again. Once the full set of ICU data is loaded, the loop wont
- * repeat because the full set will be checked the first time through.
- *
- * The loop also handles the fallback to a .dat file if the application linked
- * to the stub data library rather than a real library.
- */
- for (commonDataIndex = isICUData ? 0 : -1;;) {
- pCommonData=openCommonData(path, commonDataIndex, subErrorCode); /** search for pkg **/
-
- if(U_SUCCESS(*subErrorCode) && pCommonData!=NULL) {
- int32_t length;
-
- /* look up the data piece in the common data */
- pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &length, subErrorCode);
-#ifdef UDATA_DEBUG
- fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, (void*) pHeader, u_errorName(*subErrorCode));
-#endif
-
- if(pHeader!=NULL) {
- pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
-#ifdef UDATA_DEBUG
- fprintf(stderr, "pEntryData=%p\n", (void*) pEntryData);
-#endif
- if (U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- if (pEntryData != NULL) {
- pEntryData->length = length;
- return pEntryData;
- }
- }
- }
- // If we failed due to being out-of-memory, then stop early and report the error.
- if (*subErrorCode == U_MEMORY_ALLOCATION_ERROR) {
- *pErrorCode = *subErrorCode;
- return NULL;
- }
- /* Data wasn't found. If we were looking for an ICUData item and there is
- * more data available, load it and try again,
- * otherwise break out of this loop. */
- if (!isICUData) {
- return NULL;
- } else if (pCommonData != NULL) {
- ++commonDataIndex; /* try the next data package */
- } else if ((!checkedExtendedICUData) && extendICUData(subErrorCode)) {
- checkedExtendedICUData = TRUE;
- /* try this data package slot again: it changed from NULL to non-NULL */
- } else {
- return NULL;
- }
- }
-}
-
-/*
- * Identify the Time Zone resources that are subject to special override data loading.
- */
-static UBool isTimeZoneFile(const char *name, const char *type) {
- return ((uprv_strcmp(type, "res") == 0) &&
- (uprv_strcmp(name, "zoneinfo64") == 0 ||
- uprv_strcmp(name, "timezoneTypes") == 0 ||
- uprv_strcmp(name, "windowsZones") == 0 ||
- uprv_strcmp(name, "metaZones") == 0));
-}
-
-/*
- * A note on the ownership of Mapped Memory
- *
- * For common format files, ownership resides with the UDataMemory object
- * that lives in the cache of opened common data. These UDataMemorys are private
- * to the udata implementation, and are never seen directly by users.
- *
- * The UDataMemory objects returned to users will have the address of some desired
- * data within the mapped region, but they wont have the mapping info itself, and thus
- * won't cause anything to be removed from memory when they are closed.
- *
- * For individual data files, the UDataMemory returned to the user holds the
- * information necessary to unmap the data on close. If the user independently
- * opens the same data file twice, two completely independent mappings will be made.
- * (There is no cache of opened data items from individual files, only a cache of
- * opened Common Data files, that is, files containing a collection of data items.)
- *
- * For common data passed in from the user via udata_setAppData() or
- * udata_setCommonData(), ownership remains with the user.
- *
- * UDataMemory objects themselves, as opposed to the memory they describe,
- * can be anywhere - heap, stack/local or global.
- * They have a flag to indicate when they're heap allocated and thus
- * must be deleted when closed.
- */
-
-
-/*----------------------------------------------------------------------------*
- * *
- * main data loading functions *
- * *
- *----------------------------------------------------------------------------*/
-static UDataMemory *
-doOpenChoice(const char *path, const char *type, const char *name,
- UDataMemoryIsAcceptable *isAcceptable, void *context,
- UErrorCode *pErrorCode)
-{
- UDataMemory *retVal = NULL;
-
- const char *dataPath;
-
- int32_t tocEntrySuffixIndex;
- const char *tocEntryPathSuffix;
- UErrorCode subErrorCode=U_ZERO_ERROR;
- const char *treeChar;
-
- UBool isICUData = FALSE;
-
-
- FileTracer::traceOpen(path, type, name);
-
-
- /* Is this path ICU data? */
- if(path == NULL ||
- !strcmp(path, U_ICUDATA_ALIAS) || /* "ICUDATA" */
- !uprv_strncmp(path, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING, /* "icudt26e-" */
- uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING)) ||
- !uprv_strncmp(path, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING, /* "ICUDATA-" */
- uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING))) {
- isICUData = TRUE;
- }
-
-#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) /* Windows: try "foo\bar" and "foo/bar" */
- /* remap from alternate path char to the main one */
- CharString altSepPath;
- if(path) {
- if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) {
- altSepPath.append(path, *pErrorCode);
- char *p;
- while ((p = uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR)) != NULL) {
- *p = U_FILE_SEP_CHAR;
- }
-#if defined (UDATA_DEBUG)
- fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.s);
-#endif
- path = altSepPath.data();
- }
- }
-#endif
-
- CharString tocEntryName; /* entry name in tree format. ex: 'icudt28b/coll/ar.res' */
- CharString tocEntryPath; /* entry name in path format. ex: 'icudt28b\\coll\\ar.res' */
-
- CharString pkgName;
- CharString treeName;
-
- /* ======= Set up strings */
- if(path==NULL) {
- pkgName.append(U_ICUDATA_NAME, *pErrorCode);
- } else {
- const char *pkg;
- const char *first;
- pkg = uprv_strrchr(path, U_FILE_SEP_CHAR);
- first = uprv_strchr(path, U_FILE_SEP_CHAR);
- if(uprv_pathIsAbsolute(path) || (pkg != first)) { /* more than one slash in the path- not a tree name */
- /* see if this is an /absolute/path/to/package path */
- if(pkg) {
- pkgName.append(pkg+1, *pErrorCode);
- } else {
- pkgName.append(path, *pErrorCode);
- }
- } else {
- treeChar = uprv_strchr(path, U_TREE_SEPARATOR);
- if(treeChar) {
- treeName.append(treeChar+1, *pErrorCode); /* following '-' */
- if(isICUData) {
- pkgName.append(U_ICUDATA_NAME, *pErrorCode);
- } else {
- pkgName.append(path, (int32_t)(treeChar-path), *pErrorCode);
- if (first == NULL) {
- /*
- This user data has no path, but there is a tree name.
- Look up the correct path from the data cache later.
- */
- path = pkgName.data();
- }
- }
- } else {
- if(isICUData) {
- pkgName.append(U_ICUDATA_NAME, *pErrorCode);
- } else {
- pkgName.append(path, *pErrorCode);
- }
- }
- }
- }
-
-#ifdef UDATA_DEBUG
- fprintf(stderr, " P=%s T=%s\n", pkgName.data(), treeName.data());
-#endif
-
- /* setting up the entry name and file name
- * Make up a full name by appending the type to the supplied
- * name, assuming that a type was supplied.
- */
-
- /* prepend the package */
- tocEntryName.append(pkgName, *pErrorCode);
- tocEntryPath.append(pkgName, *pErrorCode);
- tocEntrySuffixIndex = tocEntryName.length();
-
- if(!treeName.isEmpty()) {
- tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
- tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
- }
-
- tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
- tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
- if(type!=NULL && *type!=0) {
- tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode);
- tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode);
- }
- // The +1 is for the U_FILE_SEP_CHAR that is always appended above.
- tocEntryPathSuffix = tocEntryPath.data() + tocEntrySuffixIndex + 1; /* suffix starts here */
-
-#ifdef UDATA_DEBUG
- fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data());
- fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data());
-#endif
-
-#if !defined(ICU_DATA_DIR_WINDOWS)
- if(path == NULL) {
- path = COMMON_DATA_NAME; /* "icudt26e" */
- }
-#else
- // When using the Windows system data, we expects only a single data file.
- path = COMMON_DATA_NAME; /* "icudt26e" */
-#endif
-
- /************************ Begin loop looking for ind. files ***************/
-#ifdef UDATA_DEBUG
- fprintf(stderr, "IND: inBasename = %s, pkg=%s\n", "(n/a)", packageNameFromPath(path));
-#endif
-
- /* End of dealing with a null basename */
- dataPath = u_getDataDirectory();
-
- /**** Time zone individual files override */
- if (isICUData && isTimeZoneFile(name, type)) {
- const char *tzFilesDir = u_getTimeZoneFilesDirectory(pErrorCode);
- if (tzFilesDir[0] != 0) {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "Trying Time Zone Files directory = %s\n", tzFilesDir);
-#endif
- retVal = doLoadFromIndividualFiles(/* pkgName.data() */ "", tzFilesDir, tocEntryPathSuffix,
- /* path */ "", type, name, isAcceptable, context, &subErrorCode, pErrorCode);
- if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
- return retVal;
- }
- }
- }
-
- /**** COMMON PACKAGE - only if packages are first. */
- if(gDataFileAccess == UDATA_PACKAGES_FIRST) {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "Trying packages (UDATA_PACKAGES_FIRST)\n");
-#endif
- /* #2 */
- retVal = doLoadFromCommonData(isICUData,
- pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
- path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
- if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
- return retVal;
- }
- }
-
- /**** INDIVIDUAL FILES */
- if((gDataFileAccess==UDATA_PACKAGES_FIRST) ||
- (gDataFileAccess==UDATA_FILES_FIRST)) {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "Trying individual files\n");
-#endif
- /* Check to make sure that there is a dataPath to iterate over */
- if ((dataPath && *dataPath) || !isICUData) {
- retVal = doLoadFromIndividualFiles(pkgName.data(), dataPath, tocEntryPathSuffix,
- path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
- if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
- return retVal;
- }
- }
- }
-
- /**** COMMON PACKAGE */
- if((gDataFileAccess==UDATA_ONLY_PACKAGES) ||
- (gDataFileAccess==UDATA_FILES_FIRST)) {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "Trying packages (UDATA_ONLY_PACKAGES || UDATA_FILES_FIRST)\n");
-#endif
- retVal = doLoadFromCommonData(isICUData,
- pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
- path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
- if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
- return retVal;
- }
- }
-
- /* Load from DLL. If we haven't attempted package load, we also haven't had any chance to
- try a DLL (static or setCommonData/etc) load.
- If we ever have a "UDATA_ONLY_FILES", add it to the or list here. */
- if(gDataFileAccess==UDATA_NO_FILES) {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "Trying common data (UDATA_NO_FILES)\n");
-#endif
- retVal = doLoadFromCommonData(isICUData,
- pkgName.data(), "", tocEntryPathSuffix, tocEntryName.data(),
- path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
- if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
- return retVal;
- }
- }
-
- /* data not found */
- if(U_SUCCESS(*pErrorCode)) {
- if(U_SUCCESS(subErrorCode)) {
- /* file not found */
- *pErrorCode=U_FILE_ACCESS_ERROR;
- } else {
- /* entry point not found or rejected */
- *pErrorCode=subErrorCode;
- }
- }
- return retVal;
-}
-
-
-
-/* API ---------------------------------------------------------------------- */
-
-U_CAPI UDataMemory * U_EXPORT2
-udata_open(const char *path, const char *type, const char *name,
- UErrorCode *pErrorCode) {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "udata_open(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type);
- fflush(stderr);
-#endif
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return NULL;
- } else if(name==NULL || *name==0) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- } else {
- return doOpenChoice(path, type, name, NULL, NULL, pErrorCode);
- }
-}
-
-
-
-U_CAPI UDataMemory * U_EXPORT2
-udata_openChoice(const char *path, const char *type, const char *name,
- UDataMemoryIsAcceptable *isAcceptable, void *context,
- UErrorCode *pErrorCode) {
-#ifdef UDATA_DEBUG
- fprintf(stderr, "udata_openChoice(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type);
-#endif
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return NULL;
- } else if(name==NULL || *name==0 || isAcceptable==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- } else {
- return doOpenChoice(path, type, name, isAcceptable, context, pErrorCode);
- }
-}
-
-
-
-U_CAPI void U_EXPORT2
-udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
- if(pInfo!=NULL) {
- if(pData!=NULL && pData->pHeader!=NULL) {
- const UDataInfo *info=&pData->pHeader->info;
- uint16_t dataInfoSize=udata_getInfoSize(info);
- if(pInfo->size>dataInfoSize) {
- pInfo->size=dataInfoSize;
- }
- uprv_memcpy((uint16_t *)pInfo+1, (const uint16_t *)info+1, pInfo->size-2);
- if(info->isBigEndian!=U_IS_BIG_ENDIAN) {
- /* opposite endianness */
- uint16_t x=info->reservedWord;
- pInfo->reservedWord=(uint16_t)((x<<8)|(x>>8));
- }
- } else {
- pInfo->size=0;
- }
- }
-}
-
-
-U_CAPI void U_EXPORT2 udata_setFileAccess(UDataFileAccess access, UErrorCode * /*status*/)
-{
- // Note: this function is documented as not thread safe.
- gDataFileAccess = access;
-}
diff --git a/contrib/libs/icu/common/udatamem.cpp b/contrib/libs/icu/common/udatamem.cpp
deleted file mode 100644
index 6bf7c01235c..00000000000
--- a/contrib/libs/icu/common/udatamem.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************/
-
-
-/*----------------------------------------------------------------------------------
- *
- * UDataMemory A class-like struct that serves as a handle to a piece of memory
- * that contains some ICU data (resource, converters, whatever.)
- *
- * When an application opens ICU data (with udata_open, for example,
- * a UDataMemory * is returned.
- *
- *----------------------------------------------------------------------------------*/
-
-#include "unicode/utypes.h"
-#include "cmemory.h"
-#include "unicode/udata.h"
-
-#include "udatamem.h"
-
-U_CFUNC void UDataMemory_init(UDataMemory *This) {
- uprv_memset(This, 0, sizeof(UDataMemory));
- This->length=-1;
-}
-
-
-U_CFUNC void UDatamemory_assign(UDataMemory *dest, UDataMemory *source) {
- /* UDataMemory Assignment. Destination UDataMemory must be initialized first. */
- UBool mallocedFlag = dest->heapAllocated;
- uprv_memcpy(dest, source, sizeof(UDataMemory));
- dest->heapAllocated = mallocedFlag;
-}
-
-U_CFUNC UDataMemory *UDataMemory_createNewInstance(UErrorCode *pErr) {
- UDataMemory *This;
-
- if (U_FAILURE(*pErr)) {
- return NULL;
- }
- This = (UDataMemory *)uprv_malloc(sizeof(UDataMemory));
- if (This == NULL) {
- *pErr = U_MEMORY_ALLOCATION_ERROR; }
- else {
- UDataMemory_init(This);
- This->heapAllocated = TRUE;
- }
- return This;
-}
-
-
-U_CFUNC const DataHeader *
-UDataMemory_normalizeDataPointer(const void *p) {
- /* allow the data to be optionally prepended with an alignment-forcing double value */
- const DataHeader *pdh = (const DataHeader *)p;
- if(pdh==NULL || (pdh->dataHeader.magic1==0xda && pdh->dataHeader.magic2==0x27)) {
- return pdh;
- } else {
-#if U_PLATFORM == U_PF_OS400
- /*
- TODO: Fix this once the compiler implements this feature. Keep in sync with genccode.c
-
- This is here because this platform can't currently put
- const data into the read-only pages of an object or
- shared library (service program). Only strings are allowed in read-only
- pages, so we use char * strings to store the data.
-
- In order to prevent the beginning of the data from ever matching the
- magic numbers we must skip the initial double.
- [grhoten 4/24/2003]
- */
- return (const DataHeader *)*((const void **)p+1);
-#else
- return (const DataHeader *)((const double *)p+1);
-#endif
- }
-}
-
-
-U_CFUNC void UDataMemory_setData (UDataMemory *This, const void *dataAddr) {
- This->pHeader = UDataMemory_normalizeDataPointer(dataAddr);
-}
-
-
-U_CAPI void U_EXPORT2
-udata_close(UDataMemory *pData) {
- if(pData!=NULL) {
- uprv_unmapFile(pData);
- if(pData->heapAllocated ) {
- uprv_free(pData);
- } else {
- UDataMemory_init(pData);
- }
- }
-}
-
-U_CAPI const void * U_EXPORT2
-udata_getMemory(UDataMemory *pData) {
- if(pData!=NULL && pData->pHeader!=NULL) {
- return (char *)(pData->pHeader)+udata_getHeaderSize(pData->pHeader);
- } else {
- return NULL;
- }
-}
-
-/**
- * Get the length of the data item if possible.
- * The length may be up to 15 bytes larger than the actual data.
- *
- * TODO Consider making this function public.
- * It would have to return the actual length in more cases.
- * For example, the length of the last item in a .dat package could be
- * computed from the size of the whole .dat package minus the offset of the
- * last item.
- * The size of a file that was directly memory-mapped could be determined
- * using some system API.
- *
- * In order to get perfect values for all data items, we may have to add a
- * length field to UDataInfo, but that complicates data generation
- * and may be overkill.
- *
- * @param pData The data item.
- * @return the length of the data item, or -1 if not known
- * @internal Currently used only in cintltst/udatatst.c
- */
-U_CAPI int32_t U_EXPORT2
-udata_getLength(const UDataMemory *pData) {
- if(pData!=NULL && pData->pHeader!=NULL && pData->length>=0) {
- /*
- * subtract the header size,
- * return only the size of the actual data starting at udata_getMemory()
- */
- return pData->length-udata_getHeaderSize(pData->pHeader);
- } else {
- return -1;
- }
-}
-
-/**
- * Get the memory including the data header.
- * Used in cintltst/udatatst.c
- * @internal
- */
-U_CAPI const void * U_EXPORT2
-udata_getRawMemory(const UDataMemory *pData) {
- if(pData!=NULL && pData->pHeader!=NULL) {
- return pData->pHeader;
- } else {
- return NULL;
- }
-}
-
-U_CFUNC UBool UDataMemory_isLoaded(const UDataMemory *This) {
- return This->pHeader != NULL;
-}
diff --git a/contrib/libs/icu/common/udatamem.h b/contrib/libs/icu/common/udatamem.h
deleted file mode 100644
index a05dd697568..00000000000
--- a/contrib/libs/icu/common/udatamem.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2010, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************/
-
-
-/*----------------------------------------------------------------------------------
- *
- * UDataMemory A class-like struct that serves as a handle to a piece of memory
- * that contains some ICU data (resource, converters, whatever.)
- *
- * When an application opens ICU data (with udata_open, for example,
- * a UDataMemory * is returned.
- *
- *----------------------------------------------------------------------------------*/
-#ifndef __UDATAMEM_H__
-#define __UDATAMEM_H__
-
-#include "unicode/udata.h"
-#include "ucmndata.h"
-
-struct UDataMemory {
- const commonDataFuncs *vFuncs; /* Function Pointers for accessing TOC */
-
- const DataHeader *pHeader; /* Header of the memory being described by this */
- /* UDataMemory object. */
- const void *toc; /* For common memory, table of contents for */
- /* the pieces within. */
- UBool heapAllocated; /* True if this UDataMemory Object is on the */
- /* heap and thus needs to be deleted when closed. */
-
- void *mapAddr; /* For mapped or allocated memory, the start addr. */
- /* Only non-null if a close operation should unmap */
- /* the associated data. */
- void *map; /* Handle, or other data, OS dependent. */
- /* Only non-null if a close operation should unmap */
- /* the associated data, and additional info */
- /* beyond the mapAddr is needed to do that. */
- int32_t length; /* Length of the data in bytes; -1 if unknown. */
-};
-
-U_CFUNC UDataMemory *UDataMemory_createNewInstance(UErrorCode *pErr);
-U_CFUNC void UDatamemory_assign (UDataMemory *dest, UDataMemory *source);
-U_CFUNC void UDataMemory_init (UDataMemory *This);
-U_CFUNC UBool UDataMemory_isLoaded(const UDataMemory *This);
-U_CFUNC void UDataMemory_setData (UDataMemory *This, const void *dataAddr);
-
-U_CFUNC const DataHeader *UDataMemory_normalizeDataPointer(const void *p);
-
-U_CAPI int32_t U_EXPORT2
-udata_getLength(const UDataMemory *pData);
-
-U_CAPI const void * U_EXPORT2
-udata_getRawMemory(const UDataMemory *pData);
-
-#endif
diff --git a/contrib/libs/icu/common/udataswp.cpp b/contrib/libs/icu/common/udataswp.cpp
deleted file mode 100644
index 86f302bd9c3..00000000000
--- a/contrib/libs/icu/common/udataswp.cpp
+++ /dev/null
@@ -1,473 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2003-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: udataswp.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2003jun05
-* created by: Markus W. Scherer
-*
-* Definitions for ICU data transformations for different platforms,
-* changing between big- and little-endian data and/or between
-* charset families (ASCII<->EBCDIC).
-*/
-
-#include <stdarg.h>
-#include "unicode/utypes.h"
-#include "unicode/udata.h" /* UDataInfo */
-#include "ucmndata.h" /* DataHeader */
-#include "cmemory.h"
-#include "udataswp.h"
-
-/* swapping primitives ------------------------------------------------------ */
-
-static int32_t U_CALLCONV
-uprv_swapArray16(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const uint16_t *p;
- uint16_t *q;
- int32_t count;
- uint16_t x;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* setup and swapping */
- p=(const uint16_t *)inData;
- q=(uint16_t *)outData;
- count=length/2;
- while(count>0) {
- x=*p++;
- *q++=(uint16_t)((x<<8)|(x>>8));
- --count;
- }
-
- return length;
-}
-
-static int32_t U_CALLCONV
-uprv_copyArray16(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if(length>0 && inData!=outData) {
- uprv_memcpy(outData, inData, length);
- }
- return length;
-}
-
-static int32_t U_CALLCONV
-uprv_swapArray32(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const uint32_t *p;
- uint32_t *q;
- int32_t count;
- uint32_t x;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* setup and swapping */
- p=(const uint32_t *)inData;
- q=(uint32_t *)outData;
- count=length/4;
- while(count>0) {
- x=*p++;
- *q++=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
- --count;
- }
-
- return length;
-}
-
-static int32_t U_CALLCONV
-uprv_copyArray32(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if(length>0 && inData!=outData) {
- uprv_memcpy(outData, inData, length);
- }
- return length;
-}
-
-static int32_t U_CALLCONV
-uprv_swapArray64(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const uint64_t *p;
- uint64_t *q;
- int32_t count;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* setup and swapping */
- p=(const uint64_t *)inData;
- q=(uint64_t *)outData;
- count=length/8;
- while(count>0) {
- uint64_t x=*p++;
- x=(x<<56)|((x&0xff00)<<40)|((x&0xff0000)<<24)|((x&0xff000000)<<8)|
- ((x>>8)&0xff000000)|((x>>24)&0xff0000)|((x>>40)&0xff00)|(x>>56);
- *q++=x;
- --count;
- }
-
- return length;
-}
-
-static int32_t U_CALLCONV
-uprv_copyArray64(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if(length>0 && inData!=outData) {
- uprv_memcpy(outData, inData, length);
- }
- return length;
-}
-
-static uint16_t U_CALLCONV
-uprv_readSwapUInt16(uint16_t x) {
- return (uint16_t)((x<<8)|(x>>8));
-}
-
-static uint16_t U_CALLCONV
-uprv_readDirectUInt16(uint16_t x) {
- return x;
-}
-
-static uint32_t U_CALLCONV
-uprv_readSwapUInt32(uint32_t x) {
- return (uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
-}
-
-static uint32_t U_CALLCONV
-uprv_readDirectUInt32(uint32_t x) {
- return x;
-}
-
-static void U_CALLCONV
-uprv_writeSwapUInt16(uint16_t *p, uint16_t x) {
- *p=(uint16_t)((x<<8)|(x>>8));
-}
-
-static void U_CALLCONV
-uprv_writeDirectUInt16(uint16_t *p, uint16_t x) {
- *p=x;
-}
-
-static void U_CALLCONV
-uprv_writeSwapUInt32(uint32_t *p, uint32_t x) {
- *p=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
-}
-
-static void U_CALLCONV
-uprv_writeDirectUInt32(uint32_t *p, uint32_t x) {
- *p=x;
-}
-
-U_CAPI int16_t U_EXPORT2
-udata_readInt16(const UDataSwapper *ds, int16_t x) {
- return (int16_t)ds->readUInt16((uint16_t)x);
-}
-
-U_CAPI int32_t U_EXPORT2
-udata_readInt32(const UDataSwapper *ds, int32_t x) {
- return (int32_t)ds->readUInt32((uint32_t)x);
-}
-
-/**
- * Swap a block of invariant, NUL-terminated strings, but not padding
- * bytes after the last string.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-udata_swapInvStringBlock(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const char *inChars;
- int32_t stringsLength;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* reduce the strings length to not include bytes after the last NUL */
- inChars=(const char *)inData;
- stringsLength=length;
- while(stringsLength>0 && inChars[stringsLength-1]!=0) {
- --stringsLength;
- }
-
- /* swap up to the last NUL */
- ds->swapInvChars(ds, inData, stringsLength, outData, pErrorCode);
-
- /* copy the bytes after the last NUL */
- if(inData!=outData && length>stringsLength) {
- uprv_memcpy((char *)outData+stringsLength, inChars+stringsLength, length-stringsLength);
- }
-
- /* return the length including padding bytes */
- if(U_SUCCESS(*pErrorCode)) {
- return length;
- } else {
- return 0;
- }
-}
-
-U_CAPI void U_EXPORT2
-udata_printError(const UDataSwapper *ds,
- const char *fmt,
- ...) {
- va_list args;
-
- if(ds->printError!=NULL) {
- va_start(args, fmt);
- ds->printError(ds->printErrorContext, fmt, args);
- va_end(args);
- }
-}
-
-/* swap a data header ------------------------------------------------------- */
-
-U_CAPI int32_t U_EXPORT2
-udata_swapDataHeader(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const DataHeader *pHeader;
- uint16_t headerSize, infoSize;
-
- /* argument checking */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* check minimum length and magic bytes */
- pHeader=(const DataHeader *)inData;
- if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
- pHeader->dataHeader.magic1!=0xda ||
- pHeader->dataHeader.magic2!=0x27 ||
- pHeader->info.sizeofUChar!=2
- ) {
- udata_printError(ds, "udata_swapDataHeader(): initial bytes do not look like ICU data\n");
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- headerSize=ds->readUInt16(pHeader->dataHeader.headerSize);
- infoSize=ds->readUInt16(pHeader->info.size);
-
- if( headerSize<sizeof(DataHeader) ||
- infoSize<sizeof(UDataInfo) ||
- headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
- (length>=0 && length<headerSize)
- ) {
- udata_printError(ds, "udata_swapDataHeader(): header size mismatch - headerSize %d infoSize %d length %d\n",
- headerSize, infoSize, length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- if(length>0) {
- DataHeader *outHeader;
- const char *s;
- int32_t maxLength;
-
- /* Most of the fields are just bytes and need no swapping. */
- if(inData!=outData) {
- uprv_memcpy(outData, inData, headerSize);
- }
- outHeader=(DataHeader *)outData;
-
- outHeader->info.isBigEndian = ds->outIsBigEndian;
- outHeader->info.charsetFamily = ds->outCharset;
-
- /* swap headerSize */
- ds->swapArray16(ds, &pHeader->dataHeader.headerSize, 2, &outHeader->dataHeader.headerSize, pErrorCode);
-
- /* swap UDataInfo size and reservedWord */
- ds->swapArray16(ds, &pHeader->info.size, 4, &outHeader->info.size, pErrorCode);
-
- /* swap copyright statement after the UDataInfo */
- infoSize+=sizeof(pHeader->dataHeader);
- s=(const char *)inData+infoSize;
- maxLength=headerSize-infoSize;
- /* get the length of the string */
- for(length=0; length<maxLength && s[length]!=0; ++length) {}
- /* swap the string contents */
- ds->swapInvChars(ds, s, length, (char *)outData+infoSize, pErrorCode);
- }
-
- return headerSize;
-}
-
-/* API functions ------------------------------------------------------------ */
-
-U_CAPI UDataSwapper * U_EXPORT2
-udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
- UBool outIsBigEndian, uint8_t outCharset,
- UErrorCode *pErrorCode) {
- UDataSwapper *swapper;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- if(inCharset>U_EBCDIC_FAMILY || outCharset>U_EBCDIC_FAMILY) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- /* allocate the swapper */
- swapper=(UDataSwapper *)uprv_malloc(sizeof(UDataSwapper));
- if(swapper==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memset(swapper, 0, sizeof(UDataSwapper));
-
- /* set values and functions pointers according to in/out parameters */
- swapper->inIsBigEndian=inIsBigEndian;
- swapper->inCharset=inCharset;
- swapper->outIsBigEndian=outIsBigEndian;
- swapper->outCharset=outCharset;
-
- swapper->readUInt16= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt16 : uprv_readSwapUInt16;
- swapper->readUInt32= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt32 : uprv_readSwapUInt32;
-
- swapper->writeUInt16= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt16 : uprv_writeSwapUInt16;
- swapper->writeUInt32= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt32 : uprv_writeSwapUInt32;
-
- swapper->compareInvChars= outCharset==U_ASCII_FAMILY ? uprv_compareInvAscii : uprv_compareInvEbcdic;
-
- if(inIsBigEndian==outIsBigEndian) {
- swapper->swapArray16=uprv_copyArray16;
- swapper->swapArray32=uprv_copyArray32;
- swapper->swapArray64=uprv_copyArray64;
- } else {
- swapper->swapArray16=uprv_swapArray16;
- swapper->swapArray32=uprv_swapArray32;
- swapper->swapArray64=uprv_swapArray64;
- }
-
- if(inCharset==U_ASCII_FAMILY) {
- swapper->swapInvChars= outCharset==U_ASCII_FAMILY ? uprv_copyAscii : uprv_ebcdicFromAscii;
- } else /* U_EBCDIC_FAMILY */ {
- swapper->swapInvChars= outCharset==U_EBCDIC_FAMILY ? uprv_copyEbcdic : uprv_asciiFromEbcdic;
- }
-
- return swapper;
-}
-
-U_CAPI UDataSwapper * U_EXPORT2
-udata_openSwapperForInputData(const void *data, int32_t length,
- UBool outIsBigEndian, uint8_t outCharset,
- UErrorCode *pErrorCode) {
- const DataHeader *pHeader;
- uint16_t headerSize, infoSize;
- UBool inIsBigEndian;
- int8_t inCharset;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- if( data==NULL ||
- (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
- outCharset>U_EBCDIC_FAMILY
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- pHeader=(const DataHeader *)data;
- if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
- pHeader->dataHeader.magic1!=0xda ||
- pHeader->dataHeader.magic2!=0x27 ||
- pHeader->info.sizeofUChar!=2
- ) {
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- inIsBigEndian=(UBool)pHeader->info.isBigEndian;
- inCharset=pHeader->info.charsetFamily;
-
- if(inIsBigEndian==U_IS_BIG_ENDIAN) {
- headerSize=pHeader->dataHeader.headerSize;
- infoSize=pHeader->info.size;
- } else {
- headerSize=uprv_readSwapUInt16(pHeader->dataHeader.headerSize);
- infoSize=uprv_readSwapUInt16(pHeader->info.size);
- }
-
- if( headerSize<sizeof(DataHeader) ||
- infoSize<sizeof(UDataInfo) ||
- headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
- (length>=0 && length<headerSize)
- ) {
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- return udata_openSwapper(inIsBigEndian, inCharset, outIsBigEndian, outCharset, pErrorCode);
-}
-
-U_CAPI void U_EXPORT2
-udata_closeSwapper(UDataSwapper *ds) {
- uprv_free(ds);
-}
diff --git a/contrib/libs/icu/common/udataswp.h b/contrib/libs/icu/common/udataswp.h
deleted file mode 100644
index 5e7b043c4c9..00000000000
--- a/contrib/libs/icu/common/udataswp.h
+++ /dev/null
@@ -1,404 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2003-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: udataswp.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2003jun05
-* created by: Markus W. Scherer
-*
-* Definitions for ICU data transformations for different platforms,
-* changing between big- and little-endian data and/or between
-* charset families (ASCII<->EBCDIC).
-*/
-
-#ifndef __UDATASWP_H__
-#define __UDATASWP_H__
-
-#include <stdarg.h>
-#include "unicode/utypes.h"
-
-/* forward declaration */
-
-U_CDECL_BEGIN
-
-struct UDataSwapper;
-typedef struct UDataSwapper UDataSwapper;
-
-/**
- * Function type for data transformation.
- * Transforms data, or just returns the length of the data if
- * the input length is -1.
- * Swap functions assume that their data pointers are aligned properly.
- *
- * Quick implementation outline:
- * (best to copy and adapt and existing swapper implementation)
- * check that the data looks like the expected format
- * if(length<0) {
- * preflight:
- * never dereference outData
- * read inData and determine the data size
- * assume that inData is long enough for this
- * } else {
- * outData can be NULL if length==0
- * inData==outData (in-place swapping) possible but not required!
- * verify that length>=(actual size)
- * if there is a chance that not every byte up to size is reached
- * due to padding etc.:
- * if(inData!=outData) {
- * memcpy(outData, inData, actual size);
- * }
- * swap contents
- * }
- * return actual size
- *
- * Further implementation notes:
- * - read integers from inData before swapping them
- * because in-place swapping can make them unreadable
- * - compareInvChars compares a local Unicode string with already-swapped
- * output charset strings
- *
- * @param ds Pointer to UDataSwapper containing global data about the
- * transformation and function pointers for handling primitive
- * types.
- * @param inData Pointer to the input data to be transformed or examined.
- * @param length Length of the data, counting bytes. May be -1 for preflighting.
- * If length>=0, then transform the data.
- * If length==-1, then only determine the length of the data.
- * The length cannot be determined from the data itself for all
- * types of data (e.g., not for simple arrays of integers).
- * @param outData Pointer to the output data buffer.
- * If length>=0 (transformation), then the output buffer must
- * have a capacity of at least length.
- * If length==-1, then outData will not be used and can be NULL.
- * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
- * fulfill U_SUCCESS on input.
- * @return The actual length of the data.
- *
- * @see UDataSwapper
- * @internal ICU 2.8
- */
-typedef int32_t U_CALLCONV
-UDataSwapFn(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/**
- * Convert one uint16_t from input to platform endianness.
- * @internal ICU 2.8
- */
-typedef uint16_t U_CALLCONV
-UDataReadUInt16(uint16_t x);
-
-/**
- * Convert one uint32_t from input to platform endianness.
- * @internal ICU 2.8
- */
-typedef uint32_t U_CALLCONV
-UDataReadUInt32(uint32_t x);
-
-/**
- * Convert one uint16_t from platform to input endianness.
- * @internal ICU 2.8
- */
-typedef void U_CALLCONV
-UDataWriteUInt16(uint16_t *p, uint16_t x);
-
-/**
- * Convert one uint32_t from platform to input endianness.
- * @internal ICU 2.8
- */
-typedef void U_CALLCONV
-UDataWriteUInt32(uint32_t *p, uint32_t x);
-
-/**
- * Compare invariant-character strings, one in the output data and the
- * other one caller-provided in Unicode.
- * An output data string is compared because strings are usually swapped
- * before the rest of the data, to allow for sorting of string tables
- * according to the output charset.
- * You can use -1 for the length parameters of NUL-terminated strings as usual.
- * Returns Unicode code point order for invariant characters.
- * @internal ICU 2.8
- */
-typedef int32_t U_CALLCONV
-UDataCompareInvChars(const UDataSwapper *ds,
- const char *outString, int32_t outLength,
- const UChar *localString, int32_t localLength);
-
-/**
- * Function for message output when an error occurs during data swapping.
- * A format string and variable number of arguments are passed
- * like for vprintf().
- *
- * @param context A function-specific context pointer.
- * @param fmt The format string.
- * @param args The arguments for format string inserts.
- *
- * @internal ICU 2.8
- */
-typedef void U_CALLCONV
-UDataPrintError(void *context, const char *fmt, va_list args);
-
-struct UDataSwapper {
- /** Input endianness. @internal ICU 2.8 */
- UBool inIsBigEndian;
- /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
- uint8_t inCharset;
- /** Output endianness. @internal ICU 2.8 */
- UBool outIsBigEndian;
- /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
- uint8_t outCharset;
-
- /* basic functions for reading data values */
-
- /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */
- UDataReadUInt16 *readUInt16;
- /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */
- UDataReadUInt32 *readUInt32;
- /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */
- UDataCompareInvChars *compareInvChars;
-
- /* basic functions for writing data values */
-
- /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */
- UDataWriteUInt16 *writeUInt16;
- /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */
- UDataWriteUInt32 *writeUInt32;
-
- /* basic functions for data transformations */
-
- /** Transform an array of 16-bit integers. @internal ICU 2.8 */
- UDataSwapFn *swapArray16;
- /** Transform an array of 32-bit integers. @internal ICU 2.8 */
- UDataSwapFn *swapArray32;
- /** Transform an array of 64-bit integers. @internal ICU 53 */
- UDataSwapFn *swapArray64;
- /** Transform an invariant-character string. @internal ICU 2.8 */
- UDataSwapFn *swapInvChars;
-
- /**
- * Function for message output when an error occurs during data swapping.
- * Can be NULL.
- * @internal ICU 2.8
- */
- UDataPrintError *printError;
- /** Context pointer for printError. @internal ICU 2.8 */
- void *printErrorContext;
-};
-
-U_CDECL_END
-
-U_CAPI UDataSwapper * U_EXPORT2
-udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
- UBool outIsBigEndian, uint8_t outCharset,
- UErrorCode *pErrorCode);
-
-/**
- * Open a UDataSwapper for the given input data and the specified output
- * characteristics.
- * Values of -1 for any of the characteristics mean the local platform's
- * characteristics.
- *
- * @see udata_swap
- * @internal ICU 2.8
- */
-U_CAPI UDataSwapper * U_EXPORT2
-udata_openSwapperForInputData(const void *data, int32_t length,
- UBool outIsBigEndian, uint8_t outCharset,
- UErrorCode *pErrorCode);
-
-U_CAPI void U_EXPORT2
-udata_closeSwapper(UDataSwapper *ds);
-
-/**
- * Read the beginning of an ICU data piece, recognize magic bytes,
- * swap the structure.
- * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece.
- *
- * @return The size of the data header, in bytes.
- *
- * @internal ICU 2.8
- */
-U_CAPI int32_t U_EXPORT2
-udata_swapDataHeader(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/**
- * Convert one int16_t from input to platform endianness.
- * @internal ICU 2.8
- */
-U_CAPI int16_t U_EXPORT2
-udata_readInt16(const UDataSwapper *ds, int16_t x);
-
-/**
- * Convert one int32_t from input to platform endianness.
- * @internal ICU 2.8
- */
-U_CAPI int32_t U_EXPORT2
-udata_readInt32(const UDataSwapper *ds, int32_t x);
-
-/**
- * Swap a block of invariant, NUL-terminated strings, but not padding
- * bytes after the last string.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-udata_swapInvStringBlock(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-U_CAPI void U_EXPORT2
-udata_printError(const UDataSwapper *ds,
- const char *fmt,
- ...);
-
-/* internal exports from putil.c -------------------------------------------- */
-
-/* declared here to keep them out of the public putil.h */
-
-/**
- * Swap invariant char * strings ASCII->EBCDIC.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-uprv_ebcdicFromAscii(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/**
- * Copy invariant ASCII char * strings and verify they are invariant.
- * @internal
- */
-U_CFUNC int32_t
-uprv_copyAscii(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/**
- * Swap invariant char * strings EBCDIC->ASCII.
- * @internal
- */
-U_CFUNC int32_t
-uprv_asciiFromEbcdic(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/**
- * Copy invariant EBCDIC char * strings and verify they are invariant.
- * @internal
- */
-U_CFUNC int32_t
-uprv_copyEbcdic(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/**
- * Compare ASCII invariant char * with Unicode invariant UChar *
- * @internal
- */
-U_CFUNC int32_t
-uprv_compareInvAscii(const UDataSwapper *ds,
- const char *outString, int32_t outLength,
- const UChar *localString, int32_t localLength);
-
-/**
- * Compare EBCDIC invariant char * with Unicode invariant UChar *
- * @internal
- */
-U_CFUNC int32_t
-uprv_compareInvEbcdic(const UDataSwapper *ds,
- const char *outString, int32_t outLength,
- const UChar *localString, int32_t localLength);
-
-/**
- * \def uprv_compareInvWithUChar
- * Compare an invariant-character strings with a UChar string
- * @internal
- */
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define uprv_compareInvWithUChar uprv_compareInvAscii
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
-# define uprv_compareInvWithUChar uprv_compareInvEbcdic
-#else
-# error Unknown charset family!
-#endif
-
-// utrie_swap.cpp -----------------------------------------------------------***
-
-/**
- * Swaps a serialized UTrie.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-utrie_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/**
- * Swaps a serialized UTrie2.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-utrie2_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/**
- * Swaps a serialized UCPTrie.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-ucptrie_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/**
- * Swaps a serialized UTrie, UTrie2, or UCPTrie.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-utrie_swapAnyVersion(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/* material... -------------------------------------------------------------- */
-
-#if 0
-
-/* udata.h */
-
-/**
- * Public API function in udata.c
- *
- * Same as udata_openChoice() but automatically swaps the data.
- * isAcceptable, if not NULL, may accept data with endianness and charset family
- * different from the current platform's properties.
- * If the data is acceptable and the platform properties do not match, then
- * the swap function is called to swap an allocated version of the data.
- * Preflighting may or may not be performed depending on whether the size of
- * the loaded data item is known.
- *
- * @param isAcceptable Same as for udata_openChoice(). May be NULL.
- *
- * @internal ICU 2.8
- */
-U_CAPI UDataMemory * U_EXPORT2
-udata_openSwap(const char *path, const char *type, const char *name,
- UDataMemoryIsAcceptable *isAcceptable, void *isAcceptableContext,
- UDataSwapFn *swap,
- UDataPrintError *printError, void *printErrorContext,
- UErrorCode *pErrorCode);
-
-#endif
-
-#endif
diff --git a/contrib/libs/icu/common/uelement.h b/contrib/libs/icu/common/uelement.h
deleted file mode 100644
index 05f36a09825..00000000000
--- a/contrib/libs/icu/common/uelement.h
+++ /dev/null
@@ -1,91 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 1997-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: uelement.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011jul04
-* created by: Markus W. Scherer
-*
-* Common definitions for UHashTable and UVector.
-* UHashTok moved here from uhash.h and renamed UElement.
-* This allows users of UVector to avoid the confusing #include of uhash.h.
-* uhash.h aliases UElement to UHashTok,
-* so that we need not change all of its code and its users.
-*/
-
-#ifndef __UELEMENT_H__
-#define __UELEMENT_H__
-
-#include "unicode/utypes.h"
-
-U_CDECL_BEGIN
-
-/**
- * A UVector element, or a key or value within a UHashtable.
- * It may be either a 32-bit integral value or an opaque void* pointer.
- * The void* pointer may be smaller than 32 bits (e.g. 24 bits)
- * or may be larger (e.g. 64 bits).
- *
- * Because a UElement is the size of a native pointer or a 32-bit
- * integer, we pass it around by value.
- */
-union UElement {
- void* pointer;
- int32_t integer;
-};
-typedef union UElement UElement;
-
-/**
- * An element-equality (boolean) comparison function.
- * @param e1 An element (object or integer)
- * @param e2 An element (object or integer)
- * @return TRUE if the two elements are equal.
- */
-typedef UBool U_CALLCONV UElementsAreEqual(const UElement e1, const UElement e2);
-
-/**
- * An element sorting (three-way) comparison function.
- * @param e1 An element (object or integer)
- * @param e2 An element (object or integer)
- * @return 0 if the two elements are equal, -1 if e1 is < e2, or +1 if e1 is > e2.
- */
-typedef int8_t U_CALLCONV UElementComparator(UElement e1, UElement e2);
-
-/**
- * An element assignment function. It may copy an integer, copy
- * a pointer, or clone a pointer, as appropriate.
- * @param dst The element to be assigned to
- * @param src The element to assign from
- */
-typedef void U_CALLCONV UElementAssigner(UElement *dst, UElement *src);
-
-U_CDECL_END
-
-/**
- * Comparator function for UnicodeString* keys. Implements UElementsAreEqual.
- * @param key1 The string for comparison
- * @param key2 The string for comparison
- * @return true if key1 and key2 are equal, return false otherwise.
- */
-U_CAPI UBool U_EXPORT2
-uhash_compareUnicodeString(const UElement key1, const UElement key2);
-
-/**
- * Comparator function for UnicodeString* keys (case insensitive).
- * Make sure to use together with uhash_hashCaselessUnicodeString.
- * Implements UElementsAreEqual.
- * @param key1 The string for comparison
- * @param key2 The string for comparison
- * @return true if key1 and key2 are equal, return false otherwise.
- */
-U_CAPI UBool U_EXPORT2
-uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2);
-
-#endif /* __UELEMENT_H__ */
diff --git a/contrib/libs/icu/common/uenum.cpp b/contrib/libs/icu/common/uenum.cpp
deleted file mode 100644
index 11d895ebcd7..00000000000
--- a/contrib/libs/icu/common/uenum.cpp
+++ /dev/null
@@ -1,189 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uenum.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:2
-*
-* created on: 2002jul08
-* created by: Vladimir Weinstein
-*/
-
-#include "unicode/putil.h"
-#include "uenumimp.h"
-#include "cmemory.h"
-
-/* Layout of the baseContext buffer. */
-typedef struct {
- int32_t len; /* number of bytes available starting at 'data' */
- char data; /* actual data starts here */
-} _UEnumBuffer;
-
-/* Extra bytes to allocate in the baseContext buffer. */
-static const int32_t PAD = 8;
-
-/* Return a pointer to the baseContext buffer, possibly allocating
- or reallocating it if at least 'capacity' bytes are not available. */
-static void* _getBuffer(UEnumeration* en, int32_t capacity) {
-
- if (en->baseContext != NULL) {
- if (((_UEnumBuffer*) en->baseContext)->len < capacity) {
- capacity += PAD;
- en->baseContext = uprv_realloc(en->baseContext,
- sizeof(int32_t) + capacity);
- if (en->baseContext == NULL) {
- return NULL;
- }
- ((_UEnumBuffer*) en->baseContext)->len = capacity;
- }
- } else {
- capacity += PAD;
- en->baseContext = uprv_malloc(sizeof(int32_t) + capacity);
- if (en->baseContext == NULL) {
- return NULL;
- }
- ((_UEnumBuffer*) en->baseContext)->len = capacity;
- }
-
- return (void*) & ((_UEnumBuffer*) en->baseContext)->data;
-}
-
-U_CAPI void U_EXPORT2
-uenum_close(UEnumeration* en)
-{
- if (en) {
- if (en->close != NULL) {
- if (en->baseContext) {
- uprv_free(en->baseContext);
- }
- en->close(en);
- } else { /* this seems dangerous, but we better kill the object */
- uprv_free(en);
- }
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-uenum_count(UEnumeration* en, UErrorCode* status)
-{
- if (!en || U_FAILURE(*status)) {
- return -1;
- }
- if (en->count != NULL) {
- return en->count(en, status);
- } else {
- *status = U_UNSUPPORTED_ERROR;
- return -1;
- }
-}
-
-/* Don't call this directly. Only uenum_unext should be calling this. */
-U_CAPI const UChar* U_EXPORT2
-uenum_unextDefault(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* status)
-{
- UChar *ustr = NULL;
- int32_t len = 0;
- if (en->next != NULL) {
- const char *cstr = en->next(en, &len, status);
- if (cstr != NULL) {
- ustr = (UChar*) _getBuffer(en, (len+1) * sizeof(UChar));
- if (ustr == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- u_charsToUChars(cstr, ustr, len+1);
- }
- }
- } else {
- *status = U_UNSUPPORTED_ERROR;
- }
- if (resultLength) {
- *resultLength = len;
- }
- return ustr;
-}
-
-/* Don't call this directly. Only uenum_next should be calling this. */
-U_CAPI const char* U_EXPORT2
-uenum_nextDefault(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* status)
-{
- if (en->uNext != NULL) {
- char *tempCharVal;
- const UChar *tempUCharVal = en->uNext(en, resultLength, status);
- if (tempUCharVal == NULL) {
- return NULL;
- }
- tempCharVal = (char*)
- _getBuffer(en, (*resultLength+1) * sizeof(char));
- if (!tempCharVal) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- u_UCharsToChars(tempUCharVal, tempCharVal, *resultLength + 1);
- return tempCharVal;
- } else {
- *status = U_UNSUPPORTED_ERROR;
- return NULL;
- }
-}
-
-U_CAPI const UChar* U_EXPORT2
-uenum_unext(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* status)
-{
- if (!en || U_FAILURE(*status)) {
- return NULL;
- }
- if (en->uNext != NULL) {
- return en->uNext(en, resultLength, status);
- } else {
- *status = U_UNSUPPORTED_ERROR;
- return NULL;
- }
-}
-
-U_CAPI const char* U_EXPORT2
-uenum_next(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* status)
-{
- if (!en || U_FAILURE(*status)) {
- return NULL;
- }
- if (en->next != NULL) {
- if (resultLength != NULL) {
- return en->next(en, resultLength, status);
- }
- else {
- int32_t dummyLength=0;
- return en->next(en, &dummyLength, status);
- }
- } else {
- *status = U_UNSUPPORTED_ERROR;
- return NULL;
- }
-}
-
-U_CAPI void U_EXPORT2
-uenum_reset(UEnumeration* en, UErrorCode* status)
-{
- if (!en || U_FAILURE(*status)) {
- return;
- }
- if (en->reset != NULL) {
- en->reset(en, status);
- } else {
- *status = U_UNSUPPORTED_ERROR;
- }
-}
diff --git a/contrib/libs/icu/common/uenumimp.h b/contrib/libs/icu/common/uenumimp.h
deleted file mode 100644
index 9c9df75ae04..00000000000
--- a/contrib/libs/icu/common/uenumimp.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2006, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uenumimp.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:2
-*
-* created on: 2002jul08
-* created by: Vladimir Weinstein
-*/
-
-#ifndef __UENUMIMP_H
-#define __UENUMIMP_H
-
-#include "unicode/uenum.h"
-
-U_CDECL_BEGIN
-
-/**
- * following are the type declarations for
- * implementations of APIs. If any of these
- * functions are NULL, U_UNSUPPORTED_ERROR
- * is returned. If close is NULL, the enumeration
- * object is going to be released.
- * Initial error checking is done in the body
- * of API function, so the implementations
- * need not to check the initial error condition.
- */
-
-/**
- * Function type declaration for uenum_close().
- *
- * This function should cleanup the enumerator object
- *
- * @param en enumeration to be closed
- */
-typedef void U_CALLCONV
-UEnumClose(UEnumeration *en);
-
-/**
- * Function type declaration for uenum_count().
- *
- * This function should count the number of elements
- * in this enumeration
- *
- * @param en enumeration to be counted
- * @param status pointer to UErrorCode variable
- * @return number of elements in enumeration
- */
-typedef int32_t U_CALLCONV
-UEnumCount(UEnumeration *en, UErrorCode *status);
-
-/**
- * Function type declaration for uenum_unext().
- *
- * This function returns the next element as a UChar *,
- * or NULL after all elements haven been enumerated.
- *
- * @param en enumeration
- * @param resultLength pointer to result length
- * @param status pointer to UErrorCode variable
- * @return next element as UChar *,
- * or NULL after all elements haven been enumerated
- */
-typedef const UChar* U_CALLCONV
-UEnumUNext(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* status);
-
-/**
- * Function type declaration for uenum_next().
- *
- * This function returns the next element as a char *,
- * or NULL after all elements haven been enumerated.
- *
- * @param en enumeration
- * @param resultLength pointer to result length
- * @param status pointer to UErrorCode variable
- * @return next element as char *,
- * or NULL after all elements haven been enumerated
- */
-typedef const char* U_CALLCONV
-UEnumNext(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* status);
-
-/**
- * Function type declaration for uenum_reset().
- *
- * This function should reset the enumeration
- * object
- *
- * @param en enumeration
- * @param status pointer to UErrorCode variable
- */
-typedef void U_CALLCONV
-UEnumReset(UEnumeration* en,
- UErrorCode* status);
-
-
-struct UEnumeration {
- /* baseContext. For the base class only. Don't touch! */
- void *baseContext;
-
- /* context. Use it for what you need */
- void *context;
-
- /**
- * these are functions that will
- * be used for APIs
- */
- /* called from uenum_close */
- UEnumClose *close;
- /* called from uenum_count */
- UEnumCount *count;
- /* called from uenum_unext */
- UEnumUNext *uNext;
- /* called from uenum_next */
- UEnumNext *next;
- /* called from uenum_reset */
- UEnumReset *reset;
-};
-
-U_CDECL_END
-
-/* This is the default implementation for uenum_unext().
- * It automatically converts the char * string to UChar *.
- * Don't call this directly. This is called internally by uenum_unext
- * when a UEnumeration is defined with 'uNext' pointing to this
- * function.
- */
-U_CAPI const UChar* U_EXPORT2
-uenum_unextDefault(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* status);
-
-/* This is the default implementation for uenum_next().
- * It automatically converts the UChar * string to char *.
- * Don't call this directly. This is called internally by uenum_next
- * when a UEnumeration is defined with 'next' pointing to this
- * function.
- */
-U_CAPI const char* U_EXPORT2
-uenum_nextDefault(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* status);
-
-#endif
diff --git a/contrib/libs/icu/common/uhash.cpp b/contrib/libs/icu/common/uhash.cpp
deleted file mode 100644
index 86311ceb0b2..00000000000
--- a/contrib/libs/icu/common/uhash.cpp
+++ /dev/null
@@ -1,991 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 1997-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-* Date Name Description
-* 03/22/00 aliu Adapted from original C++ ICU Hashtable.
-* 07/06/01 aliu Modified to support int32_t keys on
-* platforms with sizeof(void*) < 32.
-******************************************************************************
-*/
-
-#include "uhash.h"
-#include "unicode/ustring.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "uassert.h"
-#include "ustr_imp.h"
-
-/* This hashtable is implemented as a double hash. All elements are
- * stored in a single array with no secondary storage for collision
- * resolution (no linked list, etc.). When there is a hash collision
- * (when two unequal keys have the same hashcode) we resolve this by
- * using a secondary hash. The secondary hash is an increment
- * computed as a hash function (a different one) of the primary
- * hashcode. This increment is added to the initial hash value to
- * obtain further slots assigned to the same hash code. For this to
- * work, the length of the array and the increment must be relatively
- * prime. The easiest way to achieve this is to have the length of
- * the array be prime, and the increment be any value from
- * 1..length-1.
- *
- * Hashcodes are 32-bit integers. We make sure all hashcodes are
- * non-negative by masking off the top bit. This has two effects: (1)
- * modulo arithmetic is simplified. If we allowed negative hashcodes,
- * then when we computed hashcode % length, we could get a negative
- * result, which we would then have to adjust back into range. It's
- * simpler to just make hashcodes non-negative. (2) It makes it easy
- * to check for empty vs. occupied slots in the table. We just mark
- * empty or deleted slots with a negative hashcode.
- *
- * The central function is _uhash_find(). This function looks for a
- * slot matching the given key and hashcode. If one is found, it
- * returns a pointer to that slot. If the table is full, and no match
- * is found, it returns NULL -- in theory. This would make the code
- * more complicated, since all callers of _uhash_find() would then
- * have to check for a NULL result. To keep this from happening, we
- * don't allow the table to fill. When there is only one
- * empty/deleted slot left, uhash_put() will refuse to increase the
- * count, and fail. This simplifies the code. In practice, one will
- * seldom encounter this using default UHashtables. However, if a
- * hashtable is set to a U_FIXED resize policy, or if memory is
- * exhausted, then the table may fill.
- *
- * High and low water ratios control rehashing. They establish levels
- * of fullness (from 0 to 1) outside of which the data array is
- * reallocated and repopulated. Setting the low water ratio to zero
- * means the table will never shrink. Setting the high water ratio to
- * one means the table will never grow. The ratios should be
- * coordinated with the ratio between successive elements of the
- * PRIMES table, so that when the primeIndex is incremented or
- * decremented during rehashing, it brings the ratio of count / length
- * back into the desired range (between low and high water ratios).
- */
-
-/********************************************************************
- * PRIVATE Constants, Macros
- ********************************************************************/
-
-/* This is a list of non-consecutive primes chosen such that
- * PRIMES[i+1] ~ 2*PRIMES[i]. (Currently, the ratio ranges from 1.81
- * to 2.18; the inverse ratio ranges from 0.459 to 0.552.) If this
- * ratio is changed, the low and high water ratios should also be
- * adjusted to suit.
- *
- * These prime numbers were also chosen so that they are the largest
- * prime number while being less than a power of two.
- */
-static const int32_t PRIMES[] = {
- 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
- 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
- 16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
- 1073741789, 2147483647 /*, 4294967291 */
-};
-
-#define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
-#define DEFAULT_PRIME_INDEX 4
-
-/* These ratios are tuned to the PRIMES array such that a resize
- * places the table back into the zone of non-resizing. That is,
- * after a call to _uhash_rehash(), a subsequent call to
- * _uhash_rehash() should do nothing (should not churn). This is only
- * a potential problem with U_GROW_AND_SHRINK.
- */
-static const float RESIZE_POLICY_RATIO_TABLE[6] = {
- /* low, high water ratio */
- 0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */
- 0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */
- 0.0F, 1.0F /* U_FIXED: Never change size */
-};
-
-/*
- Invariants for hashcode values:
-
- * DELETED < 0
- * EMPTY < 0
- * Real hashes >= 0
-
- Hashcodes may not start out this way, but internally they are
- adjusted so that they are always positive. We assume 32-bit
- hashcodes; adjust these constants for other hashcode sizes.
-*/
-#define HASH_DELETED ((int32_t) 0x80000000)
-#define HASH_EMPTY ((int32_t) HASH_DELETED + 1)
-
-#define IS_EMPTY_OR_DELETED(x) ((x) < 0)
-
-/* This macro expects a UHashTok.pointer as its keypointer and
- valuepointer parameters */
-#define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) UPRV_BLOCK_MACRO_BEGIN { \
- if (hash->keyDeleter != NULL && keypointer != NULL) { \
- (*hash->keyDeleter)(keypointer); \
- } \
- if (hash->valueDeleter != NULL && valuepointer != NULL) { \
- (*hash->valueDeleter)(valuepointer); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/*
- * Constants for hinting whether a key or value is an integer
- * or a pointer. If a hint bit is zero, then the associated
- * token is assumed to be an integer.
- */
-#define HINT_KEY_POINTER (1)
-#define HINT_VALUE_POINTER (2)
-
-/********************************************************************
- * PRIVATE Implementation
- ********************************************************************/
-
-static UHashTok
-_uhash_setElement(UHashtable *hash, UHashElement* e,
- int32_t hashcode,
- UHashTok key, UHashTok value, int8_t hint) {
-
- UHashTok oldValue = e->value;
- if (hash->keyDeleter != NULL && e->key.pointer != NULL &&
- e->key.pointer != key.pointer) { /* Avoid double deletion */
- (*hash->keyDeleter)(e->key.pointer);
- }
- if (hash->valueDeleter != NULL) {
- if (oldValue.pointer != NULL &&
- oldValue.pointer != value.pointer) { /* Avoid double deletion */
- (*hash->valueDeleter)(oldValue.pointer);
- }
- oldValue.pointer = NULL;
- }
- /* Compilers should copy the UHashTok union correctly, but even if
- * they do, memory heap tools (e.g. BoundsChecker) can get
- * confused when a pointer is cloaked in a union and then copied.
- * TO ALLEVIATE THIS, we use hints (based on what API the user is
- * calling) to copy pointers when we know the user thinks
- * something is a pointer. */
- if (hint & HINT_KEY_POINTER) {
- e->key.pointer = key.pointer;
- } else {
- e->key = key;
- }
- if (hint & HINT_VALUE_POINTER) {
- e->value.pointer = value.pointer;
- } else {
- e->value = value;
- }
- e->hashcode = hashcode;
- return oldValue;
-}
-
-/**
- * Assumes that the given element is not empty or deleted.
- */
-static UHashTok
-_uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) {
- UHashTok empty;
- U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode));
- --hash->count;
- empty.pointer = NULL; empty.integer = 0;
- return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0);
-}
-
-static void
-_uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
- U_ASSERT(hash != NULL);
- U_ASSERT(((int32_t)policy) >= 0);
- U_ASSERT(((int32_t)policy) < 3);
- hash->lowWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2];
- hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1];
-}
-
-/**
- * Allocate internal data array of a size determined by the given
- * prime index. If the index is out of range it is pinned into range.
- * If the allocation fails the status is set to
- * U_MEMORY_ALLOCATION_ERROR and all array storage is freed. In
- * either case the previous array pointer is overwritten.
- *
- * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1.
- */
-static void
-_uhash_allocate(UHashtable *hash,
- int32_t primeIndex,
- UErrorCode *status) {
-
- UHashElement *p, *limit;
- UHashTok emptytok;
-
- if (U_FAILURE(*status)) return;
-
- U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH);
-
- hash->primeIndex = static_cast<int8_t>(primeIndex);
- hash->length = PRIMES[primeIndex];
-
- p = hash->elements = (UHashElement*)
- uprv_malloc(sizeof(UHashElement) * hash->length);
-
- if (hash->elements == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- emptytok.pointer = NULL; /* Only one of these two is needed */
- emptytok.integer = 0; /* but we don't know which one. */
-
- limit = p + hash->length;
- while (p < limit) {
- p->key = emptytok;
- p->value = emptytok;
- p->hashcode = HASH_EMPTY;
- ++p;
- }
-
- hash->count = 0;
- hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
- hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
-}
-
-static UHashtable*
-_uhash_init(UHashtable *result,
- UHashFunction *keyHash,
- UKeyComparator *keyComp,
- UValueComparator *valueComp,
- int32_t primeIndex,
- UErrorCode *status)
-{
- if (U_FAILURE(*status)) return NULL;
- U_ASSERT(keyHash != NULL);
- U_ASSERT(keyComp != NULL);
-
- result->keyHasher = keyHash;
- result->keyComparator = keyComp;
- result->valueComparator = valueComp;
- result->keyDeleter = NULL;
- result->valueDeleter = NULL;
- result->allocated = FALSE;
- _uhash_internalSetResizePolicy(result, U_GROW);
-
- _uhash_allocate(result, primeIndex, status);
-
- if (U_FAILURE(*status)) {
- return NULL;
- }
-
- return result;
-}
-
-static UHashtable*
-_uhash_create(UHashFunction *keyHash,
- UKeyComparator *keyComp,
- UValueComparator *valueComp,
- int32_t primeIndex,
- UErrorCode *status) {
- UHashtable *result;
-
- if (U_FAILURE(*status)) return NULL;
-
- result = (UHashtable*) uprv_malloc(sizeof(UHashtable));
- if (result == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status);
- result->allocated = TRUE;
-
- if (U_FAILURE(*status)) {
- uprv_free(result);
- return NULL;
- }
-
- return result;
-}
-
-/**
- * Look for a key in the table, or if no such key exists, the first
- * empty slot matching the given hashcode. Keys are compared using
- * the keyComparator function.
- *
- * First find the start position, which is the hashcode modulo
- * the length. Test it to see if it is:
- *
- * a. identical: First check the hash values for a quick check,
- * then compare keys for equality using keyComparator.
- * b. deleted
- * c. empty
- *
- * Stop if it is identical or empty, otherwise continue by adding a
- * "jump" value (moduloing by the length again to keep it within
- * range) and retesting. For efficiency, there need enough empty
- * values so that the searchs stop within a reasonable amount of time.
- * This can be changed by changing the high/low water marks.
- *
- * In theory, this function can return NULL, if it is full (no empty
- * or deleted slots) and if no matching key is found. In practice, we
- * prevent this elsewhere (in uhash_put) by making sure the last slot
- * in the table is never filled.
- *
- * The size of the table should be prime for this algorithm to work;
- * otherwise we are not guaranteed that the jump value (the secondary
- * hash) is relatively prime to the table length.
- */
-static UHashElement*
-_uhash_find(const UHashtable *hash, UHashTok key,
- int32_t hashcode) {
-
- int32_t firstDeleted = -1; /* assume invalid index */
- int32_t theIndex, startIndex;
- int32_t jump = 0; /* lazy evaluate */
- int32_t tableHash;
- UHashElement *elements = hash->elements;
-
- hashcode &= 0x7FFFFFFF; /* must be positive */
- startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length;
-
- do {
- tableHash = elements[theIndex].hashcode;
- if (tableHash == hashcode) { /* quick check */
- if ((*hash->keyComparator)(key, elements[theIndex].key)) {
- return &(elements[theIndex]);
- }
- } else if (!IS_EMPTY_OR_DELETED(tableHash)) {
- /* We have hit a slot which contains a key-value pair,
- * but for which the hash code does not match. Keep
- * looking.
- */
- } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */
- break;
- } else if (firstDeleted < 0) { /* remember first deleted */
- firstDeleted = theIndex;
- }
- if (jump == 0) { /* lazy compute jump */
- /* The jump value must be relatively prime to the table
- * length. As long as the length is prime, then any value
- * 1..length-1 will be relatively prime to it.
- */
- jump = (hashcode % (hash->length - 1)) + 1;
- }
- theIndex = (theIndex + jump) % hash->length;
- } while (theIndex != startIndex);
-
- if (firstDeleted >= 0) {
- theIndex = firstDeleted; /* reset if had deleted slot */
- } else if (tableHash != HASH_EMPTY) {
- /* We get to this point if the hashtable is full (no empty or
- * deleted slots), and we've failed to find a match. THIS
- * WILL NEVER HAPPEN as long as uhash_put() makes sure that
- * count is always < length.
- */
- UPRV_UNREACHABLE;
- }
- return &(elements[theIndex]);
-}
-
-/**
- * Attempt to grow or shrink the data arrays in order to make the
- * count fit between the high and low water marks. hash_put() and
- * hash_remove() call this method when the count exceeds the high or
- * low water marks. This method may do nothing, if memory allocation
- * fails, or if the count is already in range, or if the length is
- * already at the low or high limit. In any case, upon return the
- * arrays will be valid.
- */
-static void
-_uhash_rehash(UHashtable *hash, UErrorCode *status) {
-
- UHashElement *old = hash->elements;
- int32_t oldLength = hash->length;
- int32_t newPrimeIndex = hash->primeIndex;
- int32_t i;
-
- if (hash->count > hash->highWaterMark) {
- if (++newPrimeIndex >= PRIMES_LENGTH) {
- return;
- }
- } else if (hash->count < hash->lowWaterMark) {
- if (--newPrimeIndex < 0) {
- return;
- }
- } else {
- return;
- }
-
- _uhash_allocate(hash, newPrimeIndex, status);
-
- if (U_FAILURE(*status)) {
- hash->elements = old;
- hash->length = oldLength;
- return;
- }
-
- for (i = oldLength - 1; i >= 0; --i) {
- if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) {
- UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode);
- U_ASSERT(e != NULL);
- U_ASSERT(e->hashcode == HASH_EMPTY);
- e->key = old[i].key;
- e->value = old[i].value;
- e->hashcode = old[i].hashcode;
- ++hash->count;
- }
- }
-
- uprv_free(old);
-}
-
-static UHashTok
-_uhash_remove(UHashtable *hash,
- UHashTok key) {
- /* First find the position of the key in the table. If the object
- * has not been removed already, remove it. If the user wanted
- * keys deleted, then delete it also. We have to put a special
- * hashcode in that position that means that something has been
- * deleted, since when we do a find, we have to continue PAST any
- * deleted values.
- */
- UHashTok result;
- UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key));
- U_ASSERT(e != NULL);
- result.pointer = NULL;
- result.integer = 0;
- if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
- result = _uhash_internalRemoveElement(hash, e);
- if (hash->count < hash->lowWaterMark) {
- UErrorCode status = U_ZERO_ERROR;
- _uhash_rehash(hash, &status);
- }
- }
- return result;
-}
-
-static UHashTok
-_uhash_put(UHashtable *hash,
- UHashTok key,
- UHashTok value,
- int8_t hint,
- UErrorCode *status) {
-
- /* Put finds the position in the table for the new value. If the
- * key is already in the table, it is deleted, if there is a
- * non-NULL keyDeleter. Then the key, the hash and the value are
- * all put at the position in their respective arrays.
- */
- int32_t hashcode;
- UHashElement* e;
- UHashTok emptytok;
-
- if (U_FAILURE(*status)) {
- goto err;
- }
- U_ASSERT(hash != NULL);
- /* Cannot always check pointer here or iSeries sees NULL every time. */
- if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) {
- /* Disallow storage of NULL values, since NULL is returned by
- * get() to indicate an absent key. Storing NULL == removing.
- */
- return _uhash_remove(hash, key);
- }
- if (hash->count > hash->highWaterMark) {
- _uhash_rehash(hash, status);
- if (U_FAILURE(*status)) {
- goto err;
- }
- }
-
- hashcode = (*hash->keyHasher)(key);
- e = _uhash_find(hash, key, hashcode);
- U_ASSERT(e != NULL);
-
- if (IS_EMPTY_OR_DELETED(e->hashcode)) {
- /* Important: We must never actually fill the table up. If we
- * do so, then _uhash_find() will return NULL, and we'll have
- * to check for NULL after every call to _uhash_find(). To
- * avoid this we make sure there is always at least one empty
- * or deleted slot in the table. This only is a problem if we
- * are out of memory and rehash isn't working.
- */
- ++hash->count;
- if (hash->count == hash->length) {
- /* Don't allow count to reach length */
- --hash->count;
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto err;
- }
- }
-
- /* We must in all cases handle storage properly. If there was an
- * old key, then it must be deleted (if the deleter != NULL).
- * Make hashcodes stored in table positive.
- */
- return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint);
-
- err:
- /* If the deleters are non-NULL, this method adopts its key and/or
- * value arguments, and we must be sure to delete the key and/or
- * value in all cases, even upon failure.
- */
- HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer);
- emptytok.pointer = NULL; emptytok.integer = 0;
- return emptytok;
-}
-
-
-/********************************************************************
- * PUBLIC API
- ********************************************************************/
-
-U_CAPI UHashtable* U_EXPORT2
-uhash_open(UHashFunction *keyHash,
- UKeyComparator *keyComp,
- UValueComparator *valueComp,
- UErrorCode *status) {
-
- return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
-}
-
-U_CAPI UHashtable* U_EXPORT2
-uhash_openSize(UHashFunction *keyHash,
- UKeyComparator *keyComp,
- UValueComparator *valueComp,
- int32_t size,
- UErrorCode *status) {
-
- /* Find the smallest index i for which PRIMES[i] >= size. */
- int32_t i = 0;
- while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
- ++i;
- }
-
- return _uhash_create(keyHash, keyComp, valueComp, i, status);
-}
-
-U_CAPI UHashtable* U_EXPORT2
-uhash_init(UHashtable *fillinResult,
- UHashFunction *keyHash,
- UKeyComparator *keyComp,
- UValueComparator *valueComp,
- UErrorCode *status) {
-
- return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
-}
-
-U_CAPI UHashtable* U_EXPORT2
-uhash_initSize(UHashtable *fillinResult,
- UHashFunction *keyHash,
- UKeyComparator *keyComp,
- UValueComparator *valueComp,
- int32_t size,
- UErrorCode *status) {
-
- // Find the smallest index i for which PRIMES[i] >= size.
- int32_t i = 0;
- while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
- ++i;
- }
- return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
-}
-
-U_CAPI void U_EXPORT2
-uhash_close(UHashtable *hash) {
- if (hash == NULL) {
- return;
- }
- if (hash->elements != NULL) {
- if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) {
- int32_t pos=UHASH_FIRST;
- UHashElement *e;
- while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) {
- HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer);
- }
- }
- uprv_free(hash->elements);
- hash->elements = NULL;
- }
- if (hash->allocated) {
- uprv_free(hash);
- }
-}
-
-U_CAPI UHashFunction *U_EXPORT2
-uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) {
- UHashFunction *result = hash->keyHasher;
- hash->keyHasher = fn;
- return result;
-}
-
-U_CAPI UKeyComparator *U_EXPORT2
-uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
- UKeyComparator *result = hash->keyComparator;
- hash->keyComparator = fn;
- return result;
-}
-U_CAPI UValueComparator *U_EXPORT2
-uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
- UValueComparator *result = hash->valueComparator;
- hash->valueComparator = fn;
- return result;
-}
-
-U_CAPI UObjectDeleter *U_EXPORT2
-uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) {
- UObjectDeleter *result = hash->keyDeleter;
- hash->keyDeleter = fn;
- return result;
-}
-
-U_CAPI UObjectDeleter *U_EXPORT2
-uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) {
- UObjectDeleter *result = hash->valueDeleter;
- hash->valueDeleter = fn;
- return result;
-}
-
-U_CAPI void U_EXPORT2
-uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
- UErrorCode status = U_ZERO_ERROR;
- _uhash_internalSetResizePolicy(hash, policy);
- hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
- hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
- _uhash_rehash(hash, &status);
-}
-
-U_CAPI int32_t U_EXPORT2
-uhash_count(const UHashtable *hash) {
- return hash->count;
-}
-
-U_CAPI void* U_EXPORT2
-uhash_get(const UHashtable *hash,
- const void* key) {
- UHashTok keyholder;
- keyholder.pointer = (void*) key;
- return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
-}
-
-U_CAPI void* U_EXPORT2
-uhash_iget(const UHashtable *hash,
- int32_t key) {
- UHashTok keyholder;
- keyholder.integer = key;
- return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
-}
-
-U_CAPI int32_t U_EXPORT2
-uhash_geti(const UHashtable *hash,
- const void* key) {
- UHashTok keyholder;
- keyholder.pointer = (void*) key;
- return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
-}
-
-U_CAPI int32_t U_EXPORT2
-uhash_igeti(const UHashtable *hash,
- int32_t key) {
- UHashTok keyholder;
- keyholder.integer = key;
- return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
-}
-
-U_CAPI void* U_EXPORT2
-uhash_put(UHashtable *hash,
- void* key,
- void* value,
- UErrorCode *status) {
- UHashTok keyholder, valueholder;
- keyholder.pointer = key;
- valueholder.pointer = value;
- return _uhash_put(hash, keyholder, valueholder,
- HINT_KEY_POINTER | HINT_VALUE_POINTER,
- status).pointer;
-}
-
-U_CAPI void* U_EXPORT2
-uhash_iput(UHashtable *hash,
- int32_t key,
- void* value,
- UErrorCode *status) {
- UHashTok keyholder, valueholder;
- keyholder.integer = key;
- valueholder.pointer = value;
- return _uhash_put(hash, keyholder, valueholder,
- HINT_VALUE_POINTER,
- status).pointer;
-}
-
-U_CAPI int32_t U_EXPORT2
-uhash_puti(UHashtable *hash,
- void* key,
- int32_t value,
- UErrorCode *status) {
- UHashTok keyholder, valueholder;
- keyholder.pointer = key;
- valueholder.integer = value;
- return _uhash_put(hash, keyholder, valueholder,
- HINT_KEY_POINTER,
- status).integer;
-}
-
-
-U_CAPI int32_t U_EXPORT2
-uhash_iputi(UHashtable *hash,
- int32_t key,
- int32_t value,
- UErrorCode *status) {
- UHashTok keyholder, valueholder;
- keyholder.integer = key;
- valueholder.integer = value;
- return _uhash_put(hash, keyholder, valueholder,
- 0, /* neither is a ptr */
- status).integer;
-}
-
-U_CAPI void* U_EXPORT2
-uhash_remove(UHashtable *hash,
- const void* key) {
- UHashTok keyholder;
- keyholder.pointer = (void*) key;
- return _uhash_remove(hash, keyholder).pointer;
-}
-
-U_CAPI void* U_EXPORT2
-uhash_iremove(UHashtable *hash,
- int32_t key) {
- UHashTok keyholder;
- keyholder.integer = key;
- return _uhash_remove(hash, keyholder).pointer;
-}
-
-U_CAPI int32_t U_EXPORT2
-uhash_removei(UHashtable *hash,
- const void* key) {
- UHashTok keyholder;
- keyholder.pointer = (void*) key;
- return _uhash_remove(hash, keyholder).integer;
-}
-
-U_CAPI int32_t U_EXPORT2
-uhash_iremovei(UHashtable *hash,
- int32_t key) {
- UHashTok keyholder;
- keyholder.integer = key;
- return _uhash_remove(hash, keyholder).integer;
-}
-
-U_CAPI void U_EXPORT2
-uhash_removeAll(UHashtable *hash) {
- int32_t pos = UHASH_FIRST;
- const UHashElement *e;
- U_ASSERT(hash != NULL);
- if (hash->count != 0) {
- while ((e = uhash_nextElement(hash, &pos)) != NULL) {
- uhash_removeElement(hash, e);
- }
- }
- U_ASSERT(hash->count == 0);
-}
-
-U_CAPI const UHashElement* U_EXPORT2
-uhash_find(const UHashtable *hash, const void* key) {
- UHashTok keyholder;
- const UHashElement *e;
- keyholder.pointer = (void*) key;
- e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
- return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e;
-}
-
-U_CAPI const UHashElement* U_EXPORT2
-uhash_nextElement(const UHashtable *hash, int32_t *pos) {
- /* Walk through the array until we find an element that is not
- * EMPTY and not DELETED.
- */
- int32_t i;
- U_ASSERT(hash != NULL);
- for (i = *pos + 1; i < hash->length; ++i) {
- if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) {
- *pos = i;
- return &(hash->elements[i]);
- }
- }
-
- /* No more elements */
- return NULL;
-}
-
-U_CAPI void* U_EXPORT2
-uhash_removeElement(UHashtable *hash, const UHashElement* e) {
- U_ASSERT(hash != NULL);
- U_ASSERT(e != NULL);
- if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
- UHashElement *nce = (UHashElement *)e;
- return _uhash_internalRemoveElement(hash, nce).pointer;
- }
- return NULL;
-}
-
-/********************************************************************
- * UHashTok convenience
- ********************************************************************/
-
-/**
- * Return a UHashTok for an integer.
- */
-/*U_CAPI UHashTok U_EXPORT2
-uhash_toki(int32_t i) {
- UHashTok tok;
- tok.integer = i;
- return tok;
-}*/
-
-/**
- * Return a UHashTok for a pointer.
- */
-/*U_CAPI UHashTok U_EXPORT2
-uhash_tokp(void* p) {
- UHashTok tok;
- tok.pointer = p;
- return tok;
-}*/
-
-/********************************************************************
- * PUBLIC Key Hash Functions
- ********************************************************************/
-
-U_CAPI int32_t U_EXPORT2
-uhash_hashUChars(const UHashTok key) {
- const UChar *s = (const UChar *)key.pointer;
- return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s));
-}
-
-U_CAPI int32_t U_EXPORT2
-uhash_hashChars(const UHashTok key) {
- const char *s = (const char *)key.pointer;
- return s == NULL ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s))));
-}
-
-U_CAPI int32_t U_EXPORT2
-uhash_hashIChars(const UHashTok key) {
- const char *s = (const char *)key.pointer;
- return s == NULL ? 0 : ustr_hashICharsN(s, static_cast<int32_t>(uprv_strlen(s)));
-}
-
-U_CAPI UBool U_EXPORT2
-uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
- int32_t count1, count2, pos, i;
-
- if(hash1==hash2){
- return TRUE;
- }
-
- /*
- * Make sure that we are comparing 2 valid hashes of the same type
- * with valid comparison functions.
- * Without valid comparison functions, a binary comparison
- * of the hash values will yield random results on machines
- * with 64-bit pointers and 32-bit integer hashes.
- * A valueComparator is normally optional.
- */
- if (hash1==NULL || hash2==NULL ||
- hash1->keyComparator != hash2->keyComparator ||
- hash1->valueComparator != hash2->valueComparator ||
- hash1->valueComparator == NULL)
- {
- /*
- Normally we would return an error here about incompatible hash tables,
- but we return FALSE instead.
- */
- return FALSE;
- }
-
- count1 = uhash_count(hash1);
- count2 = uhash_count(hash2);
- if(count1!=count2){
- return FALSE;
- }
-
- pos=UHASH_FIRST;
- for(i=0; i<count1; i++){
- const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
- const UHashTok key1 = elem1->key;
- const UHashTok val1 = elem1->value;
- /* here the keys are not compared, instead the key form hash1 is used to fetch
- * value from hash2. If the hashes are equal then then both hashes should
- * contain equal values for the same key!
- */
- const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
- const UHashTok val2 = elem2->value;
- if(hash1->valueComparator(val1, val2)==FALSE){
- return FALSE;
- }
- }
- return TRUE;
-}
-
-/********************************************************************
- * PUBLIC Comparator Functions
- ********************************************************************/
-
-U_CAPI UBool U_EXPORT2
-uhash_compareUChars(const UHashTok key1, const UHashTok key2) {
- const UChar *p1 = (const UChar*) key1.pointer;
- const UChar *p2 = (const UChar*) key2.pointer;
- if (p1 == p2) {
- return TRUE;
- }
- if (p1 == NULL || p2 == NULL) {
- return FALSE;
- }
- while (*p1 != 0 && *p1 == *p2) {
- ++p1;
- ++p2;
- }
- return (UBool)(*p1 == *p2);
-}
-
-U_CAPI UBool U_EXPORT2
-uhash_compareChars(const UHashTok key1, const UHashTok key2) {
- const char *p1 = (const char*) key1.pointer;
- const char *p2 = (const char*) key2.pointer;
- if (p1 == p2) {
- return TRUE;
- }
- if (p1 == NULL || p2 == NULL) {
- return FALSE;
- }
- while (*p1 != 0 && *p1 == *p2) {
- ++p1;
- ++p2;
- }
- return (UBool)(*p1 == *p2);
-}
-
-U_CAPI UBool U_EXPORT2
-uhash_compareIChars(const UHashTok key1, const UHashTok key2) {
- const char *p1 = (const char*) key1.pointer;
- const char *p2 = (const char*) key2.pointer;
- if (p1 == p2) {
- return TRUE;
- }
- if (p1 == NULL || p2 == NULL) {
- return FALSE;
- }
- while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) {
- ++p1;
- ++p2;
- }
- return (UBool)(*p1 == *p2);
-}
-
-/********************************************************************
- * PUBLIC int32_t Support Functions
- ********************************************************************/
-
-U_CAPI int32_t U_EXPORT2
-uhash_hashLong(const UHashTok key) {
- return key.integer;
-}
-
-U_CAPI UBool U_EXPORT2
-uhash_compareLong(const UHashTok key1, const UHashTok key2) {
- return (UBool)(key1.integer == key2.integer);
-}
diff --git a/contrib/libs/icu/common/uhash.h b/contrib/libs/icu/common/uhash.h
deleted file mode 100644
index b59d2711bb2..00000000000
--- a/contrib/libs/icu/common/uhash.h
+++ /dev/null
@@ -1,718 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 1997-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-* Date Name Description
-* 03/22/00 aliu Adapted from original C++ ICU Hashtable.
-* 07/06/01 aliu Modified to support int32_t keys on
-* platforms with sizeof(void*) < 32.
-******************************************************************************
-*/
-
-#ifndef UHASH_H
-#define UHASH_H
-
-#include "unicode/utypes.h"
-#include "cmemory.h"
-#include "uelement.h"
-#include "unicode/localpointer.h"
-
-/**
- * UHashtable stores key-value pairs and does moderately fast lookup
- * based on keys. It provides a good tradeoff between access time and
- * storage space. As elements are added to it, it grows to accomodate
- * them. By default, the table never shrinks, even if all elements
- * are removed from it.
- *
- * Keys and values are stored as void* pointers. These void* pointers
- * may be actual pointers to strings, objects, or any other structure
- * in memory, or they may simply be integral values cast to void*.
- * UHashtable doesn't care and manipulates them via user-supplied
- * functions. These functions hash keys, compare keys, delete keys,
- * and delete values. Some function pointers are optional (may be
- * NULL); others must be supplied. Several prebuilt functions exist
- * to handle common key types.
- *
- * UHashtable ownership of keys and values is flexible, and controlled
- * by whether or not the key deleter and value deleter functions are
- * set. If a void* key is actually a pointer to a deletable object,
- * then UHashtable can be made to delete that object by setting the
- * key deleter function pointer to a non-NULL value. If this is done,
- * then keys passed to uhash_put() are owned by the hashtable and will
- * be deleted by it at some point, either as keys are replaced, or
- * when uhash_close() is finally called. The same is true of values
- * and the value deleter function pointer. Keys passed to methods
- * other than uhash_put() are never owned by the hashtable.
- *
- * NULL values are not allowed. uhash_get() returns NULL to indicate
- * a key that is not in the table, and having a NULL value in the
- * table would generate an ambiguous result. If a key and a NULL
- * value is passed to uhash_put(), this has the effect of doing a
- * uhash_remove() on that key. This keeps uhash_get(), uhash_count(),
- * and uhash_nextElement() consistent with one another.
- *
- * To see everything in a hashtable, use uhash_nextElement() to
- * iterate through its contents. Each call to this function returns a
- * UHashElement pointer. A hash element contains a key, value, and
- * hashcode. During iteration an element may be deleted by calling
- * uhash_removeElement(); iteration may safely continue thereafter.
- * The uhash_remove() function may also be safely called in
- * mid-iteration. If uhash_put() is called during iteration,
- * the iteration is still guaranteed to terminate reasonably, but
- * there is no guarantee that every element will be returned or that
- * some won't be returned more than once.
- *
- * Under no circumstances should the UHashElement returned by
- * uhash_nextElement be modified directly.
- *
- * By default, the hashtable grows when necessary, but never shrinks,
- * even if all items are removed. For most applications this is
- * optimal. However, in a highly dynamic usage where memory is at a
- * premium, the table can be set to both grow and shrink by calling
- * uhash_setResizePolicy() with the policy U_GROW_AND_SHRINK. In a
- * situation where memory is critical and the client wants a table
- * that does not grow at all, the constant U_FIXED can be used.
- */
-
-/********************************************************************
- * Data Structures
- ********************************************************************/
-
-U_CDECL_BEGIN
-
-/**
- * A key or value within a UHashtable.
- * The hashing and comparison functions take a pointer to a
- * UHashTok, but the deleter receives the void* pointer within it.
- */
-typedef UElement UHashTok;
-
-/**
- * This is a single hash element.
- */
-struct UHashElement {
- /* Reorder these elements to pack nicely if necessary */
- int32_t hashcode;
- UHashTok value;
- UHashTok key;
-};
-typedef struct UHashElement UHashElement;
-
-/**
- * A hashing function.
- * @param key A key stored in a hashtable
- * @return A NON-NEGATIVE hash code for parm.
- */
-typedef int32_t U_CALLCONV UHashFunction(const UHashTok key);
-
-/**
- * A key equality (boolean) comparison function.
- */
-typedef UElementsAreEqual UKeyComparator;
-
-/**
- * A value equality (boolean) comparison function.
- */
-typedef UElementsAreEqual UValueComparator;
-
-/* see cmemory.h for UObjectDeleter and uprv_deleteUObject() */
-
-/**
- * This specifies whether or not, and how, the hastable resizes itself.
- * See uhash_setResizePolicy().
- */
-enum UHashResizePolicy {
- U_GROW, /* Grow on demand, do not shrink */
- U_GROW_AND_SHRINK, /* Grow and shrink on demand */
- U_FIXED /* Never change size */
-};
-
-/**
- * The UHashtable struct. Clients should treat this as an opaque data
- * type and manipulate it only through the uhash_... API.
- */
-struct UHashtable {
-
- /* Main key-value pair storage array */
-
- UHashElement *elements;
-
- /* Function pointers */
-
- UHashFunction *keyHasher; /* Computes hash from key.
- * Never null. */
- UKeyComparator *keyComparator; /* Compares keys for equality.
- * Never null. */
- UValueComparator *valueComparator; /* Compares the values for equality */
-
- UObjectDeleter *keyDeleter; /* Deletes keys when required.
- * If NULL won't do anything */
- UObjectDeleter *valueDeleter; /* Deletes values when required.
- * If NULL won't do anything */
-
- /* Size parameters */
-
- int32_t count; /* The number of key-value pairs in this table.
- * 0 <= count <= length. In practice we
- * never let count == length (see code). */
- int32_t length; /* The physical size of the arrays hashes, keys
- * and values. Must be prime. */
-
- /* Rehashing thresholds */
-
- int32_t highWaterMark; /* If count > highWaterMark, rehash */
- int32_t lowWaterMark; /* If count < lowWaterMark, rehash */
- float highWaterRatio; /* 0..1; high water as a fraction of length */
- float lowWaterRatio; /* 0..1; low water as a fraction of length */
-
- int8_t primeIndex; /* Index into our prime table for length.
- * length == PRIMES[primeIndex] */
- UBool allocated; /* Was this UHashtable allocated? */
-};
-typedef struct UHashtable UHashtable;
-
-U_CDECL_END
-
-/********************************************************************
- * API
- ********************************************************************/
-
-/**
- * Initialize a new UHashtable.
- * @param keyHash A pointer to the key hashing function. Must not be
- * NULL.
- * @param keyComp A pointer to the function that compares keys. Must
- * not be NULL.
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return A pointer to a UHashtable, or 0 if an error occurred.
- * @see uhash_openSize
- */
-U_CAPI UHashtable* U_EXPORT2
-uhash_open(UHashFunction *keyHash,
- UKeyComparator *keyComp,
- UValueComparator *valueComp,
- UErrorCode *status);
-
-/**
- * Initialize a new UHashtable with a given initial size.
- * @param keyHash A pointer to the key hashing function. Must not be
- * NULL.
- * @param keyComp A pointer to the function that compares keys. Must
- * not be NULL.
- * @param size The initial capacity of this hash table.
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return A pointer to a UHashtable, or 0 if an error occurred.
- * @see uhash_open
- */
-U_CAPI UHashtable* U_EXPORT2
-uhash_openSize(UHashFunction *keyHash,
- UKeyComparator *keyComp,
- UValueComparator *valueComp,
- int32_t size,
- UErrorCode *status);
-
-/**
- * Initialize an existing UHashtable.
- * @param keyHash A pointer to the key hashing function. Must not be
- * NULL.
- * @param keyComp A pointer to the function that compares keys. Must
- * not be NULL.
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return A pointer to a UHashtable, or 0 if an error occurred.
- * @see uhash_openSize
- */
-U_CAPI UHashtable* U_EXPORT2
-uhash_init(UHashtable *hash,
- UHashFunction *keyHash,
- UKeyComparator *keyComp,
- UValueComparator *valueComp,
- UErrorCode *status);
-
-/**
- * Initialize an existing UHashtable.
- * @param keyHash A pointer to the key hashing function. Must not be
- * NULL.
- * @param keyComp A pointer to the function that compares keys. Must
- * not be NULL.
- * @param size The initial capacity of this hash table.
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return A pointer to a UHashtable, or 0 if an error occurred.
- * @see uhash_openSize
- */
-U_CAPI UHashtable* U_EXPORT2
-uhash_initSize(UHashtable *hash,
- UHashFunction *keyHash,
- UKeyComparator *keyComp,
- UValueComparator *valueComp,
- int32_t size,
- UErrorCode *status);
-
-/**
- * Close a UHashtable, releasing the memory used.
- * @param hash The UHashtable to close. If hash is NULL no operation is performed.
- */
-U_CAPI void U_EXPORT2
-uhash_close(UHashtable *hash);
-
-
-
-/**
- * Set the function used to hash keys.
- * @param hash The UHashtable to set
- * @param fn the function to be used hash keys; must not be NULL
- * @return the previous key hasher; non-NULL
- */
-U_CAPI UHashFunction *U_EXPORT2
-uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);
-
-/**
- * Set the function used to compare keys. The default comparison is a
- * void* pointer comparison.
- * @param hash The UHashtable to set
- * @param fn the function to be used compare keys; must not be NULL
- * @return the previous key comparator; non-NULL
- */
-U_CAPI UKeyComparator *U_EXPORT2
-uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);
-
-/**
- * Set the function used to compare values. The default comparison is a
- * void* pointer comparison.
- * @param hash The UHashtable to set
- * @param fn the function to be used compare keys; must not be NULL
- * @return the previous key comparator; non-NULL
- */
-U_CAPI UValueComparator *U_EXPORT2
-uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);
-
-/**
- * Set the function used to delete keys. If this function pointer is
- * NULL, this hashtable does not delete keys. If it is non-NULL, this
- * hashtable does delete keys. This function should be set once
- * before any elements are added to the hashtable and should not be
- * changed thereafter.
- * @param hash The UHashtable to set
- * @param fn the function to be used delete keys, or NULL
- * @return the previous key deleter; may be NULL
- */
-U_CAPI UObjectDeleter *U_EXPORT2
-uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);
-
-/**
- * Set the function used to delete values. If this function pointer
- * is NULL, this hashtable does not delete values. If it is non-NULL,
- * this hashtable does delete values. This function should be set
- * once before any elements are added to the hashtable and should not
- * be changed thereafter.
- * @param hash The UHashtable to set
- * @param fn the function to be used delete values, or NULL
- * @return the previous value deleter; may be NULL
- */
-U_CAPI UObjectDeleter *U_EXPORT2
-uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
-
-/**
- * Specify whether or not, and how, the hastable resizes itself.
- * By default, tables grow but do not shrink (policy U_GROW).
- * See enum UHashResizePolicy.
- * @param hash The UHashtable to set
- * @param policy The way the hashtable resizes itself, {U_GROW, U_GROW_AND_SHRINK, U_FIXED}
- */
-U_CAPI void U_EXPORT2
-uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);
-
-/**
- * Get the number of key-value pairs stored in a UHashtable.
- * @param hash The UHashtable to query.
- * @return The number of key-value pairs stored in hash.
- */
-U_CAPI int32_t U_EXPORT2
-uhash_count(const UHashtable *hash);
-
-/**
- * Put a (key=pointer, value=pointer) item in a UHashtable. If the
- * keyDeleter is non-NULL, then the hashtable owns 'key' after this
- * call. If the valueDeleter is non-NULL, then the hashtable owns
- * 'value' after this call. Storing a NULL value is the same as
- * calling uhash_remove().
- * @param hash The target UHashtable.
- * @param key The key to store.
- * @param value The value to store, may be NULL (see above).
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return The previous value, or NULL if none.
- * @see uhash_get
- */
-U_CAPI void* U_EXPORT2
-uhash_put(UHashtable *hash,
- void *key,
- void *value,
- UErrorCode *status);
-
-/**
- * Put a (key=integer, value=pointer) item in a UHashtable.
- * keyDeleter must be NULL. If the valueDeleter is non-NULL, then the
- * hashtable owns 'value' after this call. Storing a NULL value is
- * the same as calling uhash_remove().
- * @param hash The target UHashtable.
- * @param key The integer key to store.
- * @param value The value to store, may be NULL (see above).
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return The previous value, or NULL if none.
- * @see uhash_get
- */
-U_CAPI void* U_EXPORT2
-uhash_iput(UHashtable *hash,
- int32_t key,
- void* value,
- UErrorCode *status);
-
-/**
- * Put a (key=pointer, value=integer) item in a UHashtable. If the
- * keyDeleter is non-NULL, then the hashtable owns 'key' after this
- * call. valueDeleter must be NULL. Storing a 0 value is the same as
- * calling uhash_remove().
- * @param hash The target UHashtable.
- * @param key The key to store.
- * @param value The integer value to store.
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return The previous value, or 0 if none.
- * @see uhash_get
- */
-U_CAPI int32_t U_EXPORT2
-uhash_puti(UHashtable *hash,
- void* key,
- int32_t value,
- UErrorCode *status);
-
-/**
- * Put a (key=integer, value=integer) item in a UHashtable. If the
- * keyDeleter is non-NULL, then the hashtable owns 'key' after this
- * call. valueDeleter must be NULL. Storing a 0 value is the same as
- * calling uhash_remove().
- * @param hash The target UHashtable.
- * @param key The key to store.
- * @param value The integer value to store.
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return The previous value, or 0 if none.
- * @see uhash_get
- */
-U_CAPI int32_t U_EXPORT2
-uhash_iputi(UHashtable *hash,
- int32_t key,
- int32_t value,
- UErrorCode *status);
-
-/**
- * Retrieve a pointer value from a UHashtable using a pointer key,
- * as previously stored by uhash_put().
- * @param hash The target UHashtable.
- * @param key A pointer key stored in a hashtable
- * @return The requested item, or NULL if not found.
- */
-U_CAPI void* U_EXPORT2
-uhash_get(const UHashtable *hash,
- const void *key);
-
-/**
- * Retrieve a pointer value from a UHashtable using a integer key,
- * as previously stored by uhash_iput().
- * @param hash The target UHashtable.
- * @param key An integer key stored in a hashtable
- * @return The requested item, or NULL if not found.
- */
-U_CAPI void* U_EXPORT2
-uhash_iget(const UHashtable *hash,
- int32_t key);
-
-/**
- * Retrieve an integer value from a UHashtable using a pointer key,
- * as previously stored by uhash_puti().
- * @param hash The target UHashtable.
- * @param key A pointer key stored in a hashtable
- * @return The requested item, or 0 if not found.
- */
-U_CAPI int32_t U_EXPORT2
-uhash_geti(const UHashtable *hash,
- const void* key);
-/**
- * Retrieve an integer value from a UHashtable using an integer key,
- * as previously stored by uhash_iputi().
- * @param hash The target UHashtable.
- * @param key An integer key stored in a hashtable
- * @return The requested item, or 0 if not found.
- */
-U_CAPI int32_t U_EXPORT2
-uhash_igeti(const UHashtable *hash,
- int32_t key);
-
-/**
- * Remove an item from a UHashtable stored by uhash_put().
- * @param hash The target UHashtable.
- * @param key A key stored in a hashtable
- * @return The item removed, or NULL if not found.
- */
-U_CAPI void* U_EXPORT2
-uhash_remove(UHashtable *hash,
- const void *key);
-
-/**
- * Remove an item from a UHashtable stored by uhash_iput().
- * @param hash The target UHashtable.
- * @param key An integer key stored in a hashtable
- * @return The item removed, or NULL if not found.
- */
-U_CAPI void* U_EXPORT2
-uhash_iremove(UHashtable *hash,
- int32_t key);
-
-/**
- * Remove an item from a UHashtable stored by uhash_puti().
- * @param hash The target UHashtable.
- * @param key An key stored in a hashtable
- * @return The item removed, or 0 if not found.
- */
-U_CAPI int32_t U_EXPORT2
-uhash_removei(UHashtable *hash,
- const void* key);
-
-/**
- * Remove an item from a UHashtable stored by uhash_iputi().
- * @param hash The target UHashtable.
- * @param key An integer key stored in a hashtable
- * @return The item removed, or 0 if not found.
- */
-U_CAPI int32_t U_EXPORT2
-uhash_iremovei(UHashtable *hash,
- int32_t key);
-
-/**
- * Remove all items from a UHashtable.
- * @param hash The target UHashtable.
- */
-U_CAPI void U_EXPORT2
-uhash_removeAll(UHashtable *hash);
-
-/**
- * Locate an element of a UHashtable. The caller must not modify the
- * returned object. The primary use of this function is to obtain the
- * stored key when it may not be identical to the search key. For
- * example, if the compare function is a case-insensitive string
- * compare, then the hash key may be desired in order to obtain the
- * canonical case corresponding to a search key.
- * @param hash The target UHashtable.
- * @param key A key stored in a hashtable
- * @return a hash element, or NULL if the key is not found.
- */
-U_CAPI const UHashElement* U_EXPORT2
-uhash_find(const UHashtable *hash, const void* key);
-
-/**
- * \def UHASH_FIRST
- * Constant for use with uhash_nextElement
- * @see uhash_nextElement
- */
-#define UHASH_FIRST (-1)
-
-/**
- * Iterate through the elements of a UHashtable. The caller must not
- * modify the returned object. However, uhash_removeElement() may be
- * called during iteration to remove an element from the table.
- * Iteration may safely be resumed afterwards. If uhash_put() is
- * called during iteration the iteration will then be out of sync and
- * should be restarted.
- * @param hash The target UHashtable.
- * @param pos This should be set to UHASH_FIRST initially, and left untouched
- * thereafter.
- * @return a hash element, or NULL if no further key-value pairs
- * exist in the table.
- */
-U_CAPI const UHashElement* U_EXPORT2
-uhash_nextElement(const UHashtable *hash,
- int32_t *pos);
-
-/**
- * Remove an element, returned by uhash_nextElement(), from the table.
- * Iteration may be safely continued afterwards.
- * @param hash The hashtable
- * @param e The element, returned by uhash_nextElement(), to remove.
- * Must not be NULL. Must not be an empty or deleted element (as long
- * as this was returned by uhash_nextElement() it will not be empty or
- * deleted). Note: Although this parameter is const, it will be
- * modified.
- * @return the value that was removed.
- */
-U_CAPI void* U_EXPORT2
-uhash_removeElement(UHashtable *hash, const UHashElement* e);
-
-/********************************************************************
- * UHashTok convenience
- ********************************************************************/
-
-/**
- * Return a UHashTok for an integer.
- * @param i The given integer
- * @return a UHashTok for an integer.
- */
-/*U_CAPI UHashTok U_EXPORT2
-uhash_toki(int32_t i);*/
-
-/**
- * Return a UHashTok for a pointer.
- * @param p The given pointer
- * @return a UHashTok for a pointer.
- */
-/*U_CAPI UHashTok U_EXPORT2
-uhash_tokp(void* p);*/
-
-/********************************************************************
- * UChar* and char* Support Functions
- ********************************************************************/
-
-/**
- * Generate a hash code for a null-terminated UChar* string. If the
- * string is not null-terminated do not use this function. Use
- * together with uhash_compareUChars.
- * @param key The string (const UChar*) to hash.
- * @return A hash code for the key.
- */
-U_CAPI int32_t U_EXPORT2
-uhash_hashUChars(const UHashTok key);
-
-/**
- * Generate a hash code for a null-terminated char* string. If the
- * string is not null-terminated do not use this function. Use
- * together with uhash_compareChars.
- * @param key The string (const char*) to hash.
- * @return A hash code for the key.
- */
-U_CAPI int32_t U_EXPORT2
-uhash_hashChars(const UHashTok key);
-
-/**
- * Generate a case-insensitive hash code for a null-terminated char*
- * string. If the string is not null-terminated do not use this
- * function. Use together with uhash_compareIChars.
- * @param key The string (const char*) to hash.
- * @return A hash code for the key.
- */
-U_CAPI int32_t U_EXPORT2
-uhash_hashIChars(const UHashTok key);
-
-/**
- * Comparator for null-terminated UChar* strings. Use together with
- * uhash_hashUChars.
- * @param key1 The string for comparison
- * @param key2 The string for comparison
- * @return true if key1 and key2 are equal, return false otherwise.
- */
-U_CAPI UBool U_EXPORT2
-uhash_compareUChars(const UHashTok key1, const UHashTok key2);
-
-/**
- * Comparator for null-terminated char* strings. Use together with
- * uhash_hashChars.
- * @param key1 The string for comparison
- * @param key2 The string for comparison
- * @return true if key1 and key2 are equal, return false otherwise.
- */
-U_CAPI UBool U_EXPORT2
-uhash_compareChars(const UHashTok key1, const UHashTok key2);
-
-/**
- * Case-insensitive comparator for null-terminated char* strings. Use
- * together with uhash_hashIChars.
- * @param key1 The string for comparison
- * @param key2 The string for comparison
- * @return true if key1 and key2 are equal, return false otherwise.
- */
-U_CAPI UBool U_EXPORT2
-uhash_compareIChars(const UHashTok key1, const UHashTok key2);
-
-/********************************************************************
- * UnicodeString Support Functions
- ********************************************************************/
-
-/**
- * Hash function for UnicodeString* keys.
- * @param key The string (const char*) to hash.
- * @return A hash code for the key.
- */
-U_CAPI int32_t U_EXPORT2
-uhash_hashUnicodeString(const UElement key);
-
-/**
- * Hash function for UnicodeString* keys (case insensitive).
- * Make sure to use together with uhash_compareCaselessUnicodeString.
- * @param key The string (const char*) to hash.
- * @return A hash code for the key.
- */
-U_CAPI int32_t U_EXPORT2
-uhash_hashCaselessUnicodeString(const UElement key);
-
-/********************************************************************
- * int32_t Support Functions
- ********************************************************************/
-
-/**
- * Hash function for 32-bit integer keys.
- * @param key The string (const char*) to hash.
- * @return A hash code for the key.
- */
-U_CAPI int32_t U_EXPORT2
-uhash_hashLong(const UHashTok key);
-
-/**
- * Comparator function for 32-bit integer keys.
- * @param key1 The integer for comparison
- * @param Key2 The integer for comparison
- * @return true if key1 and key2 are equal, return false otherwise
- */
-U_CAPI UBool U_EXPORT2
-uhash_compareLong(const UHashTok key1, const UHashTok key2);
-
-/********************************************************************
- * Other Support Functions
- ********************************************************************/
-
-/**
- * Deleter for Hashtable objects.
- * @param obj The object to be deleted
- */
-U_CAPI void U_EXPORT2
-uhash_deleteHashtable(void *obj);
-
-/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */
-
-/**
- * Checks if the given hash tables are equal or not.
- * @param hash1
- * @param hash2
- * @return true if the hashtables are equal and false if not.
- */
-U_CAPI UBool U_EXPORT2
-uhash_equals(const UHashtable* hash1, const UHashtable* hash2);
-
-
-#if U_SHOW_CPLUSPLUS_API
-
-U_NAMESPACE_BEGIN
-
-/**
- * \class LocalUHashtablePointer
- * "Smart pointer" class, closes a UHashtable via uhash_close().
- * For most methods see the LocalPointerBase base class.
- *
- * @see LocalPointerBase
- * @see LocalPointer
- * @stable ICU 4.4
- */
-U_DEFINE_LOCAL_OPEN_POINTER(LocalUHashtablePointer, UHashtable, uhash_close);
-
-U_NAMESPACE_END
-
-#endif
-
-#endif
diff --git a/contrib/libs/icu/common/uhash_us.cpp b/contrib/libs/icu/common/uhash_us.cpp
deleted file mode 100644
index ef482c27463..00000000000
--- a/contrib/libs/icu/common/uhash_us.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 1997-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-* Date Name Description
-* 03/22/00 aliu Creation.
-* 07/06/01 aliu Modified to support int32_t keys on
-* platforms with sizeof(void*) < 32.
-******************************************************************************
-*/
-
-#include "hash.h"
-
-/**
- * Deleter for Hashtable objects.
- */
-U_CAPI void U_EXPORT2
-uhash_deleteHashtable(void *obj) {
- U_NAMESPACE_USE
- delete (Hashtable*) obj;
-}
-
-//eof
diff --git a/contrib/libs/icu/common/uidna.cpp b/contrib/libs/icu/common/uidna.cpp
deleted file mode 100644
index ac2f9c3c8cd..00000000000
--- a/contrib/libs/icu/common/uidna.cpp
+++ /dev/null
@@ -1,921 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- *******************************************************************************
- *
- * Copyright (C) 2003-2014, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- *******************************************************************************
- * file name: uidna.cpp
- * encoding: UTF-8
- * tab size: 8 (not used)
- * indentation:4
- *
- * created on: 2003feb1
- * created by: Ram Viswanadha
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_IDNA
-
-#include "unicode/uidna.h"
-#include "unicode/ustring.h"
-#include "unicode/usprep.h"
-#include "punycode.h"
-#include "ustr_imp.h"
-#include "cmemory.h"
-#include "uassert.h"
-#include "sprpimpl.h"
-
-/* it is official IDNA ACE Prefix is "xn--" */
-static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
-#define ACE_PREFIX_LENGTH 4
-
-#define MAX_LABEL_LENGTH 63
-/* The Max length of the labels should not be more than MAX_LABEL_LENGTH */
-#define MAX_LABEL_BUFFER_SIZE 100
-
-#define MAX_DOMAIN_NAME_LENGTH 255
-/* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */
-#define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1
-
-#define LOWER_CASE_DELTA 0x0020
-#define HYPHEN 0x002D
-#define FULL_STOP 0x002E
-#define CAPITAL_A 0x0041
-#define CAPITAL_Z 0x005A
-
-inline static UChar
-toASCIILower(UChar ch){
- if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
- return ch + LOWER_CASE_DELTA;
- }
- return ch;
-}
-
-inline static UBool
-startsWithPrefix(const UChar* src , int32_t srcLength){
- if(srcLength < ACE_PREFIX_LENGTH){
- return FALSE;
- }
-
- for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
- if(toASCIILower(src[i]) != ACE_PREFIX[i]){
- return FALSE;
- }
- }
- return TRUE;
-}
-
-
-inline static int32_t
-compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
- const UChar* s2, int32_t s2Len){
-
- int32_t minLength;
- int32_t lengthResult;
-
- // are we comparing different lengths?
- if(s1Len != s2Len) {
- if(s1Len < s2Len) {
- minLength = s1Len;
- lengthResult = -1;
- } else {
- minLength = s2Len;
- lengthResult = 1;
- }
- } else {
- // ok the lengths are equal
- minLength = s1Len;
- lengthResult = 0;
- }
-
- UChar c1,c2;
- int32_t rc;
-
- for(int32_t i =0;/* no condition */;i++) {
-
- /* If we reach the ends of both strings then they match */
- if(i == minLength) {
- return lengthResult;
- }
-
- c1 = s1[i];
- c2 = s2[i];
-
- /* Case-insensitive comparison */
- if(c1!=c2) {
- rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
- if(rc!=0) {
- lengthResult=rc;
- break;
- }
- }
- }
- return lengthResult;
-}
-
-
-/**
- * Ascertain if the given code point is a label separator as
- * defined by the IDNA RFC
- *
- * @param ch The code point to be ascertained
- * @return true if the char is a label separator
- * @stable ICU 2.8
- */
-static inline UBool isLabelSeparator(UChar ch){
- switch(ch){
- case 0x002e:
- case 0x3002:
- case 0xFF0E:
- case 0xFF61:
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-// returns the length of the label excluding the separator
-// if *limit == separator then the length returned does not include
-// the separtor.
-static inline int32_t
-getNextSeparator(UChar *src, int32_t srcLength,
- UChar **limit, UBool *done){
- if(srcLength == -1){
- int32_t i;
- for(i=0 ; ;i++){
- if(src[i] == 0){
- *limit = src + i; // point to null
- *done = TRUE;
- return i;
- }
- if(isLabelSeparator(src[i])){
- *limit = src + (i+1); // go past the delimiter
- return i;
-
- }
- }
- }else{
- int32_t i;
- for(i=0;i<srcLength;i++){
- if(isLabelSeparator(src[i])){
- *limit = src + (i+1); // go past the delimiter
- return i;
- }
- }
- // we have not found the delimiter
- // if(i==srcLength)
- *limit = src+srcLength;
- *done = TRUE;
-
- return i;
- }
-}
-static inline UBool isLDHChar(UChar ch){
- // high runner case
- if(ch>0x007A){
- return FALSE;
- }
- //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
- if( (ch==0x002D) ||
- (0x0030 <= ch && ch <= 0x0039) ||
- (0x0041 <= ch && ch <= 0x005A) ||
- (0x0061 <= ch && ch <= 0x007A)
- ){
- return TRUE;
- }
- return FALSE;
-}
-
-static int32_t
-_internal_toASCII(const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
- int32_t options,
- UStringPrepProfile* nameprep,
- UParseError* parseError,
- UErrorCode* status)
-{
-
- // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
- UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
- //initialize pointers to stack buffers
- UChar *b1 = b1Stack, *b2 = b2Stack;
- int32_t b1Len=0, b2Len,
- b1Capacity = MAX_LABEL_BUFFER_SIZE,
- b2Capacity = MAX_LABEL_BUFFER_SIZE ,
- reqLength=0;
-
- int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
- UBool* caseFlags = NULL;
-
- // the source contains all ascii codepoints
- UBool srcIsASCII = TRUE;
- // assume the source contains all LDH codepoints
- UBool srcIsLDH = TRUE;
-
- int32_t j=0;
-
- //get the options
- UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
-
- int32_t failPos = -1;
-
- if(srcLength == -1){
- srcLength = u_strlen(src);
- }
-
- if(srcLength > b1Capacity){
- b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
- if(b1==NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto CLEANUP;
- }
- b1Capacity = srcLength;
- }
-
- // step 1
- for( j=0;j<srcLength;j++){
- if(src[j] > 0x7F){
- srcIsASCII = FALSE;
- }
- b1[b1Len++] = src[j];
- }
-
- // step 2 is performed only if the source contains non ASCII
- if(srcIsASCII == FALSE){
-
- // step 2
- b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
-
- if(*status == U_BUFFER_OVERFLOW_ERROR){
- // redo processing of string
- // we do not have enough room so grow the buffer
- if(b1 != b1Stack){
- uprv_free(b1);
- }
- b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
- if(b1==NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto CLEANUP;
- }
-
- *status = U_ZERO_ERROR; // reset error
-
- b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
- }
- }
- // error bail out
- if(U_FAILURE(*status)){
- goto CLEANUP;
- }
- if(b1Len == 0){
- *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
- goto CLEANUP;
- }
-
- // for step 3 & 4
- srcIsASCII = TRUE;
- for( j=0;j<b1Len;j++){
- // check if output of usprep_prepare is all ASCII
- if(b1[j] > 0x7F){
- srcIsASCII = FALSE;
- }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character
- srcIsLDH = FALSE;
- failPos = j;
- }
- }
- if(useSTD3ASCIIRules == TRUE){
- // verify 3a and 3b
- // 3(a) Verify the absence of non-LDH ASCII code points; that is, the
- // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
- // 3(b) Verify the absence of leading and trailing hyphen-minus; that
- // is, the absence of U+002D at the beginning and end of the
- // sequence.
- if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */
- || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){
- *status = U_IDNA_STD3_ASCII_RULES_ERROR;
-
- /* populate the parseError struct */
- if(srcIsLDH==FALSE){
- // failPos is always set the index of failure
- uprv_syntaxError(b1,failPos, b1Len,parseError);
- }else if(b1[0] == HYPHEN){
- // fail position is 0
- uprv_syntaxError(b1,0,b1Len,parseError);
- }else{
- // the last index in the source is always length-1
- uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError);
- }
-
- goto CLEANUP;
- }
- }
- // Step 4: if the source is ASCII then proceed to step 8
- if(srcIsASCII){
- if(b1Len <= destCapacity){
- u_memmove(dest, b1, b1Len);
- reqLength = b1Len;
- }else{
- reqLength = b1Len;
- goto CLEANUP;
- }
- }else{
- // step 5 : verify the sequence does not begin with ACE prefix
- if(!startsWithPrefix(b1,b1Len)){
-
- //step 6: encode the sequence with punycode
-
- // do not preserve the case flags for now!
- // TODO: Preserve the case while implementing the RFE
- // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
- // uprv_memset(caseFlags,TRUE,b1Len);
-
- b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
-
- if(*status == U_BUFFER_OVERFLOW_ERROR){
- // redo processing of string
- /* we do not have enough room so grow the buffer*/
- b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
- if(b2 == NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto CLEANUP;
- }
-
- *status = U_ZERO_ERROR; // reset error
-
- b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
- }
- //error bail out
- if(U_FAILURE(*status)){
- goto CLEANUP;
- }
- // TODO : Reconsider while implementing the case preserve RFE
- // convert all codepoints to lower case ASCII
- // toASCIILower(b2,b2Len);
- reqLength = b2Len+ACE_PREFIX_LENGTH;
-
- if(reqLength > destCapacity){
- *status = U_BUFFER_OVERFLOW_ERROR;
- goto CLEANUP;
- }
- //Step 7: prepend the ACE prefix
- u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH);
- //Step 6: copy the contents in b2 into dest
- u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len);
-
- }else{
- *status = U_IDNA_ACE_PREFIX_ERROR;
- //position of failure is 0
- uprv_syntaxError(b1,0,b1Len,parseError);
- goto CLEANUP;
- }
- }
- // step 8: verify the length of label
- if(reqLength > MAX_LABEL_LENGTH){
- *status = U_IDNA_LABEL_TOO_LONG_ERROR;
- }
-
-CLEANUP:
- if(b1 != b1Stack){
- uprv_free(b1);
- }
- if(b2 != b2Stack){
- uprv_free(b2);
- }
- uprv_free(caseFlags);
-
- return u_terminateUChars(dest, destCapacity, reqLength, status);
-}
-
-static int32_t
-_internal_toUnicode(const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
- int32_t options,
- UStringPrepProfile* nameprep,
- UParseError* parseError,
- UErrorCode* status)
-{
-
- //get the options
- //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
- int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
-
- // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
- UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
-
- //initialize pointers to stack buffers
- UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
- int32_t b1Len = 0, b2Len, b1PrimeLen, b3Len,
- b1Capacity = MAX_LABEL_BUFFER_SIZE,
- b2Capacity = MAX_LABEL_BUFFER_SIZE,
- b3Capacity = MAX_LABEL_BUFFER_SIZE,
- reqLength=0;
-
- UBool* caseFlags = NULL;
-
- UBool srcIsASCII = TRUE;
- /*UBool srcIsLDH = TRUE;
- int32_t failPos =0;*/
-
- // step 1: find out if all the codepoints in src are ASCII
- if(srcLength==-1){
- srcLength = 0;
- for(;src[srcLength]!=0;){
- if(src[srcLength]> 0x7f){
- srcIsASCII = FALSE;
- }/*else if(isLDHChar(src[srcLength])==FALSE){
- // here we do not assemble surrogates
- // since we know that LDH code points
- // are in the ASCII range only
- srcIsLDH = FALSE;
- failPos = srcLength;
- }*/
- srcLength++;
- }
- }else if(srcLength > 0){
- for(int32_t j=0; j<srcLength; j++){
- if(src[j]> 0x7f){
- srcIsASCII = FALSE;
- break;
- }/*else if(isLDHChar(src[j])==FALSE){
- // here we do not assemble surrogates
- // since we know that LDH code points
- // are in the ASCII range only
- srcIsLDH = FALSE;
- failPos = j;
- }*/
- }
- }else{
- return 0;
- }
-
- if(srcIsASCII == FALSE){
- // step 2: process the string
- b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
- if(*status == U_BUFFER_OVERFLOW_ERROR){
- // redo processing of string
- /* we do not have enough room so grow the buffer*/
- b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
- if(b1==NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto CLEANUP;
- }
-
- *status = U_ZERO_ERROR; // reset error
-
- b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
- }
- //bail out on error
- if(U_FAILURE(*status)){
- goto CLEANUP;
- }
- }else{
-
- //just point src to b1
- b1 = (UChar*) src;
- b1Len = srcLength;
- }
-
- // The RFC states that
- // <quote>
- // ToUnicode never fails. If any step fails, then the original input
- // is returned immediately in that step.
- // </quote>
-
- //step 3: verify ACE Prefix
- if(startsWithPrefix(b1,b1Len)){
-
- //step 4: Remove the ACE Prefix
- b1Prime = b1 + ACE_PREFIX_LENGTH;
- b1PrimeLen = b1Len - ACE_PREFIX_LENGTH;
-
- //step 5: Decode using punycode
- b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
-
- if(*status == U_BUFFER_OVERFLOW_ERROR){
- // redo processing of string
- /* we do not have enough room so grow the buffer*/
- b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
- if(b2==NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto CLEANUP;
- }
-
- *status = U_ZERO_ERROR; // reset error
-
- b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
- }
-
-
- //step 6:Apply toASCII
- b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status);
-
- if(*status == U_BUFFER_OVERFLOW_ERROR){
- // redo processing of string
- /* we do not have enough room so grow the buffer*/
- b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
- if(b3==NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto CLEANUP;
- }
-
- *status = U_ZERO_ERROR; // reset error
-
- b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
-
- }
- //bail out on error
- if(U_FAILURE(*status)){
- goto CLEANUP;
- }
-
- //step 7: verify
- if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
- // Cause the original to be returned.
- *status = U_IDNA_VERIFICATION_ERROR;
- goto CLEANUP;
- }
-
- //step 8: return output of step 5
- reqLength = b2Len;
- if(b2Len <= destCapacity) {
- u_memmove(dest, b2, b2Len);
- }
- }
- else{
- // See the start of this if statement for why this is commented out.
- // verify that STD3 ASCII rules are satisfied
- /*if(useSTD3ASCIIRules == TRUE){
- if( srcIsLDH == FALSE // source contains some non-LDH characters
- || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){
- *status = U_IDNA_STD3_ASCII_RULES_ERROR;
-
- // populate the parseError struct
- if(srcIsLDH==FALSE){
- // failPos is always set the index of failure
- uprv_syntaxError(src,failPos, srcLength,parseError);
- }else if(src[0] == HYPHEN){
- // fail position is 0
- uprv_syntaxError(src,0,srcLength,parseError);
- }else{
- // the last index in the source is always length-1
- uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
- }
-
- goto CLEANUP;
- }
- }*/
- // just return the source
- //copy the source to destination
- if(srcLength <= destCapacity){
- u_memmove(dest, src, srcLength);
- }
- reqLength = srcLength;
- }
-
-
-CLEANUP:
-
- if(b1 != b1Stack && b1!=src){
- uprv_free(b1);
- }
- if(b2 != b2Stack){
- uprv_free(b2);
- }
- uprv_free(caseFlags);
-
- // The RFC states that
- // <quote>
- // ToUnicode never fails. If any step fails, then the original input
- // is returned immediately in that step.
- // </quote>
- // So if any step fails lets copy source to destination
- if(U_FAILURE(*status)){
- //copy the source to destination
- if(dest && srcLength <= destCapacity){
- // srcLength should have already been set earlier.
- U_ASSERT(srcLength >= 0);
- u_memmove(dest, src, srcLength);
- }
- reqLength = srcLength;
- *status = U_ZERO_ERROR;
- }
-
- return u_terminateUChars(dest, destCapacity, reqLength, status);
-}
-
-U_CAPI int32_t U_EXPORT2
-uidna_toASCII(const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
- int32_t options,
- UParseError* parseError,
- UErrorCode* status){
-
- if(status == NULL || U_FAILURE(*status)){
- return 0;
- }
- if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
-
- if(U_FAILURE(*status)){
- return -1;
- }
-
- int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
-
- /* close the profile*/
- usprep_close(nameprep);
-
- return retLen;
-}
-
-U_CAPI int32_t U_EXPORT2
-uidna_toUnicode(const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
- int32_t options,
- UParseError* parseError,
- UErrorCode* status){
-
- if(status == NULL || U_FAILURE(*status)){
- return 0;
- }
- if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
-
- if(U_FAILURE(*status)){
- return -1;
- }
-
- int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
-
- usprep_close(nameprep);
-
- return retLen;
-}
-
-
-U_CAPI int32_t U_EXPORT2
-uidna_IDNToASCII( const UChar *src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
- int32_t options,
- UParseError *parseError,
- UErrorCode *status){
-
- if(status == NULL || U_FAILURE(*status)){
- return 0;
- }
- if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- int32_t reqLength = 0;
-
- UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
-
- if(U_FAILURE(*status)){
- return 0;
- }
-
- //initialize pointers
- UChar *delimiter = (UChar*)src;
- UChar *labelStart = (UChar*)src;
- UChar *currentDest = (UChar*) dest;
- int32_t remainingLen = srcLength;
- int32_t remainingDestCapacity = destCapacity;
- int32_t labelLen = 0, labelReqLength = 0;
- UBool done = FALSE;
-
-
- for(;;){
-
- labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
- labelReqLength = 0;
- if(!(labelLen==0 && done)){// make sure this is not a root label separator.
-
- labelReqLength = _internal_toASCII( labelStart, labelLen,
- currentDest, remainingDestCapacity,
- options, nameprep,
- parseError, status);
-
- if(*status == U_BUFFER_OVERFLOW_ERROR){
-
- *status = U_ZERO_ERROR; // reset error
- remainingDestCapacity = 0;
- }
- }
-
-
- if(U_FAILURE(*status)){
- break;
- }
-
- reqLength +=labelReqLength;
- // adjust the destination pointer
- if(labelReqLength < remainingDestCapacity){
- currentDest = currentDest + labelReqLength;
- remainingDestCapacity -= labelReqLength;
- }else{
- // should never occur
- remainingDestCapacity = 0;
- }
-
- if(done == TRUE){
- break;
- }
-
- // add the label separator
- if(remainingDestCapacity > 0){
- *currentDest++ = FULL_STOP;
- remainingDestCapacity--;
- }
- reqLength++;
-
- labelStart = delimiter;
- if(remainingLen >0 ){
- remainingLen = (int32_t)(srcLength - (delimiter - src));
- }
-
- }
-
- if(reqLength > MAX_DOMAIN_NAME_LENGTH){
- *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
- }
-
- usprep_close(nameprep);
-
- return u_terminateUChars(dest, destCapacity, reqLength, status);
-}
-
-U_CAPI int32_t U_EXPORT2
-uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
- int32_t options,
- UParseError* parseError,
- UErrorCode* status){
-
- if(status == NULL || U_FAILURE(*status)){
- return 0;
- }
- if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- int32_t reqLength = 0;
-
- UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
-
- if(U_FAILURE(*status)){
- return 0;
- }
-
- //initialize pointers
- UChar *delimiter = (UChar*)src;
- UChar *labelStart = (UChar*)src;
- UChar *currentDest = (UChar*) dest;
- int32_t remainingLen = srcLength;
- int32_t remainingDestCapacity = destCapacity;
- int32_t labelLen = 0, labelReqLength = 0;
- UBool done = FALSE;
-
- for(;;){
-
- labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
-
- // The RFC states that
- // <quote>
- // ToUnicode never fails. If any step fails, then the original input
- // is returned immediately in that step.
- // </quote>
- // _internal_toUnicode will copy the label.
- /*if(labelLen==0 && done==FALSE){
- *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
- break;
- }*/
-
- labelReqLength = _internal_toUnicode(labelStart, labelLen,
- currentDest, remainingDestCapacity,
- options, nameprep,
- parseError, status);
-
- if(*status == U_BUFFER_OVERFLOW_ERROR){
- *status = U_ZERO_ERROR; // reset error
- remainingDestCapacity = 0;
- }
-
- if(U_FAILURE(*status)){
- break;
- }
-
- reqLength +=labelReqLength;
- // adjust the destination pointer
- if(labelReqLength < remainingDestCapacity){
- currentDest = currentDest + labelReqLength;
- remainingDestCapacity -= labelReqLength;
- }else{
- // should never occur
- remainingDestCapacity = 0;
- }
-
- if(done == TRUE){
- break;
- }
-
- // add the label separator
- // Unlike the ToASCII operation we don't normalize the label separators
- if(remainingDestCapacity > 0){
- *currentDest++ = *(labelStart + labelLen);
- remainingDestCapacity--;
- }
- reqLength++;
-
- labelStart = delimiter;
- if(remainingLen >0 ){
- remainingLen = (int32_t)(srcLength - (delimiter - src));
- }
-
- }
-
- if(reqLength > MAX_DOMAIN_NAME_LENGTH){
- *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
- }
-
- usprep_close(nameprep);
-
- return u_terminateUChars(dest, destCapacity, reqLength, status);
-}
-
-U_CAPI int32_t U_EXPORT2
-uidna_compare( const UChar *s1, int32_t length1,
- const UChar *s2, int32_t length2,
- int32_t options,
- UErrorCode* status){
-
- if(status == NULL || U_FAILURE(*status)){
- return -1;
- }
-
- UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
- UChar *b1 = b1Stack, *b2 = b2Stack;
- int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
- int32_t result=-1;
-
- UParseError parseError;
-
- b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
- if(*status == U_BUFFER_OVERFLOW_ERROR){
- // redo processing of string
- b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
- if(b1==NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto CLEANUP;
- }
-
- *status = U_ZERO_ERROR; // reset error
-
- b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
-
- }
-
- b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
- if(*status == U_BUFFER_OVERFLOW_ERROR){
- // redo processing of string
- b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
- if(b2==NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto CLEANUP;
- }
-
- *status = U_ZERO_ERROR; // reset error
-
- b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
-
- }
- // when toASCII is applied all label separators are replaced with FULL_STOP
- result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
-
-CLEANUP:
- if(b1 != b1Stack){
- uprv_free(b1);
- }
-
- if(b2 != b2Stack){
- uprv_free(b2);
- }
-
- return result;
-}
-
-#endif /* #if !UCONFIG_NO_IDNA */
diff --git a/contrib/libs/icu/common/uinit.cpp b/contrib/libs/icu/common/uinit.cpp
deleted file mode 100644
index 624431be02c..00000000000
--- a/contrib/libs/icu/common/uinit.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 2001-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-* file name: uinit.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001July05
-* created by: George Rhoten
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/icuplug.h"
-#include "unicode/uclean.h"
-#include "cmemory.h"
-#include "icuplugimp.h"
-#include "ucln_cmn.h"
-#include "ucnv_io.h"
-#include "umutex.h"
-#include "utracimp.h"
-
-U_NAMESPACE_BEGIN
-
-static UInitOnce gICUInitOnce = U_INITONCE_INITIALIZER;
-
-static UBool U_CALLCONV uinit_cleanup() {
- gICUInitOnce.reset();
- return TRUE;
-}
-
-static void U_CALLCONV
-initData(UErrorCode &status)
-{
-#if UCONFIG_ENABLE_PLUGINS
- /* initialize plugins */
- uplug_init(&status);
-#endif
-
-#if !UCONFIG_NO_CONVERSION
- /*
- * 2005-may-02
- *
- * ICU4C 3.4 (jitterbug 4497) hardcodes the data for Unicode character
- * properties for APIs that want to be fast.
- * Therefore, we need not load them here nor check for errors.
- * Instead, we load the converter alias table to see if any ICU data
- * is available.
- * Users should really open the service objects they need and check
- * for errors there, to make sure that the actual items they need are
- * available.
- */
- ucnv_io_countKnownConverters(&status);
-#endif
- ucln_common_registerCleanup(UCLN_COMMON_UINIT, uinit_cleanup);
-}
-
-U_NAMESPACE_END
-
-U_NAMESPACE_USE
-
-/*
- * ICU Initialization Function. Need not be called.
- */
-U_CAPI void U_EXPORT2
-u_init(UErrorCode *status) {
- UTRACE_ENTRY_OC(UTRACE_U_INIT);
- umtx_initOnce(gICUInitOnce, &initData, *status);
- UTRACE_EXIT_STATUS(*status);
-}
diff --git a/contrib/libs/icu/common/uinvchar.cpp b/contrib/libs/icu/common/uinvchar.cpp
deleted file mode 100644
index 05bcf10df48..00000000000
--- a/contrib/libs/icu/common/uinvchar.cpp
+++ /dev/null
@@ -1,627 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2010, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uinvchar.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:2
-*
-* created on: 2004sep14
-* created by: Markus W. Scherer
-*
-* Functions for handling invariant characters, moved here from putil.c
-* for better modularization.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/ustring.h"
-#include "udataswp.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "uassert.h"
-#include "uinvchar.h"
-
-/* invariant-character handling --------------------------------------------- */
-
-/*
- * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
- * appropriately for most EBCDIC codepages.
- *
- * They currently also map most other ASCII graphic characters,
- * appropriately for codepages 37 and 1047.
- * Exceptions: The characters for []^ have different codes in 37 & 1047.
- * Both versions are mapped to ASCII.
- *
- * ASCII 37 1047
- * [ 5B BA AD
- * ] 5D BB BD
- * ^ 5E B0 5F
- *
- * There are no mappings for variant characters from Unicode to EBCDIC.
- *
- * Currently, C0 control codes are also included in these maps.
- * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
- * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
- * but there is no mapping for ASCII LF back to EBCDIC.
- *
- * ASCII EBCDIC S/390-OE
- * LF 0A 25 15
- * NEL 85 15 25
- *
- * The maps below explicitly exclude the variant
- * control and graphical characters that are in ASCII-based
- * codepages at 0x80 and above.
- * "No mapping" is expressed by mapping to a 00 byte.
- *
- * These tables do not establish a converter or a codepage.
- */
-
-static const uint8_t asciiFromEbcdic[256]={
- 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
- 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
-
- 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
- 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
- 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
-
- 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
- 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
-
- 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
-
-static const uint8_t ebcdicFromAscii[256]={
- 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
- 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
-
- 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
- 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
- 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
- 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
-static const uint8_t lowercaseAsciiFromEbcdic[256]={
- 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
- 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
-
- 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
- 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
- 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
-
- 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
- 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
-
- 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
-
-/*
- * Bit sets indicating which characters of the ASCII repertoire
- * (by ASCII/Unicode code) are "invariant".
- * See utypes.h for more details.
- *
- * As invariant are considered the characters of the ASCII repertoire except
- * for the following:
- * 21 '!' <exclamation mark>
- * 23 '#' <number sign>
- * 24 '$' <dollar sign>
- *
- * 40 '@' <commercial at>
- *
- * 5b '[' <left bracket>
- * 5c '\' <backslash>
- * 5d ']' <right bracket>
- * 5e '^' <circumflex>
- *
- * 60 '`' <grave accent>
- *
- * 7b '{' <left brace>
- * 7c '|' <vertical line>
- * 7d '}' <right brace>
- * 7e '~' <tilde>
- */
-static const uint32_t invariantChars[4]={
- 0xfffffbff, /* 00..1f but not 0a */
- 0xffffffe5, /* 20..3f but not 21 23 24 */
- 0x87fffffe, /* 40..5f but not 40 5b..5e */
- 0x87fffffe /* 60..7f but not 60 7b..7e */
-};
-
-/*
- * test unsigned types (or values known to be non-negative) for invariant characters,
- * tests ASCII-family character values
- */
-#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
-
-/* test signed types for invariant characters, adds test for positive values */
-#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
-
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
-#define CHAR_TO_UCHAR(c) c
-#define UCHAR_TO_CHAR(c) c
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
-#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
-#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
-#else
-# error U_CHARSET_FAMILY is not valid
-#endif
-
-
-U_CAPI void U_EXPORT2
-u_charsToUChars(const char *cs, UChar *us, int32_t length) {
- UChar u;
- uint8_t c;
-
- /*
- * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
- * For EBCDIC systems, this works for characters with codes from
- * codepages 37 and 1047 or compatible.
- */
- while(length>0) {
- c=(uint8_t)(*cs++);
- u=(UChar)CHAR_TO_UCHAR(c);
- U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
- *us++=u;
- --length;
- }
-}
-
-U_CAPI void U_EXPORT2
-u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
- UChar u;
-
- while(length>0) {
- u=*us++;
- if(!UCHAR_IS_INVARIANT(u)) {
- U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
- u=0;
- }
- *cs++=(char)UCHAR_TO_CHAR(u);
- --length;
- }
-}
-
-U_CAPI UBool U_EXPORT2
-uprv_isInvariantString(const char *s, int32_t length) {
- uint8_t c;
-
- for(;;) {
- if(length<0) {
- /* NUL-terminated */
- c=(uint8_t)*s++;
- if(c==0) {
- break;
- }
- } else {
- /* count length */
- if(length==0) {
- break;
- }
- --length;
- c=(uint8_t)*s++;
- if(c==0) {
- continue; /* NUL is invariant */
- }
- }
- /* c!=0 now, one branch below checks c==0 for variant characters */
-
- /*
- * no assertions here because these functions are legitimately called
- * for strings with variant characters
- */
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
- if(!UCHAR_IS_INVARIANT(c)) {
- return FALSE; /* found a variant char */
- }
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
- c=CHAR_TO_UCHAR(c);
- if(c==0 || !UCHAR_IS_INVARIANT(c)) {
- return FALSE; /* found a variant char */
- }
-#else
-# error U_CHARSET_FAMILY is not valid
-#endif
- }
- return TRUE;
-}
-
-U_CAPI UBool U_EXPORT2
-uprv_isInvariantUString(const UChar *s, int32_t length) {
- UChar c;
-
- for(;;) {
- if(length<0) {
- /* NUL-terminated */
- c=*s++;
- if(c==0) {
- break;
- }
- } else {
- /* count length */
- if(length==0) {
- break;
- }
- --length;
- c=*s++;
- }
-
- /*
- * no assertions here because these functions are legitimately called
- * for strings with variant characters
- */
- if(!UCHAR_IS_INVARIANT(c)) {
- return FALSE; /* found a variant char */
- }
- }
- return TRUE;
-}
-
-/* UDataSwapFn implementations used in udataswp.c ------- */
-
-/* convert ASCII to EBCDIC and verify that all characters are invariant */
-U_CAPI int32_t U_EXPORT2
-uprv_ebcdicFromAscii(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const uint8_t *s;
- uint8_t *t;
- uint8_t c;
-
- int32_t count;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* setup and swapping */
- s=(const uint8_t *)inData;
- t=(uint8_t *)outData;
- count=length;
- while(count>0) {
- c=*s++;
- if(!UCHAR_IS_INVARIANT(c)) {
- udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
- length, length-count);
- *pErrorCode=U_INVALID_CHAR_FOUND;
- return 0;
- }
- *t++=ebcdicFromAscii[c];
- --count;
- }
-
- return length;
-}
-
-/* this function only checks and copies ASCII strings without conversion */
-U_CFUNC int32_t
-uprv_copyAscii(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const uint8_t *s;
- uint8_t c;
-
- int32_t count;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* setup and checking */
- s=(const uint8_t *)inData;
- count=length;
- while(count>0) {
- c=*s++;
- if(!UCHAR_IS_INVARIANT(c)) {
- udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
- length, length-count);
- *pErrorCode=U_INVALID_CHAR_FOUND;
- return 0;
- }
- --count;
- }
-
- if(length>0 && inData!=outData) {
- uprv_memcpy(outData, inData, length);
- }
-
- return length;
-}
-
-/* convert EBCDIC to ASCII and verify that all characters are invariant */
-U_CFUNC int32_t
-uprv_asciiFromEbcdic(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const uint8_t *s;
- uint8_t *t;
- uint8_t c;
-
- int32_t count;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* setup and swapping */
- s=(const uint8_t *)inData;
- t=(uint8_t *)outData;
- count=length;
- while(count>0) {
- c=*s++;
- if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
- udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
- length, length-count);
- *pErrorCode=U_INVALID_CHAR_FOUND;
- return 0;
- }
- *t++=c;
- --count;
- }
-
- return length;
-}
-
-/* this function only checks and copies EBCDIC strings without conversion */
-U_CFUNC int32_t
-uprv_copyEbcdic(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const uint8_t *s;
- uint8_t c;
-
- int32_t count;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* setup and checking */
- s=(const uint8_t *)inData;
- count=length;
- while(count>0) {
- c=*s++;
- if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
- udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
- length, length-count);
- *pErrorCode=U_INVALID_CHAR_FOUND;
- return 0;
- }
- --count;
- }
-
- if(length>0 && inData!=outData) {
- uprv_memcpy(outData, inData, length);
- }
-
- return length;
-}
-
-U_CFUNC UBool
-uprv_isEbcdicAtSign(char c) {
- static const uint8_t ebcdicAtSigns[] = {
- 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
- return c != 0 && uprv_strchr((const char *)ebcdicAtSigns, c) != nullptr;
-}
-
-/* compare invariant strings; variant characters compare less than others and unlike each other */
-U_CFUNC int32_t
-uprv_compareInvAscii(const UDataSwapper *ds,
- const char *outString, int32_t outLength,
- const UChar *localString, int32_t localLength) {
- (void)ds;
- int32_t minLength;
- UChar32 c1, c2;
- uint8_t c;
-
- if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
- return 0;
- }
-
- if(outLength<0) {
- outLength=(int32_t)uprv_strlen(outString);
- }
- if(localLength<0) {
- localLength=u_strlen(localString);
- }
-
- minLength= outLength<localLength ? outLength : localLength;
-
- while(minLength>0) {
- c=(uint8_t)*outString++;
- if(UCHAR_IS_INVARIANT(c)) {
- c1=c;
- } else {
- c1=-1;
- }
-
- c2=*localString++;
- if(!UCHAR_IS_INVARIANT(c2)) {
- c2=-2;
- }
-
- if((c1-=c2)!=0) {
- return c1;
- }
-
- --minLength;
- }
-
- /* strings start with same prefix, compare lengths */
- return outLength-localLength;
-}
-
-U_CFUNC int32_t
-uprv_compareInvEbcdic(const UDataSwapper *ds,
- const char *outString, int32_t outLength,
- const UChar *localString, int32_t localLength) {
- (void)ds;
- int32_t minLength;
- UChar32 c1, c2;
- uint8_t c;
-
- if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
- return 0;
- }
-
- if(outLength<0) {
- outLength=(int32_t)uprv_strlen(outString);
- }
- if(localLength<0) {
- localLength=u_strlen(localString);
- }
-
- minLength= outLength<localLength ? outLength : localLength;
-
- while(minLength>0) {
- c=(uint8_t)*outString++;
- if(c==0) {
- c1=0;
- } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
- /* c1 is set */
- } else {
- c1=-1;
- }
-
- c2=*localString++;
- if(!UCHAR_IS_INVARIANT(c2)) {
- c2=-2;
- }
-
- if((c1-=c2)!=0) {
- return c1;
- }
-
- --minLength;
- }
-
- /* strings start with same prefix, compare lengths */
- return outLength-localLength;
-}
-
-U_CAPI int32_t U_EXPORT2
-uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
- int32_t c1, c2;
-
- for(;; ++s1, ++s2) {
- c1=(uint8_t)*s1;
- c2=(uint8_t)*s2;
- if(c1!=c2) {
- if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
- c1=-(int32_t)(uint8_t)*s1;
- }
- if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
- c2=-(int32_t)(uint8_t)*s2;
- }
- return c1-c2;
- } else if(c1==0) {
- return 0;
- }
- }
-}
-
-U_CAPI char U_EXPORT2
-uprv_ebcdicToAscii(char c) {
- return (char)asciiFromEbcdic[(uint8_t)c];
-}
-
-U_CAPI char U_EXPORT2
-uprv_ebcdicToLowercaseAscii(char c) {
- return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
-}
-
-U_INTERNAL uint8_t* U_EXPORT2
-uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
-{
- uint8_t *orig_dst = dst;
-
- if(n==-1) {
- n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
- }
- /* copy non-null */
- while(*src && n>0) {
- *(dst++) = asciiFromEbcdic[*(src++)];
- n--;
- }
- /* pad */
- while(n>0) {
- *(dst++) = 0;
- n--;
- }
- return orig_dst;
-}
-
-U_INTERNAL uint8_t* U_EXPORT2
-uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
-{
- uint8_t *orig_dst = dst;
-
- if(n==-1) {
- n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
- }
- /* copy non-null */
- while(*src && n>0) {
- char ch = ebcdicFromAscii[*(src++)];
- if(ch == 0) {
- ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
- }
- *(dst++) = ch;
- n--;
- }
- /* pad */
- while(n>0) {
- *(dst++) = 0;
- n--;
- }
- return orig_dst;
-}
-
diff --git a/contrib/libs/icu/common/uinvchar.h b/contrib/libs/icu/common/uinvchar.h
deleted file mode 100644
index a43cfcd9828..00000000000
--- a/contrib/libs/icu/common/uinvchar.h
+++ /dev/null
@@ -1,219 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uinvchar.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:2
-*
-* created on: 2004sep14
-* created by: Markus W. Scherer
-*
-* Definitions for handling invariant characters, moved here from putil.c
-* for better modularization.
-*/
-
-#ifndef __UINVCHAR_H__
-#define __UINVCHAR_H__
-
-#include "unicode/utypes.h"
-#ifdef __cplusplus
-#include "unicode/unistr.h"
-#endif
-
-/**
- * Check if a char string only contains invariant characters.
- * See utypes.h for details.
- *
- * @param s Input string pointer.
- * @param length Length of the string, can be -1 if NUL-terminated.
- * @return TRUE if s contains only invariant characters.
- *
- * @internal (ICU 2.8)
- */
-U_INTERNAL UBool U_EXPORT2
-uprv_isInvariantString(const char *s, int32_t length);
-
-/**
- * Check if a Unicode string only contains invariant characters.
- * See utypes.h for details.
- *
- * @param s Input string pointer.
- * @param length Length of the string, can be -1 if NUL-terminated.
- * @return TRUE if s contains only invariant characters.
- *
- * @internal (ICU 2.8)
- */
-U_INTERNAL UBool U_EXPORT2
-uprv_isInvariantUString(const UChar *s, int32_t length);
-
-/**
- * \def U_UPPER_ORDINAL
- * Get the ordinal number of an uppercase invariant character
- * @internal
- */
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define U_UPPER_ORDINAL(x) ((x)-'A')
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
-# define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \
- (((x) < 'S') ? ((x)-'J'+9) : \
- ((x)-'S'+18)))
-#else
-# error Unknown charset family!
-#endif
-
-#ifdef __cplusplus
-
-U_NAMESPACE_BEGIN
-
-/**
- * Like U_UPPER_ORDINAL(x) but with validation.
- * Returns 0..25 for A..Z else a value outside 0..25.
- */
-inline int32_t uprv_upperOrdinal(int32_t c) {
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
- return c - 'A';
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
- // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
- // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
- if (c <= 'I') { return c - 'A'; } // A-I --> 0-8
- if (c < 'J') { return -1; }
- if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17
- if (c < 'S') { return -1; }
- return c - 'S' + 18; // S-Z --> 18..25
-#else
-# error Unknown charset family!
-#endif
-}
-
-// Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
-// Returns 0..25 for a..z else a value outside 0..25.
-inline int32_t uprv_lowerOrdinal(int32_t c) {
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
- return c - 'a';
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
- // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
- // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
- if (c <= 'i') { return c - 'a'; } // a-i --> 0-8
- if (c < 'j') { return -1; }
- if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17
- if (c < 's') { return -1; }
- return c - 's' + 18; // s-z --> 18..25
-#else
-# error Unknown charset family!
-#endif
-}
-
-U_NAMESPACE_END
-
-#endif
-
-/**
- * Returns true if c == '@' is possible.
- * The @ sign is variant, and the @ sign used on one
- * EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
- * @internal
- */
-U_CFUNC UBool
-uprv_isEbcdicAtSign(char c);
-
-/**
- * \def uprv_isAtSign
- * Returns true if c == '@' is possible.
- * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
- * @internal
- */
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define uprv_isAtSign(c) ((c)=='@')
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
-# define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
-#else
-# error Unknown charset family!
-#endif
-
-/**
- * Compare two EBCDIC invariant-character strings in ASCII order.
- * @internal
- */
-U_INTERNAL int32_t U_EXPORT2
-uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
-
-/**
- * \def uprv_compareInvCharsAsAscii
- * Compare two invariant-character strings in ASCII order.
- * @internal
- */
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2)
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
-# define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2)
-#else
-# error Unknown charset family!
-#endif
-
-/**
- * Converts an EBCDIC invariant character to ASCII.
- * @internal
- */
-U_INTERNAL char U_EXPORT2
-uprv_ebcdicToAscii(char c);
-
-/**
- * \def uprv_invCharToAscii
- * Converts an invariant character to ASCII.
- * @internal
- */
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define uprv_invCharToAscii(c) (c)
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
-# define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
-#else
-# error Unknown charset family!
-#endif
-
-/**
- * Converts an EBCDIC invariant character to lowercase ASCII.
- * @internal
- */
-U_INTERNAL char U_EXPORT2
-uprv_ebcdicToLowercaseAscii(char c);
-
-/**
- * \def uprv_invCharToLowercaseAscii
- * Converts an invariant character to lowercase ASCII.
- * @internal
- */
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define uprv_invCharToLowercaseAscii uprv_asciitolower
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
-# define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii
-#else
-# error Unknown charset family!
-#endif
-
-/**
- * Copy EBCDIC to ASCII
- * @internal
- * @see uprv_strncpy
- */
-U_INTERNAL uint8_t* U_EXPORT2
-uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
-
-
-/**
- * Copy ASCII to EBCDIC
- * @internal
- * @see uprv_strncpy
- */
-U_INTERNAL uint8_t* U_EXPORT2
-uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
-
-
-
-#endif
diff --git a/contrib/libs/icu/common/uiter.cpp b/contrib/libs/icu/common/uiter.cpp
deleted file mode 100644
index b9252d81c2d..00000000000
--- a/contrib/libs/icu/common/uiter.cpp
+++ /dev/null
@@ -1,1108 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uiter.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002jan18
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/ustring.h"
-#include "unicode/chariter.h"
-#include "unicode/rep.h"
-#include "unicode/uiter.h"
-#include "unicode/utf.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "cstring.h"
-
-U_NAMESPACE_USE
-
-#define IS_EVEN(n) (((n)&1)==0)
-#define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
-
-U_CDECL_BEGIN
-
-/* No-Op UCharIterator implementation for illegal input --------------------- */
-
-static int32_t U_CALLCONV
-noopGetIndex(UCharIterator * /*iter*/, UCharIteratorOrigin /*origin*/) {
- return 0;
-}
-
-static int32_t U_CALLCONV
-noopMove(UCharIterator * /*iter*/, int32_t /*delta*/, UCharIteratorOrigin /*origin*/) {
- return 0;
-}
-
-static UBool U_CALLCONV
-noopHasNext(UCharIterator * /*iter*/) {
- return FALSE;
-}
-
-static UChar32 U_CALLCONV
-noopCurrent(UCharIterator * /*iter*/) {
- return U_SENTINEL;
-}
-
-static uint32_t U_CALLCONV
-noopGetState(const UCharIterator * /*iter*/) {
- return UITER_NO_STATE;
-}
-
-static void U_CALLCONV
-noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCode) {
- *pErrorCode=U_UNSUPPORTED_ERROR;
-}
-
-static const UCharIterator noopIterator={
- 0, 0, 0, 0, 0, 0,
- noopGetIndex,
- noopMove,
- noopHasNext,
- noopHasNext,
- noopCurrent,
- noopCurrent,
- noopCurrent,
- NULL,
- noopGetState,
- noopSetState
-};
-
-/* UCharIterator implementation for simple strings -------------------------- */
-
-/*
- * This is an implementation of a code unit (UChar) iterator
- * for UChar * strings.
- *
- * The UCharIterator.context field holds a pointer to the string.
- */
-
-static int32_t U_CALLCONV
-stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
- switch(origin) {
- case UITER_ZERO:
- return 0;
- case UITER_START:
- return iter->start;
- case UITER_CURRENT:
- return iter->index;
- case UITER_LIMIT:
- return iter->limit;
- case UITER_LENGTH:
- return iter->length;
- default:
- /* not a valid origin */
- /* Should never get here! */
- return -1;
- }
-}
-
-static int32_t U_CALLCONV
-stringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
- int32_t pos;
-
- switch(origin) {
- case UITER_ZERO:
- pos=delta;
- break;
- case UITER_START:
- pos=iter->start+delta;
- break;
- case UITER_CURRENT:
- pos=iter->index+delta;
- break;
- case UITER_LIMIT:
- pos=iter->limit+delta;
- break;
- case UITER_LENGTH:
- pos=iter->length+delta;
- break;
- default:
- return -1; /* Error */
- }
-
- if(pos<iter->start) {
- pos=iter->start;
- } else if(pos>iter->limit) {
- pos=iter->limit;
- }
-
- return iter->index=pos;
-}
-
-static UBool U_CALLCONV
-stringIteratorHasNext(UCharIterator *iter) {
- return iter->index<iter->limit;
-}
-
-static UBool U_CALLCONV
-stringIteratorHasPrevious(UCharIterator *iter) {
- return iter->index>iter->start;
-}
-
-static UChar32 U_CALLCONV
-stringIteratorCurrent(UCharIterator *iter) {
- if(iter->index<iter->limit) {
- return ((const UChar *)(iter->context))[iter->index];
- } else {
- return U_SENTINEL;
- }
-}
-
-static UChar32 U_CALLCONV
-stringIteratorNext(UCharIterator *iter) {
- if(iter->index<iter->limit) {
- return ((const UChar *)(iter->context))[iter->index++];
- } else {
- return U_SENTINEL;
- }
-}
-
-static UChar32 U_CALLCONV
-stringIteratorPrevious(UCharIterator *iter) {
- if(iter->index>iter->start) {
- return ((const UChar *)(iter->context))[--iter->index];
- } else {
- return U_SENTINEL;
- }
-}
-
-static uint32_t U_CALLCONV
-stringIteratorGetState(const UCharIterator *iter) {
- return (uint32_t)iter->index;
-}
-
-static void U_CALLCONV
-stringIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- /* do nothing */
- } else if(iter==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- } else if((int32_t)state<iter->start || iter->limit<(int32_t)state) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- } else {
- iter->index=(int32_t)state;
- }
-}
-
-static const UCharIterator stringIterator={
- 0, 0, 0, 0, 0, 0,
- stringIteratorGetIndex,
- stringIteratorMove,
- stringIteratorHasNext,
- stringIteratorHasPrevious,
- stringIteratorCurrent,
- stringIteratorNext,
- stringIteratorPrevious,
- NULL,
- stringIteratorGetState,
- stringIteratorSetState
-};
-
-U_CAPI void U_EXPORT2
-uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) {
- if(iter!=0) {
- if(s!=0 && length>=-1) {
- *iter=stringIterator;
- iter->context=s;
- if(length>=0) {
- iter->length=length;
- } else {
- iter->length=u_strlen(s);
- }
- iter->limit=iter->length;
- } else {
- *iter=noopIterator;
- }
- }
-}
-
-/* UCharIterator implementation for UTF-16BE strings ------------------------ */
-
-/*
- * This is an implementation of a code unit (UChar) iterator
- * for UTF-16BE strings, i.e., strings in byte-vectors where
- * each UChar is stored as a big-endian pair of bytes.
- *
- * The UCharIterator.context field holds a pointer to the string.
- * Everything works just like with a normal UChar iterator (uiter_setString),
- * except that UChars are assembled from byte pairs.
- */
-
-/* internal helper function */
-static inline UChar32
-utf16BEIteratorGet(UCharIterator *iter, int32_t index) {
- const uint8_t *p=(const uint8_t *)iter->context;
- return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
-}
-
-static UChar32 U_CALLCONV
-utf16BEIteratorCurrent(UCharIterator *iter) {
- int32_t index;
-
- if((index=iter->index)<iter->limit) {
- return utf16BEIteratorGet(iter, index);
- } else {
- return U_SENTINEL;
- }
-}
-
-static UChar32 U_CALLCONV
-utf16BEIteratorNext(UCharIterator *iter) {
- int32_t index;
-
- if((index=iter->index)<iter->limit) {
- iter->index=index+1;
- return utf16BEIteratorGet(iter, index);
- } else {
- return U_SENTINEL;
- }
-}
-
-static UChar32 U_CALLCONV
-utf16BEIteratorPrevious(UCharIterator *iter) {
- int32_t index;
-
- if((index=iter->index)>iter->start) {
- iter->index=--index;
- return utf16BEIteratorGet(iter, index);
- } else {
- return U_SENTINEL;
- }
-}
-
-static const UCharIterator utf16BEIterator={
- 0, 0, 0, 0, 0, 0,
- stringIteratorGetIndex,
- stringIteratorMove,
- stringIteratorHasNext,
- stringIteratorHasPrevious,
- utf16BEIteratorCurrent,
- utf16BEIteratorNext,
- utf16BEIteratorPrevious,
- NULL,
- stringIteratorGetState,
- stringIteratorSetState
-};
-
-/*
- * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL,
- * i.e., before a pair of 0 bytes where the first 0 byte is at an even
- * offset from s.
- */
-static int32_t
-utf16BE_strlen(const char *s) {
- if(IS_POINTER_EVEN(s)) {
- /*
- * even-aligned, call u_strlen(s)
- * we are probably on a little-endian machine, but searching for UChar NUL
- * does not care about endianness
- */
- return u_strlen((const UChar *)s);
- } else {
- /* odd-aligned, search for pair of 0 bytes */
- const char *p=s;
-
- while(!(*p==0 && p[1]==0)) {
- p+=2;
- }
- return (int32_t)((p-s)/2);
- }
-}
-
-U_CAPI void U_EXPORT2
-uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length) {
- if(iter!=NULL) {
- /* allow only even-length strings (the input length counts bytes) */
- if(s!=NULL && (length==-1 || (length>=0 && IS_EVEN(length)))) {
- /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */
- length>>=1;
-
- if(U_IS_BIG_ENDIAN && IS_POINTER_EVEN(s)) {
- /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */
- uiter_setString(iter, (const UChar *)s, length);
- return;
- }
-
- *iter=utf16BEIterator;
- iter->context=s;
- if(length>=0) {
- iter->length=length;
- } else {
- iter->length=utf16BE_strlen(s);
- }
- iter->limit=iter->length;
- } else {
- *iter=noopIterator;
- }
- }
-}
-
-/* UCharIterator wrapper around CharacterIterator --------------------------- */
-
-/*
- * This is wrapper code around a C++ CharacterIterator to
- * look like a C UCharIterator.
- *
- * The UCharIterator.context field holds a pointer to the CharacterIterator.
- */
-
-static int32_t U_CALLCONV
-characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
- switch(origin) {
- case UITER_ZERO:
- return 0;
- case UITER_START:
- return ((CharacterIterator *)(iter->context))->startIndex();
- case UITER_CURRENT:
- return ((CharacterIterator *)(iter->context))->getIndex();
- case UITER_LIMIT:
- return ((CharacterIterator *)(iter->context))->endIndex();
- case UITER_LENGTH:
- return ((CharacterIterator *)(iter->context))->getLength();
- default:
- /* not a valid origin */
- /* Should never get here! */
- return -1;
- }
-}
-
-static int32_t U_CALLCONV
-characterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
- switch(origin) {
- case UITER_ZERO:
- ((CharacterIterator *)(iter->context))->setIndex(delta);
- return ((CharacterIterator *)(iter->context))->getIndex();
- case UITER_START:
- case UITER_CURRENT:
- case UITER_LIMIT:
- return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin);
- case UITER_LENGTH:
- ((CharacterIterator *)(iter->context))->setIndex(((CharacterIterator *)(iter->context))->getLength()+delta);
- return ((CharacterIterator *)(iter->context))->getIndex();
- default:
- /* not a valid origin */
- /* Should never get here! */
- return -1;
- }
-}
-
-static UBool U_CALLCONV
-characterIteratorHasNext(UCharIterator *iter) {
- return ((CharacterIterator *)(iter->context))->hasNext();
-}
-
-static UBool U_CALLCONV
-characterIteratorHasPrevious(UCharIterator *iter) {
- return ((CharacterIterator *)(iter->context))->hasPrevious();
-}
-
-static UChar32 U_CALLCONV
-characterIteratorCurrent(UCharIterator *iter) {
- UChar32 c;
-
- c=((CharacterIterator *)(iter->context))->current();
- if(c!=0xffff || ((CharacterIterator *)(iter->context))->hasNext()) {
- return c;
- } else {
- return U_SENTINEL;
- }
-}
-
-static UChar32 U_CALLCONV
-characterIteratorNext(UCharIterator *iter) {
- if(((CharacterIterator *)(iter->context))->hasNext()) {
- return ((CharacterIterator *)(iter->context))->nextPostInc();
- } else {
- return U_SENTINEL;
- }
-}
-
-static UChar32 U_CALLCONV
-characterIteratorPrevious(UCharIterator *iter) {
- if(((CharacterIterator *)(iter->context))->hasPrevious()) {
- return ((CharacterIterator *)(iter->context))->previous();
- } else {
- return U_SENTINEL;
- }
-}
-
-static uint32_t U_CALLCONV
-characterIteratorGetState(const UCharIterator *iter) {
- return ((CharacterIterator *)(iter->context))->getIndex();
-}
-
-static void U_CALLCONV
-characterIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- /* do nothing */
- } else if(iter==NULL || iter->context==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- } else if((int32_t)state<((CharacterIterator *)(iter->context))->startIndex() || ((CharacterIterator *)(iter->context))->endIndex()<(int32_t)state) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- } else {
- ((CharacterIterator *)(iter->context))->setIndex((int32_t)state);
- }
-}
-
-static const UCharIterator characterIteratorWrapper={
- 0, 0, 0, 0, 0, 0,
- characterIteratorGetIndex,
- characterIteratorMove,
- characterIteratorHasNext,
- characterIteratorHasPrevious,
- characterIteratorCurrent,
- characterIteratorNext,
- characterIteratorPrevious,
- NULL,
- characterIteratorGetState,
- characterIteratorSetState
-};
-
-U_CAPI void U_EXPORT2
-uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter) {
- if(iter!=0) {
- if(charIter!=0) {
- *iter=characterIteratorWrapper;
- iter->context=charIter;
- } else {
- *iter=noopIterator;
- }
- }
-}
-
-/* UCharIterator wrapper around Replaceable --------------------------------- */
-
-/*
- * This is an implementation of a code unit (UChar) iterator
- * based on a Replaceable object.
- *
- * The UCharIterator.context field holds a pointer to the Replaceable.
- * UCharIterator.length and UCharIterator.index hold Replaceable.length()
- * and the iteration index.
- */
-
-static UChar32 U_CALLCONV
-replaceableIteratorCurrent(UCharIterator *iter) {
- if(iter->index<iter->limit) {
- return ((Replaceable *)(iter->context))->charAt(iter->index);
- } else {
- return U_SENTINEL;
- }
-}
-
-static UChar32 U_CALLCONV
-replaceableIteratorNext(UCharIterator *iter) {
- if(iter->index<iter->limit) {
- return ((Replaceable *)(iter->context))->charAt(iter->index++);
- } else {
- return U_SENTINEL;
- }
-}
-
-static UChar32 U_CALLCONV
-replaceableIteratorPrevious(UCharIterator *iter) {
- if(iter->index>iter->start) {
- return ((Replaceable *)(iter->context))->charAt(--iter->index);
- } else {
- return U_SENTINEL;
- }
-}
-
-static const UCharIterator replaceableIterator={
- 0, 0, 0, 0, 0, 0,
- stringIteratorGetIndex,
- stringIteratorMove,
- stringIteratorHasNext,
- stringIteratorHasPrevious,
- replaceableIteratorCurrent,
- replaceableIteratorNext,
- replaceableIteratorPrevious,
- NULL,
- stringIteratorGetState,
- stringIteratorSetState
-};
-
-U_CAPI void U_EXPORT2
-uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) {
- if(iter!=0) {
- if(rep!=0) {
- *iter=replaceableIterator;
- iter->context=rep;
- iter->limit=iter->length=rep->length();
- } else {
- *iter=noopIterator;
- }
- }
-}
-
-/* UCharIterator implementation for UTF-8 strings --------------------------- */
-
-/*
- * Possible, probably necessary only for an implementation for arbitrary
- * converters:
- * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text.
- * This would require to turn reservedFn into a close function and
- * to introduce a uiter_close(iter).
- */
-
-#define UITER_CNV_CAPACITY 16
-
-/*
- * Minimal implementation:
- * Maintain a single-UChar buffer for an additional surrogate.
- * The caller must not modify start and limit because they are used internally.
- *
- * Use UCharIterator fields as follows:
- * context pointer to UTF-8 string
- * length UTF-16 length of the string; -1 until lazy evaluation
- * start current UTF-8 index
- * index current UTF-16 index; may be -1="unknown" after setState()
- * limit UTF-8 length of the string
- * reservedField supplementary code point
- *
- * Since UCharIterator delivers 16-bit code units, the iteration can be
- * currently in the middle of the byte sequence for a supplementary code point.
- * In this case, reservedField will contain that code point and start will
- * point to after the corresponding byte sequence. The UTF-16 index will be
- * one less than what it would otherwise be corresponding to the UTF-8 index.
- * Otherwise, reservedField will be 0.
- */
-
-/*
- * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings:
- * Add implementations that do not call strlen() for iteration but check for NUL.
- */
-
-static int32_t U_CALLCONV
-utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
- switch(origin) {
- case UITER_ZERO:
- case UITER_START:
- return 0;
- case UITER_CURRENT:
- if(iter->index<0) {
- /* the current UTF-16 index is unknown after setState(), count from the beginning */
- const uint8_t *s;
- UChar32 c;
- int32_t i, limit, index;
-
- s=(const uint8_t *)iter->context;
- i=index=0;
- limit=iter->start; /* count up to the UTF-8 index */
- while(i<limit) {
- U8_NEXT_OR_FFFD(s, i, limit, c);
- index+=U16_LENGTH(c);
- }
-
- iter->start=i; /* just in case setState() did not get us to a code point boundary */
- if(i==iter->limit) {
- iter->length=index; /* in case it was <0 or wrong */
- }
- if(iter->reservedField!=0) {
- --index; /* we are in the middle of a supplementary code point */
- }
- iter->index=index;
- }
- return iter->index;
- case UITER_LIMIT:
- case UITER_LENGTH:
- if(iter->length<0) {
- const uint8_t *s;
- UChar32 c;
- int32_t i, limit, length;
-
- s=(const uint8_t *)iter->context;
- if(iter->index<0) {
- /*
- * the current UTF-16 index is unknown after setState(),
- * we must first count from the beginning to here
- */
- i=length=0;
- limit=iter->start;
-
- /* count from the beginning to the current index */
- while(i<limit) {
- U8_NEXT_OR_FFFD(s, i, limit, c);
- length+=U16_LENGTH(c);
- }
-
- /* assume i==limit==iter->start, set the UTF-16 index */
- iter->start=i; /* just in case setState() did not get us to a code point boundary */
- iter->index= iter->reservedField!=0 ? length-1 : length;
- } else {
- i=iter->start;
- length=iter->index;
- if(iter->reservedField!=0) {
- ++length;
- }
- }
-
- /* count from the current index to the end */
- limit=iter->limit;
- while(i<limit) {
- U8_NEXT_OR_FFFD(s, i, limit, c);
- length+=U16_LENGTH(c);
- }
- iter->length=length;
- }
- return iter->length;
- default:
- /* not a valid origin */
- /* Should never get here! */
- return -1;
- }
-}
-
-static int32_t U_CALLCONV
-utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
- const uint8_t *s;
- UChar32 c;
- int32_t pos; /* requested UTF-16 index */
- int32_t i; /* UTF-8 index */
- UBool havePos;
-
- /* calculate the requested UTF-16 index */
- switch(origin) {
- case UITER_ZERO:
- case UITER_START:
- pos=delta;
- havePos=TRUE;
- /* iter->index<0 (unknown) is possible */
- break;
- case UITER_CURRENT:
- if(iter->index>=0) {
- pos=iter->index+delta;
- havePos=TRUE;
- } else {
- /* the current UTF-16 index is unknown after setState(), use only delta */
- pos=0;
- havePos=FALSE;
- }
- break;
- case UITER_LIMIT:
- case UITER_LENGTH:
- if(iter->length>=0) {
- pos=iter->length+delta;
- havePos=TRUE;
- } else {
- /* pin to the end, avoid counting the length */
- iter->index=-1;
- iter->start=iter->limit;
- iter->reservedField=0;
- if(delta>=0) {
- return UITER_UNKNOWN_INDEX;
- } else {
- /* the current UTF-16 index is unknown, use only delta */
- pos=0;
- havePos=FALSE;
- }
- }
- break;
- default:
- return -1; /* Error */
- }
-
- if(havePos) {
- /* shortcuts: pinning to the edges of the string */
- if(pos<=0) {
- iter->index=iter->start=iter->reservedField=0;
- return 0;
- } else if(iter->length>=0 && pos>=iter->length) {
- iter->index=iter->length;
- iter->start=iter->limit;
- iter->reservedField=0;
- return iter->index;
- }
-
- /* minimize the number of U8_NEXT/PREV operations */
- if(iter->index<0 || pos<iter->index/2) {
- /* go forward from the start instead of backward from the current index */
- iter->index=iter->start=iter->reservedField=0;
- } else if(iter->length>=0 && (iter->length-pos)<(pos-iter->index)) {
- /*
- * if we have the UTF-16 index and length and the new position is
- * closer to the end than the current index,
- * then go backward from the end instead of forward from the current index
- */
- iter->index=iter->length;
- iter->start=iter->limit;
- iter->reservedField=0;
- }
-
- delta=pos-iter->index;
- if(delta==0) {
- return iter->index; /* nothing to do */
- }
- } else {
- /* move relative to unknown UTF-16 index */
- if(delta==0) {
- return UITER_UNKNOWN_INDEX; /* nothing to do */
- } else if(-delta>=iter->start) {
- /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
- iter->index=iter->start=iter->reservedField=0;
- return 0;
- } else if(delta>=(iter->limit-iter->start)) {
- /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */
- iter->index=iter->length; /* may or may not be <0 (unknown) */
- iter->start=iter->limit;
- iter->reservedField=0;
- return iter->index>=0 ? iter->index : (int32_t)UITER_UNKNOWN_INDEX;
- }
- }
-
- /* delta!=0 */
-
- /* move towards the requested position, pin to the edges of the string */
- s=(const uint8_t *)iter->context;
- pos=iter->index; /* could be <0 (unknown) */
- i=iter->start;
- if(delta>0) {
- /* go forward */
- int32_t limit=iter->limit;
- if(iter->reservedField!=0) {
- iter->reservedField=0;
- ++pos;
- --delta;
- }
- while(delta>0 && i<limit) {
- U8_NEXT_OR_FFFD(s, i, limit, c);
- if(c<=0xffff) {
- ++pos;
- --delta;
- } else if(delta>=2) {
- pos+=2;
- delta-=2;
- } else /* delta==1 */ {
- /* stop in the middle of a supplementary code point */
- iter->reservedField=c;
- ++pos;
- break; /* delta=0; */
- }
- }
- if(i==limit) {
- if(iter->length<0 && iter->index>=0) {
- iter->length= iter->reservedField==0 ? pos : pos+1;
- } else if(iter->index<0 && iter->length>=0) {
- iter->index= iter->reservedField==0 ? iter->length : iter->length-1;
- }
- }
- } else /* delta<0 */ {
- /* go backward */
- if(iter->reservedField!=0) {
- iter->reservedField=0;
- i-=4; /* we stayed behind the supplementary code point; go before it now */
- --pos;
- ++delta;
- }
- while(delta<0 && i>0) {
- U8_PREV_OR_FFFD(s, 0, i, c);
- if(c<=0xffff) {
- --pos;
- ++delta;
- } else if(delta<=-2) {
- pos-=2;
- delta+=2;
- } else /* delta==-1 */ {
- /* stop in the middle of a supplementary code point */
- i+=4; /* back to behind this supplementary code point for consistent state */
- iter->reservedField=c;
- --pos;
- break; /* delta=0; */
- }
- }
- }
-
- iter->start=i;
- if(iter->index>=0) {
- return iter->index=pos;
- } else {
- /* we started with index<0 (unknown) so pos is bogus */
- if(i<=1) {
- return iter->index=i; /* reached the beginning */
- } else {
- /* we still don't know the UTF-16 index */
- return UITER_UNKNOWN_INDEX;
- }
- }
-}
-
-static UBool U_CALLCONV
-utf8IteratorHasNext(UCharIterator *iter) {
- return iter->start<iter->limit || iter->reservedField!=0;
-}
-
-static UBool U_CALLCONV
-utf8IteratorHasPrevious(UCharIterator *iter) {
- return iter->start>0;
-}
-
-static UChar32 U_CALLCONV
-utf8IteratorCurrent(UCharIterator *iter) {
- if(iter->reservedField!=0) {
- return U16_TRAIL(iter->reservedField);
- } else if(iter->start<iter->limit) {
- const uint8_t *s=(const uint8_t *)iter->context;
- UChar32 c;
- int32_t i=iter->start;
-
- U8_NEXT_OR_FFFD(s, i, iter->limit, c);
- if(c<=0xffff) {
- return c;
- } else {
- return U16_LEAD(c);
- }
- } else {
- return U_SENTINEL;
- }
-}
-
-static UChar32 U_CALLCONV
-utf8IteratorNext(UCharIterator *iter) {
- int32_t index;
-
- if(iter->reservedField!=0) {
- UChar trail=U16_TRAIL(iter->reservedField);
- iter->reservedField=0;
- if((index=iter->index)>=0) {
- iter->index=index+1;
- }
- return trail;
- } else if(iter->start<iter->limit) {
- const uint8_t *s=(const uint8_t *)iter->context;
- UChar32 c;
-
- U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c);
- if((index=iter->index)>=0) {
- iter->index=++index;
- if(iter->length<0 && iter->start==iter->limit) {
- iter->length= c<=0xffff ? index : index+1;
- }
- } else if(iter->start==iter->limit && iter->length>=0) {
- iter->index= c<=0xffff ? iter->length : iter->length-1;
- }
- if(c<=0xffff) {
- return c;
- } else {
- iter->reservedField=c;
- return U16_LEAD(c);
- }
- } else {
- return U_SENTINEL;
- }
-}
-
-static UChar32 U_CALLCONV
-utf8IteratorPrevious(UCharIterator *iter) {
- int32_t index;
-
- if(iter->reservedField!=0) {
- UChar lead=U16_LEAD(iter->reservedField);
- iter->reservedField=0;
- iter->start-=4; /* we stayed behind the supplementary code point; go before it now */
- if((index=iter->index)>0) {
- iter->index=index-1;
- }
- return lead;
- } else if(iter->start>0) {
- const uint8_t *s=(const uint8_t *)iter->context;
- UChar32 c;
-
- U8_PREV_OR_FFFD(s, 0, iter->start, c);
- if((index=iter->index)>0) {
- iter->index=index-1;
- } else if(iter->start<=1) {
- iter->index= c<=0xffff ? iter->start : iter->start+1;
- }
- if(c<=0xffff) {
- return c;
- } else {
- iter->start+=4; /* back to behind this supplementary code point for consistent state */
- iter->reservedField=c;
- return U16_TRAIL(c);
- }
- } else {
- return U_SENTINEL;
- }
-}
-
-static uint32_t U_CALLCONV
-utf8IteratorGetState(const UCharIterator *iter) {
- uint32_t state=(uint32_t)(iter->start<<1);
- if(iter->reservedField!=0) {
- state|=1;
- }
- return state;
-}
-
-static void U_CALLCONV
-utf8IteratorSetState(UCharIterator *iter,
- uint32_t state,
- UErrorCode *pErrorCode)
-{
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- /* do nothing */
- } else if(iter==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- } else if(state==utf8IteratorGetState(iter)) {
- /* setting to the current state: no-op */
- } else {
- int32_t index=(int32_t)(state>>1); /* UTF-8 index */
- state&=1; /* 1 if in surrogate pair, must be index>=4 */
-
- if((state==0 ? index<0 : index<4) || iter->limit<index) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- } else {
- iter->start=index; /* restore UTF-8 byte index */
- if(index<=1) {
- iter->index=index;
- } else {
- iter->index=-1; /* unknown UTF-16 index */
- }
- if(state==0) {
- iter->reservedField=0;
- } else {
- /* verified index>=4 above */
- UChar32 c;
- U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c);
- if(c<=0xffff) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- } else {
- iter->reservedField=c;
- }
- }
- }
- }
-}
-
-static const UCharIterator utf8Iterator={
- 0, 0, 0, 0, 0, 0,
- utf8IteratorGetIndex,
- utf8IteratorMove,
- utf8IteratorHasNext,
- utf8IteratorHasPrevious,
- utf8IteratorCurrent,
- utf8IteratorNext,
- utf8IteratorPrevious,
- NULL,
- utf8IteratorGetState,
- utf8IteratorSetState
-};
-
-U_CAPI void U_EXPORT2
-uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length) {
- if(iter!=0) {
- if(s!=0 && length>=-1) {
- *iter=utf8Iterator;
- iter->context=s;
- if(length>=0) {
- iter->limit=length;
- } else {
- iter->limit=(int32_t)uprv_strlen(s);
- }
- iter->length= iter->limit<=1 ? iter->limit : -1;
- } else {
- *iter=noopIterator;
- }
- }
-}
-
-/* Helper functions --------------------------------------------------------- */
-
-U_CAPI UChar32 U_EXPORT2
-uiter_current32(UCharIterator *iter) {
- UChar32 c, c2;
-
- c=iter->current(iter);
- if(U16_IS_SURROGATE(c)) {
- if(U16_IS_SURROGATE_LEAD(c)) {
- /*
- * go to the next code unit
- * we know that we are not at the limit because c!=U_SENTINEL
- */
- iter->move(iter, 1, UITER_CURRENT);
- if(U16_IS_TRAIL(c2=iter->current(iter))) {
- c=U16_GET_SUPPLEMENTARY(c, c2);
- }
-
- /* undo index movement */
- iter->move(iter, -1, UITER_CURRENT);
- } else {
- if(U16_IS_LEAD(c2=iter->previous(iter))) {
- c=U16_GET_SUPPLEMENTARY(c2, c);
- }
- if(c2>=0) {
- /* undo index movement */
- iter->move(iter, 1, UITER_CURRENT);
- }
- }
- }
- return c;
-}
-
-U_CAPI UChar32 U_EXPORT2
-uiter_next32(UCharIterator *iter) {
- UChar32 c, c2;
-
- c=iter->next(iter);
- if(U16_IS_LEAD(c)) {
- if(U16_IS_TRAIL(c2=iter->next(iter))) {
- c=U16_GET_SUPPLEMENTARY(c, c2);
- } else if(c2>=0) {
- /* unmatched first surrogate, undo index movement */
- iter->move(iter, -1, UITER_CURRENT);
- }
- }
- return c;
-}
-
-U_CAPI UChar32 U_EXPORT2
-uiter_previous32(UCharIterator *iter) {
- UChar32 c, c2;
-
- c=iter->previous(iter);
- if(U16_IS_TRAIL(c)) {
- if(U16_IS_LEAD(c2=iter->previous(iter))) {
- c=U16_GET_SUPPLEMENTARY(c2, c);
- } else if(c2>=0) {
- /* unmatched second surrogate, undo index movement */
- iter->move(iter, 1, UITER_CURRENT);
- }
- }
- return c;
-}
-
-U_CAPI uint32_t U_EXPORT2
-uiter_getState(const UCharIterator *iter) {
- if(iter==NULL || iter->getState==NULL) {
- return UITER_NO_STATE;
- } else {
- return iter->getState(iter);
- }
-}
-
-U_CAPI void U_EXPORT2
-uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- /* do nothing */
- } else if(iter==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- } else if(iter->setState==NULL) {
- *pErrorCode=U_UNSUPPORTED_ERROR;
- } else {
- iter->setState(iter, state, pErrorCode);
- }
-}
-
-U_CDECL_END
diff --git a/contrib/libs/icu/common/ulayout_props.h b/contrib/libs/icu/common/ulayout_props.h
deleted file mode 100644
index c0f028c7132..00000000000
--- a/contrib/libs/icu/common/ulayout_props.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// ulayout_props.h
-// created: 2019feb12 Markus W. Scherer
-
-#ifndef __ULAYOUT_PROPS_H__
-#define __ULAYOUT_PROPS_H__
-
-#include "unicode/utypes.h"
-
-// file definitions ------------------------------------------------------------
-
-#define ULAYOUT_DATA_NAME "ulayout"
-#define ULAYOUT_DATA_TYPE "icu"
-
-// data format "Layo"
-#define ULAYOUT_FMT_0 0x4c
-#define ULAYOUT_FMT_1 0x61
-#define ULAYOUT_FMT_2 0x79
-#define ULAYOUT_FMT_3 0x6f
-
-// indexes into indexes[]
-enum {
- // Element 0 stores the length of the indexes[] array.
- ULAYOUT_IX_INDEXES_LENGTH,
- // Elements 1..7 store the tops of consecutive code point tries.
- // No trie is stored if the difference between two of these is less than 16.
- ULAYOUT_IX_INPC_TRIE_TOP,
- ULAYOUT_IX_INSC_TRIE_TOP,
- ULAYOUT_IX_VO_TRIE_TOP,
- ULAYOUT_IX_RESERVED_TOP,
-
- ULAYOUT_IX_TRIES_TOP = 7,
-
- ULAYOUT_IX_MAX_VALUES = 9,
-
- // Length of indexes[]. Multiple of 4 to 16-align the tries.
- ULAYOUT_IX_COUNT = 12
-};
-
-constexpr int32_t ULAYOUT_MAX_INPC_SHIFT = 24;
-constexpr int32_t ULAYOUT_MAX_INSC_SHIFT = 16;
-constexpr int32_t ULAYOUT_MAX_VO_SHIFT = 8;
-
-#endif // __ULAYOUT_PROPS_H__
diff --git a/contrib/libs/icu/common/ulist.cpp b/contrib/libs/icu/common/ulist.cpp
deleted file mode 100644
index c5180431c31..00000000000
--- a/contrib/libs/icu/common/ulist.cpp
+++ /dev/null
@@ -1,270 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 2009-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-*/
-
-#include "ulist.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uenumimp.h"
-
-typedef struct UListNode UListNode;
-struct UListNode {
- void *data;
-
- UListNode *next;
- UListNode *previous;
-
- /* When data is created with uprv_malloc, needs to be freed during deleteList function. */
- UBool forceDelete;
-};
-
-struct UList {
- UListNode *curr;
- UListNode *head;
- UListNode *tail;
-
- int32_t size;
-};
-
-static void ulist_addFirstItem(UList *list, UListNode *newItem);
-
-U_CAPI UList *U_EXPORT2 ulist_createEmptyList(UErrorCode *status) {
- UList *newList = NULL;
-
- if (U_FAILURE(*status)) {
- return NULL;
- }
-
- newList = (UList *)uprv_malloc(sizeof(UList));
- if (newList == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- newList->curr = NULL;
- newList->head = NULL;
- newList->tail = NULL;
- newList->size = 0;
-
- return newList;
-}
-
-/*
- * Function called by addItemEndList or addItemBeginList when the first item is added to the list.
- * This function properly sets the pointers for the first item added.
- */
-static void ulist_addFirstItem(UList *list, UListNode *newItem) {
- newItem->next = NULL;
- newItem->previous = NULL;
- list->head = newItem;
- list->tail = newItem;
-}
-
-static void ulist_removeItem(UList *list, UListNode *p) {
- if (p->previous == NULL) {
- // p is the list head.
- list->head = p->next;
- } else {
- p->previous->next = p->next;
- }
- if (p->next == NULL) {
- // p is the list tail.
- list->tail = p->previous;
- } else {
- p->next->previous = p->previous;
- }
- if (p == list->curr) {
- list->curr = p->next;
- }
- --list->size;
- if (p->forceDelete) {
- uprv_free(p->data);
- }
- uprv_free(p);
-}
-
-U_CAPI void U_EXPORT2 ulist_addItemEndList(UList *list, const void *data, UBool forceDelete, UErrorCode *status) {
- UListNode *newItem = NULL;
-
- if (U_FAILURE(*status) || list == NULL || data == NULL) {
- if (forceDelete) {
- uprv_free((void *)data);
- }
- return;
- }
-
- newItem = (UListNode *)uprv_malloc(sizeof(UListNode));
- if (newItem == NULL) {
- if (forceDelete) {
- uprv_free((void *)data);
- }
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- newItem->data = (void *)(data);
- newItem->forceDelete = forceDelete;
-
- if (list->size == 0) {
- ulist_addFirstItem(list, newItem);
- } else {
- newItem->next = NULL;
- newItem->previous = list->tail;
- list->tail->next = newItem;
- list->tail = newItem;
- }
-
- list->size++;
-}
-
-U_CAPI void U_EXPORT2 ulist_addItemBeginList(UList *list, const void *data, UBool forceDelete, UErrorCode *status) {
- UListNode *newItem = NULL;
-
- if (U_FAILURE(*status) || list == NULL || data == NULL) {
- if (forceDelete) {
- uprv_free((void *)data);
- }
- return;
- }
-
- newItem = (UListNode *)uprv_malloc(sizeof(UListNode));
- if (newItem == NULL) {
- if (forceDelete) {
- uprv_free((void *)data);
- }
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- newItem->data = (void *)(data);
- newItem->forceDelete = forceDelete;
-
- if (list->size == 0) {
- ulist_addFirstItem(list, newItem);
- } else {
- newItem->previous = NULL;
- newItem->next = list->head;
- list->head->previous = newItem;
- list->head = newItem;
- }
-
- list->size++;
-}
-
-U_CAPI UBool U_EXPORT2 ulist_containsString(const UList *list, const char *data, int32_t length) {
- if (list != NULL) {
- const UListNode *pointer;
- for (pointer = list->head; pointer != NULL; pointer = pointer->next) {
- if (length == (int32_t)uprv_strlen((const char *)pointer->data)) {
- if (uprv_memcmp(data, pointer->data, length) == 0) {
- return TRUE;
- }
- }
- }
- }
- return FALSE;
-}
-
-U_CAPI UBool U_EXPORT2 ulist_removeString(UList *list, const char *data) {
- if (list != NULL) {
- UListNode *pointer;
- for (pointer = list->head; pointer != NULL; pointer = pointer->next) {
- if (uprv_strcmp(data, (const char *)pointer->data) == 0) {
- ulist_removeItem(list, pointer);
- // Remove only the first occurrence, like Java LinkedList.remove(Object).
- return TRUE;
- }
- }
- }
- return FALSE;
-}
-
-U_CAPI void *U_EXPORT2 ulist_getNext(UList *list) {
- UListNode *curr = NULL;
-
- if (list == NULL || list->curr == NULL) {
- return NULL;
- }
-
- curr = list->curr;
- list->curr = curr->next;
-
- return curr->data;
-}
-
-U_CAPI int32_t U_EXPORT2 ulist_getListSize(const UList *list) {
- if (list != NULL) {
- return list->size;
- }
-
- return -1;
-}
-
-U_CAPI void U_EXPORT2 ulist_resetList(UList *list) {
- if (list != NULL) {
- list->curr = list->head;
- }
-}
-
-U_CAPI void U_EXPORT2 ulist_deleteList(UList *list) {
- UListNode *listHead = NULL;
-
- if (list != NULL) {
- listHead = list->head;
- while (listHead != NULL) {
- UListNode *listPointer = listHead->next;
-
- if (listHead->forceDelete) {
- uprv_free(listHead->data);
- }
-
- uprv_free(listHead);
- listHead = listPointer;
- }
- uprv_free(list);
- list = NULL;
- }
-}
-
-U_CAPI void U_EXPORT2 ulist_close_keyword_values_iterator(UEnumeration *en) {
- if (en != NULL) {
- ulist_deleteList((UList *)(en->context));
- uprv_free(en);
- }
-}
-
-U_CAPI int32_t U_EXPORT2 ulist_count_keyword_values(UEnumeration *en, UErrorCode *status) {
- if (U_FAILURE(*status)) {
- return -1;
- }
-
- return ulist_getListSize((UList *)(en->context));
-}
-
-U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration *en, int32_t *resultLength, UErrorCode *status) {
- const char *s;
- if (U_FAILURE(*status)) {
- return NULL;
- }
-
- s = (const char *)ulist_getNext((UList *)(en->context));
- if (s != NULL && resultLength != NULL) {
- *resultLength = static_cast<int32_t>(uprv_strlen(s));
- }
- return s;
-}
-
-U_CAPI void U_EXPORT2 ulist_reset_keyword_values_iterator(UEnumeration *en, UErrorCode *status) {
- if (U_FAILURE(*status)) {
- return ;
- }
-
- ulist_resetList((UList *)(en->context));
-}
-
-U_CAPI UList * U_EXPORT2 ulist_getListFromEnum(UEnumeration *en) {
- return (UList *)(en->context);
-}
diff --git a/contrib/libs/icu/common/ulist.h b/contrib/libs/icu/common/ulist.h
deleted file mode 100644
index de58a4ad02c..00000000000
--- a/contrib/libs/icu/common/ulist.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 2009-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-*/
-
-#ifndef ULIST_H
-#define ULIST_H
-
-#include "unicode/utypes.h"
-#include "unicode/uenum.h"
-
-struct UList;
-typedef struct UList UList;
-
-U_CAPI UList * U_EXPORT2 ulist_createEmptyList(UErrorCode *status);
-
-U_CAPI void U_EXPORT2 ulist_addItemEndList(UList *list, const void *data, UBool forceDelete, UErrorCode *status);
-
-U_CAPI void U_EXPORT2 ulist_addItemBeginList(UList *list, const void *data, UBool forceDelete, UErrorCode *status);
-
-U_CAPI UBool U_EXPORT2 ulist_containsString(const UList *list, const char *data, int32_t length);
-
-U_CAPI UBool U_EXPORT2 ulist_removeString(UList *list, const char *data);
-
-U_CAPI void *U_EXPORT2 ulist_getNext(UList *list);
-
-U_CAPI int32_t U_EXPORT2 ulist_getListSize(const UList *list);
-
-U_CAPI void U_EXPORT2 ulist_resetList(UList *list);
-
-U_CAPI void U_EXPORT2 ulist_deleteList(UList *list);
-
-/*
- * The following are for use when creating UEnumeration object backed by UList.
- */
-U_CAPI void U_EXPORT2 ulist_close_keyword_values_iterator(UEnumeration *en);
-
-U_CAPI int32_t U_EXPORT2 ulist_count_keyword_values(UEnumeration *en, UErrorCode *status);
-
-U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration* en, int32_t *resultLength, UErrorCode* status);
-
-U_CAPI void U_EXPORT2 ulist_reset_keyword_values_iterator(UEnumeration* en, UErrorCode* status);
-
-U_CAPI UList * U_EXPORT2 ulist_getListFromEnum(UEnumeration *en);
-
-#endif
diff --git a/contrib/libs/icu/common/uloc.cpp b/contrib/libs/icu/common/uloc.cpp
deleted file mode 100644
index 0e235d7958c..00000000000
--- a/contrib/libs/icu/common/uloc.cpp
+++ /dev/null
@@ -1,2239 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1997-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*
-* File ULOC.CPP
-*
-* Modification History:
-*
-* Date Name Description
-* 04/01/97 aliu Creation.
-* 08/21/98 stephen JDK 1.2 sync
-* 12/08/98 rtg New Locale implementation and C API
-* 03/15/99 damiba overhaul.
-* 04/06/99 stephen changed setDefault() to realloc and copy
-* 06/14/99 stephen Changed calls to ures_open for new params
-* 07/21/99 stephen Modified setDefault() to propagate to C++
-* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
-* brought canonicalization code into line with spec
-*****************************************************************************/
-
-/*
- POSIX's locale format, from putil.c: [no spaces]
-
- ll [ _CC ] [ . MM ] [ @ VV]
-
- l = lang, C = ctry, M = charmap, V = variant
-*/
-
-#include "unicode/bytestream.h"
-#include "unicode/errorcode.h"
-#include "unicode/stringpiece.h"
-#include "unicode/utypes.h"
-#include "unicode/ustring.h"
-#include "unicode/uloc.h"
-
-#include "bytesinkutil.h"
-#include "putilimp.h"
-#include "ustr_imp.h"
-#include "ulocimp.h"
-#include "umutex.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "locmap.h"
-#include "uarrsort.h"
-#include "uenumimp.h"
-#include "uassert.h"
-#include "charstr.h"
-
-#include <algorithm>
-#include <stdio.h> /* for sprintf */
-
-U_NAMESPACE_USE
-
-/* ### Declarations **************************************************/
-
-/* Locale stuff from locid.cpp */
-U_CFUNC void locale_set_default(const char *id);
-U_CFUNC const char *locale_get_default(void);
-U_CFUNC int32_t
-locale_getKeywords(const char *localeID,
- char prev,
- char *keywords, int32_t keywordCapacity,
- UBool valuesToo,
- UErrorCode *status);
-
-/* ### Data tables **************************************************/
-
-/**
- * Table of language codes, both 2- and 3-letter, with preference
- * given to 2-letter codes where possible. Includes 3-letter codes
- * that lack a 2-letter equivalent.
- *
- * This list must be in sorted order. This list is returned directly
- * to the user by some API.
- *
- * This list must be kept in sync with LANGUAGES_3, with corresponding
- * entries matched.
- *
- * This table should be terminated with a NULL entry, followed by a
- * second list, and another NULL entry. The first list is visible to
- * user code when this array is returned by API. The second list
- * contains codes we support, but do not expose through user API.
- *
- * Notes
- *
- * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
- * include the revisions up to 2001/7/27 *CWB*
- *
- * The 3 character codes are the terminology codes like RFC 3066. This
- * is compatible with prior ICU codes
- *
- * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
- * table but now at the end of the table because 3 character codes are
- * duplicates. This avoids bad searches going from 3 to 2 character
- * codes.
- *
- * The range qaa-qtz is reserved for local use
- */
-/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
-/* ISO639 table version is 20150505 */
-/* Subsequent hand addition of selected languages */
-static const char * const LANGUAGES[] = {
- "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
- "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
- "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
- "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
- "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
- "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
- "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
- "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
- "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
- "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
- "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
- "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
- "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
- "cs", "csb", "cu", "cv", "cy",
- "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
- "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
- "dyo", "dyu", "dz", "dzg",
- "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
- "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
- "ext",
- "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
- "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
- "frs", "fur", "fy",
- "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
- "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
- "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
- "gur", "guz", "gv", "gwi",
- "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
- "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
- "hup", "hy", "hz",
- "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
- "ilo", "inh", "io", "is", "it", "iu", "izh",
- "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
- "jv",
- "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
- "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
- "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
- "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
- "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
- "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
- "kv", "kw", "ky",
- "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
- "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
- "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
- "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
- "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
- "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
- "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
- "ml", "mn", "mnc", "mni", "mo",
- "moh", "mos", "mr", "mrj",
- "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
- "my", "mye", "myv", "mzn",
- "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
- "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
- "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
- "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
- "oc", "oj", "om", "or", "os", "osa", "ota",
- "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
- "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
- "pon", "prg", "pro", "ps", "pt",
- "qu", "quc", "qug",
- "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
- "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
- "rw", "rwk",
- "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
- "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
- "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
- "sgs", "shi", "shn", "shu", "si", "sid", "sk",
- "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
- "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
- "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
- "sv", "sw", "swb", "swc", "syc", "syr", "szl",
- "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
- "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
- "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
- "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
- "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
- "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
- "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
- "vot", "vro", "vun",
- "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
- "xal", "xh", "xmf", "xog",
- "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
- "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
- "zun", "zxx", "zza",
-NULL,
- "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
-NULL
-};
-
-static const char* const DEPRECATED_LANGUAGES[]={
- "in", "iw", "ji", "jw", NULL, NULL
-};
-static const char* const REPLACEMENT_LANGUAGES[]={
- "id", "he", "yi", "jv", NULL, NULL
-};
-
-/**
- * Table of 3-letter language codes.
- *
- * This is a lookup table used to convert 3-letter language codes to
- * their 2-letter equivalent, where possible. It must be kept in sync
- * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
- * same language as LANGUAGES_3[i]. The commented-out lines are
- * copied from LANGUAGES to make eyeballing this baby easier.
- *
- * Where a 3-letter language code has no 2-letter equivalent, the
- * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
- *
- * This table should be terminated with a NULL entry, followed by a
- * second list, and another NULL entry. The two lists correspond to
- * the two lists in LANGUAGES.
- */
-/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
-/* ISO639 table version is 20150505 */
-/* Subsequent hand addition of selected languages */
-static const char * const LANGUAGES_3[] = {
- "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
- "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
- "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
- "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
- "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
- "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
- "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
- "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
- "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
- "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
- "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
- "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
- "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
- "ces", "csb", "chu", "chv", "cym",
- "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
- "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
- "dyo", "dyu", "dzo", "dzg",
- "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
- "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
- "ext",
- "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
- "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
- "frs", "fur", "fry",
- "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
- "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
- "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
- "gur", "guz", "glv", "gwi",
- "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
- "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
- "hup", "hye", "her",
- "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
- "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
- "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
- "jav",
- "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
- "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
- "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
- "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
- "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
- "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
- "kom", "cor", "kir",
- "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
- "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
- "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
- "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
- "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
- "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
- "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
- "mal", "mon", "mnc", "mni", "mol",
- "moh", "mos", "mar", "mrj",
- "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
- "mya", "mye", "myv", "mzn",
- "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
- "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
- "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
- "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
- "oci", "oji", "orm", "ori", "oss", "osa", "ota",
- "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
- "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
- "pon", "prg", "pro", "pus", "por",
- "que", "quc", "qug",
- "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
- "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
- "kin", "rwk",
- "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
- "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
- "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
- "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
- "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
- "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
- "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
- "swe", "swa", "swb", "swc", "syc", "syr", "szl",
- "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
- "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
- "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
- "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
- "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
- "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
- "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
- "vot", "vro", "vun",
- "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
- "xal", "xho", "xmf", "xog",
- "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
- "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
- "zun", "zxx", "zza",
-NULL,
-/* "in", "iw", "ji", "jw", "sh", */
- "ind", "heb", "yid", "jaw", "srp",
-NULL
-};
-
-/**
- * Table of 2-letter country codes.
- *
- * This list must be in sorted order. This list is returned directly
- * to the user by some API.
- *
- * This list must be kept in sync with COUNTRIES_3, with corresponding
- * entries matched.
- *
- * This table should be terminated with a NULL entry, followed by a
- * second list, and another NULL entry. The first list is visible to
- * user code when this array is returned by API. The second list
- * contains codes we support, but do not expose through user API.
- *
- * Notes:
- *
- * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
- * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
- * new codes keeping the old ones for compatibility updated to include
- * 1999/12/03 revisions *CWB*
- *
- * RO(ROM) is now RO(ROU) according to
- * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
- */
-static const char * const COUNTRIES[] = {
- "AD", "AE", "AF", "AG", "AI", "AL", "AM",
- "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
- "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
- "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
- "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
- "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
- "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
- "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
- "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
- "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
- "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
- "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
- "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
- "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
- "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
- "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
- "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
- "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
- "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
- "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
- "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
- "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
- "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
- "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
- "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
- "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
- "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
- "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
- "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
- "WS", "YE", "YT", "ZA", "ZM", "ZW",
-NULL,
- "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
-NULL
-};
-
-static const char* const DEPRECATED_COUNTRIES[] = {
- "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
-};
-static const char* const REPLACEMENT_COUNTRIES[] = {
-/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
- "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
-};
-
-/**
- * Table of 3-letter country codes.
- *
- * This is a lookup table used to convert 3-letter country codes to
- * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
- * For all valid i, COUNTRIES[i] must refer to the same country as
- * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
- * to make eyeballing this baby easier.
- *
- * This table should be terminated with a NULL entry, followed by a
- * second list, and another NULL entry. The two lists correspond to
- * the two lists in COUNTRIES.
- */
-static const char * const COUNTRIES_3[] = {
-/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
- "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
-/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
- "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
-/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
- "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
-/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
- "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
-/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
- "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
-/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
- "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
-/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
- "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
-/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
- "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
-/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
- "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
-/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
- "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
-/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
- "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
-/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
- "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
-/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
- "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
-/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
- "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
-/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
- "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
-/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
- "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
-/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
- "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
-/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
- "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
-/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
- "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
-/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
- "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
-/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
- "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
-/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
- "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
-/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
- "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
-/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
- "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
-/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
- "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
-/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
- "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
-/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
- "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
-/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
- "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
-/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
- "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
-/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
- "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
-NULL,
-/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
- "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
-NULL
-};
-
-typedef struct CanonicalizationMap {
- const char *id; /* input ID */
- const char *canonicalID; /* canonicalized output ID */
-} CanonicalizationMap;
-
-/**
- * A map to canonicalize locale IDs. This handles a variety of
- * different semantic kinds of transformations.
- */
-static const CanonicalizationMap CANONICALIZE_MAP[] = {
- { "art__LOJBAN", "jbo" }, /* registered name */
- { "hy__AREVELA", "hy" }, /* Registered IANA variant */
- { "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
- { "zh__GUOYU", "zh" }, /* registered name */
- { "zh__HAKKA", "hak" }, /* registered name */
- { "zh__XIANG", "hsn" }, /* registered name */
- // subtags with 3 chars won't be treated as variants.
- { "zh_GAN", "gan" }, /* registered name */
- { "zh_MIN_NAN", "nan" }, /* registered name */
- { "zh_WUU", "wuu" }, /* registered name */
- { "zh_YUE", "yue" }, /* registered name */
-};
-
-/* ### BCP47 Conversion *******************************************/
-/* Test if the locale id has BCP47 u extension and does not have '@' */
-#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
-/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
-#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \
- if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
- U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
- finalID=id; \
- if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
- } else { \
- finalID=buffer; \
- } \
-} UPRV_BLOCK_MACRO_END
-/* Gets the size of the shortest subtag in the given localeID. */
-static int32_t getShortestSubtagLength(const char *localeID) {
- int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
- int32_t length = localeIDLength;
- int32_t tmpLength = 0;
- int32_t i;
- UBool reset = TRUE;
-
- for (i = 0; i < localeIDLength; i++) {
- if (localeID[i] != '_' && localeID[i] != '-') {
- if (reset) {
- tmpLength = 0;
- reset = FALSE;
- }
- tmpLength++;
- } else {
- if (tmpLength != 0 && tmpLength < length) {
- length = tmpLength;
- }
- reset = TRUE;
- }
- }
-
- return length;
-}
-
-/* ### Keywords **************************************************/
-#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
-#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
-/* Punctuation/symbols allowed in legacy key values */
-#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
-
-#define ULOC_KEYWORD_BUFFER_LEN 25
-#define ULOC_MAX_NO_KEYWORDS 25
-
-U_CAPI const char * U_EXPORT2
-locale_getKeywordsStart(const char *localeID) {
- const char *result = NULL;
- if((result = uprv_strchr(localeID, '@')) != NULL) {
- return result;
- }
-#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
- else {
- /* We do this because the @ sign is variant, and the @ sign used on one
- EBCDIC machine won't be compiled the same way on other EBCDIC based
- machines. */
- static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
- const uint8_t *charToFind = ebcdicSigns;
- while(*charToFind) {
- if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
- return result;
- }
- charToFind++;
- }
- }
-#endif
- return NULL;
-}
-
-/**
- * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
- * @param keywordName incoming name to be canonicalized
- * @param status return status (keyword too long)
- * @return length of the keyword name
- */
-static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
-{
- int32_t keywordNameLen = 0;
-
- for (; *keywordName != 0; keywordName++) {
- if (!UPRV_ISALPHANUM(*keywordName)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
- return 0;
- }
- if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
- buf[keywordNameLen++] = uprv_tolower(*keywordName);
- } else {
- /* keyword name too long for internal buffer */
- *status = U_INTERNAL_PROGRAM_ERROR;
- return 0;
- }
- }
- if (keywordNameLen == 0) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
- return 0;
- }
- buf[keywordNameLen] = 0; /* terminate */
-
- return keywordNameLen;
-}
-
-typedef struct {
- char keyword[ULOC_KEYWORD_BUFFER_LEN];
- int32_t keywordLen;
- const char *valueStart;
- int32_t valueLen;
-} KeywordStruct;
-
-static int32_t U_CALLCONV
-compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
- const char* leftString = ((const KeywordStruct *)left)->keyword;
- const char* rightString = ((const KeywordStruct *)right)->keyword;
- return uprv_strcmp(leftString, rightString);
-}
-
-static void
-_getKeywords(const char *localeID,
- char prev,
- ByteSink& sink,
- UBool valuesToo,
- UErrorCode *status)
-{
- KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
-
- int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
- int32_t numKeywords = 0;
- const char* pos = localeID;
- const char* equalSign = NULL;
- const char* semicolon = NULL;
- int32_t i = 0, j, n;
-
- if(prev == '@') { /* start of keyword definition */
- /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
- do {
- UBool duplicate = FALSE;
- /* skip leading spaces */
- while(*pos == ' ') {
- pos++;
- }
- if (!*pos) { /* handle trailing "; " */
- break;
- }
- if(numKeywords == maxKeywords) {
- *status = U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- equalSign = uprv_strchr(pos, '=');
- semicolon = uprv_strchr(pos, ';');
- /* lack of '=' [foo@currency] is illegal */
- /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
- if(!equalSign || (semicolon && semicolon<equalSign)) {
- *status = U_INVALID_FORMAT_ERROR;
- return;
- }
- /* need to normalize both keyword and keyword name */
- if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
- /* keyword name too long for internal buffer */
- *status = U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- for(i = 0, n = 0; i < equalSign - pos; ++i) {
- if (pos[i] != ' ') {
- keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
- }
- }
-
- /* zero-length keyword is an error. */
- if (n == 0) {
- *status = U_INVALID_FORMAT_ERROR;
- return;
- }
-
- keywordList[numKeywords].keyword[n] = 0;
- keywordList[numKeywords].keywordLen = n;
- /* now grab the value part. First we skip the '=' */
- equalSign++;
- /* then we leading spaces */
- while(*equalSign == ' ') {
- equalSign++;
- }
-
- /* Premature end or zero-length value */
- if (!*equalSign || equalSign == semicolon) {
- *status = U_INVALID_FORMAT_ERROR;
- return;
- }
-
- keywordList[numKeywords].valueStart = equalSign;
-
- pos = semicolon;
- i = 0;
- if(pos) {
- while(*(pos - i - 1) == ' ') {
- i++;
- }
- keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
- pos++;
- } else {
- i = (int32_t)uprv_strlen(equalSign);
- while(i && equalSign[i-1] == ' ') {
- i--;
- }
- keywordList[numKeywords].valueLen = i;
- }
- /* If this is a duplicate keyword, then ignore it */
- for (j=0; j<numKeywords; ++j) {
- if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
- duplicate = TRUE;
- break;
- }
- }
- if (!duplicate) {
- ++numKeywords;
- }
- } while(pos);
-
- /* now we have a list of keywords */
- /* we need to sort it */
- uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
-
- /* Now construct the keyword part */
- for(i = 0; i < numKeywords; i++) {
- sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
- if(valuesToo) {
- sink.Append("=", 1);
- sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
- if(i < numKeywords - 1) {
- sink.Append(";", 1);
- }
- } else {
- sink.Append("\0", 1);
- }
- }
- }
-}
-
-U_CFUNC int32_t
-locale_getKeywords(const char *localeID,
- char prev,
- char *keywords, int32_t keywordCapacity,
- UBool valuesToo,
- UErrorCode *status) {
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- CheckedArrayByteSink sink(keywords, keywordCapacity);
- _getKeywords(localeID, prev, sink, valuesToo, status);
-
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*status)) {
- return reslen;
- }
-
- if (sink.Overflowed()) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(keywords, keywordCapacity, reslen, status);
- }
-
- return reslen;
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_getKeywordValue(const char* localeID,
- const char* keywordName,
- char* buffer, int32_t bufferCapacity,
- UErrorCode* status)
-{
- if (buffer != nullptr) {
- buffer[0] = '\0';
- }
- const char* startSearchHere = NULL;
- const char* nextSeparator = NULL;
- char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
- char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
- int32_t result = 0;
-
- if(status && U_SUCCESS(*status) && localeID) {
- char tempBuffer[ULOC_FULLNAME_CAPACITY];
- const char* tmpLocaleID;
-
- if (keywordName == NULL || keywordName[0] == 0) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- locale_canonKeywordName(keywordNameBuffer, keywordName, status);
- if(U_FAILURE(*status)) {
- return 0;
- }
-
- if (_hasBCP47Extension(localeID)) {
- _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
- } else {
- tmpLocaleID=localeID;
- }
-
- startSearchHere = locale_getKeywordsStart(tmpLocaleID);
- if(startSearchHere == NULL) {
- /* no keywords, return at once */
- return 0;
- }
-
- /* find the first keyword */
- while(startSearchHere) {
- const char* keyValueTail;
- int32_t keyValueLen;
-
- startSearchHere++; /* skip @ or ; */
- nextSeparator = uprv_strchr(startSearchHere, '=');
- if(!nextSeparator) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
- return 0;
- }
- /* strip leading & trailing spaces (TC decided to tolerate these) */
- while(*startSearchHere == ' ') {
- startSearchHere++;
- }
- keyValueTail = nextSeparator;
- while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
- keyValueTail--;
- }
- /* now keyValueTail points to first char after the keyName */
- /* copy & normalize keyName from locale */
- if (startSearchHere == keyValueTail) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
- return 0;
- }
- keyValueLen = 0;
- while (startSearchHere < keyValueTail) {
- if (!UPRV_ISALPHANUM(*startSearchHere)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
- return 0;
- }
- if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
- localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
- } else {
- /* keyword name too long for internal buffer */
- *status = U_INTERNAL_PROGRAM_ERROR;
- return 0;
- }
- }
- localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
-
- startSearchHere = uprv_strchr(nextSeparator, ';');
-
- if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
- /* current entry matches the keyword. */
- nextSeparator++; /* skip '=' */
- /* First strip leading & trailing spaces (TC decided to tolerate these) */
- while(*nextSeparator == ' ') {
- nextSeparator++;
- }
- keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
- while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
- keyValueTail--;
- }
- /* Now copy the value, but check well-formedness */
- if (nextSeparator == keyValueTail) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
- return 0;
- }
- keyValueLen = 0;
- while (nextSeparator < keyValueTail) {
- if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
- return 0;
- }
- if (keyValueLen < bufferCapacity) {
- /* Should we lowercase value to return here? Tests expect as-is. */
- buffer[keyValueLen++] = *nextSeparator++;
- } else { /* keep advancing so we return correct length in case of overflow */
- keyValueLen++;
- nextSeparator++;
- }
- }
- result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
- return result;
- }
- }
- }
- return 0;
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_setKeywordValue(const char* keywordName,
- const char* keywordValue,
- char* buffer, int32_t bufferCapacity,
- UErrorCode* status)
-{
- /* TODO: sorting. removal. */
- int32_t keywordNameLen;
- int32_t keywordValueLen;
- int32_t bufLen;
- int32_t needLen = 0;
- char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
- char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
- char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
- int32_t rc;
- char* nextSeparator = NULL;
- char* nextEqualsign = NULL;
- char* startSearchHere = NULL;
- char* keywordStart = NULL;
- CharString updatedKeysAndValues;
- int32_t updatedKeysAndValuesLen;
- UBool handledInputKeyAndValue = FALSE;
- char keyValuePrefix = '@';
-
- if(U_FAILURE(*status)) {
- return -1;
- }
- if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- bufLen = (int32_t)uprv_strlen(buffer);
- if(bufferCapacity<bufLen) {
- /* The capacity is less than the length?! Is this NULL terminated? */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
- if(U_FAILURE(*status)) {
- return 0;
- }
-
- keywordValueLen = 0;
- if(keywordValue) {
- while (*keywordValue != 0) {
- if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
- return 0;
- }
- if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
- /* Should we force lowercase in value to set? */
- keywordValueBuffer[keywordValueLen++] = *keywordValue++;
- } else {
- /* keywordValue too long for internal buffer */
- *status = U_INTERNAL_PROGRAM_ERROR;
- return 0;
- }
- }
- }
- keywordValueBuffer[keywordValueLen] = 0; /* terminate */
-
- startSearchHere = (char*)locale_getKeywordsStart(buffer);
- if(startSearchHere == NULL || (startSearchHere[1]==0)) {
- if(keywordValueLen == 0) { /* no keywords = nothing to remove */
- return bufLen;
- }
-
- needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
- if(startSearchHere) { /* had a single @ */
- needLen--; /* already had the @ */
- /* startSearchHere points at the @ */
- } else {
- startSearchHere=buffer+bufLen;
- }
- if(needLen >= bufferCapacity) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- return needLen; /* no change */
- }
- *startSearchHere++ = '@';
- uprv_strcpy(startSearchHere, keywordNameBuffer);
- startSearchHere += keywordNameLen;
- *startSearchHere++ = '=';
- uprv_strcpy(startSearchHere, keywordValueBuffer);
- return needLen;
- } /* end shortcut - no @ */
-
- keywordStart = startSearchHere;
- /* search for keyword */
- while(keywordStart) {
- const char* keyValueTail;
- int32_t keyValueLen;
-
- keywordStart++; /* skip @ or ; */
- nextEqualsign = uprv_strchr(keywordStart, '=');
- if (!nextEqualsign) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
- return 0;
- }
- /* strip leading & trailing spaces (TC decided to tolerate these) */
- while(*keywordStart == ' ') {
- keywordStart++;
- }
- keyValueTail = nextEqualsign;
- while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
- keyValueTail--;
- }
- /* now keyValueTail points to first char after the keyName */
- /* copy & normalize keyName from locale */
- if (keywordStart == keyValueTail) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
- return 0;
- }
- keyValueLen = 0;
- while (keywordStart < keyValueTail) {
- if (!UPRV_ISALPHANUM(*keywordStart)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
- return 0;
- }
- if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
- localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
- } else {
- /* keyword name too long for internal buffer */
- *status = U_INTERNAL_PROGRAM_ERROR;
- return 0;
- }
- }
- localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
-
- nextSeparator = uprv_strchr(nextEqualsign, ';');
-
- /* start processing the value part */
- nextEqualsign++; /* skip '=' */
- /* First strip leading & trailing spaces (TC decided to tolerate these) */
- while(*nextEqualsign == ' ') {
- nextEqualsign++;
- }
- keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
- while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
- keyValueTail--;
- }
- if (nextEqualsign == keyValueTail) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
- return 0;
- }
-
- rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
- if(rc == 0) {
- /* Current entry matches the input keyword. Update the entry */
- if(keywordValueLen > 0) { /* updating a value */
- updatedKeysAndValues.append(keyValuePrefix, *status);
- keyValuePrefix = ';'; /* for any subsequent key-value pair */
- updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
- updatedKeysAndValues.append('=', *status);
- updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
- } /* else removing this entry, don't emit anything */
- handledInputKeyAndValue = TRUE;
- } else {
- /* input keyword sorts earlier than current entry, add before current entry */
- if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
- /* insert new entry at this location */
- updatedKeysAndValues.append(keyValuePrefix, *status);
- keyValuePrefix = ';'; /* for any subsequent key-value pair */
- updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
- updatedKeysAndValues.append('=', *status);
- updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
- handledInputKeyAndValue = TRUE;
- }
- /* copy the current entry */
- updatedKeysAndValues.append(keyValuePrefix, *status);
- keyValuePrefix = ';'; /* for any subsequent key-value pair */
- updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
- updatedKeysAndValues.append('=', *status);
- updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
- }
- if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
- /* append new entry at the end, it sorts later than existing entries */
- updatedKeysAndValues.append(keyValuePrefix, *status);
- /* skip keyValuePrefix update, no subsequent key-value pair */
- updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
- updatedKeysAndValues.append('=', *status);
- updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
- handledInputKeyAndValue = TRUE;
- }
- keywordStart = nextSeparator;
- } /* end loop searching */
-
- /* Any error from updatedKeysAndValues.append above would be internal and not due to
- * problems with the passed-in locale. So if we did encounter problems with the
- * passed-in locale above, those errors took precedence and overrode any error
- * status from updatedKeysAndValues.append, and also caused a return of 0. If there
- * are errors here they are from updatedKeysAndValues.append; they do cause an
- * error return but the passed-in locale is unmodified and the original bufLen is
- * returned.
- */
- if (!handledInputKeyAndValue || U_FAILURE(*status)) {
- /* if input key/value specified removal of a keyword not present in locale, or
- * there was an error in CharString.append, leave original locale alone. */
- return bufLen;
- }
-
- updatedKeysAndValuesLen = updatedKeysAndValues.length();
- /* needLen = length of the part before '@' + length of updated key-value part including '@' */
- needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
- if(needLen >= bufferCapacity) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- return needLen; /* no change */
- }
- if (updatedKeysAndValuesLen > 0) {
- uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
- }
- buffer[needLen]=0;
- return needLen;
-}
-
-/* ### ID parsing implementation **************************************************/
-
-#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
-
-/*returns TRUE if one of the special prefixes is here (s=string)
- 'x-' or 'i-' */
-#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
-
-/* Dot terminates it because of POSIX form where dot precedes the codepage
- * except for variant
- */
-#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
-
-/**
- * Lookup 'key' in the array 'list'. The array 'list' should contain
- * a NULL entry, followed by more entries, and a second NULL entry.
- *
- * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
- * COUNTRIES_3.
- */
-static int16_t _findIndex(const char* const* list, const char* key)
-{
- const char* const* anchor = list;
- int32_t pass = 0;
-
- /* Make two passes through two NULL-terminated arrays at 'list' */
- while (pass++ < 2) {
- while (*list) {
- if (uprv_strcmp(key, *list) == 0) {
- return (int16_t)(list - anchor);
- }
- list++;
- }
- ++list; /* skip final NULL *CWB*/
- }
- return -1;
-}
-
-U_CFUNC const char*
-uloc_getCurrentCountryID(const char* oldID){
- int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
- if (offset >= 0) {
- return REPLACEMENT_COUNTRIES[offset];
- }
- return oldID;
-}
-U_CFUNC const char*
-uloc_getCurrentLanguageID(const char* oldID){
- int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
- if (offset >= 0) {
- return REPLACEMENT_LANGUAGES[offset];
- }
- return oldID;
-}
-/*
- * the internal functions _getLanguage(), _getCountry(), _getVariant()
- * avoid duplicating code to handle the earlier locale ID pieces
- * in the functions for the later ones by
- * setting the *pEnd pointer to where they stopped parsing
- *
- * TODO try to use this in Locale
- */
-static CharString
-ulocimp_getLanguage(const char *localeID,
- const char **pEnd,
- UErrorCode &status) {
- CharString result;
-
- if (uprv_stricmp(localeID, "root") == 0) {
- localeID += 4;
- } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
- (localeID[3] == '\0' ||
- localeID[3] == '-' ||
- localeID[3] == '_' ||
- localeID[3] == '@')) {
- localeID += 3;
- }
-
- /* if it starts with i- or x- then copy that prefix */
- if(_isIDPrefix(localeID)) {
- result.append((char)uprv_tolower(*localeID), status);
- result.append('-', status);
- localeID+=2;
- }
-
- /* copy the language as far as possible and count its length */
- while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
- result.append((char)uprv_tolower(*localeID), status);
- localeID++;
- }
-
- if(result.length()==3) {
- /* convert 3 character code to 2 character code if possible *CWB*/
- int32_t offset = _findIndex(LANGUAGES_3, result.data());
- if(offset>=0) {
- result.clear();
- result.append(LANGUAGES[offset], status);
- }
- }
-
- if(pEnd!=NULL) {
- *pEnd=localeID;
- }
-
- return result;
-}
-
-U_CFUNC int32_t
-ulocimp_getLanguage(const char *localeID,
- char *language, int32_t languageCapacity,
- const char **pEnd) {
- ErrorCode status;
- CharString result = ulocimp_getLanguage(localeID, pEnd, status);
- if (status.isFailure()) {
- return 0;
- }
- int32_t reslen = result.length();
- uprv_memcpy(language, result.data(), std::min(reslen, languageCapacity));
- return reslen;
-}
-
-static CharString
-ulocimp_getScript(const char *localeID,
- const char **pEnd,
- UErrorCode &status) {
- CharString result;
- int32_t idLen = 0;
-
- if (pEnd != NULL) {
- *pEnd = localeID;
- }
-
- /* copy the second item as far as possible and count its length */
- while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
- && uprv_isASCIILetter(localeID[idLen])) {
- idLen++;
- }
-
- /* If it's exactly 4 characters long, then it's a script and not a country. */
- if (idLen == 4) {
- int32_t i;
- if (pEnd != NULL) {
- *pEnd = localeID+idLen;
- }
- if (idLen >= 1) {
- result.append((char)uprv_toupper(*(localeID++)), status);
- }
- for (i = 1; i < idLen; i++) {
- result.append((char)uprv_tolower(*(localeID++)), status);
- }
- }
-
- return result;
-}
-
-U_CFUNC int32_t
-ulocimp_getScript(const char *localeID,
- char *script, int32_t scriptCapacity,
- const char **pEnd) {
- ErrorCode status;
- CharString result = ulocimp_getScript(localeID, pEnd, status);
- if (status.isFailure()) {
- return 0;
- }
- int32_t reslen = result.length();
- uprv_memcpy(script, result.data(), std::min(reslen, scriptCapacity));
- return reslen;
-}
-
-static CharString
-ulocimp_getCountry(const char *localeID,
- const char **pEnd,
- UErrorCode &status) {
- CharString result;
- int32_t idLen=0;
-
- /* copy the country as far as possible and count its length */
- while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
- result.append((char)uprv_toupper(localeID[idLen]), status);
- idLen++;
- }
-
- /* the country should be either length 2 or 3 */
- if (idLen == 2 || idLen == 3) {
- /* convert 3 character code to 2 character code if possible *CWB*/
- if(idLen==3) {
- int32_t offset = _findIndex(COUNTRIES_3, result.data());
- if(offset>=0) {
- result.clear();
- result.append(COUNTRIES[offset], status);
- }
- }
- localeID+=idLen;
- } else {
- result.clear();
- }
-
- if(pEnd!=NULL) {
- *pEnd=localeID;
- }
-
- return result;
-}
-
-U_CFUNC int32_t
-ulocimp_getCountry(const char *localeID,
- char *country, int32_t countryCapacity,
- const char **pEnd) {
- ErrorCode status;
- CharString result = ulocimp_getCountry(localeID, pEnd, status);
- if (status.isFailure()) {
- return 0;
- }
- int32_t reslen = result.length();
- uprv_memcpy(country, result.data(), std::min(reslen, countryCapacity));
- return reslen;
-}
-
-/**
- * @param needSeparator if true, then add leading '_' if any variants
- * are added to 'variant'
- */
-static void
-_getVariantEx(const char *localeID,
- char prev,
- ByteSink& sink,
- UBool needSeparator) {
- UBool hasVariant = FALSE;
-
- /* get one or more variant tags and separate them with '_' */
- if(_isIDSeparator(prev)) {
- /* get a variant string after a '-' or '_' */
- while(!_isTerminator(*localeID)) {
- if (needSeparator) {
- sink.Append("_", 1);
- needSeparator = FALSE;
- }
- char c = (char)uprv_toupper(*localeID);
- if (c == '-') c = '_';
- sink.Append(&c, 1);
- hasVariant = TRUE;
- localeID++;
- }
- }
-
- /* if there is no variant tag after a '-' or '_' then look for '@' */
- if(!hasVariant) {
- if(prev=='@') {
- /* keep localeID */
- } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
- ++localeID; /* point after the '@' */
- } else {
- return;
- }
- while(!_isTerminator(*localeID)) {
- if (needSeparator) {
- sink.Append("_", 1);
- needSeparator = FALSE;
- }
- char c = (char)uprv_toupper(*localeID);
- if (c == '-' || c == ',') c = '_';
- sink.Append(&c, 1);
- localeID++;
- }
- }
-}
-
-static int32_t
-_getVariantEx(const char *localeID,
- char prev,
- char *variant, int32_t variantCapacity,
- UBool needSeparator) {
- CheckedArrayByteSink sink(variant, variantCapacity);
- _getVariantEx(localeID, prev, sink, needSeparator);
- return sink.NumberOfBytesAppended();
-}
-
-static int32_t
-_getVariant(const char *localeID,
- char prev,
- char *variant, int32_t variantCapacity) {
- return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
-}
-
-/* Keyword enumeration */
-
-typedef struct UKeywordsContext {
- char* keywords;
- char* current;
-} UKeywordsContext;
-
-U_CDECL_BEGIN
-
-static void U_CALLCONV
-uloc_kw_closeKeywords(UEnumeration *enumerator) {
- uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
- uprv_free(enumerator->context);
- uprv_free(enumerator);
-}
-
-static int32_t U_CALLCONV
-uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
- char *kw = ((UKeywordsContext *)en->context)->keywords;
- int32_t result = 0;
- while(*kw) {
- result++;
- kw += uprv_strlen(kw)+1;
- }
- return result;
-}
-
-static const char * U_CALLCONV
-uloc_kw_nextKeyword(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* /*status*/) {
- const char* result = ((UKeywordsContext *)en->context)->current;
- int32_t len = 0;
- if(*result) {
- len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
- ((UKeywordsContext *)en->context)->current += len+1;
- } else {
- result = NULL;
- }
- if (resultLength) {
- *resultLength = len;
- }
- return result;
-}
-
-static void U_CALLCONV
-uloc_kw_resetKeywords(UEnumeration* en,
- UErrorCode* /*status*/) {
- ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
-}
-
-U_CDECL_END
-
-
-static const UEnumeration gKeywordsEnum = {
- NULL,
- NULL,
- uloc_kw_closeKeywords,
- uloc_kw_countKeywords,
- uenum_unextDefault,
- uloc_kw_nextKeyword,
- uloc_kw_resetKeywords
-};
-
-U_CAPI UEnumeration* U_EXPORT2
-uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
-{
- LocalMemory<UKeywordsContext> myContext;
- LocalMemory<UEnumeration> result;
-
- if (U_FAILURE(*status)) {
- return nullptr;
- }
- myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
- result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
- if (myContext.isNull() || result.isNull()) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
- myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
- if (myContext->keywords == nullptr) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
- myContext->keywords[keywordListSize] = 0;
- myContext->current = myContext->keywords;
- result->context = myContext.orphan();
- return result.orphan();
-}
-
-U_CAPI UEnumeration* U_EXPORT2
-uloc_openKeywords(const char* localeID,
- UErrorCode* status)
-{
- int32_t i=0;
- char keywords[256];
- int32_t keywordsCapacity = 256;
- char tempBuffer[ULOC_FULLNAME_CAPACITY];
- const char* tmpLocaleID;
-
- if(status==NULL || U_FAILURE(*status)) {
- return 0;
- }
-
- if (_hasBCP47Extension(localeID)) {
- _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
- } else {
- if (localeID==NULL) {
- localeID=uloc_getDefault();
- }
- tmpLocaleID=localeID;
- }
-
- /* Skip the language */
- ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
- if(_isIDSeparator(*tmpLocaleID)) {
- const char *scriptID;
- /* Skip the script if available */
- ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
- if(scriptID != tmpLocaleID+1) {
- /* Found optional script */
- tmpLocaleID = scriptID;
- }
- /* Skip the Country */
- if (_isIDSeparator(*tmpLocaleID)) {
- ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
- if(_isIDSeparator(*tmpLocaleID)) {
- _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
- }
- }
- }
-
- /* keywords are located after '@' */
- if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
- i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, FALSE, status);
- }
-
- if(i) {
- return uloc_openKeywordList(keywords, i, status);
- } else {
- return NULL;
- }
-}
-
-
-/* bit-flags for 'options' parameter of _canonicalize */
-#define _ULOC_STRIP_KEYWORDS 0x2
-#define _ULOC_CANONICALIZE 0x1
-
-#define OPTION_SET(options, mask) ((options & mask) != 0)
-
-static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
-#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
-
-/**
- * Canonicalize the given localeID, to level 1 or to level 2,
- * depending on the options. To specify level 1, pass in options=0.
- * To specify level 2, pass in options=_ULOC_CANONICALIZE.
- *
- * This is the code underlying uloc_getName and uloc_canonicalize.
- */
-static void
-_canonicalize(const char* localeID,
- ByteSink& sink,
- uint32_t options,
- UErrorCode* err) {
- int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
- char tempBuffer[ULOC_FULLNAME_CAPACITY];
- const char* origLocaleID;
- const char* tmpLocaleID;
- const char* keywordAssign = NULL;
- const char* separatorIndicator = NULL;
-
- if (U_FAILURE(*err)) {
- return;
- }
-
- if (_hasBCP47Extension(localeID)) {
- _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
- } else {
- if (localeID==NULL) {
- localeID=uloc_getDefault();
- }
- tmpLocaleID=localeID;
- }
-
- origLocaleID=tmpLocaleID;
-
- /* get all pieces, one after another, and separate with '_' */
- CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
-
- if (tag.length() == I_DEFAULT_LENGTH &&
- uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
- tag.clear();
- tag.append(uloc_getDefault(), *err);
- } else if(_isIDSeparator(*tmpLocaleID)) {
- const char *scriptID;
-
- ++fieldCount;
- tag.append('_', *err);
-
- CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
- tag.append(script, *err);
- scriptSize = script.length();
- if(scriptSize > 0) {
- /* Found optional script */
- tmpLocaleID = scriptID;
- ++fieldCount;
- if (_isIDSeparator(*tmpLocaleID)) {
- /* If there is something else, then we add the _ */
- tag.append('_', *err);
- }
- }
-
- if (_isIDSeparator(*tmpLocaleID)) {
- const char *cntryID;
-
- CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
- tag.append(country, *err);
- if (!country.isEmpty()) {
- /* Found optional country */
- tmpLocaleID = cntryID;
- }
- if(_isIDSeparator(*tmpLocaleID)) {
- /* If there is something else, then we add the _ if we found country before. */
- if (!_isIDSeparator(*(tmpLocaleID+1))) {
- ++fieldCount;
- tag.append('_', *err);
- }
-
- variantSize = -tag.length();
- {
- CharStringByteSink s(&tag);
- _getVariantEx(tmpLocaleID+1, *tmpLocaleID, s, FALSE);
- }
- variantSize += tag.length();
- if (variantSize > 0) {
- tmpLocaleID += variantSize + 1; /* skip '_' and variant */
- }
- }
- }
- }
-
- /* Copy POSIX-style charset specifier, if any [mr.utf8] */
- if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
- UBool done = FALSE;
- do {
- char c = *tmpLocaleID;
- switch (c) {
- case 0:
- case '@':
- done = TRUE;
- break;
- default:
- tag.append(c, *err);
- ++tmpLocaleID;
- break;
- }
- } while (!done);
- }
-
- /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
- After this, tmpLocaleID either points to '@' or is NULL */
- if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
- keywordAssign = uprv_strchr(tmpLocaleID, '=');
- separatorIndicator = uprv_strchr(tmpLocaleID, ';');
- }
-
- /* Copy POSIX-style variant, if any [mr@FOO] */
- if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
- tmpLocaleID != NULL && keywordAssign == NULL) {
- for (;;) {
- char c = *tmpLocaleID;
- if (c == 0) {
- break;
- }
- tag.append(c, *err);
- ++tmpLocaleID;
- }
- }
-
- if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
- /* Handle @FOO variant if @ is present and not followed by = */
- if (tmpLocaleID!=NULL && keywordAssign==NULL) {
- /* Add missing '_' if needed */
- if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
- do {
- tag.append('_', *err);
- ++fieldCount;
- } while(fieldCount<2);
- }
-
- int32_t posixVariantSize = -tag.length();
- {
- CharStringByteSink s(&tag);
- _getVariantEx(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
- }
- posixVariantSize += tag.length();
- if (posixVariantSize > 0) {
- variantSize += posixVariantSize;
- }
- }
-
- /* Look up the ID in the canonicalization map */
- for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
- StringPiece id(CANONICALIZE_MAP[j].id);
- if (tag == id) {
- if (id.empty() && tmpLocaleID != NULL) {
- break; /* Don't remap "" if keywords present */
- }
- tag.clear();
- tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
- break;
- }
- }
- }
-
- sink.Append(tag.data(), tag.length());
-
- if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
- if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
- (!separatorIndicator || separatorIndicator > keywordAssign)) {
- sink.Append("@", 1);
- ++fieldCount;
- _getKeywords(tmpLocaleID+1, '@', sink, TRUE, err);
- }
- }
-}
-
-/* ### ID parsing API **************************************************/
-
-U_CAPI int32_t U_EXPORT2
-uloc_getParent(const char* localeID,
- char* parent,
- int32_t parentCapacity,
- UErrorCode* err)
-{
- const char *lastUnderscore;
- int32_t i;
-
- if (U_FAILURE(*err))
- return 0;
-
- if (localeID == NULL)
- localeID = uloc_getDefault();
-
- lastUnderscore=uprv_strrchr(localeID, '_');
- if(lastUnderscore!=NULL) {
- i=(int32_t)(lastUnderscore-localeID);
- } else {
- i=0;
- }
-
- if (i > 0) {
- if (uprv_strnicmp(localeID, "und_", 4) == 0) {
- localeID += 3;
- i -= 3;
- uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
- } else if (parent != localeID) {
- uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
- }
- }
-
- return u_terminateChars(parent, parentCapacity, i, err);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_getLanguage(const char* localeID,
- char* language,
- int32_t languageCapacity,
- UErrorCode* err)
-{
- /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
- int32_t i=0;
-
- if (err==NULL || U_FAILURE(*err)) {
- return 0;
- }
-
- if(localeID==NULL) {
- localeID=uloc_getDefault();
- }
-
- i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
- return u_terminateChars(language, languageCapacity, i, err);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_getScript(const char* localeID,
- char* script,
- int32_t scriptCapacity,
- UErrorCode* err)
-{
- int32_t i=0;
-
- if(err==NULL || U_FAILURE(*err)) {
- return 0;
- }
-
- if(localeID==NULL) {
- localeID=uloc_getDefault();
- }
-
- /* skip the language */
- ulocimp_getLanguage(localeID, NULL, 0, &localeID);
- if(_isIDSeparator(*localeID)) {
- i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
- }
- return u_terminateChars(script, scriptCapacity, i, err);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_getCountry(const char* localeID,
- char* country,
- int32_t countryCapacity,
- UErrorCode* err)
-{
- int32_t i=0;
-
- if(err==NULL || U_FAILURE(*err)) {
- return 0;
- }
-
- if(localeID==NULL) {
- localeID=uloc_getDefault();
- }
-
- /* Skip the language */
- ulocimp_getLanguage(localeID, NULL, 0, &localeID);
- if(_isIDSeparator(*localeID)) {
- const char *scriptID;
- /* Skip the script if available */
- ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
- if(scriptID != localeID+1) {
- /* Found optional script */
- localeID = scriptID;
- }
- if(_isIDSeparator(*localeID)) {
- i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
- }
- }
- return u_terminateChars(country, countryCapacity, i, err);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_getVariant(const char* localeID,
- char* variant,
- int32_t variantCapacity,
- UErrorCode* err)
-{
- char tempBuffer[ULOC_FULLNAME_CAPACITY];
- const char* tmpLocaleID;
- int32_t i=0;
-
- if(err==NULL || U_FAILURE(*err)) {
- return 0;
- }
-
- if (_hasBCP47Extension(localeID)) {
- _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
- } else {
- if (localeID==NULL) {
- localeID=uloc_getDefault();
- }
- tmpLocaleID=localeID;
- }
-
- /* Skip the language */
- ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
- if(_isIDSeparator(*tmpLocaleID)) {
- const char *scriptID;
- /* Skip the script if available */
- ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
- if(scriptID != tmpLocaleID+1) {
- /* Found optional script */
- tmpLocaleID = scriptID;
- }
- /* Skip the Country */
- if (_isIDSeparator(*tmpLocaleID)) {
- const char *cntryID;
- ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
- if (cntryID != tmpLocaleID+1) {
- /* Found optional country */
- tmpLocaleID = cntryID;
- }
- if(_isIDSeparator(*tmpLocaleID)) {
- /* If there was no country ID, skip a possible extra IDSeparator */
- if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
- tmpLocaleID++;
- }
- i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
- }
- }
- }
-
- /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
- /* if we do not have a variant tag yet then try a POSIX variant after '@' */
-/*
- if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
- i=_getVariant(localeID+1, '@', variant, variantCapacity);
- }
-*/
- return u_terminateChars(variant, variantCapacity, i, err);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_getName(const char* localeID,
- char* name,
- int32_t nameCapacity,
- UErrorCode* err)
-{
- if (U_FAILURE(*err)) {
- return 0;
- }
-
- CheckedArrayByteSink sink(name, nameCapacity);
- ulocimp_getName(localeID, sink, err);
-
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*err)) {
- return reslen;
- }
-
- if (sink.Overflowed()) {
- *err = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(name, nameCapacity, reslen, err);
- }
-
- return reslen;
-}
-
-U_STABLE void U_EXPORT2
-ulocimp_getName(const char* localeID,
- ByteSink& sink,
- UErrorCode* err)
-{
- _canonicalize(localeID, sink, 0, err);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_getBaseName(const char* localeID,
- char* name,
- int32_t nameCapacity,
- UErrorCode* err)
-{
- if (U_FAILURE(*err)) {
- return 0;
- }
-
- CheckedArrayByteSink sink(name, nameCapacity);
- ulocimp_getBaseName(localeID, sink, err);
-
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*err)) {
- return reslen;
- }
-
- if (sink.Overflowed()) {
- *err = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(name, nameCapacity, reslen, err);
- }
-
- return reslen;
-}
-
-U_STABLE void U_EXPORT2
-ulocimp_getBaseName(const char* localeID,
- ByteSink& sink,
- UErrorCode* err)
-{
- _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_canonicalize(const char* localeID,
- char* name,
- int32_t nameCapacity,
- UErrorCode* err)
-{
- if (U_FAILURE(*err)) {
- return 0;
- }
-
- CheckedArrayByteSink sink(name, nameCapacity);
- ulocimp_canonicalize(localeID, sink, err);
-
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*err)) {
- return reslen;
- }
-
- if (sink.Overflowed()) {
- *err = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(name, nameCapacity, reslen, err);
- }
-
- return reslen;
-}
-
-U_STABLE void U_EXPORT2
-ulocimp_canonicalize(const char* localeID,
- ByteSink& sink,
- UErrorCode* err)
-{
- _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
-}
-
-U_CAPI const char* U_EXPORT2
-uloc_getISO3Language(const char* localeID)
-{
- int16_t offset;
- char lang[ULOC_LANG_CAPACITY];
- UErrorCode err = U_ZERO_ERROR;
-
- if (localeID == NULL)
- {
- localeID = uloc_getDefault();
- }
- uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
- if (U_FAILURE(err))
- return "";
- offset = _findIndex(LANGUAGES, lang);
- if (offset < 0)
- return "";
- return LANGUAGES_3[offset];
-}
-
-U_CAPI const char* U_EXPORT2
-uloc_getISO3Country(const char* localeID)
-{
- int16_t offset;
- char cntry[ULOC_LANG_CAPACITY];
- UErrorCode err = U_ZERO_ERROR;
-
- if (localeID == NULL)
- {
- localeID = uloc_getDefault();
- }
- uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
- if (U_FAILURE(err))
- return "";
- offset = _findIndex(COUNTRIES, cntry);
- if (offset < 0)
- return "";
-
- return COUNTRIES_3[offset];
-}
-
-U_CAPI uint32_t U_EXPORT2
-uloc_getLCID(const char* localeID)
-{
- UErrorCode status = U_ZERO_ERROR;
- char langID[ULOC_FULLNAME_CAPACITY];
- uint32_t lcid = 0;
-
- /* Check for incomplete id. */
- if (!localeID || uprv_strlen(localeID) < 2) {
- return 0;
- }
-
- // First, attempt Windows platform lookup if available, but fall
- // through to catch any special cases (ICU vs Windows name differences).
- lcid = uprv_convertToLCIDPlatform(localeID, &status);
- if (U_FAILURE(status)) {
- return 0;
- }
- if (lcid > 0) {
- // Windows found an LCID, return that
- return lcid;
- }
-
- uloc_getLanguage(localeID, langID, sizeof(langID), &status);
- if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
- return 0;
- }
-
- if (uprv_strchr(localeID, '@')) {
- // uprv_convertToLCID does not support keywords other than collation.
- // Remove all keywords except collation.
- int32_t len;
- char collVal[ULOC_KEYWORDS_CAPACITY];
- char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
-
- len = uloc_getKeywordValue(localeID, "collation", collVal,
- UPRV_LENGTHOF(collVal) - 1, &status);
-
- if (U_SUCCESS(status) && len > 0) {
- collVal[len] = 0;
-
- len = uloc_getBaseName(localeID, tmpLocaleID,
- UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
-
- if (U_SUCCESS(status) && len > 0) {
- tmpLocaleID[len] = 0;
-
- len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
- UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
-
- if (U_SUCCESS(status) && len > 0) {
- tmpLocaleID[len] = 0;
- return uprv_convertToLCID(langID, tmpLocaleID, &status);
- }
- }
- }
-
- // fall through - all keywords are simply ignored
- status = U_ZERO_ERROR;
- }
-
- return uprv_convertToLCID(langID, localeID, &status);
-}
-
-U_CAPI int32_t U_EXPORT2
-uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
- UErrorCode *status)
-{
- return uprv_convertToPosix(hostid, locale, localeCapacity, status);
-}
-
-/* ### Default locale **************************************************/
-
-U_CAPI const char* U_EXPORT2
-uloc_getDefault()
-{
- return locale_get_default();
-}
-
-U_CAPI void U_EXPORT2
-uloc_setDefault(const char* newDefaultLocale,
- UErrorCode* err)
-{
- if (U_FAILURE(*err))
- return;
- /* the error code isn't currently used for anything by this function*/
-
- /* propagate change to C++ */
- locale_set_default(newDefaultLocale);
-}
-
-/**
- * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
- * to an array of pointers to arrays of char. All of these pointers are owned
- * by ICU-- do not delete them, and do not write through them. The array is
- * terminated with a null pointer.
- */
-U_CAPI const char* const* U_EXPORT2
-uloc_getISOLanguages()
-{
- return LANGUAGES;
-}
-
-/**
- * Returns a list of all 2-letter country codes defined in ISO 639. This is a
- * pointer to an array of pointers to arrays of char. All of these pointers are
- * owned by ICU-- do not delete them, and do not write through them. The array is
- * terminated with a null pointer.
- */
-U_CAPI const char* const* U_EXPORT2
-uloc_getISOCountries()
-{
- return COUNTRIES;
-}
-
-U_CAPI const char* U_EXPORT2
-uloc_toUnicodeLocaleKey(const char* keyword)
-{
- const char* bcpKey = ulocimp_toBcpKey(keyword);
- if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
- // unknown keyword, but syntax is fine..
- return keyword;
- }
- return bcpKey;
-}
-
-U_CAPI const char* U_EXPORT2
-uloc_toUnicodeLocaleType(const char* keyword, const char* value)
-{
- const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
- if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
- // unknown keyword, but syntax is fine..
- return value;
- }
- return bcpType;
-}
-
-static UBool
-isWellFormedLegacyKey(const char* legacyKey)
-{
- const char* p = legacyKey;
- while (*p) {
- if (!UPRV_ISALPHANUM(*p)) {
- return FALSE;
- }
- p++;
- }
- return TRUE;
-}
-
-static UBool
-isWellFormedLegacyType(const char* legacyType)
-{
- const char* p = legacyType;
- int32_t alphaNumLen = 0;
- while (*p) {
- if (*p == '_' || *p == '/' || *p == '-') {
- if (alphaNumLen == 0) {
- return FALSE;
- }
- alphaNumLen = 0;
- } else if (UPRV_ISALPHANUM(*p)) {
- alphaNumLen++;
- } else {
- return FALSE;
- }
- p++;
- }
- return (alphaNumLen != 0);
-}
-
-U_CAPI const char* U_EXPORT2
-uloc_toLegacyKey(const char* keyword)
-{
- const char* legacyKey = ulocimp_toLegacyKey(keyword);
- if (legacyKey == NULL) {
- // Checks if the specified locale key is well-formed with the legacy locale syntax.
- //
- // Note:
- // LDML/CLDR provides some definition of keyword syntax in
- // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
- // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
- // Keys can only consist of [0-9a-zA-Z].
- if (isWellFormedLegacyKey(keyword)) {
- return keyword;
- }
- }
- return legacyKey;
-}
-
-U_CAPI const char* U_EXPORT2
-uloc_toLegacyType(const char* keyword, const char* value)
-{
- const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
- if (legacyType == NULL) {
- // Checks if the specified locale type is well-formed with the legacy locale syntax.
- //
- // Note:
- // LDML/CLDR provides some definition of keyword syntax in
- // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
- // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
- // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
- // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
- if (isWellFormedLegacyType(value)) {
- return value;
- }
- }
- return legacyType;
-}
-
-/*eof*/
diff --git a/contrib/libs/icu/common/uloc_keytype.cpp b/contrib/libs/icu/common/uloc_keytype.cpp
deleted file mode 100644
index 019da058cf4..00000000000
--- a/contrib/libs/icu/common/uloc_keytype.cpp
+++ /dev/null
@@ -1,534 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2014-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-#include <algorithm>
-
-#include "unicode/utypes.h"
-#include "unicode/unistr.h"
-#include "unicode/uobject.h"
-
-#include "charstr.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uassert.h"
-#include "ucln_cmn.h"
-#include "uhash.h"
-#include "umutex.h"
-#include "uresimp.h"
-#include "uvector.h"
-#include "udataswp.h" /* for InvChar functions */
-
-static UHashtable* gLocExtKeyMap = NULL;
-static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;
-
-// bit flags for special types
-typedef enum {
- SPECIALTYPE_NONE = 0,
- SPECIALTYPE_CODEPOINTS = 1,
- SPECIALTYPE_REORDER_CODE = 2,
- SPECIALTYPE_RG_KEY_VALUE = 4
-} SpecialType;
-
-struct LocExtKeyData : public icu::UMemory {
- const char* legacyId;
- const char* bcpId;
- icu::LocalUHashtablePointer typeMap;
- uint32_t specialTypes;
-};
-
-struct LocExtType : public icu::UMemory {
- const char* legacyId;
- const char* bcpId;
-};
-
-static icu::MemoryPool<icu::CharString>* gKeyTypeStringPool = NULL;
-static icu::MemoryPool<LocExtKeyData>* gLocExtKeyDataEntries = NULL;
-static icu::MemoryPool<LocExtType>* gLocExtTypeEntries = NULL;
-
-U_CDECL_BEGIN
-
-static UBool U_CALLCONV
-uloc_key_type_cleanup(void) {
- if (gLocExtKeyMap != NULL) {
- uhash_close(gLocExtKeyMap);
- gLocExtKeyMap = NULL;
- }
-
- delete gLocExtKeyDataEntries;
- gLocExtKeyDataEntries = NULL;
-
- delete gLocExtTypeEntries;
- gLocExtTypeEntries = NULL;
-
- delete gKeyTypeStringPool;
- gKeyTypeStringPool = NULL;
-
- gLocExtKeyMapInitOnce.reset();
- return TRUE;
-}
-
-U_CDECL_END
-
-
-static void U_CALLCONV
-initFromResourceBundle(UErrorCode& sts) {
- U_NAMESPACE_USE
- ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
-
- gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
-
- LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(NULL, "keyTypeData", &sts));
- LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", NULL, &sts));
- LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", NULL, &sts));
-
- if (U_FAILURE(sts)) {
- return;
- }
-
- UErrorCode tmpSts = U_ZERO_ERROR;
- LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", NULL, &tmpSts));
- tmpSts = U_ZERO_ERROR;
- LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", NULL, &tmpSts));
-
- // initialize pools storing dynamically allocated objects
- gKeyTypeStringPool = new icu::MemoryPool<icu::CharString>;
- if (gKeyTypeStringPool == NULL) {
- sts = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- gLocExtKeyDataEntries = new icu::MemoryPool<LocExtKeyData>;
- if (gLocExtKeyDataEntries == NULL) {
- sts = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- gLocExtTypeEntries = new icu::MemoryPool<LocExtType>;
- if (gLocExtTypeEntries == NULL) {
- sts = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- // iterate through keyMap resource
- LocalUResourceBundlePointer keyMapEntry;
-
- while (ures_hasNext(keyMapRes.getAlias())) {
- keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts));
- if (U_FAILURE(sts)) {
- break;
- }
- const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias());
- UnicodeString uBcpKeyId = ures_getUnicodeString(keyMapEntry.getAlias(), &sts);
- if (U_FAILURE(sts)) {
- break;
- }
-
- // empty value indicates that BCP key is same with the legacy key.
- const char* bcpKeyId = legacyKeyId;
- if (!uBcpKeyId.isEmpty()) {
- icu::CharString* bcpKeyIdBuf = gKeyTypeStringPool->create();
- if (bcpKeyIdBuf == NULL) {
- sts = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- bcpKeyIdBuf->appendInvariantChars(uBcpKeyId, sts);
- if (U_FAILURE(sts)) {
- break;
- }
- bcpKeyId = bcpKeyIdBuf->data();
- }
-
- UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
-
- UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
- if (U_FAILURE(sts)) {
- break;
- }
- uint32_t specialTypes = SPECIALTYPE_NONE;
-
- LocalUResourceBundlePointer typeAliasResByKey;
- LocalUResourceBundlePointer bcpTypeAliasResByKey;
-
- if (typeAliasRes.isValid()) {
- tmpSts = U_ZERO_ERROR;
- typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, NULL, &tmpSts));
- if (U_FAILURE(tmpSts)) {
- typeAliasResByKey.orphan();
- }
- }
- if (bcpTypeAliasRes.isValid()) {
- tmpSts = U_ZERO_ERROR;
- bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, NULL, &tmpSts));
- if (U_FAILURE(tmpSts)) {
- bcpTypeAliasResByKey.orphan();
- }
- }
-
- // look up type map for the key, and walk through the mapping data
- tmpSts = U_ZERO_ERROR;
- LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &tmpSts));
- if (U_FAILURE(tmpSts)) {
- // type map for each key must exist
- UPRV_UNREACHABLE;
- } else {
- LocalUResourceBundlePointer typeMapEntry;
-
- while (ures_hasNext(typeMapResByKey.getAlias())) {
- typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts));
- if (U_FAILURE(sts)) {
- break;
- }
- const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias());
-
- // special types
- if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
- specialTypes |= SPECIALTYPE_CODEPOINTS;
- continue;
- }
- if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
- specialTypes |= SPECIALTYPE_REORDER_CODE;
- continue;
- }
- if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE") == 0) {
- specialTypes |= SPECIALTYPE_RG_KEY_VALUE;
- continue;
- }
-
- if (isTZ) {
- // a timezone key uses a colon instead of a slash in the resource.
- // e.g. America:Los_Angeles
- if (uprv_strchr(legacyTypeId, ':') != NULL) {
- icu::CharString* legacyTypeIdBuf =
- gKeyTypeStringPool->create(legacyTypeId, sts);
- if (legacyTypeIdBuf == NULL) {
- sts = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- if (U_FAILURE(sts)) {
- break;
- }
- std::replace(
- legacyTypeIdBuf->data(),
- legacyTypeIdBuf->data() + legacyTypeIdBuf->length(),
- ':', '/');
- legacyTypeId = legacyTypeIdBuf->data();
- }
- }
-
- UnicodeString uBcpTypeId = ures_getUnicodeString(typeMapEntry.getAlias(), &sts);
- if (U_FAILURE(sts)) {
- break;
- }
-
- // empty value indicates that BCP type is same with the legacy type.
- const char* bcpTypeId = legacyTypeId;
- if (!uBcpTypeId.isEmpty()) {
- icu::CharString* bcpTypeIdBuf = gKeyTypeStringPool->create();
- if (bcpTypeIdBuf == NULL) {
- sts = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- bcpTypeIdBuf->appendInvariantChars(uBcpTypeId, sts);
- if (U_FAILURE(sts)) {
- break;
- }
- bcpTypeId = bcpTypeIdBuf->data();
- }
-
- // Note: legacy type value should never be
- // equivalent to bcp type value of a different
- // type under the same key. So we use a single
- // map for lookup.
- LocExtType* t = gLocExtTypeEntries->create();
- if (t == NULL) {
- sts = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- t->bcpId = bcpTypeId;
- t->legacyId = legacyTypeId;
-
- uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
- if (bcpTypeId != legacyTypeId) {
- // different type value
- uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
- }
- if (U_FAILURE(sts)) {
- break;
- }
-
- // also put aliases in the map
- if (typeAliasResByKey.isValid()) {
- LocalUResourceBundlePointer typeAliasDataEntry;
-
- ures_resetIterator(typeAliasResByKey.getAlias());
- while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
- int32_t toLen;
- typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts));
- const UChar* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts);
- if (U_FAILURE(sts)) {
- break;
- }
- // check if this is an alias of canoncal legacy type
- if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) {
- const char* from = ures_getKey(typeAliasDataEntry.getAlias());
- if (isTZ) {
- // replace colon with slash if necessary
- if (uprv_strchr(from, ':') != NULL) {
- icu::CharString* fromBuf =
- gKeyTypeStringPool->create(from, sts);
- if (fromBuf == NULL) {
- sts = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- if (U_FAILURE(sts)) {
- break;
- }
- std::replace(
- fromBuf->data(),
- fromBuf->data() + fromBuf->length(),
- ':', '/');
- from = fromBuf->data();
- }
- }
- uhash_put(typeDataMap, (void*)from, t, &sts);
- }
- }
- if (U_FAILURE(sts)) {
- break;
- }
- }
-
- if (bcpTypeAliasResByKey.isValid()) {
- LocalUResourceBundlePointer bcpTypeAliasDataEntry;
-
- ures_resetIterator(bcpTypeAliasResByKey.getAlias());
- while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
- int32_t toLen;
- bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts));
- const UChar* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts);
- if (U_FAILURE(sts)) {
- break;
- }
- // check if this is an alias of bcp type
- if (uprv_compareInvWithUChar(NULL, bcpTypeId, -1, to, toLen) == 0) {
- const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias());
- uhash_put(typeDataMap, (void*)from, t, &sts);
- }
- }
- if (U_FAILURE(sts)) {
- break;
- }
- }
- }
- }
- if (U_FAILURE(sts)) {
- break;
- }
-
- LocExtKeyData* keyData = gLocExtKeyDataEntries->create();
- if (keyData == NULL) {
- sts = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- keyData->bcpId = bcpKeyId;
- keyData->legacyId = legacyKeyId;
- keyData->specialTypes = specialTypes;
- keyData->typeMap.adoptInstead(typeDataMap);
-
- uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
- if (legacyKeyId != bcpKeyId) {
- // different key value
- uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
- }
- if (U_FAILURE(sts)) {
- break;
- }
- }
-}
-
-static UBool
-init() {
- UErrorCode sts = U_ZERO_ERROR;
- umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
- if (U_FAILURE(sts)) {
- return FALSE;
- }
- return TRUE;
-}
-
-static UBool
-isSpecialTypeCodepoints(const char* val) {
- int32_t subtagLen = 0;
- const char* p = val;
- while (*p) {
- if (*p == '-') {
- if (subtagLen < 4 || subtagLen > 6) {
- return FALSE;
- }
- subtagLen = 0;
- } else if ((*p >= '0' && *p <= '9') ||
- (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous
- (*p >= 'a' && *p <= 'f')) { // also in EBCDIC
- subtagLen++;
- } else {
- return FALSE;
- }
- p++;
- }
- return (subtagLen >= 4 && subtagLen <= 6);
-}
-
-static UBool
-isSpecialTypeReorderCode(const char* val) {
- int32_t subtagLen = 0;
- const char* p = val;
- while (*p) {
- if (*p == '-') {
- if (subtagLen < 3 || subtagLen > 8) {
- return FALSE;
- }
- subtagLen = 0;
- } else if (uprv_isASCIILetter(*p)) {
- subtagLen++;
- } else {
- return FALSE;
- }
- p++;
- }
- return (subtagLen >=3 && subtagLen <=8);
-}
-
-static UBool
-isSpecialTypeRgKeyValue(const char* val) {
- int32_t subtagLen = 0;
- const char* p = val;
- while (*p) {
- if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) ||
- (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) {
- subtagLen++;
- } else {
- return FALSE;
- }
- p++;
- }
- return (subtagLen == 6);
-}
-
-U_CFUNC const char*
-ulocimp_toBcpKey(const char* key) {
- if (!init()) {
- return NULL;
- }
-
- LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
- if (keyData != NULL) {
- return keyData->bcpId;
- }
- return NULL;
-}
-
-U_CFUNC const char*
-ulocimp_toLegacyKey(const char* key) {
- if (!init()) {
- return NULL;
- }
-
- LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
- if (keyData != NULL) {
- return keyData->legacyId;
- }
- return NULL;
-}
-
-U_CFUNC const char*
-ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
- if (isKnownKey != NULL) {
- *isKnownKey = FALSE;
- }
- if (isSpecialType != NULL) {
- *isSpecialType = FALSE;
- }
-
- if (!init()) {
- return NULL;
- }
-
- LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
- if (keyData != NULL) {
- if (isKnownKey != NULL) {
- *isKnownKey = TRUE;
- }
- LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
- if (t != NULL) {
- return t->bcpId;
- }
- if (keyData->specialTypes != SPECIALTYPE_NONE) {
- UBool matched = FALSE;
- if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
- matched = isSpecialTypeCodepoints(type);
- }
- if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
- matched = isSpecialTypeReorderCode(type);
- }
- if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
- matched = isSpecialTypeRgKeyValue(type);
- }
- if (matched) {
- if (isSpecialType != NULL) {
- *isSpecialType = TRUE;
- }
- return type;
- }
- }
- }
- return NULL;
-}
-
-
-U_CFUNC const char*
-ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
- if (isKnownKey != NULL) {
- *isKnownKey = FALSE;
- }
- if (isSpecialType != NULL) {
- *isSpecialType = FALSE;
- }
-
- if (!init()) {
- return NULL;
- }
-
- LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
- if (keyData != NULL) {
- if (isKnownKey != NULL) {
- *isKnownKey = TRUE;
- }
- LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
- if (t != NULL) {
- return t->legacyId;
- }
- if (keyData->specialTypes != SPECIALTYPE_NONE) {
- UBool matched = FALSE;
- if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
- matched = isSpecialTypeCodepoints(type);
- }
- if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
- matched = isSpecialTypeReorderCode(type);
- }
- if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
- matched = isSpecialTypeRgKeyValue(type);
- }
- if (matched) {
- if (isSpecialType != NULL) {
- *isSpecialType = TRUE;
- }
- return type;
- }
- }
- }
- return NULL;
-}
-
diff --git a/contrib/libs/icu/common/uloc_tag.cpp b/contrib/libs/icu/common/uloc_tag.cpp
deleted file mode 100644
index ad5dd6430c9..00000000000
--- a/contrib/libs/icu/common/uloc_tag.cpp
+++ /dev/null
@@ -1,2877 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2009-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#include "unicode/bytestream.h"
-#include "unicode/utypes.h"
-#include "unicode/ures.h"
-#include "unicode/localpointer.h"
-#include "unicode/putil.h"
-#include "unicode/uenum.h"
-#include "unicode/uloc.h"
-#include "ustr_imp.h"
-#include "charstr.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "putilimp.h"
-#include "uinvchar.h"
-#include "ulocimp.h"
-#include "uassert.h"
-
-
-/* struct holding a single variant */
-typedef struct VariantListEntry {
- const char *variant;
- struct VariantListEntry *next;
-} VariantListEntry;
-
-/* struct holding a single attribute value */
-struct AttributeListEntry : public icu::UMemory {
- const char *attribute;
- struct AttributeListEntry *next;
-};
-
-/* struct holding a single extension */
-struct ExtensionListEntry : public icu::UMemory {
- const char *key;
- const char *value;
- struct ExtensionListEntry *next;
-};
-
-#define MAXEXTLANG 3
-typedef struct ULanguageTag {
- char *buf; /* holding parsed subtags */
- const char *language;
- const char *extlang[MAXEXTLANG];
- const char *script;
- const char *region;
- VariantListEntry *variants;
- ExtensionListEntry *extensions;
- const char *privateuse;
- const char *grandfathered;
-} ULanguageTag;
-
-#define MINLEN 2
-#define SEP '-'
-#define PRIVATEUSE 'x'
-#define LDMLEXT 'u'
-
-#define LOCALE_SEP '_'
-#define LOCALE_EXT_SEP '@'
-#define LOCALE_KEYWORD_SEP ';'
-#define LOCALE_KEY_TYPE_SEP '='
-
-#define ISALPHA(c) uprv_isASCIILetter(c)
-#define ISNUMERIC(c) ((c)>='0' && (c)<='9')
-
-static const char EMPTY[] = "";
-static const char LANG_UND[] = "und";
-static const char PRIVATEUSE_KEY[] = "x";
-static const char _POSIX[] = "_POSIX";
-static const char POSIX_KEY[] = "va";
-static const char POSIX_VALUE[] = "posix";
-static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
-static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
-static const char LOCALE_TYPE_YES[] = "yes";
-
-#define LANG_UND_LEN 3
-
-/*
- Updated on 2018-09-12 from
- https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
-
- This table has 2 parts. The parts for Grandfathered tags is generated by the
- following scripts from the IANA language tag registry.
-
- curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
- egrep -A 7 'Type: grandfathered' | \
- egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
- awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
- tr 'A-Z' 'a-z'
-
-
- The 2nd part is made of five ICU-specific entries. They're kept for
- the backward compatibility for now, even though there are no preferred
- values. They may have to be removed for the strict BCP 47 compliance.
-
-*/
-static const char* const GRANDFATHERED[] = {
-/* grandfathered preferred */
- "art-lojban", "jbo",
- "en-gb-oed", "en-gb-oxendict",
- "i-ami", "ami",
- "i-bnn", "bnn",
- "i-hak", "hak",
- "i-klingon", "tlh",
- "i-lux", "lb",
- "i-navajo", "nv",
- "i-pwn", "pwn",
- "i-tao", "tao",
- "i-tay", "tay",
- "i-tsu", "tsu",
- "no-bok", "nb",
- "no-nyn", "nn",
- "sgn-be-fr", "sfb",
- "sgn-be-nl", "vgt",
- "sgn-ch-de", "sgg",
- "zh-guoyu", "cmn",
- "zh-hakka", "hak",
- "zh-min-nan", "nan",
- "zh-xiang", "hsn",
-
- // Grandfathered tags with no preferred value in the IANA
- // registry. Kept for now for the backward compatibility
- // because ICU has mapped them this way.
- "cel-gaulish", "xtg-x-cel-gaulish",
- "i-default", "en-x-i-default",
- "i-enochian", "und-x-i-enochian",
- "i-mingo", "see-x-i-mingo",
- "zh-min", "nan-x-zh-min",
-};
-
-/*
- Updated on 2018-09-12 from
- https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
-
- The table lists redundant tags with preferred value in the IANA languate tag registry.
- It's generated with the following command:
-
- curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
- grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
- awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
- tr 'A-Z' 'a-z'
-
- In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
- a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
-*/
-
-static const char* const REDUNDANT[] = {
-// redundant preferred
- "sgn-br", "bzs",
- "sgn-co", "csn",
- "sgn-de", "gsg",
- "sgn-dk", "dsl",
- "sgn-es", "ssp",
- "sgn-fr", "fsl",
- "sgn-gb", "bfi",
- "sgn-gr", "gss",
- "sgn-ie", "isg",
- "sgn-it", "ise",
- "sgn-jp", "jsl",
- "sgn-mx", "mfs",
- "sgn-ni", "ncs",
- "sgn-nl", "dse",
- "sgn-no", "nsl",
- "sgn-pt", "psr",
- "sgn-se", "swl",
- "sgn-us", "ase",
- "sgn-za", "sfs",
- "zh-cmn", "cmn",
- "zh-cmn-hans", "cmn-hans",
- "zh-cmn-hant", "cmn-hant",
- "zh-gan", "gan",
- "zh-wuu", "wuu",
- "zh-yue", "yue",
-
- // variant tag with preferred value
- "ja-latn-hepburn-heploc", "ja-latn-alalc97",
-};
-
-/*
- Updated on 2018-09-12 from
- https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
-
- grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \
- grep -B1 'Preferred' | grep -v '^--' | \
- awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
-
- Make sure that 2-letter language subtags come before 3-letter subtags.
-*/
-static const char DEPRECATEDLANGS[][4] = {
-/* deprecated new */
- "in", "id",
- "iw", "he",
- "ji", "yi",
- "jw", "jv",
- "mo", "ro",
- "aam", "aas",
- "adp", "dz",
- "aue", "ktz",
- "ayx", "nun",
- "bgm", "bcg",
- "bjd", "drl",
- "ccq", "rki",
- "cjr", "mom",
- "cka", "cmr",
- "cmk", "xch",
- "coy", "pij",
- "cqu", "quh",
- "drh", "khk",
- "drw", "prs",
- "gav", "dev",
- "gfx", "vaj",
- "ggn", "gvr",
- "gti", "nyc",
- "guv", "duz",
- "hrr", "jal",
- "ibi", "opa",
- "ilw", "gal",
- "jeg", "oyb",
- "kgc", "tdf",
- "kgh", "kml",
- "koj", "kwv",
- "krm", "bmf",
- "ktr", "dtp",
- "kvs", "gdj",
- "kwq", "yam",
- "kxe", "tvd",
- "kzj", "dtp",
- "kzt", "dtp",
- "lii", "raq",
- "lmm", "rmx",
- "meg", "cir",
- "mst", "mry",
- "mwj", "vaj",
- "myt", "mry",
- "nad", "xny",
- "ncp", "kdz",
- "nnx", "ngv",
- "nts", "pij",
- "oun", "vaj",
- "pcr", "adx",
- "pmc", "huw",
- "pmu", "phr",
- "ppa", "bfy",
- "ppr", "lcq",
- "pry", "prt",
- "puz", "pub",
- "sca", "hle",
- "skk", "oyb",
- "tdu", "dtp",
- "thc", "tpo",
- "thx", "oyb",
- "tie", "ras",
- "tkk", "twm",
- "tlw", "weo",
- "tmp", "tyj",
- "tne", "kak",
- "tnf", "prs",
- "tsf", "taj",
- "uok", "ema",
- "xba", "cax",
- "xia", "acn",
- "xkh", "waw",
- "xsj", "suj",
- "ybd", "rki",
- "yma", "lrr",
- "ymt", "mtm",
- "yos", "zom",
- "yuu", "yug",
-};
-
-/*
- Updated on 2018-04-24 from
-
- curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
- grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
- grep -B1 'Preferred' | \
- awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
-*/
-static const char DEPRECATEDREGIONS[][3] = {
-/* deprecated new */
- "BU", "MM",
- "DD", "DE",
- "FX", "FR",
- "TP", "TL",
- "YD", "YE",
- "ZR", "CD",
-};
-
-/*
-* -------------------------------------------------
-*
-* These ultag_ functions may be exposed as APIs later
-*
-* -------------------------------------------------
-*/
-
-static ULanguageTag*
-ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
-
-static void
-ultag_close(ULanguageTag* langtag);
-
-static const char*
-ultag_getLanguage(const ULanguageTag* langtag);
-
-#if 0
-static const char*
-ultag_getJDKLanguage(const ULanguageTag* langtag);
-#endif
-
-static const char*
-ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
-
-static int32_t
-ultag_getExtlangSize(const ULanguageTag* langtag);
-
-static const char*
-ultag_getScript(const ULanguageTag* langtag);
-
-static const char*
-ultag_getRegion(const ULanguageTag* langtag);
-
-static const char*
-ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
-
-static int32_t
-ultag_getVariantsSize(const ULanguageTag* langtag);
-
-static const char*
-ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
-
-static const char*
-ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
-
-static int32_t
-ultag_getExtensionsSize(const ULanguageTag* langtag);
-
-static const char*
-ultag_getPrivateUse(const ULanguageTag* langtag);
-
-#if 0
-static const char*
-ultag_getGrandfathered(const ULanguageTag* langtag);
-#endif
-
-U_NAMESPACE_BEGIN
-
-/**
- * \class LocalULanguageTagPointer
- * "Smart pointer" class, closes a ULanguageTag via ultag_close().
- * For most methods see the LocalPointerBase base class.
- *
- * @see LocalPointerBase
- * @see LocalPointer
- * @internal
- */
-U_DEFINE_LOCAL_OPEN_POINTER(LocalULanguageTagPointer, ULanguageTag, ultag_close);
-
-U_NAMESPACE_END
-
-/*
-* -------------------------------------------------
-*
-* Language subtag syntax validation functions
-*
-* -------------------------------------------------
-*/
-
-static UBool
-_isAlphaString(const char* s, int32_t len) {
- int32_t i;
- for (i = 0; i < len; i++) {
- if (!ISALPHA(*(s + i))) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-static UBool
-_isNumericString(const char* s, int32_t len) {
- int32_t i;
- for (i = 0; i < len; i++) {
- if (!ISNUMERIC(*(s + i))) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-static UBool
-_isAlphaNumericString(const char* s, int32_t len) {
- int32_t i;
- for (i = 0; i < len; i++) {
- if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-static UBool
-_isAlphaNumericStringLimitedLength(const char* s, int32_t len, int32_t min, int32_t max) {
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len >= min && len <= max && _isAlphaNumericString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-U_CFUNC UBool
-ultag_isLanguageSubtag(const char* s, int32_t len) {
- /*
- * unicode_language_subtag = alpha{2,3} | alpha{5,8};
- * NOTE: Per ICUTC 2019/01/23- accepting alpha 4
- * See ICU-20372
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isExtlangSubtag(const char* s, int32_t len) {
- /*
- * extlang = 3ALPHA ; selected ISO 639 codes
- * *2("-" 3ALPHA) ; permanently reserved
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len == 3 && _isAlphaString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-U_CFUNC UBool
-ultag_isScriptSubtag(const char* s, int32_t len) {
- /*
- * script = 4ALPHA ; ISO 15924 code
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len == 4 && _isAlphaString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-U_CFUNC UBool
-ultag_isRegionSubtag(const char* s, int32_t len) {
- /*
- * region = 2ALPHA ; ISO 3166-1 code
- * / 3DIGIT ; UN M.49 code
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len == 2 && _isAlphaString(s, len)) {
- return TRUE;
- }
- if (len == 3 && _isNumericString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isVariantSubtag(const char* s, int32_t len) {
- /*
- * variant = 5*8alphanum ; registered variants
- * / (DIGIT 3alphanum)
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (_isAlphaNumericStringLimitedLength(s, len, 5, 8)) {
- return TRUE;
- }
- if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isSepListOf(UBool (*test)(const char*, int32_t), const char* s, int32_t len) {
- const char *p = s;
- const char *pSubtag = NULL;
-
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
-
- while ((p - s) < len) {
- if (*p == SEP) {
- if (pSubtag == NULL) {
- return FALSE;
- }
- if (!test(pSubtag, (int32_t)(p - pSubtag))) {
- return FALSE;
- }
- pSubtag = NULL;
- } else if (pSubtag == NULL) {
- pSubtag = p;
- }
- p++;
- }
- if (pSubtag == NULL) {
- return FALSE;
- }
- return test(pSubtag, (int32_t)(p - pSubtag));
-}
-
-U_CFUNC UBool
-ultag_isVariantSubtags(const char* s, int32_t len) {
- return _isSepListOf(&_isVariantSubtag, s, len);
-}
-
-// This is for the ICU-specific "lvariant" handling.
-static UBool
-_isPrivateuseVariantSubtag(const char* s, int32_t len) {
- /*
- * variant = 1*8alphanum ; registered variants
- * / (DIGIT 3alphanum)
- */
- return _isAlphaNumericStringLimitedLength(s, len , 1, 8);
-}
-
-static UBool
-_isExtensionSingleton(const char* s, int32_t len) {
- /*
- * extension = singleton 1*("-" (2*8alphanum))
- *
- * singleton = DIGIT ; 0 - 9
- * / %x41-57 ; A - W
- * / %x59-5A ; Y - Z
- * / %x61-77 ; a - w
- * / %x79-7A ; y - z
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len == 1 && (ISALPHA(*s) || ISNUMERIC(*s)) && (uprv_tolower(*s) != PRIVATEUSE)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isExtensionSubtag(const char* s, int32_t len) {
- /*
- * extension = singleton 1*("-" (2*8alphanum))
- */
- return _isAlphaNumericStringLimitedLength(s, len, 2, 8);
-}
-
-U_CFUNC UBool
-ultag_isExtensionSubtags(const char* s, int32_t len) {
- return _isSepListOf(&_isExtensionSubtag, s, len);
-}
-
-static UBool
-_isPrivateuseValueSubtag(const char* s, int32_t len) {
- /*
- * privateuse = "x" 1*("-" (1*8alphanum))
- */
- return _isAlphaNumericStringLimitedLength(s, len, 1, 8);
-}
-
-U_CFUNC UBool
-ultag_isPrivateuseValueSubtags(const char* s, int32_t len) {
- return _isSepListOf(&_isPrivateuseValueSubtag, s, len);
-}
-
-U_CFUNC UBool
-ultag_isUnicodeLocaleAttribute(const char* s, int32_t len) {
- /*
- * attribute = alphanum{3,8} ;
- */
- return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
-}
-
-U_CFUNC UBool
-ultag_isUnicodeLocaleAttributes(const char* s, int32_t len) {
- return _isSepListOf(&ultag_isUnicodeLocaleAttribute, s, len);
-}
-
-U_CFUNC UBool
-ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
- /*
- * key = alphanum alpha ;
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len == 2 && (ISALPHA(*s) || ISNUMERIC(*s)) && ISALPHA(s[1])) {
- return TRUE;
- }
- return FALSE;
-}
-
-U_CFUNC UBool
-_isUnicodeLocaleTypeSubtag(const char*s, int32_t len) {
- /*
- * alphanum{3,8}
- */
- return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
-}
-
-U_CFUNC UBool
-ultag_isUnicodeLocaleType(const char*s, int32_t len) {
- /*
- * type = alphanum{3,8} (sep alphanum{3,8})* ;
- */
- return _isSepListOf(&_isUnicodeLocaleTypeSubtag, s, len);
-}
-
-static UBool
-_isTKey(const char* s, int32_t len)
-{
- /*
- * tkey = alpha digit ;
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len == 2 && ISALPHA(*s) && ISNUMERIC(*(s + 1))) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isTValue(const char* s, int32_t len)
-{
- /*
- * tvalue = (sep alphanum{3,8})+ ;
- */
- return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
-}
-
-static UBool
-_isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
-{
- const int32_t kStart = 0; // Start, wait for unicode_language_subtag, tkey or end
- const int32_t kGotLanguage = 1; // Got unicode_language_subtag, wait for unicode_script_subtag,
- // unicode_region_subtag, unicode_variant_subtag, tkey or end
- const int32_t kGotScript = 2; // Got unicode_script_subtag, wait for unicode_region_subtag,
- // unicode_variant_subtag, tkey, or end
- const int32_t kGotRegion = 3; // Got unicode_region_subtag, wait for unicode_variant_subtag,
- // tkey, or end.
- const int32_t kGotVariant = 4; // Got unicode_variant_subtag, wait for unicode_variant_subtag
- // tkey or end.
- const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here.
- const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end
-
- switch (state) {
- case kStart:
- if (ultag_isLanguageSubtag(s, len)) {
- state = kGotLanguage;
- return TRUE;
- }
- if (_isTKey(s, len)) {
- state = kGotTKey;
- return TRUE;
- }
- return FALSE;
- case kGotLanguage:
- if (ultag_isScriptSubtag(s, len)) {
- state = kGotScript;
- return TRUE;
- }
- U_FALLTHROUGH;
- case kGotScript:
- if (ultag_isRegionSubtag(s, len)) {
- state = kGotRegion;
- return TRUE;
- }
- U_FALLTHROUGH;
- case kGotRegion:
- U_FALLTHROUGH;
- case kGotVariant:
- if (_isVariantSubtag(s, len)) {
- state = kGotVariant;
- return TRUE;
- }
- if (_isTKey(s, len)) {
- state = kGotTKey;
- return TRUE;
- }
- return FALSE;
- case kGotTKey:
- if (_isTValue(s, len)) {
- state = kGotTValue;
- return TRUE;
- }
- return FALSE;
- case kGotTValue:
- if (_isTKey(s, len)) {
- state = kGotTKey;
- return TRUE;
- }
- if (_isTValue(s, len)) {
- return TRUE;
- }
- return FALSE;
- }
- return FALSE;
-}
-
-static UBool
-_isUnicodeExtensionSubtag(int32_t& state, const char* s, int32_t len)
-{
- const int32_t kStart = 0; // Start, wait for a key or attribute or end
- const int32_t kGotKey = 1; // Got a key, wait for type or key or end
- const int32_t kGotType = 2; // Got a type, wait for key or end
-
- switch (state) {
- case kStart:
- if (ultag_isUnicodeLocaleKey(s, len)) {
- state = kGotKey;
- return TRUE;
- }
- if (ultag_isUnicodeLocaleAttribute(s, len)) {
- return TRUE;
- }
- return FALSE;
- case kGotKey:
- if (ultag_isUnicodeLocaleKey(s, len)) {
- return TRUE;
- }
- if (_isUnicodeLocaleTypeSubtag(s, len)) {
- state = kGotType;
- return TRUE;
- }
- return FALSE;
- case kGotType:
- if (ultag_isUnicodeLocaleKey(s, len)) {
- state = kGotKey;
- return TRUE;
- }
- if (_isUnicodeLocaleTypeSubtag(s, len)) {
- return TRUE;
- }
- return FALSE;
- }
- return FALSE;
-}
-
-static UBool
-_isStatefulSepListOf(UBool (*test)(int32_t&, const char*, int32_t), const char* s, int32_t len)
-{
- int32_t state = 0;
- const char* p;
- const char* start = s;
- int32_t subtagLen = 0;
-
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
-
- for (p = s; len > 0; p++, len--) {
- if (*p == SEP) {
- if (!test(state, start, subtagLen)) {
- return FALSE;
- }
- subtagLen = 0;
- start = p + 1;
- } else {
- subtagLen++;
- }
- }
-
- if (test(state, start, subtagLen) && state >= 0) {
- return TRUE;
- }
- return FALSE;
-}
-
-U_CFUNC UBool
-ultag_isTransformedExtensionSubtags(const char* s, int32_t len)
-{
- return _isStatefulSepListOf(&_isTransformedExtensionSubtag, s, len);
-}
-
-U_CFUNC UBool
-ultag_isUnicodeExtensionSubtags(const char* s, int32_t len) {
- return _isStatefulSepListOf(&_isUnicodeExtensionSubtag, s, len);
-}
-
-
-/*
-* -------------------------------------------------
-*
-* Helper functions
-*
-* -------------------------------------------------
-*/
-
-static UBool
-_addVariantToList(VariantListEntry **first, VariantListEntry *var) {
- UBool bAdded = TRUE;
-
- if (*first == NULL) {
- var->next = NULL;
- *first = var;
- } else {
- VariantListEntry *prev, *cur;
- int32_t cmp;
-
- /* variants order should be preserved */
- prev = NULL;
- cur = *first;
- while (TRUE) {
- if (cur == NULL) {
- prev->next = var;
- var->next = NULL;
- break;
- }
-
- /* Checking for duplicate variant */
- cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
- if (cmp == 0) {
- /* duplicated variant */
- bAdded = FALSE;
- break;
- }
- prev = cur;
- cur = cur->next;
- }
- }
-
- return bAdded;
-}
-
-static UBool
-_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
- UBool bAdded = TRUE;
-
- if (*first == NULL) {
- attr->next = NULL;
- *first = attr;
- } else {
- AttributeListEntry *prev, *cur;
- int32_t cmp;
-
- /* reorder variants in alphabetical order */
- prev = NULL;
- cur = *first;
- while (TRUE) {
- if (cur == NULL) {
- prev->next = attr;
- attr->next = NULL;
- break;
- }
- cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
- if (cmp < 0) {
- if (prev == NULL) {
- *first = attr;
- } else {
- prev->next = attr;
- }
- attr->next = cur;
- break;
- }
- if (cmp == 0) {
- /* duplicated variant */
- bAdded = FALSE;
- break;
- }
- prev = cur;
- cur = cur->next;
- }
- }
-
- return bAdded;
-}
-
-
-static UBool
-_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
- UBool bAdded = TRUE;
-
- if (*first == NULL) {
- ext->next = NULL;
- *first = ext;
- } else {
- ExtensionListEntry *prev, *cur;
- int32_t cmp;
-
- /* reorder variants in alphabetical order */
- prev = NULL;
- cur = *first;
- while (TRUE) {
- if (cur == NULL) {
- prev->next = ext;
- ext->next = NULL;
- break;
- }
- if (localeToBCP) {
- /* special handling for locale to bcp conversion */
- int32_t len, curlen;
-
- len = (int32_t)uprv_strlen(ext->key);
- curlen = (int32_t)uprv_strlen(cur->key);
-
- if (len == 1 && curlen == 1) {
- if (*(ext->key) == *(cur->key)) {
- cmp = 0;
- } else if (*(ext->key) == PRIVATEUSE) {
- cmp = 1;
- } else if (*(cur->key) == PRIVATEUSE) {
- cmp = -1;
- } else {
- cmp = *(ext->key) - *(cur->key);
- }
- } else if (len == 1) {
- cmp = *(ext->key) - LDMLEXT;
- } else if (curlen == 1) {
- cmp = LDMLEXT - *(cur->key);
- } else {
- cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
- /* Both are u extension keys - we need special handling for 'attribute' */
- if (cmp != 0) {
- if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
- cmp = 1;
- } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
- cmp = -1;
- }
- }
- }
- } else {
- cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
- }
- if (cmp < 0) {
- if (prev == NULL) {
- *first = ext;
- } else {
- prev->next = ext;
- }
- ext->next = cur;
- break;
- }
- if (cmp == 0) {
- /* duplicated extension key */
- bAdded = FALSE;
- break;
- }
- prev = cur;
- cur = cur->next;
- }
- }
-
- return bAdded;
-}
-
-static void
-_initializeULanguageTag(ULanguageTag* langtag) {
- int32_t i;
-
- langtag->buf = NULL;
-
- langtag->language = EMPTY;
- for (i = 0; i < MAXEXTLANG; i++) {
- langtag->extlang[i] = NULL;
- }
-
- langtag->script = EMPTY;
- langtag->region = EMPTY;
-
- langtag->variants = NULL;
- langtag->extensions = NULL;
-
- langtag->grandfathered = EMPTY;
- langtag->privateuse = EMPTY;
-}
-
-static void
-_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
- char buf[ULOC_LANG_CAPACITY];
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t len, i;
-
- if (U_FAILURE(*status)) {
- return;
- }
-
- len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- len = 0;
- }
-
- /* Note: returned language code is in lower case letters */
-
- if (len == 0) {
- sink.Append(LANG_UND, LANG_UND_LEN);
- } else if (!ultag_isLanguageSubtag(buf, len)) {
- /* invalid language code */
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- sink.Append(LANG_UND, LANG_UND_LEN);
- } else {
- /* resolve deprecated */
- for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
- // 2-letter deprecated subtags are listede before 3-letter
- // ones in DEPRECATEDLANGS[]. Get out of loop on coming
- // across the 1st 3-letter subtag, if the input is a 2-letter code.
- // to avoid continuing to try when there's no match.
- if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
- if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
- uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
- len = (int32_t)uprv_strlen(buf);
- break;
- }
- }
- sink.Append(buf, len);
- }
-}
-
-static void
-_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
- char buf[ULOC_SCRIPT_CAPACITY];
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t len;
-
- if (U_FAILURE(*status)) {
- return;
- }
-
- len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return;
- }
-
- if (len > 0) {
- if (!ultag_isScriptSubtag(buf, len)) {
- /* invalid script code */
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return;
- } else {
- sink.Append("-", 1);
- sink.Append(buf, len);
- }
- }
-}
-
-static void
-_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
- char buf[ULOC_COUNTRY_CAPACITY];
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t len;
-
- if (U_FAILURE(*status)) {
- return;
- }
-
- len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return;
- }
-
- if (len > 0) {
- if (!ultag_isRegionSubtag(buf, len)) {
- /* invalid region code */
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return;
- } else {
- sink.Append("-", 1);
- /* resolve deprecated */
- for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
- if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
- uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
- len = (int32_t)uprv_strlen(buf);
- break;
- }
- }
- sink.Append(buf, len);
- }
- }
-}
-
-static void _sortVariants(VariantListEntry* first) {
- for (VariantListEntry* var1 = first; var1 != NULL; var1 = var1->next) {
- for (VariantListEntry* var2 = var1->next; var2 != NULL; var2 = var2->next) {
- // Swap var1->variant and var2->variant.
- if (uprv_compareInvCharsAsAscii(var1->variant, var2->variant) > 0) {
- const char* temp = var1->variant;
- var1->variant = var2->variant;
- var2->variant = temp;
- }
- }
- }
-}
-
-static void
-_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool *hadPosix, UErrorCode* status) {
- char buf[ULOC_FULLNAME_CAPACITY];
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t len, i;
-
- if (U_FAILURE(*status)) {
- return;
- }
-
- len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return;
- }
-
- if (len > 0) {
- char *p, *pVar;
- UBool bNext = TRUE;
- VariantListEntry *var;
- VariantListEntry *varFirst = NULL;
-
- pVar = NULL;
- p = buf;
- while (bNext) {
- if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
- if (*p == 0) {
- bNext = FALSE;
- } else {
- *p = 0; /* terminate */
- }
- if (pVar == NULL) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- /* ignore empty variant */
- } else {
- /* ICU uses upper case letters for variants, but
- the canonical format is lowercase in BCP47 */
- for (i = 0; *(pVar + i) != 0; i++) {
- *(pVar + i) = uprv_tolower(*(pVar + i));
- }
-
- /* validate */
- if (_isVariantSubtag(pVar, -1)) {
- if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) {
- /* emit the variant to the list */
- var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
- if (var == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- var->variant = pVar;
- if (!_addVariantToList(&varFirst, var)) {
- /* duplicated variant */
- uprv_free(var);
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- }
- } else {
- /* Special handling for POSIX variant, need to remember that we had it and then */
- /* treat it like an extension later. */
- *hadPosix = TRUE;
- }
- } else if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- } else if (_isPrivateuseValueSubtag(pVar, -1)) {
- /* Handle private use subtags separately */
- break;
- }
- }
- /* reset variant starting position */
- pVar = NULL;
- } else if (pVar == NULL) {
- pVar = p;
- }
- p++;
- }
-
- if (U_SUCCESS(*status)) {
- if (varFirst != NULL) {
- int32_t varLen;
-
- /* per UTS35, we should sort the variants */
- _sortVariants(varFirst);
-
- /* write out validated/normalized variants to the target */
- var = varFirst;
- while (var != NULL) {
- sink.Append("-", 1);
- varLen = (int32_t)uprv_strlen(var->variant);
- sink.Append(var->variant, varLen);
- var = var->next;
- }
- }
- }
-
- /* clean up */
- var = varFirst;
- while (var != NULL) {
- VariantListEntry *tmpVar = var->next;
- uprv_free(var);
- var = tmpVar;
- }
-
- if (U_FAILURE(*status)) {
- return;
- }
- }
-}
-
-static void
-_appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {
- char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
- int32_t attrBufLength = 0;
-
- icu::MemoryPool<AttributeListEntry> attrPool;
- icu::MemoryPool<ExtensionListEntry> extPool;
- icu::MemoryPool<icu::CharString> strPool;
-
- icu::LocalUEnumerationPointer keywordEnum(uloc_openKeywords(localeID, status));
- if (U_FAILURE(*status) && !hadPosix) {
- return;
- }
- if (keywordEnum.isValid() || hadPosix) {
- /* reorder extensions */
- int32_t len;
- const char *key;
- ExtensionListEntry *firstExt = NULL;
- ExtensionListEntry *ext;
- AttributeListEntry *firstAttr = NULL;
- AttributeListEntry *attr;
- icu::MemoryPool<icu::CharString> extBufPool;
- const char *bcpKey=nullptr, *bcpValue=nullptr;
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t keylen;
- UBool isBcpUExt;
-
- while (TRUE) {
- icu::CharString buf;
- key = uenum_next(keywordEnum.getAlias(), NULL, status);
- if (key == NULL) {
- break;
- }
- char* buffer;
- int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
-
- for (;;) {
- buffer = buf.getAppendBuffer(
- /*minCapacity=*/resultCapacity,
- /*desiredCapacityHint=*/resultCapacity,
- resultCapacity,
- tmpStatus);
-
- if (U_FAILURE(tmpStatus)) {
- break;
- }
-
- len = uloc_getKeywordValue(
- localeID, key, buffer, resultCapacity, &tmpStatus);
-
- if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
- break;
- }
-
- resultCapacity = len;
- tmpStatus = U_ZERO_ERROR;
- }
-
- if (U_FAILURE(tmpStatus)) {
- if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- /* ignore this keyword */
- tmpStatus = U_ZERO_ERROR;
- continue;
- }
-
- buf.append(buffer, len, tmpStatus);
- if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
- }
-
- keylen = (int32_t)uprv_strlen(key);
- isBcpUExt = (keylen > 1);
-
- /* special keyword used for representing Unicode locale attributes */
- if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
- if (len > 0) {
- int32_t i = 0;
- while (TRUE) {
- attrBufLength = 0;
- for (; i < len; i++) {
- if (buf[i] != '-') {
- attrBuf[attrBufLength++] = buf[i];
- } else {
- i++;
- break;
- }
- }
- if (attrBufLength > 0) {
- attrBuf[attrBufLength] = 0;
-
- } else if (i >= len){
- break;
- }
-
- /* create AttributeListEntry */
- attr = attrPool.create();
- if (attr == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- icu::CharString* attrValue =
- strPool.create(attrBuf, attrBufLength, *status);
- if (attrValue == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- if (U_FAILURE(*status)) {
- break;
- }
- attr->attribute = attrValue->data();
-
- if (!_addAttributeToList(&firstAttr, attr)) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- }
- }
- /* for a place holder ExtensionListEntry */
- bcpKey = LOCALE_ATTRIBUTE_KEY;
- bcpValue = NULL;
- }
- } else if (isBcpUExt) {
- bcpKey = uloc_toUnicodeLocaleKey(key);
- if (bcpKey == NULL) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- continue;
- }
-
- /* we've checked buf is null-terminated above */
- bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
- if (bcpValue == NULL) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- continue;
- }
- if (bcpValue == buf.data()) {
- /*
- When uloc_toUnicodeLocaleType(key, buf) returns the
- input value as is, the value is well-formed, but has
- no known mapping. This implementation normalizes the
- value to lower case
- */
- icu::CharString* extBuf = extBufPool.create();
- if (extBuf == nullptr) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
- int32_t resultCapacity;
- char* pExtBuf = extBuf->getAppendBuffer(
- /*minCapacity=*/bcpValueLen,
- /*desiredCapacityHint=*/bcpValueLen,
- resultCapacity,
- tmpStatus);
- if (U_FAILURE(tmpStatus)) {
- *status = tmpStatus;
- break;
- }
-
- uprv_strcpy(pExtBuf, bcpValue);
- T_CString_toLowerCase(pExtBuf);
-
- extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
- if (U_FAILURE(tmpStatus)) {
- *status = tmpStatus;
- break;
- }
-
- bcpValue = extBuf->data();
- }
- } else {
- if (*key == PRIVATEUSE) {
- if (!ultag_isPrivateuseValueSubtags(buf.data(), len)) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- continue;
- }
- } else {
- if (!_isExtensionSingleton(key, keylen) || !ultag_isExtensionSubtags(buf.data(), len)) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- continue;
- }
- }
- bcpKey = key;
- icu::CharString* extBuf =
- extBufPool.create(buf.data(), len, tmpStatus);
- if (extBuf == nullptr) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- if (U_FAILURE(tmpStatus)) {
- *status = tmpStatus;
- break;
- }
- bcpValue = extBuf->data();
- }
-
- /* create ExtensionListEntry */
- ext = extPool.create();
- if (ext == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- ext->key = bcpKey;
- ext->value = bcpValue;
-
- if (!_addExtensionToList(&firstExt, ext, TRUE)) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- }
- }
-
- /* Special handling for POSIX variant - add the keywords for POSIX */
- if (hadPosix) {
- /* create ExtensionListEntry for POSIX */
- ext = extPool.create();
- if (ext == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- ext->key = POSIX_KEY;
- ext->value = POSIX_VALUE;
-
- if (!_addExtensionToList(&firstExt, ext, TRUE)) {
- // Silently ignore errors.
- }
- }
-
- if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
- UBool startLDMLExtension = FALSE;
- for (ext = firstExt; ext; ext = ext->next) {
- if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
- /* first LDML u singlton extension */
- sink.Append("-u", 2);
- startLDMLExtension = TRUE;
- }
-
- /* write out the sorted BCP47 attributes, extensions and private use */
- if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
- /* write the value for the attributes */
- for (attr = firstAttr; attr; attr = attr->next) {
- sink.Append("-", 1);
- sink.Append(
- attr->attribute, static_cast<int32_t>(uprv_strlen(attr->attribute)));
- }
- } else {
- sink.Append("-", 1);
- sink.Append(ext->key, static_cast<int32_t>(uprv_strlen(ext->key)));
- if (uprv_strcmp(ext->value, "true") != 0 &&
- uprv_strcmp(ext->value, "yes") != 0) {
- sink.Append("-", 1);
- sink.Append(ext->value, static_cast<int32_t>(uprv_strlen(ext->value)));
- }
- }
- }
- }
- }
-}
-
-/**
- * Append keywords parsed from LDML extension value
- * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
- * Note: char* buf is used for storing keywords
- */
-static void
-_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, icu::MemoryPool<ExtensionListEntry>& extPool, icu::MemoryPool<icu::CharString>& kwdBuf, UBool *posixVariant, UErrorCode *status) {
- const char *pTag; /* beginning of current subtag */
- const char *pKwds; /* beginning of key-type pairs */
- UBool variantExists = *posixVariant;
-
- ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */
- ExtensionListEntry *kwd, *nextKwd;
-
- int32_t len;
-
- /* Reset the posixVariant value */
- *posixVariant = FALSE;
-
- pTag = ldmlext;
- pKwds = NULL;
-
- {
- AttributeListEntry *attrFirst = NULL; /* first attribute */
- AttributeListEntry *attr, *nextAttr;
-
- char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
- int32_t attrBufIdx = 0;
-
- icu::MemoryPool<AttributeListEntry> attrPool;
-
- /* Iterate through u extension attributes */
- while (*pTag) {
- /* locate next separator char */
- for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
-
- if (ultag_isUnicodeLocaleKey(pTag, len)) {
- pKwds = pTag;
- break;
- }
-
- /* add this attribute to the list */
- attr = attrPool.create();
- if (attr == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
- uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
- attrBuf[attrBufIdx + len] = 0;
- attr->attribute = &attrBuf[attrBufIdx];
- attrBufIdx += (len + 1);
- } else {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- // duplicate attribute is ignored, causes no error.
- _addAttributeToList(&attrFirst, attr);
-
- /* next tag */
- pTag += len;
- if (*pTag) {
- /* next to the separator */
- pTag++;
- }
- }
-
- if (attrFirst) {
- /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
-
- kwd = extPool.create();
- if (kwd == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- icu::CharString* value = kwdBuf.create();
- if (value == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- /* attribute subtags sorted in alphabetical order as type */
- attr = attrFirst;
- while (attr != NULL) {
- nextAttr = attr->next;
- if (attr != attrFirst) {
- value->append('-', *status);
- }
- value->append(attr->attribute, *status);
- attr = nextAttr;
- }
- if (U_FAILURE(*status)) {
- return;
- }
-
- kwd->key = LOCALE_ATTRIBUTE_KEY;
- kwd->value = value->data();
-
- if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- }
- }
-
- if (pKwds) {
- const char *pBcpKey = NULL; /* u extenstion key subtag */
- const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */
- int32_t bcpKeyLen = 0;
- int32_t bcpTypeLen = 0;
- UBool isDone = FALSE;
-
- pTag = pKwds;
- /* BCP47 representation of LDML key/type pairs */
- while (!isDone) {
- const char *pNextBcpKey = NULL;
- int32_t nextBcpKeyLen = 0;
- UBool emitKeyword = FALSE;
-
- if (*pTag) {
- /* locate next separator char */
- for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
-
- if (ultag_isUnicodeLocaleKey(pTag, len)) {
- if (pBcpKey) {
- emitKeyword = TRUE;
- pNextBcpKey = pTag;
- nextBcpKeyLen = len;
- } else {
- pBcpKey = pTag;
- bcpKeyLen = len;
- }
- } else {
- U_ASSERT(pBcpKey != NULL);
- /* within LDML type subtags */
- if (pBcpType) {
- bcpTypeLen += (len + 1);
- } else {
- pBcpType = pTag;
- bcpTypeLen = len;
- }
- }
-
- /* next tag */
- pTag += len;
- if (*pTag) {
- /* next to the separator */
- pTag++;
- }
- } else {
- /* processing last one */
- emitKeyword = TRUE;
- isDone = TRUE;
- }
-
- if (emitKeyword) {
- const char *pKey = NULL; /* LDML key */
- const char *pType = NULL; /* LDML type */
-
- char bcpKeyBuf[3]; /* BCP key length is always 2 for now */
-
- U_ASSERT(pBcpKey != NULL);
-
- if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) {
- /* the BCP key is invalid */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- U_ASSERT(bcpKeyLen <= 2);
-
- uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
- bcpKeyBuf[bcpKeyLen] = 0;
-
- /* u extension key to LDML key */
- pKey = uloc_toLegacyKey(bcpKeyBuf);
- if (pKey == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if (pKey == bcpKeyBuf) {
- /*
- The key returned by toLegacyKey points to the input buffer.
- We normalize the result key to lower case.
- */
- T_CString_toLowerCase(bcpKeyBuf);
- icu::CharString* key = kwdBuf.create(bcpKeyBuf, bcpKeyLen, *status);
- if (key == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- if (U_FAILURE(*status)) {
- return;
- }
- pKey = key->data();
- }
-
- if (pBcpType) {
- char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
- if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) {
- /* the BCP type is too long */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
- bcpTypeBuf[bcpTypeLen] = 0;
-
- /* BCP type to locale type */
- pType = uloc_toLegacyType(pKey, bcpTypeBuf);
- if (pType == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if (pType == bcpTypeBuf) {
- /*
- The type returned by toLegacyType points to the input buffer.
- We normalize the result type to lower case.
- */
- /* normalize to lower case */
- T_CString_toLowerCase(bcpTypeBuf);
- icu::CharString* type = kwdBuf.create(bcpTypeBuf, bcpTypeLen, *status);
- if (type == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- if (U_FAILURE(*status)) {
- return;
- }
- pType = type->data();
- }
- } else {
- /* typeless - default type value is "yes" */
- pType = LOCALE_TYPE_YES;
- }
-
- /* Special handling for u-va-posix, since we want to treat this as a variant,
- not as a keyword */
- if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
- *posixVariant = TRUE;
- } else {
- /* create an ExtensionListEntry for this keyword */
- kwd = extPool.create();
- if (kwd == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- kwd->key = pKey;
- kwd->value = pType;
-
- if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
- // duplicate keyword is allowed, Only the first
- // is honored.
- }
- }
-
- pBcpKey = pNextBcpKey;
- bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
- pBcpType = NULL;
- bcpTypeLen = 0;
- }
- }
- }
-
- kwd = kwdFirst;
- while (kwd != NULL) {
- nextKwd = kwd->next;
- _addExtensionToList(appendTo, kwd, FALSE);
- kwd = nextKwd;
- }
-}
-
-
-static void
-_appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) {
- int32_t i, n;
- int32_t len;
- ExtensionListEntry *kwdFirst = NULL;
- ExtensionListEntry *kwd;
- const char *key, *type;
- icu::MemoryPool<ExtensionListEntry> extPool;
- icu::MemoryPool<icu::CharString> kwdBuf;
- UBool posixVariant = FALSE;
-
- if (U_FAILURE(*status)) {
- return;
- }
-
- /* Determine if variants already exists */
- if (ultag_getVariantsSize(langtag)) {
- posixVariant = TRUE;
- }
-
- n = ultag_getExtensionsSize(langtag);
-
- /* resolve locale keywords and reordering keys */
- for (i = 0; i < n; i++) {
- key = ultag_getExtensionKey(langtag, i);
- type = ultag_getExtensionValue(langtag, i);
- if (*key == LDMLEXT) {
- _appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status);
- if (U_FAILURE(*status)) {
- break;
- }
- } else {
- kwd = extPool.create();
- if (kwd == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- kwd->key = key;
- kwd->value = type;
- if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- }
- }
-
- if (U_SUCCESS(*status)) {
- type = ultag_getPrivateUse(langtag);
- if ((int32_t)uprv_strlen(type) > 0) {
- /* add private use as a keyword */
- kwd = extPool.create();
- if (kwd == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- kwd->key = PRIVATEUSE_KEY;
- kwd->value = type;
- if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- }
- }
- }
-
- /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
-
- if (U_SUCCESS(*status) && posixVariant) {
- len = (int32_t) uprv_strlen(_POSIX);
- sink.Append(_POSIX, len);
- }
-
- if (U_SUCCESS(*status) && kwdFirst != NULL) {
- /* write out the sorted keywords */
- UBool firstValue = TRUE;
- kwd = kwdFirst;
- do {
- if (firstValue) {
- sink.Append("@", 1);
- firstValue = FALSE;
- } else {
- sink.Append(";", 1);
- }
-
- /* key */
- len = (int32_t)uprv_strlen(kwd->key);
- sink.Append(kwd->key, len);
- sink.Append("=", 1);
-
- /* type */
- len = (int32_t)uprv_strlen(kwd->value);
- sink.Append(kwd->value, len);
-
- kwd = kwd->next;
- } while (kwd);
- }
-}
-
-static void
-_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {
- (void)hadPosix;
- char buf[ULOC_FULLNAME_CAPACITY];
- char tmpAppend[ULOC_FULLNAME_CAPACITY];
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t len, i;
- int32_t reslen = 0;
- int32_t capacity = sizeof tmpAppend;
-
- if (U_FAILURE(*status)) {
- return;
- }
-
- len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return;
- }
-
- if (len > 0) {
- char *p, *pPriv;
- UBool bNext = TRUE;
- UBool firstValue = TRUE;
- UBool writeValue;
-
- pPriv = NULL;
- p = buf;
- while (bNext) {
- writeValue = FALSE;
- if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
- if (*p == 0) {
- bNext = FALSE;
- } else {
- *p = 0; /* terminate */
- }
- if (pPriv != NULL) {
- /* Private use in the canonical format is lowercase in BCP47 */
- for (i = 0; *(pPriv + i) != 0; i++) {
- *(pPriv + i) = uprv_tolower(*(pPriv + i));
- }
-
- /* validate */
- if (_isPrivateuseValueSubtag(pPriv, -1)) {
- if (firstValue) {
- if (!_isVariantSubtag(pPriv, -1)) {
- writeValue = TRUE;
- }
- } else {
- writeValue = TRUE;
- }
- } else if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- } else {
- break;
- }
-
- if (writeValue) {
- if (reslen < capacity) {
- tmpAppend[reslen++] = SEP;
- }
-
- if (firstValue) {
- if (reslen < capacity) {
- tmpAppend[reslen++] = *PRIVATEUSE_KEY;
- }
-
- if (reslen < capacity) {
- tmpAppend[reslen++] = SEP;
- }
-
- len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
- if (reslen < capacity) {
- uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
- }
- reslen += len;
-
- if (reslen < capacity) {
- tmpAppend[reslen++] = SEP;
- }
-
- firstValue = FALSE;
- }
-
- len = (int32_t)uprv_strlen(pPriv);
- if (reslen < capacity) {
- uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
- }
- reslen += len;
- }
- }
- /* reset private use starting position */
- pPriv = NULL;
- } else if (pPriv == NULL) {
- pPriv = p;
- }
- p++;
- }
-
- if (U_FAILURE(*status)) {
- return;
- }
- }
-
- if (U_SUCCESS(*status)) {
- len = reslen;
- sink.Append(tmpAppend, len);
- }
-}
-
-/*
-* -------------------------------------------------
-*
-* ultag_ functions
-*
-* -------------------------------------------------
-*/
-
-/* Bit flags used by the parser */
-#define LANG 0x0001
-#define EXTL 0x0002
-#define SCRT 0x0004
-#define REGN 0x0008
-#define VART 0x0010
-#define EXTS 0x0020
-#define EXTV 0x0040
-#define PRIV 0x0080
-
-/**
- * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing
- * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ )
- * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above.
- */
-#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
-#pragma optimize( "", off )
-#endif
-
-static ULanguageTag*
-ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
- char *tagBuf;
- int16_t next;
- char *pSubtag, *pNext, *pLastGoodPosition;
- int32_t subtagLen;
- int32_t extlangIdx;
- ExtensionListEntry *pExtension;
- char *pExtValueSubtag, *pExtValueSubtagEnd;
- int32_t i;
- UBool privateuseVar = FALSE;
- int32_t grandfatheredLen = 0;
-
- if (parsedLen != NULL) {
- *parsedLen = 0;
- }
-
- if (U_FAILURE(*status)) {
- return NULL;
- }
-
- if (tagLen < 0) {
- tagLen = (int32_t)uprv_strlen(tag);
- }
-
- /* copy the entire string */
- tagBuf = (char*)uprv_malloc(tagLen + 1);
- if (tagBuf == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memcpy(tagBuf, tag, tagLen);
- *(tagBuf + tagLen) = 0;
-
- /* create a ULanguageTag */
- icu::LocalULanguageTagPointer t(
- (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)));
- if (t.isNull()) {
- uprv_free(tagBuf);
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- _initializeULanguageTag(t.getAlias());
- t->buf = tagBuf;
-
- if (tagLen < MINLEN) {
- /* the input tag is too short - return empty ULanguageTag */
- return t.orphan();
- }
-
- size_t parsedLenDelta = 0;
- // Grandfathered tag will be consider together. Grandfathered tag with intervening
- // script and region such as art-DE-lojban or art-Latn-lojban won't be
- // matched.
- /* check if the tag is grandfathered */
- for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
- int32_t checkGrandfatheredLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i]));
- if (tagLen < checkGrandfatheredLen) {
- continue;
- }
- if (tagLen > checkGrandfatheredLen && tagBuf[checkGrandfatheredLen] != '-') {
- // make sure next char is '-'.
- continue;
- }
- if (uprv_strnicmp(GRANDFATHERED[i], tagBuf, checkGrandfatheredLen) == 0) {
- int32_t newTagLength;
-
- grandfatheredLen = checkGrandfatheredLen; /* back up for output parsedLen */
- int32_t replacementLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
- newTagLength = replacementLen + tagLen - checkGrandfatheredLen;
- if (tagLen < newTagLength) {
- uprv_free(tagBuf);
- tagBuf = (char*)uprv_malloc(newTagLength + 1);
- if (tagBuf == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- t->buf = tagBuf;
- tagLen = newTagLength;
- }
- parsedLenDelta = checkGrandfatheredLen - replacementLen;
- uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
- if (checkGrandfatheredLen != tagLen) {
- uprv_strcpy(t->buf + replacementLen, tag + checkGrandfatheredLen);
- }
- break;
- }
- }
-
- if (grandfatheredLen == 0) {
- for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
- const char* redundantTag = REDUNDANT[i];
- size_t redundantTagLen = uprv_strlen(redundantTag);
- // The preferred tag for a redundant tag is always shorter than redundant
- // tag. A redundant tag may or may not be followed by other subtags.
- // (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
- if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
- const char* redundantTagEnd = tagBuf + redundantTagLen;
- if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) {
- const char* preferredTag = REDUNDANT[i + 1];
- size_t preferredTagLen = uprv_strlen(preferredTag);
- uprv_strncpy(t->buf, preferredTag, preferredTagLen);
- if (*redundantTagEnd == SEP) {
- uprv_memmove(tagBuf + preferredTagLen,
- redundantTagEnd,
- tagLen - redundantTagLen + 1);
- } else {
- tagBuf[preferredTagLen] = '\0';
- }
- // parsedLen should be the length of the input
- // before redundantTag is replaced by preferredTag.
- // Save the delta to add it back later.
- parsedLenDelta = redundantTagLen - preferredTagLen;
- break;
- }
- }
- }
- }
-
- /*
- * langtag = language
- * ["-" script]
- * ["-" region]
- * *("-" variant)
- * *("-" extension)
- * ["-" privateuse]
- */
-
- next = LANG | PRIV;
- pNext = pLastGoodPosition = tagBuf;
- extlangIdx = 0;
- pExtension = NULL;
- pExtValueSubtag = NULL;
- pExtValueSubtagEnd = NULL;
-
- while (pNext) {
- char *pSep;
-
- pSubtag = pNext;
-
- /* locate next separator char */
- pSep = pSubtag;
- while (*pSep) {
- if (*pSep == SEP) {
- break;
- }
- pSep++;
- }
- if (*pSep == 0) {
- /* last subtag */
- pNext = NULL;
- } else {
- pNext = pSep + 1;
- }
- subtagLen = (int32_t)(pSep - pSubtag);
-
- if (next & LANG) {
- if (ultag_isLanguageSubtag(pSubtag, subtagLen)) {
- *pSep = 0; /* terminate */
- // TODO: move deprecated language code handling here.
- t->language = T_CString_toLowerCase(pSubtag);
-
- pLastGoodPosition = pSep;
- next = SCRT | REGN | VART | EXTS | PRIV;
- if (subtagLen <= 3)
- next |= EXTL;
- continue;
- }
- }
- if (next & EXTL) {
- if (_isExtlangSubtag(pSubtag, subtagLen)) {
- *pSep = 0;
- t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
-
- pLastGoodPosition = pSep;
- if (extlangIdx < 3) {
- next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
- } else {
- next = SCRT | REGN | VART | EXTS | PRIV;
- }
- continue;
- }
- }
- if (next & SCRT) {
- if (ultag_isScriptSubtag(pSubtag, subtagLen)) {
- char *p = pSubtag;
-
- *pSep = 0;
-
- /* to title case */
- *p = uprv_toupper(*p);
- p++;
- for (; *p; p++) {
- *p = uprv_tolower(*p);
- }
-
- t->script = pSubtag;
-
- pLastGoodPosition = pSep;
- next = REGN | VART | EXTS | PRIV;
- continue;
- }
- }
- if (next & REGN) {
- if (ultag_isRegionSubtag(pSubtag, subtagLen)) {
- *pSep = 0;
- // TODO: move deprecated region code handling here.
- t->region = T_CString_toUpperCase(pSubtag);
-
- pLastGoodPosition = pSep;
- next = VART | EXTS | PRIV;
- continue;
- }
- }
- if (next & VART) {
- if (_isVariantSubtag(pSubtag, subtagLen) ||
- (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
- VariantListEntry *var;
- UBool isAdded;
-
- var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
- if (var == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- *pSep = 0;
- var->variant = T_CString_toUpperCase(pSubtag);
- isAdded = _addVariantToList(&(t->variants), var);
- if (!isAdded) {
- /* duplicated variant entry */
- uprv_free(var);
- break;
- }
- pLastGoodPosition = pSep;
- next = VART | EXTS | PRIV;
- continue;
- }
- }
- if (next & EXTS) {
- if (_isExtensionSingleton(pSubtag, subtagLen)) {
- if (pExtension != NULL) {
- if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
- /* the previous extension is incomplete */
- uprv_free(pExtension);
- pExtension = NULL;
- break;
- }
-
- /* terminate the previous extension value */
- *pExtValueSubtagEnd = 0;
- pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
-
- /* insert the extension to the list */
- if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
- pLastGoodPosition = pExtValueSubtagEnd;
- } else {
- /* stop parsing here */
- uprv_free(pExtension);
- pExtension = NULL;
- break;
- }
- }
-
- /* create a new extension */
- pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
- if (pExtension == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- *pSep = 0;
- pExtension->key = T_CString_toLowerCase(pSubtag);
- pExtension->value = NULL; /* will be set later */
-
- /*
- * reset the start and the end location of extension value
- * subtags for this extension
- */
- pExtValueSubtag = NULL;
- pExtValueSubtagEnd = NULL;
-
- next = EXTV;
- continue;
- }
- }
- if (next & EXTV) {
- if (_isExtensionSubtag(pSubtag, subtagLen)) {
- if (pExtValueSubtag == NULL) {
- /* if the start postion of this extension's value is not yet,
- this one is the first value subtag */
- pExtValueSubtag = pSubtag;
- }
-
- /* Mark the end of this subtag */
- pExtValueSubtagEnd = pSep;
- next = EXTS | EXTV | PRIV;
-
- continue;
- }
- }
- if (next & PRIV) {
- if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
- char *pPrivuseVal;
-
- if (pExtension != NULL) {
- /* Process the last extension */
- if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
- /* the previous extension is incomplete */
- uprv_free(pExtension);
- pExtension = NULL;
- break;
- } else {
- /* terminate the previous extension value */
- *pExtValueSubtagEnd = 0;
- pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
-
- /* insert the extension to the list */
- if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
- pLastGoodPosition = pExtValueSubtagEnd;
- pExtension = NULL;
- } else {
- /* stop parsing here */
- uprv_free(pExtension);
- pExtension = NULL;
- break;
- }
- }
- }
-
- /* The rest of part will be private use value subtags */
- if (pNext == NULL) {
- /* empty private use subtag */
- break;
- }
- /* back up the private use value start position */
- pPrivuseVal = pNext;
-
- /* validate private use value subtags */
- while (pNext) {
- pSubtag = pNext;
- pSep = pSubtag;
- while (*pSep) {
- if (*pSep == SEP) {
- break;
- }
- pSep++;
- }
- if (*pSep == 0) {
- /* last subtag */
- pNext = NULL;
- } else {
- pNext = pSep + 1;
- }
- subtagLen = (int32_t)(pSep - pSubtag);
-
- if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
- *pSep = 0;
- next = VART;
- privateuseVar = TRUE;
- break;
- } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
- pLastGoodPosition = pSep;
- } else {
- break;
- }
- }
-
- if (next == VART) {
- continue;
- }
-
- if (pLastGoodPosition - pPrivuseVal > 0) {
- *pLastGoodPosition = 0;
- t->privateuse = T_CString_toLowerCase(pPrivuseVal);
- }
- /* No more subtags, exiting the parse loop */
- break;
- }
- break;
- }
-
- /* If we fell through here, it means this subtag is illegal - quit parsing */
- break;
- }
-
- if (pExtension != NULL) {
- /* Process the last extension */
- if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
- /* the previous extension is incomplete */
- uprv_free(pExtension);
- } else {
- /* terminate the previous extension value */
- *pExtValueSubtagEnd = 0;
- pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
- /* insert the extension to the list */
- if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
- pLastGoodPosition = pExtValueSubtagEnd;
- } else {
- uprv_free(pExtension);
- }
- }
- }
-
- if (parsedLen != NULL) {
- *parsedLen = (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
- }
-
- return t.orphan();
-}
-
-/**
-* Ticket #12705 - Turn optimization back on.
-*/
-#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
-#pragma optimize( "", on )
-#endif
-
-static void
-ultag_close(ULanguageTag* langtag) {
-
- if (langtag == NULL) {
- return;
- }
-
- uprv_free(langtag->buf);
-
- if (langtag->variants) {
- VariantListEntry *curVar = langtag->variants;
- while (curVar) {
- VariantListEntry *nextVar = curVar->next;
- uprv_free(curVar);
- curVar = nextVar;
- }
- }
-
- if (langtag->extensions) {
- ExtensionListEntry *curExt = langtag->extensions;
- while (curExt) {
- ExtensionListEntry *nextExt = curExt->next;
- uprv_free(curExt);
- curExt = nextExt;
- }
- }
-
- uprv_free(langtag);
-}
-
-static const char*
-ultag_getLanguage(const ULanguageTag* langtag) {
- return langtag->language;
-}
-
-#if 0
-static const char*
-ultag_getJDKLanguage(const ULanguageTag* langtag) {
- int32_t i;
- for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
- if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
- return DEPRECATEDLANGS[i + 1];
- }
- }
- return langtag->language;
-}
-#endif
-
-static const char*
-ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
- if (idx >= 0 && idx < MAXEXTLANG) {
- return langtag->extlang[idx];
- }
- return NULL;
-}
-
-static int32_t
-ultag_getExtlangSize(const ULanguageTag* langtag) {
- int32_t size = 0;
- int32_t i;
- for (i = 0; i < MAXEXTLANG; i++) {
- if (langtag->extlang[i]) {
- size++;
- }
- }
- return size;
-}
-
-static const char*
-ultag_getScript(const ULanguageTag* langtag) {
- return langtag->script;
-}
-
-static const char*
-ultag_getRegion(const ULanguageTag* langtag) {
- return langtag->region;
-}
-
-static const char*
-ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
- const char *var = NULL;
- VariantListEntry *cur = langtag->variants;
- int32_t i = 0;
- while (cur) {
- if (i == idx) {
- var = cur->variant;
- break;
- }
- cur = cur->next;
- i++;
- }
- return var;
-}
-
-static int32_t
-ultag_getVariantsSize(const ULanguageTag* langtag) {
- int32_t size = 0;
- VariantListEntry *cur = langtag->variants;
- while (TRUE) {
- if (cur == NULL) {
- break;
- }
- size++;
- cur = cur->next;
- }
- return size;
-}
-
-static const char*
-ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
- const char *key = NULL;
- ExtensionListEntry *cur = langtag->extensions;
- int32_t i = 0;
- while (cur) {
- if (i == idx) {
- key = cur->key;
- break;
- }
- cur = cur->next;
- i++;
- }
- return key;
-}
-
-static const char*
-ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
- const char *val = NULL;
- ExtensionListEntry *cur = langtag->extensions;
- int32_t i = 0;
- while (cur) {
- if (i == idx) {
- val = cur->value;
- break;
- }
- cur = cur->next;
- i++;
- }
- return val;
-}
-
-static int32_t
-ultag_getExtensionsSize(const ULanguageTag* langtag) {
- int32_t size = 0;
- ExtensionListEntry *cur = langtag->extensions;
- while (TRUE) {
- if (cur == NULL) {
- break;
- }
- size++;
- cur = cur->next;
- }
- return size;
-}
-
-static const char*
-ultag_getPrivateUse(const ULanguageTag* langtag) {
- return langtag->privateuse;
-}
-
-#if 0
-static const char*
-ultag_getGrandfathered(const ULanguageTag* langtag) {
- return langtag->grandfathered;
-}
-#endif
-
-
-/*
-* -------------------------------------------------
-*
-* Locale/BCP47 conversion APIs, exposed as uloc_*
-*
-* -------------------------------------------------
-*/
-U_CAPI int32_t U_EXPORT2
-uloc_toLanguageTag(const char* localeID,
- char* langtag,
- int32_t langtagCapacity,
- UBool strict,
- UErrorCode* status) {
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- icu::CheckedArrayByteSink sink(langtag, langtagCapacity);
- ulocimp_toLanguageTag(localeID, sink, strict, status);
-
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*status)) {
- return reslen;
- }
-
- if (sink.Overflowed()) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(langtag, langtagCapacity, reslen, status);
- }
-
- return reslen;
-}
-
-
-U_CAPI void U_EXPORT2
-ulocimp_toLanguageTag(const char* localeID,
- icu::ByteSink& sink,
- UBool strict,
- UErrorCode* status) {
- icu::CharString canonical;
- int32_t reslen;
- UErrorCode tmpStatus = U_ZERO_ERROR;
- UBool hadPosix = FALSE;
- const char* pKeywordStart;
-
- /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
- int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
- if (resultCapacity > 0) {
- char* buffer;
-
- for (;;) {
- buffer = canonical.getAppendBuffer(
- /*minCapacity=*/resultCapacity,
- /*desiredCapacityHint=*/resultCapacity,
- resultCapacity,
- tmpStatus);
-
- if (U_FAILURE(tmpStatus)) {
- *status = tmpStatus;
- return;
- }
-
- reslen =
- uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
-
- if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
- break;
- }
-
- resultCapacity = reslen;
- tmpStatus = U_ZERO_ERROR;
- }
-
- if (U_FAILURE(tmpStatus)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- canonical.append(buffer, reslen, tmpStatus);
- if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
- }
-
- if (U_FAILURE(tmpStatus)) {
- *status = tmpStatus;
- return;
- }
- }
-
- /* For handling special case - private use only tag */
- pKeywordStart = locale_getKeywordsStart(canonical.data());
- if (pKeywordStart == canonical.data()) {
- int kwdCnt = 0;
- UBool done = FALSE;
-
- icu::LocalUEnumerationPointer kwdEnum(uloc_openKeywords(canonical.data(), &tmpStatus));
- if (U_SUCCESS(tmpStatus)) {
- kwdCnt = uenum_count(kwdEnum.getAlias(), &tmpStatus);
- if (kwdCnt == 1) {
- const char *key;
- int32_t len = 0;
-
- key = uenum_next(kwdEnum.getAlias(), &len, &tmpStatus);
- if (len == 1 && *key == PRIVATEUSE) {
- char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
- buf[0] = PRIVATEUSE;
- buf[1] = SEP;
- len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
- if (U_SUCCESS(tmpStatus)) {
- if (ultag_isPrivateuseValueSubtags(&buf[2], len)) {
- /* return private use only tag */
- sink.Append(buf, len + 2);
- done = TRUE;
- } else if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- done = TRUE;
- }
- /* if not strict mode, then "und" will be returned */
- } else {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- done = TRUE;
- }
- }
- }
- if (done) {
- return;
- }
- }
- }
-
- _appendLanguageToLanguageTag(canonical.data(), sink, strict, status);
- _appendScriptToLanguageTag(canonical.data(), sink, strict, status);
- _appendRegionToLanguageTag(canonical.data(), sink, strict, status);
- _appendVariantsToLanguageTag(canonical.data(), sink, strict, &hadPosix, status);
- _appendKeywordsToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
- _appendPrivateuseToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
-}
-
-
-U_CAPI int32_t U_EXPORT2
-uloc_forLanguageTag(const char* langtag,
- char* localeID,
- int32_t localeIDCapacity,
- int32_t* parsedLength,
- UErrorCode* status) {
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- icu::CheckedArrayByteSink sink(localeID, localeIDCapacity);
- ulocimp_forLanguageTag(langtag, -1, sink, parsedLength, status);
-
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*status)) {
- return reslen;
- }
-
- if (sink.Overflowed()) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(localeID, localeIDCapacity, reslen, status);
- }
-
- return reslen;
-}
-
-
-U_CAPI void U_EXPORT2
-ulocimp_forLanguageTag(const char* langtag,
- int32_t tagLen,
- icu::ByteSink& sink,
- int32_t* parsedLength,
- UErrorCode* status) {
- UBool isEmpty = TRUE;
- const char *subtag, *p;
- int32_t len;
- int32_t i, n;
- UBool noRegion = TRUE;
-
- icu::LocalULanguageTagPointer lt(ultag_parse(langtag, tagLen, parsedLength, status));
- if (U_FAILURE(*status)) {
- return;
- }
-
- /* language */
- subtag = ultag_getExtlangSize(lt.getAlias()) > 0 ? ultag_getExtlang(lt.getAlias(), 0) : ultag_getLanguage(lt.getAlias());
- if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
- len = (int32_t)uprv_strlen(subtag);
- if (len > 0) {
- sink.Append(subtag, len);
- isEmpty = FALSE;
- }
- }
-
- /* script */
- subtag = ultag_getScript(lt.getAlias());
- len = (int32_t)uprv_strlen(subtag);
- if (len > 0) {
- sink.Append("_", 1);
- isEmpty = FALSE;
-
- /* write out the script in title case */
- char c = uprv_toupper(*subtag);
- sink.Append(&c, 1);
- sink.Append(subtag + 1, len - 1);
- }
-
- /* region */
- subtag = ultag_getRegion(lt.getAlias());
- len = (int32_t)uprv_strlen(subtag);
- if (len > 0) {
- sink.Append("_", 1);
- isEmpty = FALSE;
-
- /* write out the region in upper case */
- p = subtag;
- while (*p) {
- char c = uprv_toupper(*p);
- sink.Append(&c, 1);
- p++;
- }
- noRegion = FALSE;
- }
-
- /* variants */
- _sortVariants(lt.getAlias()->variants);
- n = ultag_getVariantsSize(lt.getAlias());
- if (n > 0) {
- if (noRegion) {
- sink.Append("_", 1);
- isEmpty = FALSE;
- }
-
- for (i = 0; i < n; i++) {
- subtag = ultag_getVariant(lt.getAlias(), i);
- sink.Append("_", 1);
-
- /* write out the variant in upper case */
- p = subtag;
- while (*p) {
- char c = uprv_toupper(*p);
- sink.Append(&c, 1);
- p++;
- }
- }
- }
-
- /* keywords */
- n = ultag_getExtensionsSize(lt.getAlias());
- subtag = ultag_getPrivateUse(lt.getAlias());
- if (n > 0 || uprv_strlen(subtag) > 0) {
- if (isEmpty && n > 0) {
- /* need a language */
- sink.Append(LANG_UND, LANG_UND_LEN);
- }
- _appendKeywords(lt.getAlias(), sink, status);
- }
-}
diff --git a/contrib/libs/icu/common/ulocimp.h b/contrib/libs/icu/common/ulocimp.h
deleted file mode 100644
index b9e2eb4bfeb..00000000000
--- a/contrib/libs/icu/common/ulocimp.h
+++ /dev/null
@@ -1,282 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2004-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#ifndef ULOCIMP_H
-#define ULOCIMP_H
-
-#include "unicode/bytestream.h"
-#include "unicode/uloc.h"
-
-/**
- * Create an iterator over the specified keywords list
- * @param keywordList double-null terminated list. Will be copied.
- * @param keywordListSize size in bytes of keywordList
- * @param status err code
- * @return enumeration (owned by caller) of the keyword list.
- * @internal ICU 3.0
- */
-U_CAPI UEnumeration* U_EXPORT2
-uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
-
-/**
- * Look up a resource bundle table item with fallback on the table level.
- * This is accessible so it can be called by C++ code.
- */
-U_CAPI const UChar * U_EXPORT2
-uloc_getTableStringWithFallback(
- const char *path,
- const char *locale,
- const char *tableKey,
- const char *subTableKey,
- const char *itemKey,
- int32_t *pLength,
- UErrorCode *pErrorCode);
-
-/*returns TRUE if a is an ID separator FALSE otherwise*/
-#define _isIDSeparator(a) (a == '_' || a == '-')
-
-U_CFUNC const char*
-uloc_getCurrentCountryID(const char* oldID);
-
-U_CFUNC const char*
-uloc_getCurrentLanguageID(const char* oldID);
-
-U_CFUNC int32_t
-ulocimp_getLanguage(const char *localeID,
- char *language, int32_t languageCapacity,
- const char **pEnd);
-
-U_CFUNC int32_t
-ulocimp_getScript(const char *localeID,
- char *script, int32_t scriptCapacity,
- const char **pEnd);
-
-U_CFUNC int32_t
-ulocimp_getCountry(const char *localeID,
- char *country, int32_t countryCapacity,
- const char **pEnd);
-
-U_STABLE void U_EXPORT2
-ulocimp_getName(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* err);
-
-U_STABLE void U_EXPORT2
-ulocimp_getBaseName(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* err);
-
-U_STABLE void U_EXPORT2
-ulocimp_canonicalize(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* err);
-
-/**
- * Writes a well-formed language tag for this locale ID.
- *
- * **Note**: When `strict` is FALSE, any locale fields which do not satisfy the
- * BCP47 syntax requirement will be omitted from the result. When `strict` is
- * TRUE, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
- * fields do not satisfy the BCP47 syntax requirement.
- *
- * @param localeID the input locale ID
- * @param sink the output sink receiving the BCP47 language
- * tag for this Locale.
- * @param strict boolean value indicating if the function returns
- * an error for an ill-formed input locale ID.
- * @param err error information if receiving the language
- * tag failed.
- * @return The length of the BCP47 language tag.
- *
- * @internal ICU 64
- */
-U_STABLE void U_EXPORT2
-ulocimp_toLanguageTag(const char* localeID,
- icu::ByteSink& sink,
- UBool strict,
- UErrorCode* err);
-
-/**
- * Returns a locale ID for the specified BCP47 language tag string.
- * If the specified language tag contains any ill-formed subtags,
- * the first such subtag and all following subtags are ignored.
- * <p>
- * This implements the 'Language-Tag' production of BCP47, and so
- * supports grandfathered (regular and irregular) as well as private
- * use language tags. Private use tags are represented as 'x-whatever',
- * and grandfathered tags are converted to their canonical replacements
- * where they exist. Note that a few grandfathered tags have no modern
- * replacement, these will be converted using the fallback described in
- * the first paragraph, so some information might be lost.
- * @param langtag the input BCP47 language tag.
- * @param tagLen the length of langtag, or -1 to call uprv_strlen().
- * @param sink the output sink receiving a locale ID for the
- * specified BCP47 language tag.
- * @param parsedLength if not NULL, successfully parsed length
- * for the input language tag is set.
- * @param err error information if receiving the locald ID
- * failed.
- * @internal ICU 63
- */
-U_CAPI void U_EXPORT2
-ulocimp_forLanguageTag(const char* langtag,
- int32_t tagLen,
- icu::ByteSink& sink,
- int32_t* parsedLength,
- UErrorCode* err);
-
-/**
- * Get the region to use for supplemental data lookup. Uses
- * (1) any region specified by locale tag "rg"; if none then
- * (2) any unicode_region_tag in the locale ID; if none then
- * (3) if inferRegion is TRUE, the region suggested by
- * getLikelySubtags on the localeID.
- * If no region is found, returns length 0.
- *
- * @param localeID
- * The complete locale ID (with keywords) from which
- * to get the region to use for supplemental data.
- * @param inferRegion
- * If TRUE, will try to infer region from localeID if
- * no other region is found.
- * @param region
- * Buffer in which to put the region ID found; should
- * have a capacity at least ULOC_COUNTRY_CAPACITY.
- * @param regionCapacity
- * The actual capacity of the region buffer.
- * @param status
- * Pointer to in/out UErrorCode value for latest status.
- * @return
- * The length of any region code found, or 0 if none.
- * @internal ICU 57
- */
-U_CAPI int32_t U_EXPORT2
-ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
- char *region, int32_t regionCapacity, UErrorCode* status);
-
-/**
- * Add the likely subtags for a provided locale ID, per the algorithm described
- * in the following CLDR technical report:
- *
- * http://www.unicode.org/reports/tr35/#Likely_Subtags
- *
- * If localeID is already in the maximal form, or there is no data available
- * for maximization, it will be copied to the output buffer. For example,
- * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
- *
- * Examples:
- *
- * "en" maximizes to "en_Latn_US"
- *
- * "de" maximizes to "de_Latn_US"
- *
- * "sr" maximizes to "sr_Cyrl_RS"
- *
- * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
- *
- * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
- *
- * @param localeID The locale to maximize
- * @param sink The output sink receiving the maximized locale
- * @param err Error information if maximizing the locale failed. If the length
- * of the localeID and the null-terminator is greater than the maximum allowed size,
- * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
- * @internal ICU 64
- */
-U_STABLE void U_EXPORT2
-ulocimp_addLikelySubtags(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* err);
-
-/**
- * Minimize the subtags for a provided locale ID, per the algorithm described
- * in the following CLDR technical report:
- *
- * http://www.unicode.org/reports/tr35/#Likely_Subtags
- *
- * If localeID is already in the minimal form, or there is no data available
- * for minimization, it will be copied to the output buffer. Since the
- * minimization algorithm relies on proper maximization, see the comments
- * for ulocimp_addLikelySubtags for reasons why there might not be any data.
- *
- * Examples:
- *
- * "en_Latn_US" minimizes to "en"
- *
- * "de_Latn_US" minimizes to "de"
- *
- * "sr_Cyrl_RS" minimizes to "sr"
- *
- * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
- * script, and minimizing to "zh" would imply "zh_Hans_CN".)
- *
- * @param localeID The locale to minimize
- * @param sink The output sink receiving the maximized locale
- * @param err Error information if minimizing the locale failed. If the length
- * of the localeID and the null-terminator is greater than the maximum allowed size,
- * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
- * @internal ICU 64
- */
-U_STABLE void U_EXPORT2
-ulocimp_minimizeSubtags(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* err);
-
-U_CAPI const char * U_EXPORT2
-locale_getKeywordsStart(const char *localeID);
-
-U_CFUNC UBool
-ultag_isExtensionSubtags(const char* s, int32_t len);
-
-U_CFUNC UBool
-ultag_isLanguageSubtag(const char* s, int32_t len);
-
-U_CFUNC UBool
-ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
-
-U_CFUNC UBool
-ultag_isRegionSubtag(const char* s, int32_t len);
-
-U_CFUNC UBool
-ultag_isScriptSubtag(const char* s, int32_t len);
-
-U_CFUNC UBool
-ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
-
-U_CFUNC UBool
-ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
-
-U_CFUNC UBool
-ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
-
-U_CFUNC UBool
-ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
-
-U_CFUNC UBool
-ultag_isUnicodeLocaleKey(const char* s, int32_t len);
-
-U_CFUNC UBool
-ultag_isUnicodeLocaleType(const char* s, int32_t len);
-
-U_CFUNC UBool
-ultag_isVariantSubtags(const char* s, int32_t len);
-
-U_CFUNC const char*
-ulocimp_toBcpKey(const char* key);
-
-U_CFUNC const char*
-ulocimp_toLegacyKey(const char* key);
-
-U_CFUNC const char*
-ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
-
-U_CFUNC const char*
-ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
-
-#endif
diff --git a/contrib/libs/icu/common/umapfile.cpp b/contrib/libs/icu/common/umapfile.cpp
deleted file mode 100644
index 3e714876a4d..00000000000
--- a/contrib/libs/icu/common/umapfile.cpp
+++ /dev/null
@@ -1,530 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2013, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************/
-
-
-/*----------------------------------------------------------------------------
- *
- * Memory mapped file wrappers for use by the ICU Data Implementation
- * All of the platform-specific implementation for mapping data files
- * is here. The rest of the ICU Data implementation uses only the
- * wrapper functions.
- *
- *----------------------------------------------------------------------------*/
-/* Defines _XOPEN_SOURCE for access to POSIX functions.
- * Must be before any other #includes. */
-#include "uposixdefs.h"
-
-#include "unicode/putil.h"
-#include "unicode/ustring.h"
-#include "udatamem.h"
-#include "umapfile.h"
-
-/* memory-mapping base definitions ------------------------------------------ */
-
-#if MAP_IMPLEMENTATION==MAP_WIN32
-#ifndef WIN32_LEAN_AND_MEAN
-# define WIN32_LEAN_AND_MEAN
-#endif
-# define VC_EXTRALEAN
-# define NOUSER
-# define NOSERVICE
-# define NOIME
-# define NOMCX
-
-# if U_PLATFORM_HAS_WINUWP_API == 1
- // Some previous versions of the Windows 10 SDK don't expose various APIs for UWP applications
- // to use, even though UWP apps are allowed to call and use them. Temporarily change the
- // WINAPI family partition below to Desktop, so that function declarations are visible for UWP.
-# include <winapifamily.h>
-# if !(WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_SYSTEM))
-# pragma push_macro("WINAPI_PARTITION_DESKTOP")
-# undef WINAPI_PARTITION_DESKTOP
-# define WINAPI_PARTITION_DESKTOP 1
-# define CHANGED_WINAPI_PARTITION_DESKTOP_VALUE
-# endif
-# endif
-
-# include <windows.h>
-
-# if U_PLATFORM_HAS_WINUWP_API == 1 && defined(CHANGED_WINAPI_PARTITION_DESKTOP_VALUE)
-# pragma pop_macro("WINAPI_PARTITION_DESKTOP")
-# endif
-
-# include "cmemory.h"
-
-typedef HANDLE MemoryMap;
-
-# define IS_MAP(map) ((map)!=nullptr)
-
-#elif MAP_IMPLEMENTATION==MAP_POSIX || MAP_IMPLEMENTATION==MAP_390DLL
- typedef size_t MemoryMap;
-
-# define IS_MAP(map) ((map)!=0)
-
-# include <unistd.h>
-# include <sys/mman.h>
-# include <sys/stat.h>
-# include <fcntl.h>
-
-# ifndef MAP_FAILED
-# define MAP_FAILED ((void*)-1)
-# endif
-
-# if MAP_IMPLEMENTATION==MAP_390DLL
- /* No memory mapping for 390 batch mode. Fake it using dll loading. */
-# include <dll.h>
-# include "cstring.h"
-# include "cmemory.h"
-# include "unicode/udata.h"
-# define LIB_PREFIX "lib"
-# define LIB_SUFFIX ".dll"
- /* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */
-# define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat"
-# endif
-#elif MAP_IMPLEMENTATION==MAP_STDIO
-# include <stdio.h>
-# include "cmemory.h"
-
- typedef void *MemoryMap;
-
-# define IS_MAP(map) ((map)!=nullptr)
-#endif
-
-/*----------------------------------------------------------------------------*
- * *
- * Memory Mapped File support. Platform dependent implementation of *
- * functions used by the rest of the implementation.*
- * *
- *----------------------------------------------------------------------------*/
-#if MAP_IMPLEMENTATION==MAP_NONE
- U_CFUNC UBool
- uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
- if (U_FAILURE(*status)) {
- return FALSE;
- }
- UDataMemory_init(pData); /* Clear the output struct. */
- return FALSE; /* no file access */
- }
-
- U_CFUNC void uprv_unmapFile(UDataMemory *pData) {
- /* nothing to do */
- }
-#elif MAP_IMPLEMENTATION==MAP_WIN32
- U_CFUNC UBool
- uprv_mapFile(
- UDataMemory *pData, /* Fill in with info on the result doing the mapping. */
- /* Output only; any original contents are cleared. */
- const char *path, /* File path to be opened/mapped. */
- UErrorCode *status /* Error status, used to report out-of-memory errors. */
- )
- {
- if (U_FAILURE(*status)) {
- return FALSE;
- }
-
- HANDLE map = nullptr;
- HANDLE file = INVALID_HANDLE_VALUE;
-
- UDataMemory_init(pData); /* Clear the output struct. */
-
- /* open the input file */
-#if U_PLATFORM_HAS_WINUWP_API == 0
- // Note: In the non-UWP code-path (ie: Win32), the value of the path variable might have come from
- // the CRT 'getenv' function, and would be therefore be encoded in the default ANSI code page.
- // This means that we can't call the *W version of API below, whereas in the UWP code-path
- // there is no 'getenv' call, and thus the string will be only UTF-8/Invariant characters.
- file=CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, nullptr,
- OPEN_EXISTING,
- FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, nullptr);
-#else
- // Convert from UTF-8 string to UTF-16 string.
- wchar_t utf16Path[MAX_PATH];
- int32_t pathUtf16Len = 0;
- u_strFromUTF8(reinterpret_cast<UChar*>(utf16Path), static_cast<int32_t>(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status);
-
- if (U_FAILURE(*status)) {
- return FALSE;
- }
- if (*status == U_STRING_NOT_TERMINATED_WARNING) {
- // Report back an error instead of a warning.
- *status = U_BUFFER_OVERFLOW_ERROR;
- return FALSE;
- }
-
- file = CreateFileW(utf16Path, GENERIC_READ, FILE_SHARE_READ, nullptr,
- OPEN_EXISTING,
- FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, nullptr);
-#endif
- if (file == INVALID_HANDLE_VALUE) {
- // If we failed to open the file due to an out-of-memory error, then we want
- // to report that error back to the caller.
- if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- }
- return FALSE;
- }
-
- // Note: We use NULL/nullptr for lpAttributes parameter below.
- // This means our handle cannot be inherited and we will get the default security descriptor.
- /* create an unnamed Windows file-mapping object for the specified file */
- map = CreateFileMappingW(file, nullptr, PAGE_READONLY, 0, 0, nullptr);
-
- CloseHandle(file);
- if (map == nullptr) {
- // If we failed to create the mapping due to an out-of-memory error, then
- // we want to report that error back to the caller.
- if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- }
- return FALSE;
- }
-
- /* map a view of the file into our address space */
- pData->pHeader = reinterpret_cast<const DataHeader *>(MapViewOfFile(map, FILE_MAP_READ, 0, 0, 0));
- if (pData->pHeader == nullptr) {
- CloseHandle(map);
- return FALSE;
- }
- pData->map = map;
- return TRUE;
- }
-
- U_CFUNC void
- uprv_unmapFile(UDataMemory *pData) {
- if (pData != nullptr && pData->map != nullptr) {
- UnmapViewOfFile(pData->pHeader);
- CloseHandle(pData->map);
- pData->pHeader = nullptr;
- pData->map = nullptr;
- }
- }
-
-
-
-#elif MAP_IMPLEMENTATION==MAP_POSIX
- U_CFUNC UBool
- uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
- int fd;
- int length;
- struct stat mystat;
- void *data;
-
- if (U_FAILURE(*status)) {
- return FALSE;
- }
-
- UDataMemory_init(pData); /* Clear the output struct. */
-
- /* determine the length of the file */
- if(stat(path, &mystat)!=0 || mystat.st_size<=0) {
- return FALSE;
- }
- length=mystat.st_size;
-
- /* open the file */
- fd=open(path, O_RDONLY);
- if(fd==-1) {
- return FALSE;
- }
-
- /* get a view of the mapping */
-#if U_PLATFORM != U_PF_HPUX
- data=mmap(0, length, PROT_READ, MAP_SHARED, fd, 0);
-#else
- data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
-#endif
- close(fd); /* no longer needed */
- if(data==MAP_FAILED) {
- // Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
- return FALSE;
- }
-
- pData->map = (char *)data + length;
- pData->pHeader=(const DataHeader *)data;
- pData->mapAddr = data;
-#if U_PLATFORM == U_PF_IPHONE
- posix_madvise(data, length, POSIX_MADV_RANDOM);
-#endif
- return TRUE;
- }
-
- U_CFUNC void
- uprv_unmapFile(UDataMemory *pData) {
- if(pData!=nullptr && pData->map!=nullptr) {
- size_t dataLen = (char *)pData->map - (char *)pData->mapAddr;
- if(munmap(pData->mapAddr, dataLen)==-1) {
- }
- pData->pHeader=nullptr;
- pData->map=0;
- pData->mapAddr=nullptr;
- }
- }
-
-
-
-#elif MAP_IMPLEMENTATION==MAP_STDIO
- /* copy of the filestrm.c/T_FileStream_size() implementation */
- static int32_t
- umap_fsize(FILE *f) {
- int32_t savedPos = ftell(f);
- int32_t size = 0;
-
- /*Changes by Bertrand A. D. doesn't affect the current position
- goes to the end of the file before ftell*/
- fseek(f, 0, SEEK_END);
- size = (int32_t)ftell(f);
- fseek(f, savedPos, SEEK_SET);
- return size;
- }
-
- U_CFUNC UBool
- uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
- FILE *file;
- int32_t fileLength;
- void *p;
-
- if (U_FAILURE(*status)) {
- return FALSE;
- }
-
- UDataMemory_init(pData); /* Clear the output struct. */
- /* open the input file */
- file=fopen(path, "rb");
- if(file==nullptr) {
- return FALSE;
- }
-
- /* get the file length */
- fileLength=umap_fsize(file);
- if(ferror(file) || fileLength<=20) {
- fclose(file);
- return FALSE;
- }
-
- /* allocate the memory to hold the file data */
- p=uprv_malloc(fileLength);
- if(p==nullptr) {
- fclose(file);
- *status = U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
-
- /* read the file */
- if(fileLength!=fread(p, 1, fileLength, file)) {
- uprv_free(p);
- fclose(file);
- return FALSE;
- }
-
- fclose(file);
- pData->map=p;
- pData->pHeader=(const DataHeader *)p;
- pData->mapAddr=p;
- return TRUE;
- }
-
- U_CFUNC void
- uprv_unmapFile(UDataMemory *pData) {
- if(pData!=nullptr && pData->map!=nullptr) {
- uprv_free(pData->map);
- pData->map = nullptr;
- pData->mapAddr = nullptr;
- pData->pHeader = nullptr;
- }
- }
-
-
-#elif MAP_IMPLEMENTATION==MAP_390DLL
- /* 390 specific Library Loading.
- * This is the only platform left that dynamically loads an ICU Data Library.
- * All other platforms use .data files when dynamic loading is required, but
- * this turn out to be awkward to support in 390 batch mode.
- *
- * The idea here is to hide the fact that 390 is using dll loading from the
- * rest of ICU, and make it look like there is file loading happening.
- *
- */
-
- static char *strcpy_returnEnd(char *dest, const char *src)
- {
- while((*dest=*src)!=0) {
- ++dest;
- ++src;
- }
- return dest;
- }
-
- /*------------------------------------------------------------------------------
- *
- * computeDirPath given a user-supplied path of an item to be opened,
- * compute and return
- * - the full directory path to be used
- * when opening the file.
- * - Pointer to null at end of above returned path
- *
- * Parameters:
- * path: input path. Buffer is not altered.
- * pathBuffer: Output buffer. Any contents are overwritten.
- *
- * Returns:
- * Pointer to null termination in returned pathBuffer.
- *
- * TODO: This works the way ICU historically has, but the
- * whole data fallback search path is so complicated that
- * probably almost no one will ever really understand it,
- * the potential for confusion is large. (It's not just
- * this one function, but the whole scheme.)
- *
- *------------------------------------------------------------------------------*/
- static char *uprv_computeDirPath(const char *path, char *pathBuffer)
- {
- char *finalSlash; /* Ptr to last dir separator in input path, or null if none. */
- int32_t pathLen; /* Length of the returned directory path */
-
- finalSlash = 0;
- if (path != 0) {
- finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR);
- }
-
- *pathBuffer = 0;
- if (finalSlash == 0) {
- /* No user-supplied path.
- * Copy the ICU_DATA path to the path buffer and return that*/
- const char *icuDataDir;
- icuDataDir=u_getDataDirectory();
- if(icuDataDir!=nullptr && *icuDataDir!=0) {
- return strcpy_returnEnd(pathBuffer, icuDataDir);
- } else {
- /* there is no icuDataDir either. Just return the empty pathBuffer. */
- return pathBuffer;
- }
- }
-
- /* User supplied path did contain a directory portion.
- * Copy it to the output path buffer */
- pathLen = (int32_t)(finalSlash - path + 1);
- uprv_memcpy(pathBuffer, path, pathLen);
- *(pathBuffer+pathLen) = 0;
- return pathBuffer+pathLen;
- }
-
-
-# define DATA_TYPE "dat"
-
- U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
- const char *inBasename;
- char *basename;
- char pathBuffer[1024];
- const DataHeader *pHeader;
- dllhandle *handle;
- void *val=0;
-
- if (U_FAILURE(*status)) {
- return FALSE;
- }
-
- inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR);
- if(inBasename==nullptr) {
- inBasename = path;
- } else {
- inBasename++;
- }
- basename=uprv_computeDirPath(path, pathBuffer);
- if(uprv_strcmp(inBasename, U_ICUDATA_NAME".dat") != 0) {
- /* must mmap file... for build */
- int fd;
- int length;
- struct stat mystat;
- void *data;
- UDataMemory_init(pData); /* Clear the output struct. */
-
- /* determine the length of the file */
- if(stat(path, &mystat)!=0 || mystat.st_size<=0) {
- return FALSE;
- }
- length=mystat.st_size;
-
- /* open the file */
- fd=open(path, O_RDONLY);
- if(fd==-1) {
- return FALSE;
- }
-
- /* get a view of the mapping */
- data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
- close(fd); /* no longer needed */
- if(data==MAP_FAILED) {
- // Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
- return FALSE;
- }
- pData->map = (char *)data + length;
- pData->pHeader=(const DataHeader *)data;
- pData->mapAddr = data;
- return TRUE;
- }
-
-# ifdef OS390BATCH
- /* ### hack: we still need to get u_getDataDirectory() fixed
- for OS/390 (batch mode - always return "//"? )
- and this here straightened out with LIB_PREFIX and LIB_SUFFIX (both empty?!)
- This is probably due to the strange file system on OS/390. It's more like
- a database with short entry names than a typical file system. */
- /* U_ICUDATA_NAME should always have the correct name */
- /* BUT FOR BATCH MODE IT IS AN EXCEPTION BECAUSE */
- /* THE FIRST THREE LETTERS ARE PREASSIGNED TO THE */
- /* PROJECT!!!!! */
- uprv_strcpy(pathBuffer, "IXMI" U_ICU_VERSION_SHORT "DA");
-# else
- /* set up the library name */
- uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME U_ICU_VERSION_SHORT LIB_SUFFIX);
-# endif
-
-# ifdef UDATA_DEBUG
- fprintf(stderr, "dllload: %s ", pathBuffer);
-# endif
-
- handle=dllload(pathBuffer);
-
-# ifdef UDATA_DEBUG
- fprintf(stderr, " -> %08X\n", handle );
-# endif
-
- if(handle != nullptr) {
- /* we have a data DLL - what kind of lookup do we need here? */
- /* try to find the Table of Contents */
- UDataMemory_init(pData); /* Clear the output struct. */
- val=dllqueryvar((dllhandle*)handle, U_ICUDATA_ENTRY_NAME);
- if(val == 0) {
- /* failed... so keep looking */
- return FALSE;
- }
-# ifdef UDATA_DEBUG
- fprintf(stderr, "dllqueryvar(%08X, %s) -> %08X\n", handle, U_ICUDATA_ENTRY_NAME, val);
-# endif
-
- pData->pHeader=(const DataHeader *)val;
- return TRUE;
- } else {
- return FALSE; /* no handle */
- }
- }
-
- U_CFUNC void uprv_unmapFile(UDataMemory *pData) {
- if(pData!=nullptr && pData->map!=nullptr) {
- uprv_free(pData->map);
- pData->map = nullptr;
- pData->mapAddr = nullptr;
- pData->pHeader = nullptr;
- }
- }
-
-#else
-# error MAP_IMPLEMENTATION is set incorrectly
-#endif
diff --git a/contrib/libs/icu/common/umapfile.h b/contrib/libs/icu/common/umapfile.h
deleted file mode 100644
index 92bd567a2a9..00000000000
--- a/contrib/libs/icu/common/umapfile.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************/
-
-/*----------------------------------------------------------------------------------
- *
- * Memory mapped file wrappers for use by the ICU Data Implementation
- *
- * Porting note: The implementation of these functions is very platform specific.
- * Not all platforms can do real memory mapping. Those that can't
- * still must implement these functions, getting the data into memory using
- * whatever means are available.
- *
- * These functions are part of the ICU internal implementation, and
- * are not inteded to be used directly by applications.
- *
- *----------------------------------------------------------------------------------*/
-
-#ifndef __UMAPFILE_H__
-#define __UMAPFILE_H__
-
-#include "unicode/putil.h"
-#include "unicode/udata.h"
-#include "putilimp.h"
-
-U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path, UErrorCode *status);
-U_CFUNC void uprv_unmapFile(UDataMemory *pData);
-
-/* MAP_NONE: no memory mapping, no file access at all */
-#define MAP_NONE 0
-#define MAP_WIN32 1
-#define MAP_POSIX 2
-#define MAP_STDIO 3
-#define MAP_390DLL 4
-
-#if UCONFIG_NO_FILE_IO
-# define MAP_IMPLEMENTATION MAP_NONE
-#elif U_PLATFORM_USES_ONLY_WIN32_API
-# define MAP_IMPLEMENTATION MAP_WIN32
-#elif U_HAVE_MMAP || U_PLATFORM == U_PF_OS390
-# if U_PLATFORM == U_PF_OS390 && defined (OS390_STUBDATA)
- /* No memory mapping for 390 batch mode. Fake it using dll loading. */
-# define MAP_IMPLEMENTATION MAP_390DLL
-# else
-# define MAP_IMPLEMENTATION MAP_POSIX
-# endif
-#else /* unknown platform, no memory map implementation: use stdio.h and uprv_malloc() instead */
-# define MAP_IMPLEMENTATION MAP_STDIO
-#endif
-
-#endif
diff --git a/contrib/libs/icu/common/umath.cpp b/contrib/libs/icu/common/umath.cpp
deleted file mode 100644
index 7cf4b317494..00000000000
--- a/contrib/libs/icu/common/umath.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1997-2006, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* This file contains platform independent math.
-*/
-
-#include "putilimp.h"
-
-U_CAPI int32_t U_EXPORT2
-uprv_max(int32_t x, int32_t y)
-{
- return (x > y ? x : y);
-}
-
-U_CAPI int32_t U_EXPORT2
-uprv_min(int32_t x, int32_t y)
-{
- return (x > y ? y : x);
-}
-
diff --git a/contrib/libs/icu/common/umutablecptrie.cpp b/contrib/libs/icu/common/umutablecptrie.cpp
deleted file mode 100644
index cdbe27080b4..00000000000
--- a/contrib/libs/icu/common/umutablecptrie.cpp
+++ /dev/null
@@ -1,1852 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// umutablecptrie.cpp (inspired by utrie2_builder.cpp)
-// created: 2017dec29 Markus W. Scherer
-
-// #define UCPTRIE_DEBUG
-#ifdef UCPTRIE_DEBUG
-# include <stdio.h>
-#endif
-
-#include "unicode/utypes.h"
-#include "unicode/ucptrie.h"
-#include "unicode/umutablecptrie.h"
-#include "unicode/uobject.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "uassert.h"
-#include "ucptrie_impl.h"
-
-// ICU-20235 In case Microsoft math.h has defined this, undefine it.
-#ifdef OVERFLOW
-#undef OVERFLOW
-#endif
-
-U_NAMESPACE_BEGIN
-
-namespace {
-
-constexpr int32_t MAX_UNICODE = 0x10ffff;
-
-constexpr int32_t UNICODE_LIMIT = 0x110000;
-constexpr int32_t BMP_LIMIT = 0x10000;
-constexpr int32_t ASCII_LIMIT = 0x80;
-
-constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3;
-constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3;
-constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3;
-
-constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3));
-
-// Flag values for data blocks.
-constexpr uint8_t ALL_SAME = 0;
-constexpr uint8_t MIXED = 1;
-constexpr uint8_t SAME_AS = 2;
-
-/** Start with allocation of 16k data entries. */
-constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14);
-
-/** Grow about 8x each time. */
-constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17);
-
-/**
- * Maximum length of the build-time data array.
- * One entry per 0x110000 code points.
- */
-constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT;
-
-// Flag values for index-3 blocks while compacting/building.
-constexpr uint8_t I3_NULL = 0;
-constexpr uint8_t I3_BMP = 1;
-constexpr uint8_t I3_16 = 2;
-constexpr uint8_t I3_18 = 3;
-
-constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8;
-
-class AllSameBlocks;
-class MixedBlocks;
-
-class MutableCodePointTrie : public UMemory {
-public:
- MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode);
- MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode);
- MutableCodePointTrie(const MutableCodePointTrie &other) = delete;
- ~MutableCodePointTrie();
-
- MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete;
-
- static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode);
- static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode);
-
- uint32_t get(UChar32 c) const;
- int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context,
- uint32_t *pValue) const;
-
- void set(UChar32 c, uint32_t value, UErrorCode &errorCode);
- void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode);
-
- UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode);
-
-private:
- void clear();
-
- bool ensureHighStart(UChar32 c);
- int32_t allocDataBlock(int32_t blockLength);
- int32_t getDataBlock(int32_t i);
-
- void maskValues(uint32_t mask);
- UChar32 findHighStart() const;
- int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks);
- int32_t compactData(
- int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
- int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
- int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
- int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode);
-
- uint32_t *index = nullptr;
- int32_t indexCapacity = 0;
- int32_t index3NullOffset = -1;
- uint32_t *data = nullptr;
- int32_t dataCapacity = 0;
- int32_t dataLength = 0;
- int32_t dataNullOffset = -1;
-
- uint32_t origInitialValue;
- uint32_t initialValue;
- uint32_t errorValue;
- UChar32 highStart;
- uint32_t highValue;
-#ifdef UCPTRIE_DEBUG
-public:
- const char *name;
-#endif
-private:
- /** Temporary array while building the final data. */
- uint16_t *index16 = nullptr;
- uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3];
-};
-
-MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) :
- origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue),
- highStart(0), highValue(initialValue)
-#ifdef UCPTRIE_DEBUG
- , name("open")
-#endif
- {
- if (U_FAILURE(errorCode)) { return; }
- index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4);
- data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4);
- if (index == nullptr || data == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- indexCapacity = BMP_I_LIMIT;
- dataCapacity = INITIAL_DATA_LENGTH;
-}
-
-MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) :
- index3NullOffset(other.index3NullOffset),
- dataNullOffset(other.dataNullOffset),
- origInitialValue(other.origInitialValue), initialValue(other.initialValue),
- errorValue(other.errorValue),
- highStart(other.highStart), highValue(other.highValue)
-#ifdef UCPTRIE_DEBUG
- , name("mutable clone")
-#endif
- {
- if (U_FAILURE(errorCode)) { return; }
- int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT;
- index = (uint32_t *)uprv_malloc(iCapacity * 4);
- data = (uint32_t *)uprv_malloc(other.dataCapacity * 4);
- if (index == nullptr || data == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- indexCapacity = iCapacity;
- dataCapacity = other.dataCapacity;
-
- int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
- uprv_memcpy(flags, other.flags, iLimit);
- uprv_memcpy(index, other.index, iLimit * 4);
- uprv_memcpy(data, other.data, (size_t)other.dataLength * 4);
- dataLength = other.dataLength;
- U_ASSERT(other.index16 == nullptr);
-}
-
-MutableCodePointTrie::~MutableCodePointTrie() {
- uprv_free(index);
- uprv_free(data);
- uprv_free(index16);
-}
-
-MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) {
- // Use the highValue as the initialValue to reduce the highStart.
- uint32_t errorValue = ucpmap_get(map, -1);
- uint32_t initialValue = ucpmap_get(map, 0x10ffff);
- LocalPointer<MutableCodePointTrie> mutableTrie(
- new MutableCodePointTrie(initialValue, errorValue, errorCode),
- errorCode);
- if (U_FAILURE(errorCode)) {
- return nullptr;
- }
- UChar32 start = 0, end;
- uint32_t value;
- while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
- nullptr, nullptr, &value)) >= 0) {
- if (value != initialValue) {
- if (start == end) {
- mutableTrie->set(start, value, errorCode);
- } else {
- mutableTrie->setRange(start, end, value, errorCode);
- }
- }
- start = end + 1;
- }
- if (U_SUCCESS(errorCode)) {
- return mutableTrie.orphan();
- } else {
- return nullptr;
- }
-}
-
-MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) {
- // Use the highValue as the initialValue to reduce the highStart.
- uint32_t errorValue;
- uint32_t initialValue;
- switch (trie->valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
- initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
- break;
- case UCPTRIE_VALUE_BITS_32:
- errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
- initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
- break;
- case UCPTRIE_VALUE_BITS_8:
- errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
- initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
- break;
- default:
- // Unreachable if the trie is properly initialized.
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- LocalPointer<MutableCodePointTrie> mutableTrie(
- new MutableCodePointTrie(initialValue, errorValue, errorCode),
- errorCode);
- if (U_FAILURE(errorCode)) {
- return nullptr;
- }
- UChar32 start = 0, end;
- uint32_t value;
- while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
- nullptr, nullptr, &value)) >= 0) {
- if (value != initialValue) {
- if (start == end) {
- mutableTrie->set(start, value, errorCode);
- } else {
- mutableTrie->setRange(start, end, value, errorCode);
- }
- }
- start = end + 1;
- }
- if (U_SUCCESS(errorCode)) {
- return mutableTrie.orphan();
- } else {
- return nullptr;
- }
-}
-
-void MutableCodePointTrie::clear() {
- index3NullOffset = dataNullOffset = -1;
- dataLength = 0;
- highValue = initialValue = origInitialValue;
- highStart = 0;
- uprv_free(index16);
- index16 = nullptr;
-}
-
-uint32_t MutableCodePointTrie::get(UChar32 c) const {
- if ((uint32_t)c > MAX_UNICODE) {
- return errorValue;
- }
- if (c >= highStart) {
- return highValue;
- }
- int32_t i = c >> UCPTRIE_SHIFT_3;
- if (flags[i] == ALL_SAME) {
- return index[i];
- } else {
- return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)];
- }
-}
-
-inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue,
- UCPMapValueFilter *filter, const void *context) {
- if (value == initialValue) {
- value = nullValue;
- } else if (filter != nullptr) {
- value = filter(context, value);
- }
- return value;
-}
-
-UChar32 MutableCodePointTrie::getRange(
- UChar32 start, UCPMapValueFilter *filter, const void *context,
- uint32_t *pValue) const {
- if ((uint32_t)start > MAX_UNICODE) {
- return U_SENTINEL;
- }
- if (start >= highStart) {
- if (pValue != nullptr) {
- uint32_t value = highValue;
- if (filter != nullptr) { value = filter(context, value); }
- *pValue = value;
- }
- return MAX_UNICODE;
- }
- uint32_t nullValue = initialValue;
- if (filter != nullptr) { nullValue = filter(context, nullValue); }
- UChar32 c = start;
- uint32_t trieValue, value;
- bool haveValue = false;
- int32_t i = c >> UCPTRIE_SHIFT_3;
- do {
- if (flags[i] == ALL_SAME) {
- uint32_t trieValue2 = index[i];
- if (haveValue) {
- if (trieValue2 != trieValue) {
- if (filter == nullptr ||
- maybeFilterValue(trieValue2, initialValue, nullValue,
- filter, context) != value) {
- return c - 1;
- }
- trieValue = trieValue2; // may or may not help
- }
- } else {
- trieValue = trieValue2;
- value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
- if (pValue != nullptr) { *pValue = value; }
- haveValue = true;
- }
- c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK;
- } else /* MIXED */ {
- int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK);
- uint32_t trieValue2 = data[di];
- if (haveValue) {
- if (trieValue2 != trieValue) {
- if (filter == nullptr ||
- maybeFilterValue(trieValue2, initialValue, nullValue,
- filter, context) != value) {
- return c - 1;
- }
- trieValue = trieValue2; // may or may not help
- }
- } else {
- trieValue = trieValue2;
- value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
- if (pValue != nullptr) { *pValue = value; }
- haveValue = true;
- }
- while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) {
- trieValue2 = data[++di];
- if (trieValue2 != trieValue) {
- if (filter == nullptr ||
- maybeFilterValue(trieValue2, initialValue, nullValue,
- filter, context) != value) {
- return c - 1;
- }
- }
- trieValue = trieValue2; // may or may not help
- }
- }
- ++i;
- } while (c < highStart);
- U_ASSERT(haveValue);
- if (maybeFilterValue(highValue, initialValue, nullValue,
- filter, context) != value) {
- return c - 1;
- } else {
- return MAX_UNICODE;
- }
-}
-
-void
-writeBlock(uint32_t *block, uint32_t value) {
- uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- while (block < limit) {
- *block++ = value;
- }
-}
-
-bool MutableCodePointTrie::ensureHighStart(UChar32 c) {
- if (c >= highStart) {
- // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction.
- c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
- int32_t i = highStart >> UCPTRIE_SHIFT_3;
- int32_t iLimit = c >> UCPTRIE_SHIFT_3;
- if (iLimit > indexCapacity) {
- uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4);
- if (newIndex == nullptr) { return false; }
- uprv_memcpy(newIndex, index, i * 4);
- uprv_free(index);
- index = newIndex;
- indexCapacity = I_LIMIT;
- }
- do {
- flags[i] = ALL_SAME;
- index[i] = initialValue;
- } while(++i < iLimit);
- highStart = c;
- }
- return true;
-}
-
-int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) {
- int32_t newBlock = dataLength;
- int32_t newTop = newBlock + blockLength;
- if (newTop > dataCapacity) {
- int32_t capacity;
- if (dataCapacity < MEDIUM_DATA_LENGTH) {
- capacity = MEDIUM_DATA_LENGTH;
- } else if (dataCapacity < MAX_DATA_LENGTH) {
- capacity = MAX_DATA_LENGTH;
- } else {
- // Should never occur.
- // Either MAX_DATA_LENGTH is incorrect,
- // or the code writes more values than should be possible.
- return -1;
- }
- uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4);
- if (newData == nullptr) {
- return -1;
- }
- uprv_memcpy(newData, data, (size_t)dataLength * 4);
- uprv_free(data);
- data = newData;
- dataCapacity = capacity;
- }
- dataLength = newTop;
- return newBlock;
-}
-
-/**
- * No error checking for illegal arguments.
- *
- * @return -1 if no new data block available (out of memory in data array)
- * @internal
- */
-int32_t MutableCodePointTrie::getDataBlock(int32_t i) {
- if (flags[i] == MIXED) {
- return index[i];
- }
- if (i < BMP_I_LIMIT) {
- int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH);
- if (newBlock < 0) { return newBlock; }
- int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1);
- int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
- do {
- U_ASSERT(flags[iStart] == ALL_SAME);
- writeBlock(data + newBlock, index[iStart]);
- flags[iStart] = MIXED;
- index[iStart++] = newBlock;
- newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- } while (iStart < iLimit);
- return index[i];
- } else {
- int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH);
- if (newBlock < 0) { return newBlock; }
- writeBlock(data + newBlock, index[i]);
- flags[i] = MIXED;
- index[i] = newBlock;
- return newBlock;
- }
-}
-
-void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return;
- }
- if ((uint32_t)c > MAX_UNICODE) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- int32_t block;
- if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value;
-}
-
-void
-fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) {
- uint32_t *pLimit = block + limit;
- block += start;
- while (block < pLimit) {
- *block++ = value;
- }
-}
-
-void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return;
- }
- if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if (!ensureHighStart(end)) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- UChar32 limit = end + 1;
- if (start & UCPTRIE_SMALL_DATA_MASK) {
- // Set partial block at [start..following block boundary[.
- int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
- if (block < 0) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK;
- if (nextStart <= limit) {
- fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH,
- value);
- start = nextStart;
- } else {
- fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK,
- value);
- return;
- }
- }
-
- // Number of positions in the last, partial block.
- int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK;
-
- // Round down limit to a block boundary.
- limit &= ~UCPTRIE_SMALL_DATA_MASK;
-
- // Iterate over all-value blocks.
- while (start < limit) {
- int32_t i = start >> UCPTRIE_SHIFT_3;
- if (flags[i] == ALL_SAME) {
- index[i] = value;
- } else /* MIXED */ {
- fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value);
- }
- start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- }
-
- if (rest > 0) {
- // Set partial block at [last block boundary..limit[.
- int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
- if (block < 0) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- fillBlock(data + block, 0, rest, value);
- }
-}
-
-/* compaction --------------------------------------------------------------- */
-
-void MutableCodePointTrie::maskValues(uint32_t mask) {
- initialValue &= mask;
- errorValue &= mask;
- highValue &= mask;
- int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
- for (int32_t i = 0; i < iLimit; ++i) {
- if (flags[i] == ALL_SAME) {
- index[i] &= mask;
- }
- }
- for (int32_t i = 0; i < dataLength; ++i) {
- data[i] &= mask;
- }
-}
-
-template<typename UIntA, typename UIntB>
-bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) {
- while (length > 0 && *s == *t) {
- ++s;
- ++t;
- --length;
- }
- return length == 0;
-}
-
-bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) {
- const uint32_t *pLimit = p + length;
- while (p < pLimit && *p == value) { ++p; }
- return p == pLimit;
-}
-
-/** Search for an identical block. */
-int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
- const uint16_t *q, int32_t qStart, int32_t blockLength) {
- // Ensure that we do not even partially get past length.
- length -= blockLength;
-
- q += qStart;
- while (pStart <= length) {
- if (equalBlocks(p + pStart, q, blockLength)) {
- return pStart;
- }
- ++pStart;
- }
- return -1;
-}
-
-int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit,
- uint32_t value, int32_t blockLength) {
- // Ensure that we do not even partially get past limit.
- limit -= blockLength;
-
- for (int32_t block = start; block <= limit; ++block) {
- if (p[block] == value) {
- for (int32_t i = 1;; ++i) {
- if (i == blockLength) {
- return block;
- }
- if (p[block + i] != value) {
- block += i;
- break;
- }
- }
- }
- }
- return -1;
-}
-
-/**
- * Look for maximum overlap of the beginning of the other block
- * with the previous, adjacent block.
- */
-template<typename UIntA, typename UIntB>
-int32_t getOverlap(const UIntA *p, int32_t length,
- const UIntB *q, int32_t qStart, int32_t blockLength) {
- int32_t overlap = blockLength - 1;
- U_ASSERT(overlap <= length);
- q += qStart;
- while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
- --overlap;
- }
- return overlap;
-}
-
-int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value,
- int32_t blockLength) {
- int32_t min = length - (blockLength - 1);
- int32_t i = length;
- while (min < i && p[i - 1] == value) { --i; }
- return length - i;
-}
-
-bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) {
- for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
- if (index[i] == dataOffset) {
- return true;
- }
- }
- return false;
-}
-
-/**
- * Finds the start of the last range in the trie by enumerating backward.
- * Indexes for code points higher than this will be omitted.
- */
-UChar32 MutableCodePointTrie::findHighStart() const {
- int32_t i = highStart >> UCPTRIE_SHIFT_3;
- while (i > 0) {
- bool match;
- if (flags[--i] == ALL_SAME) {
- match = index[i] == highValue;
- } else /* MIXED */ {
- const uint32_t *p = data + index[i];
- for (int32_t j = 0;; ++j) {
- if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) {
- match = true;
- break;
- }
- if (p[j] != highValue) {
- match = false;
- break;
- }
- }
- }
- if (!match) {
- return (i + 1) << UCPTRIE_SHIFT_3;
- }
- }
- return 0;
-}
-
-class AllSameBlocks {
-public:
- static constexpr int32_t NEW_UNIQUE = -1;
- static constexpr int32_t OVERFLOW = -2;
-
- AllSameBlocks() : length(0), mostRecent(-1) {}
-
- int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) {
- if (mostRecent >= 0 && values[mostRecent] == value) {
- refCounts[mostRecent] += count;
- return indexes[mostRecent];
- }
- for (int32_t i = 0; i < length; ++i) {
- if (values[i] == value) {
- mostRecent = i;
- refCounts[i] += count;
- return indexes[i];
- }
- }
- if (length == CAPACITY) {
- return OVERFLOW;
- }
- mostRecent = length;
- indexes[length] = index;
- values[length] = value;
- refCounts[length++] = count;
- return NEW_UNIQUE;
- }
-
- /** Replaces the block which has the lowest reference count. */
- void add(int32_t index, int32_t count, uint32_t value) {
- U_ASSERT(length == CAPACITY);
- int32_t least = -1;
- int32_t leastCount = I_LIMIT;
- for (int32_t i = 0; i < length; ++i) {
- U_ASSERT(values[i] != value);
- if (refCounts[i] < leastCount) {
- least = i;
- leastCount = refCounts[i];
- }
- }
- U_ASSERT(least >= 0);
- mostRecent = least;
- indexes[least] = index;
- values[least] = value;
- refCounts[least] = count;
- }
-
- int32_t findMostUsed() const {
- if (length == 0) { return -1; }
- int32_t max = -1;
- int32_t maxCount = 0;
- for (int32_t i = 0; i < length; ++i) {
- if (refCounts[i] > maxCount) {
- max = i;
- maxCount = refCounts[i];
- }
- }
- return indexes[max];
- }
-
-private:
- static constexpr int32_t CAPACITY = 32;
-
- int32_t length;
- int32_t mostRecent;
-
- int32_t indexes[CAPACITY];
- uint32_t values[CAPACITY];
- int32_t refCounts[CAPACITY];
-};
-
-// Custom hash table for mixed-value blocks to be found anywhere in the
-// compacted data or index so far.
-class MixedBlocks {
-public:
- MixedBlocks() {}
- ~MixedBlocks() {
- uprv_free(table);
- }
-
- bool init(int32_t maxLength, int32_t newBlockLength) {
- // We store actual data indexes + 1 to reserve 0 for empty entries.
- int32_t maxDataIndex = maxLength - newBlockLength + 1;
- int32_t newLength;
- if (maxDataIndex <= 0xfff) { // 4k
- newLength = 6007;
- shift = 12;
- mask = 0xfff;
- } else if (maxDataIndex <= 0x7fff) { // 32k
- newLength = 50021;
- shift = 15;
- mask = 0x7fff;
- } else if (maxDataIndex <= 0x1ffff) { // 128k
- newLength = 200003;
- shift = 17;
- mask = 0x1ffff;
- } else {
- // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M
- newLength = 1500007;
- shift = 21;
- mask = 0x1fffff;
- }
- if (newLength > capacity) {
- uprv_free(table);
- table = (uint32_t *)uprv_malloc(newLength * 4);
- if (table == nullptr) {
- return false;
- }
- capacity = newLength;
- }
- length = newLength;
- uprv_memset(table, 0, length * 4);
-
- blockLength = newBlockLength;
- return true;
- }
-
- template<typename UInt>
- void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) {
- int32_t start = prevDataLength - blockLength;
- if (start >= minStart) {
- ++start; // Skip the last block that we added last time.
- } else {
- start = minStart; // Begin with the first full block.
- }
- for (int32_t end = newDataLength - blockLength; start <= end; ++start) {
- uint32_t hashCode = makeHashCode(data, start);
- addEntry(data, start, hashCode, start);
- }
- }
-
- template<typename UIntA, typename UIntB>
- int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const {
- uint32_t hashCode = makeHashCode(blockData, blockStart);
- int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);
- if (entryIndex >= 0) {
- return (table[entryIndex] & mask) - 1;
- } else {
- return -1;
- }
- }
-
- int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const {
- uint32_t hashCode = makeHashCode(blockValue);
- int32_t entryIndex = findEntry(data, blockValue, hashCode);
- if (entryIndex >= 0) {
- return (table[entryIndex] & mask) - 1;
- } else {
- return -1;
- }
- }
-
-private:
- template<typename UInt>
- uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const {
- int32_t blockLimit = blockStart + blockLength;
- uint32_t hashCode = blockData[blockStart++];
- do {
- hashCode = 37 * hashCode + blockData[blockStart++];
- } while (blockStart < blockLimit);
- return hashCode;
- }
-
- uint32_t makeHashCode(uint32_t blockValue) const {
- uint32_t hashCode = blockValue;
- for (int32_t i = 1; i < blockLength; ++i) {
- hashCode = 37 * hashCode + blockValue;
- }
- return hashCode;
- }
-
- template<typename UInt>
- void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) {
- U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask);
- int32_t entryIndex = findEntry(data, data, blockStart, hashCode);
- if (entryIndex < 0) {
- table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);
- }
- }
-
- template<typename UIntA, typename UIntB>
- int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart,
- uint32_t hashCode) const {
- uint32_t shiftedHashCode = hashCode << shift;
- int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1
- for (int32_t entryIndex = initialEntryIndex;;) {
- uint32_t entry = table[entryIndex];
- if (entry == 0) {
- return ~entryIndex;
- }
- if ((entry & ~mask) == shiftedHashCode) {
- int32_t dataIndex = (entry & mask) - 1;
- if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) {
- return entryIndex;
- }
- }
- entryIndex = nextIndex(initialEntryIndex, entryIndex);
- }
- }
-
- int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const {
- uint32_t shiftedHashCode = hashCode << shift;
- int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1
- for (int32_t entryIndex = initialEntryIndex;;) {
- uint32_t entry = table[entryIndex];
- if (entry == 0) {
- return ~entryIndex;
- }
- if ((entry & ~mask) == shiftedHashCode) {
- int32_t dataIndex = (entry & mask) - 1;
- if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) {
- return entryIndex;
- }
- }
- entryIndex = nextIndex(initialEntryIndex, entryIndex);
- }
- }
-
- inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const {
- // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length);
- return (entryIndex + initialEntryIndex) % length;
- }
-
- // Hash table.
- // The length is a prime number, larger than the maximum data length.
- // The "shift" lower bits store a data index + 1.
- // The remaining upper bits store a partial hashCode of the block data values.
- uint32_t *table = nullptr;
- int32_t capacity = 0;
- int32_t length = 0;
- int32_t shift = 0;
- uint32_t mask = 0;
-
- int32_t blockLength = 0;
-};
-
-int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) {
-#ifdef UCPTRIE_DEBUG
- bool overflow = false;
-#endif
-
- // ASCII data will be stored as a linear table, even if the following code
- // does not yet count it that way.
- int32_t newDataCapacity = ASCII_LIMIT;
- // Add room for a small data null block in case it would match the start of
- // a fast data block where dataNullOffset must not be set in that case.
- newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- // Add room for special values (errorValue, highValue) and padding.
- newDataCapacity += 4;
- int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
- int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
- int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
- for (int32_t i = 0; i < iLimit; i += inc) {
- if (i == fastILimit) {
- blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- inc = 1;
- }
- uint32_t value = index[i];
- if (flags[i] == MIXED) {
- // Really mixed?
- const uint32_t *p = data + value;
- value = *p;
- if (allValuesSameAs(p + 1, blockLength - 1, value)) {
- flags[i] = ALL_SAME;
- index[i] = value;
- // Fall through to ALL_SAME handling.
- } else {
- newDataCapacity += blockLength;
- continue;
- }
- } else {
- U_ASSERT(flags[i] == ALL_SAME);
- if (inc > 1) {
- // Do all of the fast-range data block's ALL_SAME parts have the same value?
- bool allSame = true;
- int32_t next_i = i + inc;
- for (int32_t j = i + 1; j < next_i; ++j) {
- U_ASSERT(flags[j] == ALL_SAME);
- if (index[j] != value) {
- allSame = false;
- break;
- }
- }
- if (!allSame) {
- // Turn it into a MIXED block.
- if (getDataBlock(i) < 0) {
- return -1;
- }
- newDataCapacity += blockLength;
- continue;
- }
- }
- }
- // Is there another ALL_SAME block with the same value?
- int32_t other = allSameBlocks.findOrAdd(i, inc, value);
- if (other == AllSameBlocks::OVERFLOW) {
- // The fixed-size array overflowed. Slow check for a duplicate block.
-#ifdef UCPTRIE_DEBUG
- if (!overflow) {
- puts("UCPTrie AllSameBlocks overflow");
- overflow = true;
- }
-#endif
- int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
- for (int32_t j = 0;; j += jInc) {
- if (j == i) {
- allSameBlocks.add(i, inc, value);
- break;
- }
- if (j == fastILimit) {
- jInc = 1;
- }
- if (flags[j] == ALL_SAME && index[j] == value) {
- allSameBlocks.add(j, jInc + inc, value);
- other = j;
- break;
- // We could keep counting blocks with the same value
- // before we add the first one, which may improve compaction in rare cases,
- // but it would make it slower.
- }
- }
- }
- if (other >= 0) {
- flags[i] = SAME_AS;
- index[i] = other;
- } else {
- // New unique same-value block.
- newDataCapacity += blockLength;
- }
- }
- return newDataCapacity;
-}
-
-#ifdef UCPTRIE_DEBUG
-# define DEBUG_DO(expr) expr
-#else
-# define DEBUG_DO(expr)
-#endif
-
-#ifdef UCPTRIE_DEBUG
-// Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF
-int32_t appendValue(char s[], int32_t length, uint32_t value) {
- value ^= value >> 16;
- value ^= value >> 8;
- s[length] = 0xE2;
- s[length + 1] = (char)(0xA0 + ((value >> 6) & 3));
- s[length + 2] = (char)(0x80 + (value & 0x3F));
- return length + 3;
-}
-
-void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value,
- UChar32 start, int32_t overlap, uint32_t initialValue) {
- char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3];
- int32_t length = 0;
- int32_t i;
- for (i = 0; i < overlap; ++i) {
- length = appendValue(s, length, 0); // Braille blank
- }
- s[length++] = '|';
- for (; i < blockLength; ++i) {
- if (block != nullptr) {
- value = block[i];
- }
- if (value == initialValue) {
- value = 0x40; // Braille lower left dot
- }
- length = appendValue(s, length, value);
- }
- s[length] = 0;
- start += overlap;
- if (start <= 0xffff) {
- printf(" %04lX %s|\n", (long)start, s);
- } else if (start <= 0xfffff) {
- printf(" %5lX %s|\n", (long)start, s);
- } else {
- printf(" %6lX %s|\n", (long)start, s);
- }
-}
-#endif
-
-/**
- * Compacts a build-time trie.
- *
- * The compaction
- * - removes blocks that are identical with earlier ones
- * - overlaps each new non-duplicate block as much as possible with the previously-written one
- * - works with fast-range data blocks whose length is a multiple of that of
- * higher-code-point data blocks
- *
- * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
- */
-int32_t MutableCodePointTrie::compactData(
- int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
- int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) {
-#ifdef UCPTRIE_DEBUG
- int32_t countSame=0, sumOverlaps=0;
- bool printData = dataLength == 29088 /* line.brk */ ||
- // dataLength == 30048 /* CanonIterData */ ||
- dataLength == 50400 /* zh.txt~stroke */;
-#endif
-
- // The linear ASCII data has been copied into newData already.
- int32_t newDataLength = 0;
- for (int32_t i = 0; newDataLength < ASCII_LIMIT;
- newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
- index[i] = newDataLength;
-#ifdef UCPTRIE_DEBUG
- if (printData) {
- printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue);
- }
-#endif
- }
-
- int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
- if (!mixedBlocks.init(newDataCapacity, blockLength)) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- mixedBlocks.extend(newData, 0, 0, newDataLength);
-
- int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
- int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
- int32_t fastLength = 0;
- for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) {
- if (i == fastILimit) {
- blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- inc = 1;
- fastLength = newDataLength;
- if (!mixedBlocks.init(newDataCapacity, blockLength)) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- mixedBlocks.extend(newData, 0, 0, newDataLength);
- }
- if (flags[i] == ALL_SAME) {
- uint32_t value = index[i];
- // Find an earlier part of the data array of length blockLength
- // that is filled with this value.
- int32_t n = mixedBlocks.findAllSameBlock(newData, value);
- // If we find a match, and the current block is the data null block,
- // and it is not a fast block but matches the start of a fast block,
- // then we need to continue looking.
- // This is because this small block is shorter than the fast block,
- // and not all of the rest of the fast block is filled with this value.
- // Otherwise trie.getRange() would detect that the fast block starts at
- // dataNullOffset and assume incorrectly that it is filled with the null value.
- while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength &&
- isStartOfSomeFastBlock(n, index, fastILimit)) {
- n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength);
- }
- if (n >= 0) {
- DEBUG_DO(++countSame);
- index[i] = n;
- } else {
- n = getAllSameOverlap(newData, newDataLength, value, blockLength);
- DEBUG_DO(sumOverlaps += n);
-#ifdef UCPTRIE_DEBUG
- if (printData) {
- printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue);
- }
-#endif
- index[i] = newDataLength - n;
- int32_t prevDataLength = newDataLength;
- while (n < blockLength) {
- newData[newDataLength++] = value;
- ++n;
- }
- mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
- }
- } else if (flags[i] == MIXED) {
- const uint32_t *block = data + index[i];
- int32_t n = mixedBlocks.findBlock(newData, block, 0);
- if (n >= 0) {
- DEBUG_DO(++countSame);
- index[i] = n;
- } else {
- n = getOverlap(newData, newDataLength, block, 0, blockLength);
- DEBUG_DO(sumOverlaps += n);
-#ifdef UCPTRIE_DEBUG
- if (printData) {
- printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue);
- }
-#endif
- index[i] = newDataLength - n;
- int32_t prevDataLength = newDataLength;
- while (n < blockLength) {
- newData[newDataLength++] = block[n++];
- }
- mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
- }
- } else /* SAME_AS */ {
- uint32_t j = index[i];
- index[i] = index[j];
- }
- }
-
-#ifdef UCPTRIE_DEBUG
- /* we saved some space */
- printf("compacting UCPTrie: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n",
- (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps);
-#endif
- return newDataLength;
-}
-
-int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks,
- UErrorCode &errorCode) {
- int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3);
- if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) {
- // Only the linear fast index, no multi-stage index tables.
- index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
- return fastIndexLength;
- }
-
- // Condense the fast index table.
- // Also, does it contain an index-3 block with all dataNullOffset?
- uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH]; // fastIndexLength
- int32_t i3FirstNull = -1;
- for (int32_t i = 0, j = 0; i < fastILimit; ++j) {
- uint32_t i3 = index[i];
- fastIndex[j] = (uint16_t)i3;
- if (i3 == (uint32_t)dataNullOffset) {
- if (i3FirstNull < 0) {
- i3FirstNull = j;
- } else if (index3NullOffset < 0 &&
- (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) {
- index3NullOffset = i3FirstNull;
- }
- } else {
- i3FirstNull = -1;
- }
- // Set the index entries that compactData() skipped.
- // Needed when the multi-stage index covers the fast index range as well.
- int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
- while (++i < iNext) {
- i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- index[i] = i3;
- }
- }
-
- if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength);
-
- // Examine index-3 blocks. For each determine one of:
- // - same as the index-3 null block
- // - same as a fast-index block
- // - 16-bit indexes
- // - 18-bit indexes
- // We store this in the first flags entry for the index-3 block.
- //
- // Also determine an upper limit for the index-3 table length.
- int32_t index3Capacity = 0;
- i3FirstNull = index3NullOffset;
- bool hasLongI3Blocks = false;
- // If the fast index covers the whole BMP, then
- // the multi-stage index is only for supplementary code points.
- // Otherwise, the multi-stage index covers all of Unicode.
- int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT;
- int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
- for (int32_t i = iStart; i < iLimit;) {
- int32_t j = i;
- int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
- uint32_t oredI3 = 0;
- bool isNull = true;
- do {
- uint32_t i3 = index[j];
- oredI3 |= i3;
- if (i3 != (uint32_t)dataNullOffset) {
- isNull = false;
- }
- } while (++j < jLimit);
- if (isNull) {
- flags[i] = I3_NULL;
- if (i3FirstNull < 0) {
- if (oredI3 <= 0xffff) {
- index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
- } else {
- index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
- hasLongI3Blocks = true;
- }
- i3FirstNull = 0;
- }
- } else {
- if (oredI3 <= 0xffff) {
- int32_t n = mixedBlocks.findBlock(fastIndex, index, i);
- if (n >= 0) {
- flags[i] = I3_BMP;
- index[i] = n;
- } else {
- flags[i] = I3_16;
- index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
- }
- } else {
- flags[i] = I3_18;
- index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
- hasLongI3Blocks = true;
- }
- }
- i = j;
- }
-
- int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3;
-
- // Length of the index-1 table, rounded up.
- int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2;
-
- // Index table: Fast index, index-1, index-3, index-2.
- // +1 for possible index table padding.
- int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1;
- index16 = (uint16_t *)uprv_malloc(index16Capacity * 2);
- if (index16 == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- uprv_memcpy(index16, fastIndex, fastIndexLength * 2);
-
- if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- MixedBlocks longI3Blocks;
- if (hasLongI3Blocks) {
- if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- }
-
- // Compact the index-3 table and write an uncompacted version of the index-2 table.
- uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity
- int32_t i2Length = 0;
- i3FirstNull = index3NullOffset;
- int32_t index3Start = fastIndexLength + index1Length;
- int32_t indexLength = index3Start;
- for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) {
- int32_t i3;
- uint8_t f = flags[i];
- if (f == I3_NULL && i3FirstNull < 0) {
- // First index-3 null block. Write & overlap it like a normal block, then remember it.
- f = dataNullOffset <= 0xffff ? I3_16 : I3_18;
- i3FirstNull = 0;
- }
- if (f == I3_NULL) {
- i3 = index3NullOffset;
- } else if (f == I3_BMP) {
- i3 = index[i];
- } else if (f == I3_16) {
- int32_t n = mixedBlocks.findBlock(index16, index, i);
- if (n >= 0) {
- i3 = n;
- } else {
- if (indexLength == index3Start) {
- // No overlap at the boundary between the index-1 and index-3 tables.
- n = 0;
- } else {
- n = getOverlap(index16, indexLength,
- index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
- }
- i3 = indexLength - n;
- int32_t prevIndexLength = indexLength;
- while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) {
- index16[indexLength++] = index[i + n++];
- }
- mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
- if (hasLongI3Blocks) {
- longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
- }
- }
- } else {
- U_ASSERT(f == I3_18);
- U_ASSERT(hasLongI3Blocks);
- // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
- int32_t j = i;
- int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
- int32_t k = indexLength;
- do {
- ++k;
- uint32_t v = index[j++];
- uint32_t upperBits = (v & 0x30000) >> 2;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 4;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 6;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 8;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 10;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 12;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 14;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 16;
- index16[k++] = v;
- index16[k - 9] = upperBits;
- } while (j < jLimit);
- int32_t n = longI3Blocks.findBlock(index16, index16, indexLength);
- if (n >= 0) {
- i3 = n | 0x8000;
- } else {
- if (indexLength == index3Start) {
- // No overlap at the boundary between the index-1 and index-3 tables.
- n = 0;
- } else {
- n = getOverlap(index16, indexLength,
- index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
- }
- i3 = (indexLength - n) | 0x8000;
- int32_t prevIndexLength = indexLength;
- if (n > 0) {
- int32_t start = indexLength;
- while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
- index16[indexLength++] = index16[start + n++];
- }
- } else {
- indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
- }
- mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
- if (hasLongI3Blocks) {
- longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
- }
- }
- }
- if (index3NullOffset < 0 && i3FirstNull >= 0) {
- index3NullOffset = i3;
- }
- // Set the index-2 table entry.
- index2[i2Length++] = i3;
- }
- U_ASSERT(i2Length == index2Capacity);
- U_ASSERT(indexLength <= index3Start + index3Capacity);
-
- if (index3NullOffset < 0) {
- index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
- }
- if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
- // The index-3 offsets exceed 15 bits, or
- // the last one cannot be distinguished from the no-null-block value.
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- // Compact the index-2 table and write the index-1 table.
- static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH,
- "must re-init mixedBlocks");
- int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH;
- int32_t i1 = fastIndexLength;
- for (int32_t i = 0; i < i2Length; i += blockLength) {
- int32_t n;
- if ((i2Length - i) >= blockLength) {
- // normal block
- U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH);
- n = mixedBlocks.findBlock(index16, index2, i);
- } else {
- // highStart is inside the last index-2 block. Shorten it.
- blockLength = i2Length - i;
- n = findSameBlock(index16, index3Start, indexLength,
- index2, i, blockLength);
- }
- int32_t i2;
- if (n >= 0) {
- i2 = n;
- } else {
- if (indexLength == index3Start) {
- // No overlap at the boundary between the index-1 and index-3/2 tables.
- n = 0;
- } else {
- n = getOverlap(index16, indexLength, index2, i, blockLength);
- }
- i2 = indexLength - n;
- int32_t prevIndexLength = indexLength;
- while (n < blockLength) {
- index16[indexLength++] = index2[i + n++];
- }
- mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
- }
- // Set the index-1 table entry.
- index16[i1++] = i2;
- }
- U_ASSERT(i1 == index3Start);
- U_ASSERT(indexLength <= index16Capacity);
-
-#ifdef UCPTRIE_DEBUG
- /* we saved some space */
- printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n",
- (long)iLimit, (long)indexLength);
-#endif
-
- return indexLength;
-}
-
-int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) {
- // Find the real highStart and round it up.
- U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
- highValue = get(MAX_UNICODE);
- int32_t realHighStart = findHighStart();
- realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) &
- ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
- if (realHighStart == UNICODE_LIMIT) {
- highValue = initialValue;
- }
-
-#ifdef UCPTRIE_DEBUG
- printf("UCPTrie: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n",
- (long)realHighStart, (long)highValue, (long)initialValue);
-#endif
-
- // We always store indexes and data values for the fast range.
- // Pin highStart to the top of that range while building.
- UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3;
- if (realHighStart < fastLimit) {
- for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) {
- flags[i] = ALL_SAME;
- index[i] = highValue;
- }
- highStart = fastLimit;
- } else {
- highStart = realHighStart;
- }
-
- uint32_t asciiData[ASCII_LIMIT];
- for (int32_t i = 0; i < ASCII_LIMIT; ++i) {
- asciiData[i] = get(i);
- }
-
- // First we look for which data blocks have the same value repeated over the whole block,
- // deduplicate such blocks, find a good null data block (for faster enumeration),
- // and get an upper bound for the necessary data array length.
- AllSameBlocks allSameBlocks;
- int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks);
- if (newDataCapacity < 0) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4);
- if (newData == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- uprv_memcpy(newData, asciiData, sizeof(asciiData));
-
- int32_t dataNullIndex = allSameBlocks.findMostUsed();
-
- MixedBlocks mixedBlocks;
- int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity,
- dataNullIndex, mixedBlocks, errorCode);
- if (U_FAILURE(errorCode)) { return 0; }
- U_ASSERT(newDataLength <= newDataCapacity);
- uprv_free(data);
- data = newData;
- dataCapacity = newDataCapacity;
- dataLength = newDataLength;
- if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) {
- // The offset of the last data block is too high to be stored in the index table.
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- if (dataNullIndex >= 0) {
- dataNullOffset = index[dataNullIndex];
-#ifdef UCPTRIE_DEBUG
- if (data[dataNullOffset] != initialValue) {
- printf("UCPTrie initialValue %lx -> more common nullValue %lx\n",
- (long)initialValue, (long)data[dataNullOffset]);
- }
-#endif
- initialValue = data[dataNullOffset];
- } else {
- dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET;
- }
-
- int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode);
- highStart = realHighStart;
- return indexLength;
-}
-
-UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return nullptr;
- }
- if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
- valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
-
- // The mutable trie always stores 32-bit values.
- // When we build a UCPTrie for a smaller value width, we first mask off unused bits
- // before compacting the data.
- switch (valueWidth) {
- case UCPTRIE_VALUE_BITS_32:
- break;
- case UCPTRIE_VALUE_BITS_16:
- maskValues(0xffff);
- break;
- case UCPTRIE_VALUE_BITS_8:
- maskValues(0xff);
- break;
- default:
- break;
- }
-
- UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT;
- int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode);
- if (U_FAILURE(errorCode)) {
- clear();
- return nullptr;
- }
-
- // Ensure data table alignment: The index length must be even for uint32_t data.
- if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) {
- index16[indexLength++] = 0xffee; // arbitrary value
- }
-
- // Make the total trie structure length a multiple of 4 bytes by padding the data table,
- // and store special values as the last two data values.
- int32_t length = indexLength * 2;
- if (valueWidth == UCPTRIE_VALUE_BITS_16) {
- if (((indexLength ^ dataLength) & 1) != 0) {
- // padding
- data[dataLength++] = errorValue;
- }
- if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
- data[dataLength++] = highValue;
- data[dataLength++] = errorValue;
- }
- length += dataLength * 2;
- } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
- // 32-bit data words never need padding to a multiple of 4 bytes.
- if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
- if (data[dataLength - 1] != highValue) {
- data[dataLength++] = highValue;
- }
- data[dataLength++] = errorValue;
- }
- length += dataLength * 4;
- } else {
- int32_t and3 = (length + dataLength) & 3;
- if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) {
- // all set
- } else if(and3 == 3 && data[dataLength - 1] == highValue) {
- data[dataLength++] = errorValue;
- } else {
- while (and3 != 2) {
- data[dataLength++] = highValue;
- and3 = (and3 + 1) & 3;
- }
- data[dataLength++] = highValue;
- data[dataLength++] = errorValue;
- }
- length += dataLength;
- }
-
- // Calculate the total length of the UCPTrie as a single memory block.
- length += sizeof(UCPTrie);
- U_ASSERT((length & 3) == 0);
-
- uint8_t *bytes = (uint8_t *)uprv_malloc(length);
- if (bytes == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- clear();
- return nullptr;
- }
- UCPTrie *trie = reinterpret_cast<UCPTrie *>(bytes);
- uprv_memset(trie, 0, sizeof(UCPTrie));
- trie->indexLength = indexLength;
- trie->dataLength = dataLength;
-
- trie->highStart = highStart;
- // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes.
- // Runtime code needs to then test for the real highStart as well.
- trie->shifted12HighStart = (highStart + 0xfff) >> 12;
- trie->type = type;
- trie->valueWidth = valueWidth;
-
- trie->index3NullOffset = index3NullOffset;
- trie->dataNullOffset = dataNullOffset;
- trie->nullValue = initialValue;
-
- bytes += sizeof(UCPTrie);
-
- // Fill the index and data arrays.
- uint16_t *dest16 = (uint16_t *)bytes;
- trie->index = dest16;
-
- if (highStart <= fastLimit) {
- // Condense only the fast index from the mutable-trie index.
- for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) {
- *dest16++ = (uint16_t)index[i]; // dest16[j]
- }
- } else {
- uprv_memcpy(dest16, index16, indexLength * 2);
- dest16 += indexLength;
- }
- bytes += indexLength * 2;
-
- // Write the data array.
- const uint32_t *p = data;
- switch (valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- // Write 16-bit data values.
- trie->data.ptr16 = dest16;
- for (int32_t i = dataLength; i > 0; --i) {
- *dest16++ = (uint16_t)*p++;
- }
- break;
- case UCPTRIE_VALUE_BITS_32:
- // Write 32-bit data values.
- trie->data.ptr32 = (uint32_t *)bytes;
- uprv_memcpy(bytes, p, (size_t)dataLength * 4);
- break;
- case UCPTRIE_VALUE_BITS_8:
- // Write 8-bit data values.
- trie->data.ptr8 = bytes;
- for (int32_t i = dataLength; i > 0; --i) {
- *bytes++ = (uint8_t)*p++;
- }
- break;
- default:
- // Will not occur, valueWidth checked at the beginning.
- break;
- }
-
-#ifdef UCPTRIE_DEBUG
- trie->name = name;
-
- ucptrie_printLengths(trie, "");
-#endif
-
- clear();
- return trie;
-}
-
-} // namespace
-
-U_NAMESPACE_END
-
-U_NAMESPACE_USE
-
-U_CAPI UMutableCPTrie * U_EXPORT2
-umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- LocalPointer<MutableCodePointTrie> trie(
- new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode);
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- return reinterpret_cast<UMutableCPTrie *>(trie.orphan());
-}
-
-U_CAPI UMutableCPTrie * U_EXPORT2
-umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- if (other == nullptr) {
- return nullptr;
- }
- LocalPointer<MutableCodePointTrie> clone(
- new MutableCodePointTrie(*reinterpret_cast<const MutableCodePointTrie *>(other), *pErrorCode), *pErrorCode);
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- return reinterpret_cast<UMutableCPTrie *>(clone.orphan());
-}
-
-U_CAPI void U_EXPORT2
-umutablecptrie_close(UMutableCPTrie *trie) {
- delete reinterpret_cast<MutableCodePointTrie *>(trie);
-}
-
-U_CAPI UMutableCPTrie * U_EXPORT2
-umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- if (map == nullptr) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode));
-}
-
-U_CAPI UMutableCPTrie * U_EXPORT2
-umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- if (trie == nullptr) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode));
-}
-
-U_CAPI uint32_t U_EXPORT2
-umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) {
- return reinterpret_cast<const MutableCodePointTrie *>(trie)->get(c);
-}
-
-namespace {
-
-UChar32 getRange(const void *trie, UChar32 start,
- UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
- return reinterpret_cast<const MutableCodePointTrie *>(trie)->
- getRange(start, filter, context, pValue);
-}
-
-} // namespace
-
-U_CAPI UChar32 U_EXPORT2
-umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
- UCPMapRangeOption option, uint32_t surrogateValue,
- UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
- return ucptrie_internalGetRange(getRange, trie, start,
- option, surrogateValue,
- filter, context, pValue);
-}
-
-U_CAPI void U_EXPORT2
-umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return;
- }
- reinterpret_cast<MutableCodePointTrie *>(trie)->set(c, value, *pErrorCode);
-}
-
-U_CAPI void U_EXPORT2
-umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end,
- uint32_t value, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return;
- }
- reinterpret_cast<MutableCodePointTrie *>(trie)->setRange(start, end, value, *pErrorCode);
-}
-
-/* Compact and internally serialize the trie. */
-U_CAPI UCPTrie * U_EXPORT2
-umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- return reinterpret_cast<MutableCodePointTrie *>(trie)->build(type, valueWidth, *pErrorCode);
-}
-
-#ifdef UCPTRIE_DEBUG
-U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) {
- reinterpret_cast<MutableCodePointTrie *>(trie)->name = name;
-}
-#endif
diff --git a/contrib/libs/icu/common/umutex.cpp b/contrib/libs/icu/common/umutex.cpp
deleted file mode 100644
index ccbee9960a3..00000000000
--- a/contrib/libs/icu/common/umutex.cpp
+++ /dev/null
@@ -1,204 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1997-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* File umutex.cpp
-*
-* Modification History:
-*
-* Date Name Description
-* 04/02/97 aliu Creation.
-* 04/07/99 srl updated
-* 05/13/99 stephen Changed to umutex (from cmutex).
-* 11/22/99 aliu Make non-global mutex autoinitialize [j151]
-******************************************************************************
-*/
-
-#include "umutex.h"
-
-#include "unicode/utypes.h"
-#include "uassert.h"
-#include "ucln_cmn.h"
-#include "cmemory.h"
-
-U_NAMESPACE_BEGIN
-
-
-#if defined(U_USER_MUTEX_CPP)
-// Support for including an alternate implementation of mutexes has been withdrawn.
-// See issue ICU-20185.
-#error U_USER_MUTEX_CPP not supported
-#endif
-
-
-/*************************************************************************************************
- *
- * ICU Mutex wrappers.
- *
- *************************************************************************************************/
-
-namespace {
-std::mutex *initMutex;
-std::condition_variable *initCondition;
-
-// The ICU global mutex.
-// Used when ICU implementation code passes nullptr for the mutex pointer.
-UMutex globalMutex;
-
-std::once_flag initFlag;
-std::once_flag *pInitFlag = &initFlag;
-
-} // Anonymous namespace
-
-U_CDECL_BEGIN
-static UBool U_CALLCONV umtx_cleanup() {
- initMutex->~mutex();
- initCondition->~condition_variable();
- UMutex::cleanup();
-
- // Reset the once_flag, by destructing it and creating a fresh one in its place.
- // Do not use this trick anywhere else in ICU; use umtx_initOnce, not std::call_once().
- pInitFlag->~once_flag();
- pInitFlag = new(&initFlag) std::once_flag();
- return true;
-}
-
-static void U_CALLCONV umtx_init() {
- initMutex = STATIC_NEW(std::mutex);
- initCondition = STATIC_NEW(std::condition_variable);
- ucln_common_registerCleanup(UCLN_COMMON_MUTEX, umtx_cleanup);
-}
-U_CDECL_END
-
-
-std::mutex *UMutex::getMutex() {
- std::mutex *retPtr = fMutex.load(std::memory_order_acquire);
- if (retPtr == nullptr) {
- std::call_once(*pInitFlag, umtx_init);
- std::lock_guard<std::mutex> guard(*initMutex);
- retPtr = fMutex.load(std::memory_order_acquire);
- if (retPtr == nullptr) {
- fMutex = new(fStorage) std::mutex();
- retPtr = fMutex;
- fListLink = gListHead;
- gListHead = this;
- }
- }
- U_ASSERT(retPtr != nullptr);
- return retPtr;
-}
-
-UMutex *UMutex::gListHead = nullptr;
-
-void UMutex::cleanup() {
- UMutex *next = nullptr;
- for (UMutex *m = gListHead; m != nullptr; m = next) {
- (*m->fMutex).~mutex();
- m->fMutex = nullptr;
- next = m->fListLink;
- m->fListLink = nullptr;
- }
- gListHead = nullptr;
-}
-
-
-U_CAPI void U_EXPORT2
-umtx_lock(UMutex *mutex) {
- if (mutex == nullptr) {
- mutex = &globalMutex;
- }
- mutex->lock();
-}
-
-
-U_CAPI void U_EXPORT2
-umtx_unlock(UMutex* mutex)
-{
- if (mutex == nullptr) {
- mutex = &globalMutex;
- }
- mutex->unlock();
-}
-
-
-/*************************************************************************************************
- *
- * UInitOnce Implementation
- *
- *************************************************************************************************/
-
-// This function is called when a test of a UInitOnce::fState reveals that
-// initialization has not completed, that we either need to call the init
-// function on this thread, or wait for some other thread to complete.
-//
-// The actual call to the init function is made inline by template code
-// that knows the C++ types involved. This function returns true if
-// the caller needs to call the Init function.
-//
-U_COMMON_API UBool U_EXPORT2
-umtx_initImplPreInit(UInitOnce &uio) {
- std::call_once(*pInitFlag, umtx_init);
- std::unique_lock<std::mutex> lock(*initMutex);
- if (umtx_loadAcquire(uio.fState) == 0) {
- umtx_storeRelease(uio.fState, 1);
- return true; // Caller will next call the init function.
- } else {
- while (umtx_loadAcquire(uio.fState) == 1) {
- // Another thread is currently running the initialization.
- // Wait until it completes.
- initCondition->wait(lock);
- }
- U_ASSERT(uio.fState == 2);
- return false;
- }
-}
-
-
-// This function is called by the thread that ran an initialization function,
-// just after completing the function.
-// Some threads may be waiting on the condition, requiring the broadcast wakeup.
-// Some threads may be racing to test the fState variable outside of the mutex,
-// requiring the use of store/release when changing its value.
-
-U_COMMON_API void U_EXPORT2
-umtx_initImplPostInit(UInitOnce &uio) {
- {
- std::unique_lock<std::mutex> lock(*initMutex);
- umtx_storeRelease(uio.fState, 2);
- }
- initCondition->notify_all();
-}
-
-U_NAMESPACE_END
-
-/*************************************************************************************************
- *
- * Deprecated functions for setting user mutexes.
- *
- *************************************************************************************************/
-
-U_DEPRECATED void U_EXPORT2
-u_setMutexFunctions(const void * /*context */, UMtxInitFn *, UMtxFn *,
- UMtxFn *, UMtxFn *, UErrorCode *status) {
- if (U_SUCCESS(*status)) {
- *status = U_UNSUPPORTED_ERROR;
- }
- return;
-}
-
-
-
-U_DEPRECATED void U_EXPORT2
-u_setAtomicIncDecFunctions(const void * /*context */, UMtxAtomicFn *, UMtxAtomicFn *,
- UErrorCode *status) {
- if (U_SUCCESS(*status)) {
- *status = U_UNSUPPORTED_ERROR;
- }
- return;
-}
diff --git a/contrib/libs/icu/common/umutex.h b/contrib/libs/icu/common/umutex.h
deleted file mode 100644
index 2503aa4a291..00000000000
--- a/contrib/libs/icu/common/umutex.h
+++ /dev/null
@@ -1,277 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1997-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*
-* File UMUTEX.H
-*
-* Modification History:
-*
-* Date Name Description
-* 04/02/97 aliu Creation.
-* 04/07/99 srl rewrite - C interface, multiple mutices
-* 05/13/99 stephen Changed to umutex (from cmutex)
-******************************************************************************
-*/
-
-#ifndef UMUTEX_H
-#define UMUTEX_H
-
-#include <atomic>
-#include <condition_variable>
-#include <mutex>
-#include <type_traits>
-
-#include "unicode/utypes.h"
-#include "unicode/uclean.h"
-#include "unicode/uobject.h"
-
-#include "putilimp.h"
-
-#if defined(U_USER_ATOMICS_H) || defined(U_USER_MUTEX_H)
-// Support for including an alternate implementation of atomic & mutex operations has been withdrawn.
-// See issue ICU-20185.
-#error U_USER_ATOMICS and U_USER_MUTEX_H are not supported
-#endif
-
-// Export an explicit template instantiation of std::atomic<int32_t>.
-// When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class.
-// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
-//
-// Similar story for std::atomic<std::mutex *>, and the exported UMutex class.
-#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN)
-#if defined(__clang__) || defined(_MSC_VER)
- #if defined(__clang__)
- // Suppress the warning that the explicit instantiation after explicit specialization has no effect.
- #pragma clang diagnostic push
- #pragma clang diagnostic ignored "-Winstantiation-after-specialization"
- #endif
-template struct U_COMMON_API std::atomic<int32_t>;
-template struct U_COMMON_API std::atomic<std::mutex *>;
- #if defined(__clang__)
- #pragma clang diagnostic pop
- #endif
-#elif defined(__GNUC__)
-// For GCC this class is already exported/visible, so no need for U_COMMON_API.
-template struct std::atomic<int32_t>;
-template struct std::atomic<std::mutex *>;
-#endif
-#endif
-
-
-U_NAMESPACE_BEGIN
-
-/****************************************************************************
- *
- * Low Level Atomic Operations, ICU wrappers for.
- *
- ****************************************************************************/
-
-typedef std::atomic<int32_t> u_atomic_int32_t;
-#define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val)
-
-inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) {
- return var.load(std::memory_order_acquire);
-}
-
-inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) {
- var.store(val, std::memory_order_release);
-}
-
-inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) {
- return var->fetch_add(1) + 1;
-}
-
-inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) {
- return var->fetch_sub(1) - 1;
-}
-
-
-/*************************************************************************************************
- *
- * UInitOnce Definitions.
- *
- *************************************************************************************************/
-
-struct UInitOnce {
- u_atomic_int32_t fState;
- UErrorCode fErrCode;
- void reset() {fState = 0;}
- UBool isReset() {return umtx_loadAcquire(fState) == 0;}
-// Note: isReset() is used by service registration code.
-// Thread safety of this usage needs review.
-};
-
-#define U_INITONCE_INITIALIZER {ATOMIC_INT32_T_INITIALIZER(0), U_ZERO_ERROR}
-
-
-U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &);
-U_COMMON_API void U_EXPORT2 umtx_initImplPostInit(UInitOnce &);
-
-template<class T> void umtx_initOnce(UInitOnce &uio, T *obj, void (U_CALLCONV T::*fp)()) {
- if (umtx_loadAcquire(uio.fState) == 2) {
- return;
- }
- if (umtx_initImplPreInit(uio)) {
- (obj->*fp)();
- umtx_initImplPostInit(uio);
- }
-}
-
-
-// umtx_initOnce variant for plain functions, or static class functions.
-// No context parameter.
-inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)()) {
- if (umtx_loadAcquire(uio.fState) == 2) {
- return;
- }
- if (umtx_initImplPreInit(uio)) {
- (*fp)();
- umtx_initImplPostInit(uio);
- }
-}
-
-// umtx_initOnce variant for plain functions, or static class functions.
-// With ErrorCode, No context parameter.
-inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(UErrorCode &), UErrorCode &errCode) {
- if (U_FAILURE(errCode)) {
- return;
- }
- if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) {
- // We run the initialization.
- (*fp)(errCode);
- uio.fErrCode = errCode;
- umtx_initImplPostInit(uio);
- } else {
- // Someone else already ran the initialization.
- if (U_FAILURE(uio.fErrCode)) {
- errCode = uio.fErrCode;
- }
- }
-}
-
-// umtx_initOnce variant for plain functions, or static class functions,
-// with a context parameter.
-template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T), T context) {
- if (umtx_loadAcquire(uio.fState) == 2) {
- return;
- }
- if (umtx_initImplPreInit(uio)) {
- (*fp)(context);
- umtx_initImplPostInit(uio);
- }
-}
-
-// umtx_initOnce variant for plain functions, or static class functions,
-// with a context parameter and an error code.
-template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T, UErrorCode &), T context, UErrorCode &errCode) {
- if (U_FAILURE(errCode)) {
- return;
- }
- if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) {
- // We run the initialization.
- (*fp)(context, errCode);
- uio.fErrCode = errCode;
- umtx_initImplPostInit(uio);
- } else {
- // Someone else already ran the initialization.
- if (U_FAILURE(uio.fErrCode)) {
- errCode = uio.fErrCode;
- }
- }
-}
-
-// UMutex should be constexpr-constructible, so that no initialization code
-// is run during startup.
-// This works on all C++ libraries except MS VS before VS2019.
-#if (defined(_CPPLIB_VER) && !defined(_MSVC_STL_VERSION)) || \
- (defined(_MSVC_STL_VERSION) && _MSVC_STL_VERSION < 142)
- // (VS std lib older than VS2017) || (VS std lib version < VS2019)
-# define UMUTEX_CONSTEXPR
-#else
-# define UMUTEX_CONSTEXPR constexpr
-#endif
-
-/**
- * UMutex - ICU Mutex class.
- *
- * This is the preferred Mutex class for use within ICU implementation code.
- * It is a thin wrapper over C++ std::mutex, with these additions:
- * - Static instances are safe, not triggering static construction or destruction,
- * and the associated order of construction or destruction issues.
- * - Plumbed into u_cleanup() for destructing the underlying std::mutex,
- * which frees any OS level resources they may be holding.
- *
- * Limitations:
- * - Static or global instances only. Cannot be heap allocated. Cannot appear as a
- * member of another class.
- * - No condition variables or other advanced features. If needed, you will need to use
- * std::mutex and std::condition_variable directly. For an example, see unifiedcache.cpp
- *
- * Typical Usage:
- * static UMutex myMutex;
- *
- * {
- * Mutex lock(myMutex);
- * ... // Do stuff that is protected by myMutex;
- * } // myMutex is released when lock goes out of scope.
- */
-
-class U_COMMON_API UMutex {
-public:
- UMUTEX_CONSTEXPR UMutex() {}
- ~UMutex() = default;
-
- UMutex(const UMutex &other) = delete;
- UMutex &operator =(const UMutex &other) = delete;
- void *operator new(size_t) = delete;
-
- // requirements for C++ BasicLockable, allows UMutex to work with std::lock_guard
- void lock() {
- std::mutex *m = fMutex.load(std::memory_order_acquire);
- if (m == nullptr) { m = getMutex(); }
- m->lock();
- }
- void unlock() { fMutex.load(std::memory_order_relaxed)->unlock(); }
-
- static void cleanup();
-
-private:
- alignas(std::mutex) char fStorage[sizeof(std::mutex)] {};
- std::atomic<std::mutex *> fMutex { nullptr };
-
- /** All initialized UMutexes are kept in a linked list, so that they can be found,
- * and the underlying std::mutex destructed, by u_cleanup().
- */
- UMutex *fListLink { nullptr };
- static UMutex *gListHead;
-
- /** Out-of-line function to lazily initialize a UMutex on first use.
- * Initial fast check is inline, in lock(). The returned value may never
- * be nullptr.
- */
- std::mutex *getMutex();
-};
-
-
-/* Lock a mutex.
- * @param mutex The given mutex to be locked. Pass NULL to specify
- * the global ICU mutex. Recursive locks are an error
- * and may cause a deadlock on some platforms.
- */
-U_INTERNAL void U_EXPORT2 umtx_lock(UMutex* mutex);
-
-/* Unlock a mutex.
- * @param mutex The given mutex to be unlocked. Pass NULL to specify
- * the global ICU mutex.
- */
-U_INTERNAL void U_EXPORT2 umtx_unlock (UMutex* mutex);
-
-
-U_NAMESPACE_END
-
-#endif /* UMUTEX_H */
-/*eof*/
diff --git a/contrib/libs/icu/common/unames.cpp b/contrib/libs/icu/common/unames.cpp
deleted file mode 100644
index 5776058f957..00000000000
--- a/contrib/libs/icu/common/unames.cpp
+++ /dev/null
@@ -1,2108 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: unames.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999oct04
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/putil.h"
-#include "unicode/uchar.h"
-#include "unicode/udata.h"
-#include "unicode/utf.h"
-#include "unicode/utf16.h"
-#include "uassert.h"
-#include "ustr_imp.h"
-#include "umutex.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "ucln_cmn.h"
-#include "udataswp.h"
-#include "uprops.h"
-
-U_NAMESPACE_BEGIN
-
-/* prototypes ------------------------------------------------------------- */
-
-static const char DATA_NAME[] = "unames";
-static const char DATA_TYPE[] = "icu";
-
-#define GROUP_SHIFT 5
-#define LINES_PER_GROUP (1L<<GROUP_SHIFT)
-#define GROUP_MASK (LINES_PER_GROUP-1)
-
-/*
- * This struct was replaced by explicitly accessing equivalent
- * fields from triples of uint16_t.
- * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
- * which broke the assumption that sizeof(Group)==6 and that the ++ operator
- * would advance by 6 bytes (3 uint16_t).
- *
- * We can't just change the data structure because it's loaded from a data file,
- * and we don't want to make it less compact, so we changed the access code.
- *
- * For details see ICU tickets 6331 and 6008.
-typedef struct {
- uint16_t groupMSB,
- offsetHigh, offsetLow; / * avoid padding * /
-} Group;
- */
-enum {
- GROUP_MSB,
- GROUP_OFFSET_HIGH,
- GROUP_OFFSET_LOW,
- GROUP_LENGTH
-};
-
-/*
- * Get the 32-bit group offset.
- * @param group (const uint16_t *) pointer to a Group triple of uint16_t
- * @return group offset (int32_t)
- */
-#define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
-
-#define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
-#define PREV_GROUP(group) ((group)-GROUP_LENGTH)
-
-typedef struct {
- uint32_t start, end;
- uint8_t type, variant;
- uint16_t size;
-} AlgorithmicRange;
-
-typedef struct {
- uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
-} UCharNames;
-
-/*
- * Get the groups table from a UCharNames struct.
- * The groups table consists of one uint16_t groupCount followed by
- * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
- * and the comment for the old struct Group above.
- *
- * @param names (const UCharNames *) pointer to the UCharNames indexes
- * @return (const uint16_t *) pointer to the groups table
- */
-#define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
-
-typedef struct {
- const char *otherName;
- UChar32 code;
-} FindName;
-
-#define DO_FIND_NAME NULL
-
-static UDataMemory *uCharNamesData=NULL;
-static UCharNames *uCharNames=NULL;
-static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;
-
-/*
- * Maximum length of character names (regular & 1.0).
- */
-static int32_t gMaxNameLength=0;
-
-/*
- * Set of chars used in character names (regular & 1.0).
- * Chars are platform-dependent (can be EBCDIC).
- */
-static uint32_t gNameSet[8]={ 0 };
-
-#define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
-#define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
-#define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
-
-#define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
-
-static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
- "unassigned",
- "uppercase letter",
- "lowercase letter",
- "titlecase letter",
- "modifier letter",
- "other letter",
- "non spacing mark",
- "enclosing mark",
- "combining spacing mark",
- "decimal digit number",
- "letter number",
- "other number",
- "space separator",
- "line separator",
- "paragraph separator",
- "control",
- "format",
- "private use area",
- "surrogate",
- "dash punctuation",
- "start punctuation",
- "end punctuation",
- "connector punctuation",
- "other punctuation",
- "math symbol",
- "currency symbol",
- "modifier symbol",
- "other symbol",
- "initial punctuation",
- "final punctuation",
- "noncharacter",
- "lead surrogate",
- "trail surrogate"
-};
-
-/* implementation ----------------------------------------------------------- */
-
-static UBool U_CALLCONV unames_cleanup(void)
-{
- if(uCharNamesData) {
- udata_close(uCharNamesData);
- uCharNamesData = NULL;
- }
- if(uCharNames) {
- uCharNames = NULL;
- }
- gCharNamesInitOnce.reset();
- gMaxNameLength=0;
- return TRUE;
-}
-
-static UBool U_CALLCONV
-isAcceptable(void * /*context*/,
- const char * /*type*/, const char * /*name*/,
- const UDataInfo *pInfo) {
- return (UBool)(
- pInfo->size>=20 &&
- pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
- pInfo->charsetFamily==U_CHARSET_FAMILY &&
- pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
- pInfo->dataFormat[1]==0x6e &&
- pInfo->dataFormat[2]==0x61 &&
- pInfo->dataFormat[3]==0x6d &&
- pInfo->formatVersion[0]==1);
-}
-
-static void U_CALLCONV
-loadCharNames(UErrorCode &status) {
- U_ASSERT(uCharNamesData == NULL);
- U_ASSERT(uCharNames == NULL);
-
- uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);
- if(U_FAILURE(status)) {
- uCharNamesData = NULL;
- } else {
- uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);
- }
- ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
-}
-
-
-static UBool
-isDataLoaded(UErrorCode *pErrorCode) {
- umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);
- return U_SUCCESS(*pErrorCode);
-}
-
-#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) UPRV_BLOCK_MACRO_BEGIN { \
- if((bufferLength)>0) { \
- *(buffer)++=c; \
- --(bufferLength); \
- } \
- ++(bufferPos); \
-} UPRV_BLOCK_MACRO_END
-
-#define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
-
-/*
- * Important: expandName() and compareName() are almost the same -
- * apply fixes to both.
- *
- * UnicodeData.txt uses ';' as a field separator, so no
- * field can contain ';' as part of its contents.
- * In unames.dat, it is marked as token[';']==-1 only if the
- * semicolon is used in the data file - which is iff we
- * have Unicode 1.0 names or ISO comments or aliases.
- * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
- * although we know that it will never be part of a name.
- */
-static uint16_t
-expandName(UCharNames *names,
- const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
- char *buffer, uint16_t bufferLength) {
- uint16_t *tokens=(uint16_t *)names+8;
- uint16_t token, tokenCount=*tokens++, bufferPos=0;
- uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
- uint8_t c;
-
- if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
- /*
- * skip the modern name if it is not requested _and_
- * if the semicolon byte value is a character, not a token number
- */
- if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
- int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
- do {
- while(nameLength>0) {
- --nameLength;
- if(*name++==';') {
- break;
- }
- }
- } while(--fieldIndex>0);
- } else {
- /*
- * the semicolon byte value is a token number, therefore
- * only modern names are stored in unames.dat and there is no
- * such requested alternate name here
- */
- nameLength=0;
- }
- }
-
- /* write each letter directly, and write a token word per token */
- while(nameLength>0) {
- --nameLength;
- c=*name++;
-
- if(c>=tokenCount) {
- if(c!=';') {
- /* implicit letter */
- WRITE_CHAR(buffer, bufferLength, bufferPos, c);
- } else {
- /* finished */
- break;
- }
- } else {
- token=tokens[c];
- if(token==(uint16_t)(-2)) {
- /* this is a lead byte for a double-byte token */
- token=tokens[c<<8|*name++];
- --nameLength;
- }
- if(token==(uint16_t)(-1)) {
- if(c!=';') {
- /* explicit letter */
- WRITE_CHAR(buffer, bufferLength, bufferPos, c);
- } else {
- /* stop, but skip the semicolon if we are seeking
- extended names and there was no 2.0 name but there
- is a 1.0 name. */
- if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
- if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
- continue;
- }
- }
- /* finished */
- break;
- }
- } else {
- /* write token word */
- uint8_t *tokenString=tokenStrings+token;
- while((c=*tokenString++)!=0) {
- WRITE_CHAR(buffer, bufferLength, bufferPos, c);
- }
- }
- }
- }
-
- /* zero-terminate */
- if(bufferLength>0) {
- *buffer=0;
- }
-
- return bufferPos;
-}
-
-/*
- * compareName() is almost the same as expandName() except that it compares
- * the currently expanded name to an input name.
- * It returns the match/no match result as soon as possible.
- */
-static UBool
-compareName(UCharNames *names,
- const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
- const char *otherName) {
- uint16_t *tokens=(uint16_t *)names+8;
- uint16_t token, tokenCount=*tokens++;
- uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
- uint8_t c;
- const char *origOtherName = otherName;
-
- if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
- /*
- * skip the modern name if it is not requested _and_
- * if the semicolon byte value is a character, not a token number
- */
- if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
- int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
- do {
- while(nameLength>0) {
- --nameLength;
- if(*name++==';') {
- break;
- }
- }
- } while(--fieldIndex>0);
- } else {
- /*
- * the semicolon byte value is a token number, therefore
- * only modern names are stored in unames.dat and there is no
- * such requested alternate name here
- */
- nameLength=0;
- }
- }
-
- /* compare each letter directly, and compare a token word per token */
- while(nameLength>0) {
- --nameLength;
- c=*name++;
-
- if(c>=tokenCount) {
- if(c!=';') {
- /* implicit letter */
- if((char)c!=*otherName++) {
- return FALSE;
- }
- } else {
- /* finished */
- break;
- }
- } else {
- token=tokens[c];
- if(token==(uint16_t)(-2)) {
- /* this is a lead byte for a double-byte token */
- token=tokens[c<<8|*name++];
- --nameLength;
- }
- if(token==(uint16_t)(-1)) {
- if(c!=';') {
- /* explicit letter */
- if((char)c!=*otherName++) {
- return FALSE;
- }
- } else {
- /* stop, but skip the semicolon if we are seeking
- extended names and there was no 2.0 name but there
- is a 1.0 name. */
- if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
- if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
- continue;
- }
- }
- /* finished */
- break;
- }
- } else {
- /* write token word */
- uint8_t *tokenString=tokenStrings+token;
- while((c=*tokenString++)!=0) {
- if((char)c!=*otherName++) {
- return FALSE;
- }
- }
- }
- }
- }
-
- /* complete match? */
- return (UBool)(*otherName==0);
-}
-
-static uint8_t getCharCat(UChar32 cp) {
- uint8_t cat;
-
- if (U_IS_UNICODE_NONCHAR(cp)) {
- return U_NONCHARACTER_CODE_POINT;
- }
-
- if ((cat = u_charType(cp)) == U_SURROGATE) {
- cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
- }
-
- return cat;
-}
-
-static const char *getCharCatName(UChar32 cp) {
- uint8_t cat = getCharCat(cp);
-
- /* Return unknown if the table of names above is not up to
- date. */
-
- if (cat >= UPRV_LENGTHOF(charCatNames)) {
- return "unknown";
- } else {
- return charCatNames[cat];
- }
-}
-
-static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
- const char *catname = getCharCatName(code);
- uint16_t length = 0;
-
- UChar32 cp;
- int ndigits, i;
-
- WRITE_CHAR(buffer, bufferLength, length, '<');
- while (catname[length - 1]) {
- WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
- }
- WRITE_CHAR(buffer, bufferLength, length, '-');
- for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
- ;
- if (ndigits < 4)
- ndigits = 4;
- for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
- uint8_t v = (uint8_t)(cp & 0xf);
- buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
- }
- buffer += ndigits;
- length += static_cast<uint16_t>(ndigits);
- WRITE_CHAR(buffer, bufferLength, length, '>');
-
- return length;
-}
-
-/*
- * getGroup() does a binary search for the group that contains the
- * Unicode code point "code".
- * The return value is always a valid Group* that may contain "code"
- * or else is the highest group before "code".
- * If the lowest group is after "code", then that one is returned.
- */
-static const uint16_t *
-getGroup(UCharNames *names, uint32_t code) {
- const uint16_t *groups=GET_GROUPS(names);
- uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
- start=0,
- limit=*groups++,
- number;
-
- /* binary search for the group of names that contains the one for code */
- while(start<limit-1) {
- number=(uint16_t)((start+limit)/2);
- if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
- limit=number;
- } else {
- start=number;
- }
- }
-
- /* return this regardless of whether it is an exact match */
- return groups+start*GROUP_LENGTH;
-}
-
-/*
- * expandGroupLengths() reads a block of compressed lengths of 32 strings and
- * expands them into offsets and lengths for each string.
- * Lengths are stored with a variable-width encoding in consecutive nibbles:
- * If a nibble<0xc, then it is the length itself (0=empty string).
- * If a nibble>=0xc, then it forms a length value with the following nibble.
- * Calculation see below.
- * The offsets and lengths arrays must be at least 33 (one more) long because
- * there is no check here at the end if the last nibble is still used.
- */
-static const uint8_t *
-expandGroupLengths(const uint8_t *s,
- uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
- /* read the lengths of the 32 strings in this group and get each string's offset */
- uint16_t i=0, offset=0, length=0;
- uint8_t lengthByte;
-
- /* all 32 lengths must be read to get the offset of the first group string */
- while(i<LINES_PER_GROUP) {
- lengthByte=*s++;
-
- /* read even nibble - MSBs of lengthByte */
- if(length>=12) {
- /* double-nibble length spread across two bytes */
- length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
- lengthByte&=0xf;
- } else if((lengthByte /* &0xf0 */)>=0xc0) {
- /* double-nibble length spread across this one byte */
- length=(uint16_t)((lengthByte&0x3f)+12);
- } else {
- /* single-nibble length in MSBs */
- length=(uint16_t)(lengthByte>>4);
- lengthByte&=0xf;
- }
-
- *offsets++=offset;
- *lengths++=length;
-
- offset+=length;
- ++i;
-
- /* read odd nibble - LSBs of lengthByte */
- if((lengthByte&0xf0)==0) {
- /* this nibble was not consumed for a double-nibble length above */
- length=lengthByte;
- if(length<12) {
- /* single-nibble length in LSBs */
- *offsets++=offset;
- *lengths++=length;
-
- offset+=length;
- ++i;
- }
- } else {
- length=0; /* prevent double-nibble detection in the next iteration */
- }
- }
-
- /* now, s is at the first group string */
- return s;
-}
-
-static uint16_t
-expandGroupName(UCharNames *names, const uint16_t *group,
- uint16_t lineNumber, UCharNameChoice nameChoice,
- char *buffer, uint16_t bufferLength) {
- uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
- const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
- s=expandGroupLengths(s, offsets, lengths);
- return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
- buffer, bufferLength);
-}
-
-static uint16_t
-getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
- char *buffer, uint16_t bufferLength) {
- const uint16_t *group=getGroup(names, code);
- if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
- return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
- buffer, bufferLength);
- } else {
- /* group not found */
- /* zero-terminate */
- if(bufferLength>0) {
- *buffer=0;
- }
- return 0;
- }
-}
-
-/*
- * enumGroupNames() enumerates all the names in a 32-group
- * and either calls the enumerator function or finds a given input name.
- */
-static UBool
-enumGroupNames(UCharNames *names, const uint16_t *group,
- UChar32 start, UChar32 end,
- UEnumCharNamesFn *fn, void *context,
- UCharNameChoice nameChoice) {
- uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
- const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
-
- s=expandGroupLengths(s, offsets, lengths);
- if(fn!=DO_FIND_NAME) {
- char buffer[200];
- uint16_t length;
-
- while(start<=end) {
- length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
- if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
- buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
- }
- /* here, we assume that the buffer is large enough */
- if(length>0) {
- if(!fn(context, start, nameChoice, buffer, length)) {
- return FALSE;
- }
- }
- ++start;
- }
- } else {
- const char *otherName=((FindName *)context)->otherName;
- while(start<=end) {
- if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
- ((FindName *)context)->code=start;
- return FALSE;
- }
- ++start;
- }
- }
- return TRUE;
-}
-
-/*
- * enumExtNames enumerate extended names.
- * It only needs to do it if it is called with a real function and not
- * with the dummy DO_FIND_NAME, because u_charFromName() does a check
- * for extended names by itself.
- */
-static UBool
-enumExtNames(UChar32 start, UChar32 end,
- UEnumCharNamesFn *fn, void *context)
-{
- if(fn!=DO_FIND_NAME) {
- char buffer[200];
- uint16_t length;
-
- while(start<=end) {
- buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
- /* here, we assume that the buffer is large enough */
- if(length>0) {
- if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
- return FALSE;
- }
- }
- ++start;
- }
- }
-
- return TRUE;
-}
-
-static UBool
-enumNames(UCharNames *names,
- UChar32 start, UChar32 limit,
- UEnumCharNamesFn *fn, void *context,
- UCharNameChoice nameChoice) {
- uint16_t startGroupMSB, endGroupMSB, groupCount;
- const uint16_t *group, *groupLimit;
-
- startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
- endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
-
- /* find the group that contains start, or the highest before it */
- group=getGroup(names, start);
-
- if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
- /* enumerate synthetic names between start and the group start */
- UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
- if(extLimit>limit) {
- extLimit=limit;
- }
- if(!enumExtNames(start, extLimit-1, fn, context)) {
- return FALSE;
- }
- start=extLimit;
- }
-
- if(startGroupMSB==endGroupMSB) {
- if(startGroupMSB==group[GROUP_MSB]) {
- /* if start and limit-1 are in the same group, then enumerate only in that one */
- return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
- }
- } else {
- const uint16_t *groups=GET_GROUPS(names);
- groupCount=*groups++;
- groupLimit=groups+groupCount*GROUP_LENGTH;
-
- if(startGroupMSB==group[GROUP_MSB]) {
- /* enumerate characters in the partial start group */
- if((start&GROUP_MASK)!=0) {
- if(!enumGroupNames(names, group,
- start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
- fn, context, nameChoice)) {
- return FALSE;
- }
- group=NEXT_GROUP(group); /* continue with the next group */
- }
- } else if(startGroupMSB>group[GROUP_MSB]) {
- /* make sure that we start enumerating with the first group after start */
- const uint16_t *nextGroup=NEXT_GROUP(group);
- if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
- UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
- if (end > limit) {
- end = limit;
- }
- if (!enumExtNames(start, end - 1, fn, context)) {
- return FALSE;
- }
- }
- group=nextGroup;
- }
-
- /* enumerate entire groups between the start- and end-groups */
- while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
- const uint16_t *nextGroup;
- start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
- if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
- return FALSE;
- }
- nextGroup=NEXT_GROUP(group);
- if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
- UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
- if (end > limit) {
- end = limit;
- }
- if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
- return FALSE;
- }
- }
- group=nextGroup;
- }
-
- /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
- if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
- return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
- } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
- UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
- if (next > start) {
- start = next;
- }
- } else {
- return TRUE;
- }
- }
-
- /* we have not found a group, which means everything is made of
- extended names. */
- if (nameChoice == U_EXTENDED_CHAR_NAME) {
- if (limit > UCHAR_MAX_VALUE + 1) {
- limit = UCHAR_MAX_VALUE + 1;
- }
- return enumExtNames(start, limit - 1, fn, context);
- }
-
- return TRUE;
-}
-
-static uint16_t
-writeFactorSuffix(const uint16_t *factors, uint16_t count,
- const char *s, /* suffix elements */
- uint32_t code,
- uint16_t indexes[8], /* output fields from here */
- const char *elementBases[8], const char *elements[8],
- char *buffer, uint16_t bufferLength) {
- uint16_t i, factor, bufferPos=0;
- char c;
-
- /* write elements according to the factors */
-
- /*
- * the factorized elements are determined by modulo arithmetic
- * with the factors of this algorithm
- *
- * note that for fewer operations, count is decremented here
- */
- --count;
- for(i=count; i>0; --i) {
- factor=factors[i];
- indexes[i]=(uint16_t)(code%factor);
- code/=factor;
- }
- /*
- * we don't need to calculate the last modulus because start<=code<=end
- * guarantees here that code<=factors[0]
- */
- indexes[0]=(uint16_t)code;
-
- /* write each element */
- for(;;) {
- if(elementBases!=NULL) {
- *elementBases++=s;
- }
-
- /* skip indexes[i] strings */
- factor=indexes[i];
- while(factor>0) {
- while(*s++!=0) {}
- --factor;
- }
- if(elements!=NULL) {
- *elements++=s;
- }
-
- /* write element */
- while((c=*s++)!=0) {
- WRITE_CHAR(buffer, bufferLength, bufferPos, c);
- }
-
- /* we do not need to perform the rest of this loop for i==count - break here */
- if(i>=count) {
- break;
- }
-
- /* skip the rest of the strings for this factors[i] */
- factor=(uint16_t)(factors[i]-indexes[i]-1);
- while(factor>0) {
- while(*s++!=0) {}
- --factor;
- }
-
- ++i;
- }
-
- /* zero-terminate */
- if(bufferLength>0) {
- *buffer=0;
- }
-
- return bufferPos;
-}
-
-/*
- * Important:
- * Parts of findAlgName() are almost the same as some of getAlgName().
- * Fixes must be applied to both.
- */
-static uint16_t
-getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
- char *buffer, uint16_t bufferLength) {
- uint16_t bufferPos=0;
-
- /* Only the normative character name can be algorithmic. */
- if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
- /* zero-terminate */
- if(bufferLength>0) {
- *buffer=0;
- }
- return 0;
- }
-
- switch(range->type) {
- case 0: {
- /* name = prefix hex-digits */
- const char *s=(const char *)(range+1);
- char c;
-
- uint16_t i, count;
-
- /* copy prefix */
- while((c=*s++)!=0) {
- WRITE_CHAR(buffer, bufferLength, bufferPos, c);
- }
-
- /* write hexadecimal code point value */
- count=range->variant;
-
- /* zero-terminate */
- if(count<bufferLength) {
- buffer[count]=0;
- }
-
- for(i=count; i>0;) {
- if(--i<bufferLength) {
- c=(char)(code&0xf);
- if(c<10) {
- c+='0';
- } else {
- c+='A'-10;
- }
- buffer[i]=c;
- }
- code>>=4;
- }
-
- bufferPos+=count;
- break;
- }
- case 1: {
- /* name = prefix factorized-elements */
- uint16_t indexes[8];
- const uint16_t *factors=(const uint16_t *)(range+1);
- uint16_t count=range->variant;
- const char *s=(const char *)(factors+count);
- char c;
-
- /* copy prefix */
- while((c=*s++)!=0) {
- WRITE_CHAR(buffer, bufferLength, bufferPos, c);
- }
-
- bufferPos+=writeFactorSuffix(factors, count,
- s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
- break;
- }
- default:
- /* undefined type */
- /* zero-terminate */
- if(bufferLength>0) {
- *buffer=0;
- }
- break;
- }
-
- return bufferPos;
-}
-
-/*
- * Important: enumAlgNames() and findAlgName() are almost the same.
- * Any fix must be applied to both.
- */
-static UBool
-enumAlgNames(AlgorithmicRange *range,
- UChar32 start, UChar32 limit,
- UEnumCharNamesFn *fn, void *context,
- UCharNameChoice nameChoice) {
- char buffer[200];
- uint16_t length;
-
- if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
- return TRUE;
- }
-
- switch(range->type) {
- case 0: {
- char *s, *end;
- char c;
-
- /* get the full name of the start character */
- length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
- if(length<=0) {
- return TRUE;
- }
-
- /* call the enumerator function with this first character */
- if(!fn(context, start, nameChoice, buffer, length)) {
- return FALSE;
- }
-
- /* go to the end of the name; all these names have the same length */
- end=buffer;
- while(*end!=0) {
- ++end;
- }
-
- /* enumerate the rest of the names */
- while(++start<limit) {
- /* increment the hexadecimal number on a character-basis */
- s=end;
- for (;;) {
- c=*--s;
- if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
- *s=(char)(c+1);
- break;
- } else if(c=='9') {
- *s='A';
- break;
- } else if(c=='F') {
- *s='0';
- }
- }
-
- if(!fn(context, start, nameChoice, buffer, length)) {
- return FALSE;
- }
- }
- break;
- }
- case 1: {
- uint16_t indexes[8];
- const char *elementBases[8], *elements[8];
- const uint16_t *factors=(const uint16_t *)(range+1);
- uint16_t count=range->variant;
- const char *s=(const char *)(factors+count);
- char *suffix, *t;
- uint16_t prefixLength, i, idx;
-
- char c;
-
- /* name = prefix factorized-elements */
-
- /* copy prefix */
- suffix=buffer;
- prefixLength=0;
- while((c=*s++)!=0) {
- *suffix++=c;
- ++prefixLength;
- }
-
- /* append the suffix of the start character */
- length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
- s, (uint32_t)start-range->start,
- indexes, elementBases, elements,
- suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
-
- /* call the enumerator function with this first character */
- if(!fn(context, start, nameChoice, buffer, length)) {
- return FALSE;
- }
-
- /* enumerate the rest of the names */
- while(++start<limit) {
- /* increment the indexes in lexical order bound by the factors */
- i=count;
- for (;;) {
- idx=(uint16_t)(indexes[--i]+1);
- if(idx<factors[i]) {
- /* skip one index and its element string */
- indexes[i]=idx;
- s=elements[i];
- while(*s++!=0) {
- }
- elements[i]=s;
- break;
- } else {
- /* reset this index to 0 and its element string to the first one */
- indexes[i]=0;
- elements[i]=elementBases[i];
- }
- }
-
- /* to make matters a little easier, just append all elements to the suffix */
- t=suffix;
- length=prefixLength;
- for(i=0; i<count; ++i) {
- s=elements[i];
- while((c=*s++)!=0) {
- *t++=c;
- ++length;
- }
- }
- /* zero-terminate */
- *t=0;
-
- if(!fn(context, start, nameChoice, buffer, length)) {
- return FALSE;
- }
- }
- break;
- }
- default:
- /* undefined type */
- break;
- }
-
- return TRUE;
-}
-
-/*
- * findAlgName() is almost the same as enumAlgNames() except that it
- * returns the code point for a name if it fits into the range.
- * It returns 0xffff otherwise.
- */
-static UChar32
-findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
- UChar32 code;
-
- if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
- return 0xffff;
- }
-
- switch(range->type) {
- case 0: {
- /* name = prefix hex-digits */
- const char *s=(const char *)(range+1);
- char c;
-
- uint16_t i, count;
-
- /* compare prefix */
- while((c=*s++)!=0) {
- if((char)c!=*otherName++) {
- return 0xffff;
- }
- }
-
- /* read hexadecimal code point value */
- count=range->variant;
- code=0;
- for(i=0; i<count; ++i) {
- c=*otherName++;
- if('0'<=c && c<='9') {
- code=(code<<4)|(c-'0');
- } else if('A'<=c && c<='F') {
- code=(code<<4)|(c-'A'+10);
- } else {
- return 0xffff;
- }
- }
-
- /* does it fit into the range? */
- if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
- return code;
- }
- break;
- }
- case 1: {
- char buffer[64];
- uint16_t indexes[8];
- const char *elementBases[8], *elements[8];
- const uint16_t *factors=(const uint16_t *)(range+1);
- uint16_t count=range->variant;
- const char *s=(const char *)(factors+count), *t;
- UChar32 start, limit;
- uint16_t i, idx;
-
- char c;
-
- /* name = prefix factorized-elements */
-
- /* compare prefix */
- while((c=*s++)!=0) {
- if((char)c!=*otherName++) {
- return 0xffff;
- }
- }
-
- start=(UChar32)range->start;
- limit=(UChar32)(range->end+1);
-
- /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
- writeFactorSuffix(factors, count, s, 0,
- indexes, elementBases, elements, buffer, sizeof(buffer));
-
- /* compare the first suffix */
- if(0==uprv_strcmp(otherName, buffer)) {
- return start;
- }
-
- /* enumerate and compare the rest of the suffixes */
- while(++start<limit) {
- /* increment the indexes in lexical order bound by the factors */
- i=count;
- for (;;) {
- idx=(uint16_t)(indexes[--i]+1);
- if(idx<factors[i]) {
- /* skip one index and its element string */
- indexes[i]=idx;
- s=elements[i];
- while(*s++!=0) {}
- elements[i]=s;
- break;
- } else {
- /* reset this index to 0 and its element string to the first one */
- indexes[i]=0;
- elements[i]=elementBases[i];
- }
- }
-
- /* to make matters a little easier, just compare all elements of the suffix */
- t=otherName;
- for(i=0; i<count; ++i) {
- s=elements[i];
- while((c=*s++)!=0) {
- if(c!=*t++) {
- s=""; /* does not match */
- i=99;
- }
- }
- }
- if(i<99 && *t==0) {
- return start;
- }
- }
- break;
- }
- default:
- /* undefined type */
- break;
- }
-
- return 0xffff;
-}
-
-/* sets of name characters, maximum name lengths ---------------------------- */
-
-#define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
-#define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
-
-static int32_t
-calcStringSetLength(uint32_t set[8], const char *s) {
- int32_t length=0;
- char c;
-
- while((c=*s++)!=0) {
- SET_ADD(set, c);
- ++length;
- }
- return length;
-}
-
-static int32_t
-calcAlgNameSetsLengths(int32_t maxNameLength) {
- AlgorithmicRange *range;
- uint32_t *p;
- uint32_t rangeCount;
- int32_t length;
-
- /* enumerate algorithmic ranges */
- p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
- rangeCount=*p;
- range=(AlgorithmicRange *)(p+1);
- while(rangeCount>0) {
- switch(range->type) {
- case 0:
- /* name = prefix + (range->variant times) hex-digits */
- /* prefix */
- length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
- if(length>maxNameLength) {
- maxNameLength=length;
- }
- break;
- case 1: {
- /* name = prefix factorized-elements */
- const uint16_t *factors=(const uint16_t *)(range+1);
- const char *s;
- int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
-
- /* prefix length */
- s=(const char *)(factors+count);
- length=calcStringSetLength(gNameSet, s);
- s+=length+1; /* start of factor suffixes */
-
- /* get the set and maximum factor suffix length for each factor */
- for(i=0; i<count; ++i) {
- maxFactorLength=0;
- for(factor=factors[i]; factor>0; --factor) {
- factorLength=calcStringSetLength(gNameSet, s);
- s+=factorLength+1;
- if(factorLength>maxFactorLength) {
- maxFactorLength=factorLength;
- }
- }
- length+=maxFactorLength;
- }
-
- if(length>maxNameLength) {
- maxNameLength=length;
- }
- break;
- }
- default:
- /* unknown type */
- break;
- }
-
- range=(AlgorithmicRange *)((uint8_t *)range+range->size);
- --rangeCount;
- }
- return maxNameLength;
-}
-
-static int32_t
-calcExtNameSetsLengths(int32_t maxNameLength) {
- int32_t i, length;
-
- for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) {
- /*
- * for each category, count the length of the category name
- * plus 9=
- * 2 for <>
- * 1 for -
- * 6 for most hex digits per code point
- */
- length=9+calcStringSetLength(gNameSet, charCatNames[i]);
- if(length>maxNameLength) {
- maxNameLength=length;
- }
- }
- return maxNameLength;
-}
-
-static int32_t
-calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
- uint32_t set[8],
- const uint8_t **pLine, const uint8_t *lineLimit) {
- const uint8_t *line=*pLine;
- int32_t length=0, tokenLength;
- uint16_t c, token;
-
- while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
- if(c>=tokenCount) {
- /* implicit letter */
- SET_ADD(set, c);
- ++length;
- } else {
- token=tokens[c];
- if(token==(uint16_t)(-2)) {
- /* this is a lead byte for a double-byte token */
- c=c<<8|*line++;
- token=tokens[c];
- }
- if(token==(uint16_t)(-1)) {
- /* explicit letter */
- SET_ADD(set, c);
- ++length;
- } else {
- /* count token word */
- if(tokenLengths!=NULL) {
- /* use cached token length */
- tokenLength=tokenLengths[c];
- if(tokenLength==0) {
- tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
- tokenLengths[c]=(int8_t)tokenLength;
- }
- } else {
- tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
- }
- length+=tokenLength;
- }
- }
- }
-
- *pLine=line;
- return length;
-}
-
-static void
-calcGroupNameSetsLengths(int32_t maxNameLength) {
- uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
-
- uint16_t *tokens=(uint16_t *)uCharNames+8;
- uint16_t tokenCount=*tokens++;
- uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
-
- int8_t *tokenLengths;
-
- const uint16_t *group;
- const uint8_t *s, *line, *lineLimit;
-
- int32_t groupCount, lineNumber, length;
-
- tokenLengths=(int8_t *)uprv_malloc(tokenCount);
- if(tokenLengths!=NULL) {
- uprv_memset(tokenLengths, 0, tokenCount);
- }
-
- group=GET_GROUPS(uCharNames);
- groupCount=*group++;
-
- /* enumerate all groups */
- while(groupCount>0) {
- s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
- s=expandGroupLengths(s, offsets, lengths);
-
- /* enumerate all lines in each group */
- for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
- line=s+offsets[lineNumber];
- length=lengths[lineNumber];
- if(length==0) {
- continue;
- }
-
- lineLimit=line+length;
-
- /* read regular name */
- length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
- if(length>maxNameLength) {
- maxNameLength=length;
- }
- if(line==lineLimit) {
- continue;
- }
-
- /* read Unicode 1.0 name */
- length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
- if(length>maxNameLength) {
- maxNameLength=length;
- }
- if(line==lineLimit) {
- continue;
- }
-
- /* read ISO comment */
- /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
- }
-
- group=NEXT_GROUP(group);
- --groupCount;
- }
-
- if(tokenLengths!=NULL) {
- uprv_free(tokenLengths);
- }
-
- /* set gMax... - name length last for threading */
- gMaxNameLength=maxNameLength;
-}
-
-static UBool
-calcNameSetsLengths(UErrorCode *pErrorCode) {
- static const char extChars[]="0123456789ABCDEF<>-";
- int32_t i, maxNameLength;
-
- if(gMaxNameLength!=0) {
- return TRUE;
- }
-
- if(!isDataLoaded(pErrorCode)) {
- return FALSE;
- }
-
- /* set hex digits, used in various names, and <>-, used in extended names */
- for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {
- SET_ADD(gNameSet, extChars[i]);
- }
-
- /* set sets and lengths from algorithmic names */
- maxNameLength=calcAlgNameSetsLengths(0);
-
- /* set sets and lengths from extended names */
- maxNameLength=calcExtNameSetsLengths(maxNameLength);
-
- /* set sets and lengths from group names, set global maximum values */
- calcGroupNameSetsLengths(maxNameLength);
-
- return TRUE;
-}
-
-U_NAMESPACE_END
-
-/* public API --------------------------------------------------------------- */
-
-U_NAMESPACE_USE
-
-U_CAPI int32_t U_EXPORT2
-u_charName(UChar32 code, UCharNameChoice nameChoice,
- char *buffer, int32_t bufferLength,
- UErrorCode *pErrorCode) {
- AlgorithmicRange *algRange;
- uint32_t *p;
- uint32_t i;
- int32_t length;
-
- /* check the argument values */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
- bufferLength<0 || (bufferLength>0 && buffer==NULL)
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
- return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
- }
-
- length=0;
-
- /* try algorithmic names first */
- p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
- i=*p;
- algRange=(AlgorithmicRange *)(p+1);
- while(i>0) {
- if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
- length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
- break;
- }
- algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
- --i;
- }
-
- if(i==0) {
- if (nameChoice == U_EXTENDED_CHAR_NAME) {
- length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
- if (!length) {
- /* extended character name */
- length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
- }
- } else {
- /* normal character name */
- length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
- }
- }
-
- return u_terminateChars(buffer, bufferLength, length, pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-u_getISOComment(UChar32 /*c*/,
- char *dest, int32_t destCapacity,
- UErrorCode *pErrorCode) {
- /* check the argument values */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- return u_terminateChars(dest, destCapacity, 0, pErrorCode);
-}
-
-U_CAPI UChar32 U_EXPORT2
-u_charFromName(UCharNameChoice nameChoice,
- const char *name,
- UErrorCode *pErrorCode) {
- char upper[120] = {0};
- char lower[120] = {0};
- FindName findName;
- AlgorithmicRange *algRange;
- uint32_t *p;
- uint32_t i;
- UChar32 cp = 0;
- char c0;
- static constexpr UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return error;
- }
-
- if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return error;
- }
-
- if(!isDataLoaded(pErrorCode)) {
- return error;
- }
-
- /* construct the uppercase and lowercase of the name first */
- for(i=0; i<sizeof(upper); ++i) {
- if((c0=*name++)!=0) {
- upper[i]=uprv_toupper(c0);
- lower[i]=uprv_tolower(c0);
- } else {
- upper[i]=lower[i]=0;
- break;
- }
- }
- if(i==sizeof(upper)) {
- /* name too long, there is no such character */
- *pErrorCode = U_ILLEGAL_CHAR_FOUND;
- return error;
- }
- // i==strlen(name)==strlen(lower)==strlen(upper)
-
- /* try extended names first */
- if (lower[0] == '<') {
- if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') {
- // Parse a string like "<category-HHHH>" where HHHH is a hex code point.
- uint32_t limit = i;
- while (i >= 3 && lower[--i] != '-') {}
-
- // There should be 1 to 8 hex digits.
- int32_t hexLength = limit - (i + 1);
- if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) {
- uint32_t cIdx;
-
- lower[i] = 0;
-
- for (++i; i < limit; ++i) {
- if (lower[i] >= '0' && lower[i] <= '9') {
- cp = (cp << 4) + lower[i] - '0';
- } else if (lower[i] >= 'a' && lower[i] <= 'f') {
- cp = (cp << 4) + lower[i] - 'a' + 10;
- } else {
- *pErrorCode = U_ILLEGAL_CHAR_FOUND;
- return error;
- }
- // Prevent signed-integer overflow and out-of-range code points.
- if (cp > UCHAR_MAX_VALUE) {
- *pErrorCode = U_ILLEGAL_CHAR_FOUND;
- return error;
- }
- }
-
- /* Now validate the category name.
- We could use a binary search, or a trie, if
- we really wanted to. */
- uint8_t cat = getCharCat(cp);
- for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
-
- if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
- if (cat == cIdx) {
- return cp;
- }
- break;
- }
- }
- }
- }
-
- *pErrorCode = U_ILLEGAL_CHAR_FOUND;
- return error;
- }
-
- /* try algorithmic names now */
- p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
- i=*p;
- algRange=(AlgorithmicRange *)(p+1);
- while(i>0) {
- if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
- return cp;
- }
- algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
- --i;
- }
-
- /* normal character name */
- findName.otherName=upper;
- findName.code=error;
- enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
- if (findName.code == error) {
- *pErrorCode = U_ILLEGAL_CHAR_FOUND;
- }
- return findName.code;
-}
-
-U_CAPI void U_EXPORT2
-u_enumCharNames(UChar32 start, UChar32 limit,
- UEnumCharNamesFn *fn,
- void *context,
- UCharNameChoice nameChoice,
- UErrorCode *pErrorCode) {
- AlgorithmicRange *algRange;
- uint32_t *p;
- uint32_t i;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
- limit = UCHAR_MAX_VALUE + 1;
- }
- if((uint32_t)start>=(uint32_t)limit) {
- return;
- }
-
- if(!isDataLoaded(pErrorCode)) {
- return;
- }
-
- /* interleave the data-driven ones with the algorithmic ones */
- /* iterate over all algorithmic ranges; assume that they are in ascending order */
- p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
- i=*p;
- algRange=(AlgorithmicRange *)(p+1);
- while(i>0) {
- /* enumerate the character names before the current algorithmic range */
- /* here: start<limit */
- if((uint32_t)start<algRange->start) {
- if((uint32_t)limit<=algRange->start) {
- enumNames(uCharNames, start, limit, fn, context, nameChoice);
- return;
- }
- if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
- return;
- }
- start=(UChar32)algRange->start;
- }
- /* enumerate the character names in the current algorithmic range */
- /* here: algRange->start<=start<limit */
- if((uint32_t)start<=algRange->end) {
- if((uint32_t)limit<=(algRange->end+1)) {
- enumAlgNames(algRange, start, limit, fn, context, nameChoice);
- return;
- }
- if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
- return;
- }
- start=(UChar32)algRange->end+1;
- }
- /* continue to the next algorithmic range (here: start<limit) */
- algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
- --i;
- }
- /* enumerate the character names after the last algorithmic range */
- enumNames(uCharNames, start, limit, fn, context, nameChoice);
-}
-
-U_CAPI int32_t U_EXPORT2
-uprv_getMaxCharNameLength() {
- UErrorCode errorCode=U_ZERO_ERROR;
- if(calcNameSetsLengths(&errorCode)) {
- return gMaxNameLength;
- } else {
- return 0;
- }
-}
-
-/**
- * Converts the char set cset into a Unicode set uset.
- * @param cset Set of 256 bit flags corresponding to a set of chars.
- * @param uset USet to receive characters. Existing contents are deleted.
- */
-static void
-charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
- UChar us[256];
- char cs[256];
-
- int32_t i, length;
- UErrorCode errorCode;
-
- errorCode=U_ZERO_ERROR;
-
- if(!calcNameSetsLengths(&errorCode)) {
- return;
- }
-
- /* build a char string with all chars that are used in character names */
- length=0;
- for(i=0; i<256; ++i) {
- if(SET_CONTAINS(cset, i)) {
- cs[length++]=(char)i;
- }
- }
-
- /* convert the char string to a UChar string */
- u_charsToUChars(cs, us, length);
-
- /* add each UChar to the USet */
- for(i=0; i<length; ++i) {
- if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
- sa->add(sa->set, us[i]);
- }
- }
-}
-
-/**
- * Fills set with characters that are used in Unicode character names.
- * @param set USet to receive characters.
- */
-U_CAPI void U_EXPORT2
-uprv_getCharNameCharacters(const USetAdder *sa) {
- charSetToUSet(gNameSet, sa);
-}
-
-/* data swapping ------------------------------------------------------------ */
-
-/*
- * The token table contains non-negative entries for token bytes,
- * and -1 for bytes that represent themselves in the data file's charset.
- * -2 entries are used for lead bytes.
- *
- * Direct bytes (-1 entries) must be translated from the input charset family
- * to the output charset family.
- * makeTokenMap() writes a permutation mapping for this.
- * Use it once for single-/lead-byte tokens and once more for all trail byte
- * tokens. (';' is an unused trail byte marked with -1.)
- */
-static void
-makeTokenMap(const UDataSwapper *ds,
- int16_t tokens[], uint16_t tokenCount,
- uint8_t map[256],
- UErrorCode *pErrorCode) {
- UBool usedOutChar[256];
- uint16_t i, j;
- uint8_t c1, c2;
-
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
-
- if(ds->inCharset==ds->outCharset) {
- /* Same charset family: identity permutation */
- for(i=0; i<256; ++i) {
- map[i]=(uint8_t)i;
- }
- } else {
- uprv_memset(map, 0, 256);
- uprv_memset(usedOutChar, 0, 256);
-
- if(tokenCount>256) {
- tokenCount=256;
- }
-
- /* set the direct bytes (byte 0 always maps to itself) */
- for(i=1; i<tokenCount; ++i) {
- if(tokens[i]==-1) {
- /* convert the direct byte character */
- c1=(uint8_t)i;
- ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
- i, ds->inCharset);
- return;
- }
-
- /* enter the converted character into the map and mark it used */
- map[c1]=c2;
- usedOutChar[c2]=TRUE;
- }
- }
-
- /* set the mappings for the rest of the permutation */
- for(i=j=1; i<tokenCount; ++i) {
- /* set mappings that were not set for direct bytes */
- if(map[i]==0) {
- /* set an output byte value that was not used as an output byte above */
- while(usedOutChar[j]) {
- ++j;
- }
- map[i]=(uint8_t)j++;
- }
- }
-
- /*
- * leave mappings at tokenCount and above unset if tokenCount<256
- * because they won't be used
- */
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-uchar_swapNames(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UDataInfo *pInfo;
- int32_t headerSize;
-
- const uint8_t *inBytes;
- uint8_t *outBytes;
-
- uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
- offset, i, count, stringsCount;
-
- const AlgorithmicRange *inRange;
- AlgorithmicRange *outRange;
-
- /* udata_swapDataHeader checks the arguments */
- headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* check data format and format version */
- pInfo=(const UDataInfo *)((const char *)inData+4);
- if(!(
- pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
- pInfo->dataFormat[1]==0x6e &&
- pInfo->dataFormat[2]==0x61 &&
- pInfo->dataFormat[3]==0x6d &&
- pInfo->formatVersion[0]==1
- )) {
- udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
- pInfo->dataFormat[0], pInfo->dataFormat[1],
- pInfo->dataFormat[2], pInfo->dataFormat[3],
- pInfo->formatVersion[0]);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- inBytes=(const uint8_t *)inData+headerSize;
- outBytes=(uint8_t *)outData+headerSize;
- if(length<0) {
- algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
- } else {
- length-=headerSize;
- if( length<20 ||
- (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
- ) {
- udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
- length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
-
- if(length<0) {
- /* preflighting: iterate through algorithmic ranges */
- offset=algNamesOffset;
- count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
- offset+=4;
-
- for(i=0; i<count; ++i) {
- inRange=(const AlgorithmicRange *)(inBytes+offset);
- offset+=ds->readUInt16(inRange->size);
- }
- } else {
- /* swap data */
- const uint16_t *p;
- uint16_t *q, *temp;
-
- int16_t tokens[512];
- uint16_t tokenCount;
-
- uint8_t map[256], trailMap[256];
-
- /* copy the data for inaccessible bytes */
- if(inBytes!=outBytes) {
- uprv_memcpy(outBytes, inBytes, length);
- }
-
- /* the initial 4 offsets first */
- tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
- groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
- groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
- ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
-
- /*
- * now the tokens table
- * it needs to be permutated along with the compressed name strings
- */
- p=(const uint16_t *)(inBytes+16);
- q=(uint16_t *)(outBytes+16);
-
- /* read and swap the tokenCount */
- tokenCount=ds->readUInt16(*p);
- ds->swapArray16(ds, p, 2, q, pErrorCode);
- ++p;
- ++q;
-
- /* read the first 512 tokens and make the token maps */
- if(tokenCount<=512) {
- count=tokenCount;
- } else {
- count=512;
- }
- for(i=0; i<count; ++i) {
- tokens[i]=udata_readInt16(ds, p[i]);
- }
- for(; i<512; ++i) {
- tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
- }
- makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
- makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /*
- * swap and permutate the tokens
- * go through a temporary array to support in-place swapping
- */
- temp=(uint16_t *)uprv_malloc(tokenCount*2);
- if(temp==NULL) {
- udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
- tokenCount);
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
- /* swap and permutate single-/lead-byte tokens */
- for(i=0; i<tokenCount && i<256; ++i) {
- ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
- }
-
- /* swap and permutate trail-byte tokens */
- for(; i<tokenCount; ++i) {
- ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
- }
-
- /* copy the result into the output and free the temporary array */
- uprv_memcpy(q, temp, tokenCount*2);
- uprv_free(temp);
-
- /*
- * swap the token strings but not a possible padding byte after
- * the terminating NUL of the last string
- */
- udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
- outBytes+tokenStringOffset, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "uchar_swapNames(token strings) failed\n");
- return 0;
- }
-
- /* swap the group table */
- count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
- ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
- outBytes+groupsOffset, pErrorCode);
-
- /*
- * swap the group strings
- * swap the string bytes but not the nibble-encoded string lengths
- */
- if(ds->inCharset!=ds->outCharset) {
- uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
-
- const uint8_t *inStrings, *nextInStrings;
- uint8_t *outStrings;
-
- uint8_t c;
-
- inStrings=inBytes+groupStringOffset;
- outStrings=outBytes+groupStringOffset;
-
- stringsCount=algNamesOffset-groupStringOffset;
-
- /* iterate through string groups until only a few padding bytes are left */
- while(stringsCount>32) {
- nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
-
- /* move past the length bytes */
- stringsCount-=(uint32_t)(nextInStrings-inStrings);
- outStrings+=nextInStrings-inStrings;
- inStrings=nextInStrings;
-
- count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
- stringsCount-=count;
-
- /* swap the string bytes using map[] and trailMap[] */
- while(count>0) {
- c=*inStrings++;
- *outStrings++=map[c];
- if(tokens[c]!=-2) {
- --count;
- } else {
- /* token lead byte: swap the trail byte, too */
- *outStrings++=trailMap[*inStrings++];
- count-=2;
- }
- }
- }
- }
-
- /* swap the algorithmic ranges */
- offset=algNamesOffset;
- count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
- ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
- offset+=4;
-
- for(i=0; i<count; ++i) {
- if(offset>(uint32_t)length) {
- udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
- length, i);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- inRange=(const AlgorithmicRange *)(inBytes+offset);
- outRange=(AlgorithmicRange *)(outBytes+offset);
- offset+=ds->readUInt16(inRange->size);
-
- ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
- ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
- switch(inRange->type) {
- case 0:
- /* swap prefix string */
- ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
- outRange+1, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
- i);
- return 0;
- }
- break;
- case 1:
- {
- /* swap factors and the prefix and factor strings */
- uint32_t factorsCount;
-
- factorsCount=inRange->variant;
- p=(const uint16_t *)(inRange+1);
- q=(uint16_t *)(outRange+1);
- ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
-
- /* swap the strings, up to the last terminating NUL */
- p+=factorsCount;
- q+=factorsCount;
- stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
- while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
- --stringsCount;
- }
- ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
- }
- break;
- default:
- udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
- inRange->type, i);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
- }
- }
-
- return headerSize+(int32_t)offset;
-}
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/contrib/libs/icu/common/unifiedcache.cpp b/contrib/libs/icu/common/unifiedcache.cpp
deleted file mode 100644
index f2dd9165595..00000000000
--- a/contrib/libs/icu/common/unifiedcache.cpp
+++ /dev/null
@@ -1,522 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 2015, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-*
-* File unifiedcache.cpp
-******************************************************************************
-*/
-
-#include "unifiedcache.h"
-
-#include <algorithm> // For std::max()
-#include <mutex>
-
-#include "uassert.h"
-#include "uhash.h"
-#include "ucln_cmn.h"
-
-static icu::UnifiedCache *gCache = NULL;
-static std::mutex *gCacheMutex = nullptr;
-static std::condition_variable *gInProgressValueAddedCond;
-static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER;
-
-static const int32_t MAX_EVICT_ITERATIONS = 10;
-static const int32_t DEFAULT_MAX_UNUSED = 1000;
-static const int32_t DEFAULT_PERCENTAGE_OF_IN_USE = 100;
-
-
-U_CDECL_BEGIN
-static UBool U_CALLCONV unifiedcache_cleanup() {
- gCacheInitOnce.reset();
- delete gCache;
- gCache = nullptr;
- gCacheMutex->~mutex();
- gCacheMutex = nullptr;
- gInProgressValueAddedCond->~condition_variable();
- gInProgressValueAddedCond = nullptr;
- return TRUE;
-}
-U_CDECL_END
-
-
-U_NAMESPACE_BEGIN
-
-U_CAPI int32_t U_EXPORT2
-ucache_hashKeys(const UHashTok key) {
- const CacheKeyBase *ckey = (const CacheKeyBase *) key.pointer;
- return ckey->hashCode();
-}
-
-U_CAPI UBool U_EXPORT2
-ucache_compareKeys(const UHashTok key1, const UHashTok key2) {
- const CacheKeyBase *p1 = (const CacheKeyBase *) key1.pointer;
- const CacheKeyBase *p2 = (const CacheKeyBase *) key2.pointer;
- return *p1 == *p2;
-}
-
-U_CAPI void U_EXPORT2
-ucache_deleteKey(void *obj) {
- CacheKeyBase *p = (CacheKeyBase *) obj;
- delete p;
-}
-
-CacheKeyBase::~CacheKeyBase() {
-}
-
-static void U_CALLCONV cacheInit(UErrorCode &status) {
- U_ASSERT(gCache == NULL);
- ucln_common_registerCleanup(
- UCLN_COMMON_UNIFIED_CACHE, unifiedcache_cleanup);
-
- gCacheMutex = STATIC_NEW(std::mutex);
- gInProgressValueAddedCond = STATIC_NEW(std::condition_variable);
- gCache = new UnifiedCache(status);
- if (gCache == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- if (U_FAILURE(status)) {
- delete gCache;
- gCache = NULL;
- return;
- }
-}
-
-UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) {
- umtx_initOnce(gCacheInitOnce, &cacheInit, status);
- if (U_FAILURE(status)) {
- return NULL;
- }
- U_ASSERT(gCache != NULL);
- return gCache;
-}
-
-UnifiedCache::UnifiedCache(UErrorCode &status) :
- fHashtable(NULL),
- fEvictPos(UHASH_FIRST),
- fNumValuesTotal(0),
- fNumValuesInUse(0),
- fMaxUnused(DEFAULT_MAX_UNUSED),
- fMaxPercentageOfInUse(DEFAULT_PERCENTAGE_OF_IN_USE),
- fAutoEvictedCount(0),
- fNoValue(nullptr) {
- if (U_FAILURE(status)) {
- return;
- }
- fNoValue = new SharedObject();
- if (fNoValue == nullptr) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- fNoValue->softRefCount = 1; // Add fake references to prevent fNoValue from being deleted
- fNoValue->hardRefCount = 1; // when other references to it are removed.
- fNoValue->cachePtr = this;
-
- fHashtable = uhash_open(
- &ucache_hashKeys,
- &ucache_compareKeys,
- NULL,
- &status);
- if (U_FAILURE(status)) {
- return;
- }
- uhash_setKeyDeleter(fHashtable, &ucache_deleteKey);
-}
-
-void UnifiedCache::setEvictionPolicy(
- int32_t count, int32_t percentageOfInUseItems, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return;
- }
- if (count < 0 || percentageOfInUseItems < 0) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- std::lock_guard<std::mutex> lock(*gCacheMutex);
- fMaxUnused = count;
- fMaxPercentageOfInUse = percentageOfInUseItems;
-}
-
-int32_t UnifiedCache::unusedCount() const {
- std::lock_guard<std::mutex> lock(*gCacheMutex);
- return uhash_count(fHashtable) - fNumValuesInUse;
-}
-
-int64_t UnifiedCache::autoEvictedCount() const {
- std::lock_guard<std::mutex> lock(*gCacheMutex);
- return fAutoEvictedCount;
-}
-
-int32_t UnifiedCache::keyCount() const {
- std::lock_guard<std::mutex> lock(*gCacheMutex);
- return uhash_count(fHashtable);
-}
-
-void UnifiedCache::flush() const {
- std::lock_guard<std::mutex> lock(*gCacheMutex);
-
- // Use a loop in case cache items that are flushed held hard references to
- // other cache items making those additional cache items eligible for
- // flushing.
- while (_flush(FALSE));
-}
-
-void UnifiedCache::handleUnreferencedObject() const {
- std::lock_guard<std::mutex> lock(*gCacheMutex);
- --fNumValuesInUse;
- _runEvictionSlice();
-}
-
-#ifdef UNIFIED_CACHE_DEBUG
-#include <stdio.h>
-
-void UnifiedCache::dump() {
- UErrorCode status = U_ZERO_ERROR;
- const UnifiedCache *cache = getInstance(status);
- if (U_FAILURE(status)) {
- fprintf(stderr, "Unified Cache: Error fetching cache.\n");
- return;
- }
- cache->dumpContents();
-}
-
-void UnifiedCache::dumpContents() const {
- std::lock_guard<std::mutex> lock(*gCacheMutex);
- _dumpContents();
-}
-
-// Dumps content of cache.
-// On entry, gCacheMutex must be held.
-// On exit, cache contents dumped to stderr.
-void UnifiedCache::_dumpContents() const {
- int32_t pos = UHASH_FIRST;
- const UHashElement *element = uhash_nextElement(fHashtable, &pos);
- char buffer[256];
- int32_t cnt = 0;
- for (; element != NULL; element = uhash_nextElement(fHashtable, &pos)) {
- const SharedObject *sharedObject =
- (const SharedObject *) element->value.pointer;
- const CacheKeyBase *key =
- (const CacheKeyBase *) element->key.pointer;
- if (sharedObject->hasHardReferences()) {
- ++cnt;
- fprintf(
- stderr,
- "Unified Cache: Key '%s', error %d, value %p, total refcount %d, soft refcount %d\n",
- key->writeDescription(buffer, 256),
- key->creationStatus,
- sharedObject == fNoValue ? NULL :sharedObject,
- sharedObject->getRefCount(),
- sharedObject->getSoftRefCount());
- }
- }
- fprintf(stderr, "Unified Cache: %d out of a total of %d still have hard references\n", cnt, uhash_count(fHashtable));
-}
-#endif
-
-UnifiedCache::~UnifiedCache() {
- // Try our best to clean up first.
- flush();
- {
- // Now all that should be left in the cache are entries that refer to
- // each other and entries with hard references from outside the cache.
- // Nothing we can do about these so proceed to wipe out the cache.
- std::lock_guard<std::mutex> lock(*gCacheMutex);
- _flush(TRUE);
- }
- uhash_close(fHashtable);
- fHashtable = nullptr;
- delete fNoValue;
- fNoValue = nullptr;
-}
-
-const UHashElement *
-UnifiedCache::_nextElement() const {
- const UHashElement *element = uhash_nextElement(fHashtable, &fEvictPos);
- if (element == NULL) {
- fEvictPos = UHASH_FIRST;
- return uhash_nextElement(fHashtable, &fEvictPos);
- }
- return element;
-}
-
-UBool UnifiedCache::_flush(UBool all) const {
- UBool result = FALSE;
- int32_t origSize = uhash_count(fHashtable);
- for (int32_t i = 0; i < origSize; ++i) {
- const UHashElement *element = _nextElement();
- if (element == nullptr) {
- break;
- }
- if (all || _isEvictable(element)) {
- const SharedObject *sharedObject =
- (const SharedObject *) element->value.pointer;
- U_ASSERT(sharedObject->cachePtr == this);
- uhash_removeElement(fHashtable, element);
- removeSoftRef(sharedObject); // Deletes the sharedObject when softRefCount goes to zero.
- result = TRUE;
- }
- }
- return result;
-}
-
-int32_t UnifiedCache::_computeCountOfItemsToEvict() const {
- int32_t totalItems = uhash_count(fHashtable);
- int32_t evictableItems = totalItems - fNumValuesInUse;
-
- int32_t unusedLimitByPercentage = fNumValuesInUse * fMaxPercentageOfInUse / 100;
- int32_t unusedLimit = std::max(unusedLimitByPercentage, fMaxUnused);
- int32_t countOfItemsToEvict = std::max(0, evictableItems - unusedLimit);
- return countOfItemsToEvict;
-}
-
-void UnifiedCache::_runEvictionSlice() const {
- int32_t maxItemsToEvict = _computeCountOfItemsToEvict();
- if (maxItemsToEvict <= 0) {
- return;
- }
- for (int32_t i = 0; i < MAX_EVICT_ITERATIONS; ++i) {
- const UHashElement *element = _nextElement();
- if (element == nullptr) {
- break;
- }
- if (_isEvictable(element)) {
- const SharedObject *sharedObject =
- (const SharedObject *) element->value.pointer;
- uhash_removeElement(fHashtable, element);
- removeSoftRef(sharedObject); // Deletes sharedObject when SoftRefCount goes to zero.
- ++fAutoEvictedCount;
- if (--maxItemsToEvict == 0) {
- break;
- }
- }
- }
-}
-
-void UnifiedCache::_putNew(
- const CacheKeyBase &key,
- const SharedObject *value,
- const UErrorCode creationStatus,
- UErrorCode &status) const {
- if (U_FAILURE(status)) {
- return;
- }
- CacheKeyBase *keyToAdopt = key.clone();
- if (keyToAdopt == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- keyToAdopt->fCreationStatus = creationStatus;
- if (value->softRefCount == 0) {
- _registerMaster(keyToAdopt, value);
- }
- void *oldValue = uhash_put(fHashtable, keyToAdopt, (void *) value, &status);
- U_ASSERT(oldValue == nullptr);
- (void)oldValue;
- if (U_SUCCESS(status)) {
- value->softRefCount++;
- }
-}
-
-void UnifiedCache::_putIfAbsentAndGet(
- const CacheKeyBase &key,
- const SharedObject *&value,
- UErrorCode &status) const {
- std::lock_guard<std::mutex> lock(*gCacheMutex);
- const UHashElement *element = uhash_find(fHashtable, &key);
- if (element != NULL && !_inProgress(element)) {
- _fetch(element, value, status);
- return;
- }
- if (element == NULL) {
- UErrorCode putError = U_ZERO_ERROR;
- // best-effort basis only.
- _putNew(key, value, status, putError);
- } else {
- _put(element, value, status);
- }
- // Run an eviction slice. This will run even if we added a master entry
- // which doesn't increase the unused count, but that is still o.k
- _runEvictionSlice();
-}
-
-
-UBool UnifiedCache::_poll(
- const CacheKeyBase &key,
- const SharedObject *&value,
- UErrorCode &status) const {
- U_ASSERT(value == NULL);
- U_ASSERT(status == U_ZERO_ERROR);
- std::unique_lock<std::mutex> lock(*gCacheMutex);
- const UHashElement *element = uhash_find(fHashtable, &key);
-
- // If the hash table contains an inProgress placeholder entry for this key,
- // this means that another thread is currently constructing the value object.
- // Loop, waiting for that construction to complete.
- while (element != NULL && _inProgress(element)) {
- gInProgressValueAddedCond->wait(lock);
- element = uhash_find(fHashtable, &key);
- }
-
- // If the hash table contains an entry for the key,
- // fetch out the contents and return them.
- if (element != NULL) {
- _fetch(element, value, status);
- return TRUE;
- }
-
- // The hash table contained nothing for this key.
- // Insert an inProgress place holder value.
- // Our caller will create the final value and update the hash table.
- _putNew(key, fNoValue, U_ZERO_ERROR, status);
- return FALSE;
-}
-
-void UnifiedCache::_get(
- const CacheKeyBase &key,
- const SharedObject *&value,
- const void *creationContext,
- UErrorCode &status) const {
- U_ASSERT(value == NULL);
- U_ASSERT(status == U_ZERO_ERROR);
- if (_poll(key, value, status)) {
- if (value == fNoValue) {
- SharedObject::clearPtr(value);
- }
- return;
- }
- if (U_FAILURE(status)) {
- return;
- }
- value = key.createObject(creationContext, status);
- U_ASSERT(value == NULL || value->hasHardReferences());
- U_ASSERT(value != NULL || status != U_ZERO_ERROR);
- if (value == NULL) {
- SharedObject::copyPtr(fNoValue, value);
- }
- _putIfAbsentAndGet(key, value, status);
- if (value == fNoValue) {
- SharedObject::clearPtr(value);
- }
-}
-
-void UnifiedCache::_registerMaster(
- const CacheKeyBase *theKey, const SharedObject *value) const {
- theKey->fIsMaster = true;
- value->cachePtr = this;
- ++fNumValuesTotal;
- ++fNumValuesInUse;
-}
-
-void UnifiedCache::_put(
- const UHashElement *element,
- const SharedObject *value,
- const UErrorCode status) const {
- U_ASSERT(_inProgress(element));
- const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
- const SharedObject *oldValue = (const SharedObject *) element->value.pointer;
- theKey->fCreationStatus = status;
- if (value->softRefCount == 0) {
- _registerMaster(theKey, value);
- }
- value->softRefCount++;
- UHashElement *ptr = const_cast<UHashElement *>(element);
- ptr->value.pointer = (void *) value;
- U_ASSERT(oldValue == fNoValue);
- removeSoftRef(oldValue);
-
- // Tell waiting threads that we replace in-progress status with
- // an error.
- gInProgressValueAddedCond->notify_all();
-}
-
-void UnifiedCache::_fetch(
- const UHashElement *element,
- const SharedObject *&value,
- UErrorCode &status) const {
- const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
- status = theKey->fCreationStatus;
-
- // Since we have the cache lock, calling regular SharedObject add/removeRef
- // could cause us to deadlock on ourselves since they may need to lock
- // the cache mutex.
- removeHardRef(value);
- value = static_cast<const SharedObject *>(element->value.pointer);
- addHardRef(value);
-}
-
-
-UBool UnifiedCache::_inProgress(const UHashElement* element) const {
- UErrorCode status = U_ZERO_ERROR;
- const SharedObject * value = NULL;
- _fetch(element, value, status);
- UBool result = _inProgress(value, status);
- removeHardRef(value);
- return result;
-}
-
-UBool UnifiedCache::_inProgress(
- const SharedObject* theValue, UErrorCode creationStatus) const {
- return (theValue == fNoValue && creationStatus == U_ZERO_ERROR);
-}
-
-UBool UnifiedCache::_isEvictable(const UHashElement *element) const
-{
- const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
- const SharedObject *theValue =
- (const SharedObject *) element->value.pointer;
-
- // Entries that are under construction are never evictable
- if (_inProgress(theValue, theKey->fCreationStatus)) {
- return FALSE;
- }
-
- // We can evict entries that are either not a master or have just
- // one reference (The one reference being from the cache itself).
- return (!theKey->fIsMaster || (theValue->softRefCount == 1 && theValue->noHardReferences()));
-}
-
-void UnifiedCache::removeSoftRef(const SharedObject *value) const {
- U_ASSERT(value->cachePtr == this);
- U_ASSERT(value->softRefCount > 0);
- if (--value->softRefCount == 0) {
- --fNumValuesTotal;
- if (value->noHardReferences()) {
- delete value;
- } else {
- // This path only happens from flush(all). Which only happens from the
- // UnifiedCache destructor. Nulling out value.cacheptr changes the behavior
- // of value.removeRef(), causing the deletion to be done there.
- value->cachePtr = nullptr;
- }
- }
-}
-
-int32_t UnifiedCache::removeHardRef(const SharedObject *value) const {
- int refCount = 0;
- if (value) {
- refCount = umtx_atomic_dec(&value->hardRefCount);
- U_ASSERT(refCount >= 0);
- if (refCount == 0) {
- --fNumValuesInUse;
- }
- }
- return refCount;
-}
-
-int32_t UnifiedCache::addHardRef(const SharedObject *value) const {
- int refCount = 0;
- if (value) {
- refCount = umtx_atomic_inc(&value->hardRefCount);
- U_ASSERT(refCount >= 1);
- if (refCount == 1) {
- fNumValuesInUse++;
- }
- }
- return refCount;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/unifiedcache.h b/contrib/libs/icu/common/unifiedcache.h
deleted file mode 100644
index d6c9945126c..00000000000
--- a/contrib/libs/icu/common/unifiedcache.h
+++ /dev/null
@@ -1,556 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 2015, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-*
-* File UNIFIEDCACHE.H - The ICU Unified cache.
-******************************************************************************
-*/
-
-#ifndef __UNIFIED_CACHE_H__
-#define __UNIFIED_CACHE_H__
-
-#include "utypeinfo.h" // for 'typeid' to work
-
-#include "unicode/uobject.h"
-#include "unicode/locid.h"
-#include "sharedobject.h"
-#include "unicode/unistr.h"
-#include "cstring.h"
-#include "ustr_imp.h"
-
-struct UHashtable;
-struct UHashElement;
-
-U_NAMESPACE_BEGIN
-
-class UnifiedCache;
-
-/**
- * A base class for all cache keys.
- */
-class U_COMMON_API CacheKeyBase : public UObject {
- public:
- CacheKeyBase() : fCreationStatus(U_ZERO_ERROR), fIsMaster(FALSE) {}
-
- /**
- * Copy constructor. Needed to support cloning.
- */
- CacheKeyBase(const CacheKeyBase &other)
- : UObject(other), fCreationStatus(other.fCreationStatus), fIsMaster(FALSE) { }
- virtual ~CacheKeyBase();
-
- /**
- * Returns the hash code for this object.
- */
- virtual int32_t hashCode() const = 0;
-
- /**
- * Clones this object polymorphically. Caller owns returned value.
- */
- virtual CacheKeyBase *clone() const = 0;
-
- /**
- * Equality operator.
- */
- virtual UBool operator == (const CacheKeyBase &other) const = 0;
-
- /**
- * Create a new object for this key. Called by cache on cache miss.
- * createObject must add a reference to the object it returns. Note
- * that getting an object from the cache and returning it without calling
- * removeRef on it satisfies this requirement. It can also return NULL
- * and set status to an error.
- *
- * @param creationContext the context in which the object is being
- * created. May be NULL.
- * @param status Implementations can return a failure here.
- * In addition, implementations may return a
- * non NULL object and set a warning status.
- */
- virtual const SharedObject *createObject(
- const void *creationContext, UErrorCode &status) const = 0;
-
- /**
- * Writes a description of this key to buffer and returns buffer. Written
- * description is NULL terminated.
- */
- virtual char *writeDescription(char *buffer, int32_t bufSize) const = 0;
-
- /**
- * Inequality operator.
- */
- UBool operator != (const CacheKeyBase &other) const {
- return !(*this == other);
- }
- private:
- mutable UErrorCode fCreationStatus;
- mutable UBool fIsMaster;
- friend class UnifiedCache;
-};
-
-
-
-/**
- * Templated version of CacheKeyBase.
- * A key of type LocaleCacheKey<T> maps to a value of type T.
- */
-template<typename T>
-class CacheKey : public CacheKeyBase {
- public:
- virtual ~CacheKey() { }
- /**
- * The template parameter, T, determines the hash code returned.
- */
- virtual int32_t hashCode() const {
- const char *s = typeid(T).name();
- return ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s)));
- }
-
- /**
- * Use the value type, T, as the description.
- */
- virtual char *writeDescription(char *buffer, int32_t bufLen) const {
- const char *s = typeid(T).name();
- uprv_strncpy(buffer, s, bufLen);
- buffer[bufLen - 1] = 0;
- return buffer;
- }
-
- /**
- * Two objects are equal if they are of the same type.
- */
- virtual UBool operator == (const CacheKeyBase &other) const {
- return typeid(*this) == typeid(other);
- }
-};
-
-/**
- * Cache key based on locale.
- * A key of type LocaleCacheKey<T> maps to a value of type T.
- */
-template<typename T>
-class LocaleCacheKey : public CacheKey<T> {
- protected:
- Locale fLoc;
- public:
- LocaleCacheKey(const Locale &loc) : fLoc(loc) {}
- LocaleCacheKey(const LocaleCacheKey<T> &other)
- : CacheKey<T>(other), fLoc(other.fLoc) { }
- virtual ~LocaleCacheKey() { }
- virtual int32_t hashCode() const {
- return (int32_t)(37u * (uint32_t)CacheKey<T>::hashCode() + (uint32_t)fLoc.hashCode());
- }
- virtual UBool operator == (const CacheKeyBase &other) const {
- // reflexive
- if (this == &other) {
- return TRUE;
- }
- if (!CacheKey<T>::operator == (other)) {
- return FALSE;
- }
- // We know this and other are of same class because operator== on
- // CacheKey returned true.
- const LocaleCacheKey<T> *fOther =
- static_cast<const LocaleCacheKey<T> *>(&other);
- return fLoc == fOther->fLoc;
- }
- virtual CacheKeyBase *clone() const {
- return new LocaleCacheKey<T>(*this);
- }
- virtual const T *createObject(
- const void *creationContext, UErrorCode &status) const;
- /**
- * Use the locale id as the description.
- */
- virtual char *writeDescription(char *buffer, int32_t bufLen) const {
- const char *s = fLoc.getName();
- uprv_strncpy(buffer, s, bufLen);
- buffer[bufLen - 1] = 0;
- return buffer;
- }
-
-};
-
-/**
- * The unified cache. A singleton type.
- * Design doc here:
- * https://docs.google.com/document/d/1RwGQJs4N4tawNbf809iYDRCvXoMKqDJihxzYt1ysmd8/edit?usp=sharing
- */
-class U_COMMON_API UnifiedCache : public UnifiedCacheBase {
- public:
- /**
- * @internal
- * Do not call directly. Instead use UnifiedCache::getInstance() as
- * there should be only one UnifiedCache in an application.
- */
- UnifiedCache(UErrorCode &status);
-
- /**
- * Return a pointer to the global cache instance.
- */
- static UnifiedCache *getInstance(UErrorCode &status);
-
- /**
- * Fetches a value from the cache by key. Equivalent to
- * get(key, NULL, ptr, status);
- */
- template<typename T>
- void get(
- const CacheKey<T>& key,
- const T *&ptr,
- UErrorCode &status) const {
- get(key, NULL, ptr, status);
- }
-
- /**
- * Fetches value from the cache by key.
- *
- * @param key the cache key.
- * @param creationContext passed verbatim to createObject method of key
- * @param ptr On entry, ptr must be NULL or be included if
- * the reference count of the object it points
- * to. On exit, ptr points to the fetched object
- * from the cache or is left unchanged on
- * failure. Caller must call removeRef on ptr
- * if set to a non NULL value.
- * @param status Any error returned here. May be set to a
- * warning value even if ptr is set.
- */
- template<typename T>
- void get(
- const CacheKey<T>& key,
- const void *creationContext,
- const T *&ptr,
- UErrorCode &status) const {
- if (U_FAILURE(status)) {
- return;
- }
- UErrorCode creationStatus = U_ZERO_ERROR;
- const SharedObject *value = NULL;
- _get(key, value, creationContext, creationStatus);
- const T *tvalue = (const T *) value;
- if (U_SUCCESS(creationStatus)) {
- SharedObject::copyPtr(tvalue, ptr);
- }
- SharedObject::clearPtr(tvalue);
- // Take care not to overwrite a warning status passed in with
- // another warning or U_ZERO_ERROR.
- if (status == U_ZERO_ERROR || U_FAILURE(creationStatus)) {
- status = creationStatus;
- }
- }
-
-#ifdef UNIFIED_CACHE_DEBUG
- /**
- * Dumps the contents of this cache to standard error. Used for testing of
- * cache only.
- */
- void dumpContents() const;
-#endif
-
- /**
- * Convenience method to get a value of type T from cache for a
- * particular locale with creationContext == NULL.
- * @param loc the locale
- * @param ptr On entry, must be NULL or included in the ref count
- * of the object to which it points.
- * On exit, fetched value stored here or is left
- * unchanged on failure. Caller must call removeRef on
- * ptr if set to a non NULL value.
- * @param status Any error returned here. May be set to a
- * warning value even if ptr is set.
- */
- template<typename T>
- static void getByLocale(
- const Locale &loc, const T *&ptr, UErrorCode &status) {
- const UnifiedCache *cache = getInstance(status);
- if (U_FAILURE(status)) {
- return;
- }
- cache->get(LocaleCacheKey<T>(loc), ptr, status);
- }
-
-#ifdef UNIFIED_CACHE_DEBUG
- /**
- * Dumps the cache contents to stderr. For testing only.
- */
- static void dump();
-#endif
-
- /**
- * Returns the number of keys in this cache. For testing only.
- */
- int32_t keyCount() const;
-
- /**
- * Removes any values from cache that are not referenced outside
- * the cache.
- */
- void flush() const;
-
- /**
- * Configures at what point evcition of unused entries will begin.
- * Eviction is triggered whenever the number of evictable keys exeeds
- * BOTH count AND (number of in-use items) * (percentageOfInUseItems / 100).
- * Once the number of unused entries drops below one of these,
- * eviction ceases. Because eviction happens incrementally,
- * the actual unused entry count may exceed both these numbers
- * from time to time.
- *
- * A cache entry is defined as unused if it is not essential to guarantee
- * that for a given key X, the cache returns the same reference to the
- * same value as long as the client already holds a reference to that
- * value.
- *
- * If this method is never called, the default settings are 1000 and 100%.
- *
- * Although this method is thread-safe, it is designed to be called at
- * application startup. If it is called in the middle of execution, it
- * will have no immediate effect on the cache. However over time, the
- * cache will perform eviction slices in an attempt to honor the new
- * settings.
- *
- * If a client already holds references to many different unique values
- * in the cache such that the number of those unique values far exeeds
- * "count" then the cache may not be able to maintain this maximum.
- * However, if this happens, the cache still guarantees that the number of
- * unused entries will remain only a small percentage of the total cache
- * size.
- *
- * If the parameters passed are negative, setEvctionPolicy sets status to
- * U_ILLEGAL_ARGUMENT_ERROR.
- */
- void setEvictionPolicy(
- int32_t count, int32_t percentageOfInUseItems, UErrorCode &status);
-
-
- /**
- * Returns how many entries have been auto evicted during the lifetime
- * of this cache. This only includes auto evicted entries, not
- * entries evicted because of a call to flush().
- */
- int64_t autoEvictedCount() const;
-
- /**
- * Returns the unused entry count in this cache. For testing only,
- * Regular clients will not need this.
- */
- int32_t unusedCount() const;
-
- virtual void handleUnreferencedObject() const;
- virtual ~UnifiedCache();
-
- private:
- UHashtable *fHashtable;
- mutable int32_t fEvictPos;
- mutable int32_t fNumValuesTotal;
- mutable int32_t fNumValuesInUse;
- int32_t fMaxUnused;
- int32_t fMaxPercentageOfInUse;
- mutable int64_t fAutoEvictedCount;
- SharedObject *fNoValue;
-
- UnifiedCache(const UnifiedCache &other);
- UnifiedCache &operator=(const UnifiedCache &other);
-
- /**
- * Flushes the contents of the cache. If cache values hold references to other
- * cache values then _flush should be called in a loop until it returns FALSE.
- *
- * On entry, gCacheMutex must be held.
- * On exit, those values with are evictable are flushed.
- *
- * @param all if false flush evictable items only, which are those with no external
- * references, plus those that can be safely recreated.<br>
- * if true, flush all elements. Any values (sharedObjects) with remaining
- * hard (external) references are not deleted, but are detached from
- * the cache, so that a subsequent removeRefs can delete them.
- * _flush is not thread safe when all is true.
- * @return TRUE if any value in cache was flushed or FALSE otherwise.
- */
- UBool _flush(UBool all) const;
-
- /**
- * Gets value out of cache.
- * On entry. gCacheMutex must not be held. value must be NULL. status
- * must be U_ZERO_ERROR.
- * On exit. value and status set to what is in cache at key or on cache
- * miss the key's createObject() is called and value and status are set to
- * the result of that. In this latter case, best effort is made to add the
- * value and status to the cache. If createObject() fails to create a value,
- * fNoValue is stored in cache, and value is set to NULL. Caller must call
- * removeRef on value if non NULL.
- */
- void _get(
- const CacheKeyBase &key,
- const SharedObject *&value,
- const void *creationContext,
- UErrorCode &status) const;
-
- /**
- * Attempts to fetch value and status for key from cache.
- * On entry, gCacheMutex must not be held value must be NULL and status must
- * be U_ZERO_ERROR.
- * On exit, either returns FALSE (In this
- * case caller should try to create the object) or returns TRUE with value
- * pointing to the fetched value and status set to fetched status. When
- * FALSE is returned status may be set to failure if an in progress hash
- * entry could not be made but value will remain unchanged. When TRUE is
- * returned, caller must call removeRef() on value.
- */
- UBool _poll(
- const CacheKeyBase &key,
- const SharedObject *&value,
- UErrorCode &status) const;
-
- /**
- * Places a new value and creationStatus in the cache for the given key.
- * On entry, gCacheMutex must be held. key must not exist in the cache.
- * On exit, value and creation status placed under key. Soft reference added
- * to value on successful add. On error sets status.
- */
- void _putNew(
- const CacheKeyBase &key,
- const SharedObject *value,
- const UErrorCode creationStatus,
- UErrorCode &status) const;
-
- /**
- * Places value and status at key if there is no value at key or if cache
- * entry for key is in progress. Otherwise, it leaves the current value and
- * status there.
- *
- * On entry. gCacheMutex must not be held. Value must be
- * included in the reference count of the object to which it points.
- *
- * On exit, value and status are changed to what was already in the cache if
- * something was there and not in progress. Otherwise, value and status are left
- * unchanged in which case they are placed in the cache on a best-effort basis.
- * Caller must call removeRef() on value.
- */
- void _putIfAbsentAndGet(
- const CacheKeyBase &key,
- const SharedObject *&value,
- UErrorCode &status) const;
-
- /**
- * Returns the next element in the cache round robin style.
- * Returns nullptr if the cache is empty.
- * On entry, gCacheMutex must be held.
- */
- const UHashElement *_nextElement() const;
-
- /**
- * Return the number of cache items that would need to be evicted
- * to bring usage into conformance with eviction policy.
- *
- * An item corresponds to an entry in the hash table, a hash table element.
- *
- * On entry, gCacheMutex must be held.
- */
- int32_t _computeCountOfItemsToEvict() const;
-
- /**
- * Run an eviction slice.
- * On entry, gCacheMutex must be held.
- * _runEvictionSlice runs a slice of the evict pipeline by examining the next
- * 10 entries in the cache round robin style evicting them if they are eligible.
- */
- void _runEvictionSlice() const;
-
- /**
- * Register a master cache entry. A master key is the first key to create
- * a given SharedObject value. Subsequent keys whose create function
- * produce referneces to an already existing SharedObject are not masters -
- * they can be evicted and subsequently recreated.
- *
- * On entry, gCacheMutex must be held.
- * On exit, items in use count incremented, entry is marked as a master
- * entry, and value registered with cache so that subsequent calls to
- * addRef() and removeRef() on it correctly interact with the cache.
- */
- void _registerMaster(const CacheKeyBase *theKey, const SharedObject *value) const;
-
- /**
- * Store a value and creation error status in given hash entry.
- * On entry, gCacheMutex must be held. Hash entry element must be in progress.
- * value must be non NULL.
- * On Exit, soft reference added to value. value and status stored in hash
- * entry. Soft reference removed from previous stored value. Waiting
- * threads notified.
- */
- void _put(
- const UHashElement *element,
- const SharedObject *value,
- const UErrorCode status) const;
- /**
- * Remove a soft reference, and delete the SharedObject if no references remain.
- * To be used from within the UnifiedCache implementation only.
- * gCacheMutex must be held by caller.
- * @param value the SharedObject to be acted on.
- */
- void removeSoftRef(const SharedObject *value) const;
-
- /**
- * Increment the hard reference count of the given SharedObject.
- * gCacheMutex must be held by the caller.
- * Update numValuesEvictable on transitions between zero and one reference.
- *
- * @param value The SharedObject to be referenced.
- * @return the hard reference count after the addition.
- */
- int32_t addHardRef(const SharedObject *value) const;
-
- /**
- * Decrement the hard reference count of the given SharedObject.
- * gCacheMutex must be held by the caller.
- * Update numValuesEvictable on transitions between one and zero reference.
- *
- * @param value The SharedObject to be referenced.
- * @return the hard reference count after the removal.
- */
- int32_t removeHardRef(const SharedObject *value) const;
-
-
-#ifdef UNIFIED_CACHE_DEBUG
- void _dumpContents() const;
-#endif
-
- /**
- * Fetch value and error code from a particular hash entry.
- * On entry, gCacheMutex must be held. value must be either NULL or must be
- * included in the ref count of the object to which it points.
- * On exit, value and status set to what is in the hash entry. Caller must
- * eventually call removeRef on value.
- * If hash entry is in progress, value will be set to gNoValue and status will
- * be set to U_ZERO_ERROR.
- */
- void _fetch(const UHashElement *element, const SharedObject *&value,
- UErrorCode &status) const;
-
- /**
- * Determine if given hash entry is in progress.
- * On entry, gCacheMutex must be held.
- */
- UBool _inProgress(const UHashElement *element) const;
-
- /**
- * Determine if given hash entry is in progress.
- * On entry, gCacheMutex must be held.
- */
- UBool _inProgress(const SharedObject *theValue, UErrorCode creationStatus) const;
-
- /**
- * Determine if given hash entry is eligible for eviction.
- * On entry, gCacheMutex must be held.
- */
- UBool _isEvictable(const UHashElement *element) const;
-};
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/unifilt.cpp b/contrib/libs/icu/common/unifilt.cpp
deleted file mode 100644
index 4ab0d9b5f93..00000000000
--- a/contrib/libs/icu/common/unifilt.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2001-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 07/18/01 aliu Creation.
-**********************************************************************
-*/
-
-#include "unicode/unifilt.h"
-#include "unicode/rep.h"
-#include "unicode/utf16.h"
-
-U_NAMESPACE_BEGIN
-UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFilter)
-
-
-/* Define this here due to the lack of another file.
- It can't be defined in the header */
-UnicodeMatcher::~UnicodeMatcher() {}
-
-UnicodeFilter::~UnicodeFilter() {}
-
-/**
- * UnicodeFunctor API.
- * Note that UnicodeMatcher is a base class of UnicodeFilter.
- */
-UnicodeMatcher* UnicodeFilter::toMatcher() const {
- return const_cast<UnicodeFilter *>(this);
-}
-
-void UnicodeFilter::setData(const TransliterationRuleData*) {}
-
-/**
- * Default implementation of UnicodeMatcher::matches() for Unicode
- * filters. Matches a single code point at offset (either one or
- * two 16-bit code units).
- */
-UMatchDegree UnicodeFilter::matches(const Replaceable& text,
- int32_t& offset,
- int32_t limit,
- UBool incremental) {
- UChar32 c;
- if (offset < limit &&
- contains(c = text.char32At(offset))) {
- offset += U16_LENGTH(c);
- return U_MATCH;
- }
- if (offset > limit &&
- contains(c = text.char32At(offset))) {
- // Backup offset by 1, unless the preceding character is a
- // surrogate pair -- then backup by 2 (keep offset pointing at
- // the lead surrogate).
- --offset;
- if (offset >= 0) {
- offset -= U16_LENGTH(text.char32At(offset)) - 1;
- }
- return U_MATCH;
- }
- if (incremental && offset == limit) {
- return U_PARTIAL_MATCH;
- }
- return U_MISMATCH;
-}
-
-U_NAMESPACE_END
-
-//eof
diff --git a/contrib/libs/icu/common/unifunct.cpp b/contrib/libs/icu/common/unifunct.cpp
deleted file mode 100644
index f3995b298d2..00000000000
--- a/contrib/libs/icu/common/unifunct.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2002-2004, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#include "unicode/unifunct.h"
-
-U_NAMESPACE_BEGIN
-
-UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFunctor)
-
-UnicodeFunctor::~UnicodeFunctor() {}
-
-UnicodeMatcher* UnicodeFunctor::toMatcher() const {
- return 0;
-}
-
-UnicodeReplacer* UnicodeFunctor::toReplacer() const {
- return 0;
-}
-
-U_NAMESPACE_END
-
-//eof
diff --git a/contrib/libs/icu/common/uniset.cpp b/contrib/libs/icu/common/uniset.cpp
deleted file mode 100644
index b73d612f246..00000000000
--- a/contrib/libs/icu/common/uniset.cpp
+++ /dev/null
@@ -1,2356 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 10/20/99 alan Creation.
-**********************************************************************
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/parsepos.h"
-#include "unicode/symtable.h"
-#include "unicode/uniset.h"
-#include "unicode/ustring.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "ruleiter.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "patternprops.h"
-#include "uelement.h"
-#include "util.h"
-#include "uvector.h"
-#include "charstr.h"
-#include "ustrfmt.h"
-#include "uassert.h"
-#include "bmpset.h"
-#include "unisetspan.h"
-
-// Define UChar constants using hex for EBCDIC compatibility
-// Used #define to reduce private static exports and memory access time.
-#define SET_OPEN ((UChar)0x005B) /*[*/
-#define SET_CLOSE ((UChar)0x005D) /*]*/
-#define HYPHEN ((UChar)0x002D) /*-*/
-#define COMPLEMENT ((UChar)0x005E) /*^*/
-#define COLON ((UChar)0x003A) /*:*/
-#define BACKSLASH ((UChar)0x005C) /*\*/
-#define INTERSECTION ((UChar)0x0026) /*&*/
-#define UPPER_U ((UChar)0x0055) /*U*/
-#define LOWER_U ((UChar)0x0075) /*u*/
-#define OPEN_BRACE ((UChar)123) /*{*/
-#define CLOSE_BRACE ((UChar)125) /*}*/
-#define UPPER_P ((UChar)0x0050) /*P*/
-#define LOWER_P ((UChar)0x0070) /*p*/
-#define UPPER_N ((UChar)78) /*N*/
-#define EQUALS ((UChar)0x003D) /*=*/
-
-// HIGH_VALUE > all valid values. 110000 for codepoints
-#define UNICODESET_HIGH 0x0110000
-
-// LOW <= all valid values. ZERO for codepoints
-#define UNICODESET_LOW 0x000000
-
-/** Max list [0, 1, 2, ..., max code point, HIGH] */
-constexpr int32_t MAX_LENGTH = UNICODESET_HIGH + 1;
-
-U_NAMESPACE_BEGIN
-
-SymbolTable::~SymbolTable() {}
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet)
-
-/**
- * Modify the given UChar32 variable so that it is in range, by
- * pinning values < UNICODESET_LOW to UNICODESET_LOW, and
- * pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1.
- * It modifies its argument in-place and also returns it.
- */
-static inline UChar32 pinCodePoint(UChar32& c) {
- if (c < UNICODESET_LOW) {
- c = UNICODESET_LOW;
- } else if (c > (UNICODESET_HIGH-1)) {
- c = (UNICODESET_HIGH-1);
- }
- return c;
-}
-
-//----------------------------------------------------------------
-// Debugging
-//----------------------------------------------------------------
-
-// DO NOT DELETE THIS CODE. This code is used to debug memory leaks.
-// To enable the debugging, define the symbol DEBUG_MEM in the line
-// below. This will result in text being sent to stdout that looks
-// like this:
-// DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85-
-// DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85-
-// Each line lists a construction (ct) or destruction (dt) event, the
-// object address, the number of outstanding objects after the event,
-// and the pattern of the object in question.
-
-// #define DEBUG_MEM
-
-#ifdef DEBUG_MEM
-#include <stdio.h>
-static int32_t _dbgCount = 0;
-
-static inline void _dbgct(UnicodeSet* set) {
- UnicodeString str;
- set->toPattern(str, TRUE);
- char buf[40];
- str.extract(0, 39, buf, "");
- printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set, ++_dbgCount, buf);
-}
-
-static inline void _dbgdt(UnicodeSet* set) {
- UnicodeString str;
- set->toPattern(str, TRUE);
- char buf[40];
- str.extract(0, 39, buf, "");
- printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set, --_dbgCount, buf);
-}
-
-#else
-
-#define _dbgct(set)
-#define _dbgdt(set)
-
-#endif
-
-//----------------------------------------------------------------
-// UnicodeString in UVector support
-//----------------------------------------------------------------
-
-static void U_CALLCONV cloneUnicodeString(UElement *dst, UElement *src) {
- dst->pointer = new UnicodeString(*(UnicodeString*)src->pointer);
-}
-
-static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
- const UnicodeString &a = *(const UnicodeString*)t1.pointer;
- const UnicodeString &b = *(const UnicodeString*)t2.pointer;
- return a.compare(b);
-}
-
-UBool UnicodeSet::hasStrings() const {
- return strings != nullptr && !strings->isEmpty();
-}
-
-int32_t UnicodeSet::stringsSize() const {
- return strings == nullptr ? 0 : strings->size();
-}
-
-UBool UnicodeSet::stringsContains(const UnicodeString &s) const {
- return strings != nullptr && strings->contains((void*) &s);
-}
-
-//----------------------------------------------------------------
-// Constructors &c
-//----------------------------------------------------------------
-
-/**
- * Constructs an empty set.
- */
-UnicodeSet::UnicodeSet() {
- list[0] = UNICODESET_HIGH;
- _dbgct(this);
-}
-
-/**
- * Constructs a set containing the given range. If <code>end >
- * start</code> then an empty set is created.
- *
- * @param start first character, inclusive, of range
- * @param end last character, inclusive, of range
- */
-UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) {
- list[0] = UNICODESET_HIGH;
- add(start, end);
- _dbgct(this);
-}
-
-/**
- * Constructs a set that is identical to the given UnicodeSet.
- */
-UnicodeSet::UnicodeSet(const UnicodeSet& o) : UnicodeFilter(o) {
- *this = o;
- _dbgct(this);
-}
-
-// Copy-construct as thawed.
-UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : UnicodeFilter(o) {
- if (ensureCapacity(o.len)) {
- // *this = o except for bmpSet and stringSpan
- len = o.len;
- uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32));
- if (o.hasStrings()) {
- UErrorCode status = U_ZERO_ERROR;
- if (!allocateStrings(status) ||
- (strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) {
- setToBogus();
- return;
- }
- }
- if (o.pat) {
- setPattern(o.pat, o.patLen);
- }
- _dbgct(this);
- }
-}
-
-/**
- * Destructs the set.
- */
-UnicodeSet::~UnicodeSet() {
- _dbgdt(this); // first!
- if (list != stackList) {
- uprv_free(list);
- }
- delete bmpSet;
- if (buffer != stackList) {
- uprv_free(buffer);
- }
- delete strings;
- delete stringSpan;
- releasePattern();
-}
-
-/**
- * Assigns this object to be a copy of another.
- */
-UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
- return copyFrom(o, FALSE);
-}
-
-UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) {
- if (this == &o) {
- return *this;
- }
- if (isFrozen()) {
- return *this;
- }
- if (o.isBogus()) {
- setToBogus();
- return *this;
- }
- if (!ensureCapacity(o.len)) {
- // ensureCapacity will mark the UnicodeSet as Bogus if OOM failure happens.
- return *this;
- }
- len = o.len;
- uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32));
- if (o.bmpSet != nullptr && !asThawed) {
- bmpSet = new BMPSet(*o.bmpSet, list, len);
- if (bmpSet == NULL) { // Check for memory allocation error.
- setToBogus();
- return *this;
- }
- }
- if (o.hasStrings()) {
- UErrorCode status = U_ZERO_ERROR;
- if ((strings == nullptr && !allocateStrings(status)) ||
- (strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) {
- setToBogus();
- return *this;
- }
- } else if (hasStrings()) {
- strings->removeAllElements();
- }
- if (o.stringSpan != nullptr && !asThawed) {
- stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings);
- if (stringSpan == NULL) { // Check for memory allocation error.
- setToBogus();
- return *this;
- }
- }
- releasePattern();
- if (o.pat) {
- setPattern(o.pat, o.patLen);
- }
- return *this;
-}
-
-/**
- * Returns a copy of this object. All UnicodeMatcher objects have
- * to support cloning in order to allow classes using
- * UnicodeMatchers, such as Transliterator, to implement cloning.
- */
-UnicodeSet* UnicodeSet::clone() const {
- return new UnicodeSet(*this);
-}
-
-UnicodeSet *UnicodeSet::cloneAsThawed() const {
- return new UnicodeSet(*this, TRUE);
-}
-
-/**
- * Compares the specified object with this set for equality. Returns
- * <tt>true</tt> if the two sets
- * have the same size, and every member of the specified set is
- * contained in this set (or equivalently, every member of this set is
- * contained in the specified set).
- *
- * @param o set to be compared for equality with this set.
- * @return <tt>true</tt> if the specified set is equal to this set.
- */
-UBool UnicodeSet::operator==(const UnicodeSet& o) const {
- if (len != o.len) return FALSE;
- for (int32_t i = 0; i < len; ++i) {
- if (list[i] != o.list[i]) return FALSE;
- }
- if (hasStrings() != o.hasStrings()) { return FALSE; }
- if (hasStrings() && *strings != *o.strings) return FALSE;
- return TRUE;
-}
-
-/**
- * Returns the hash code value for this set.
- *
- * @return the hash code value for this set.
- * @see Object#hashCode()
- */
-int32_t UnicodeSet::hashCode(void) const {
- uint32_t result = static_cast<uint32_t>(len);
- for (int32_t i = 0; i < len; ++i) {
- result *= 1000003u;
- result += list[i];
- }
- return static_cast<int32_t>(result);
-}
-
-//----------------------------------------------------------------
-// Public API
-//----------------------------------------------------------------
-
-/**
- * Returns the number of elements in this set (its cardinality),
- * Note than the elements of a set may include both individual
- * codepoints and strings.
- *
- * @return the number of elements in this set (its cardinality).
- */
-int32_t UnicodeSet::size(void) const {
- int32_t n = 0;
- int32_t count = getRangeCount();
- for (int32_t i = 0; i < count; ++i) {
- n += getRangeEnd(i) - getRangeStart(i) + 1;
- }
- return n + stringsSize();
-}
-
-/**
- * Returns <tt>true</tt> if this set contains no elements.
- *
- * @return <tt>true</tt> if this set contains no elements.
- */
-UBool UnicodeSet::isEmpty(void) const {
- return len == 1 && !hasStrings();
-}
-
-/**
- * Returns true if this set contains the given character.
- * @param c character to be checked for containment
- * @return true if the test condition is met
- */
-UBool UnicodeSet::contains(UChar32 c) const {
- // Set i to the index of the start item greater than ch
- // We know we will terminate without length test!
- // LATER: for large sets, add binary search
- //int32_t i = -1;
- //for (;;) {
- // if (c < list[++i]) break;
- //}
- if (bmpSet != NULL) {
- return bmpSet->contains(c);
- }
- if (stringSpan != NULL) {
- return stringSpan->contains(c);
- }
- if (c >= UNICODESET_HIGH) { // Don't need to check LOW bound
- return FALSE;
- }
- int32_t i = findCodePoint(c);
- return (UBool)(i & 1); // return true if odd
-}
-
-/**
- * Returns the smallest value i such that c < list[i]. Caller
- * must ensure that c is a legal value or this method will enter
- * an infinite loop. This method performs a binary search.
- * @param c a character in the range MIN_VALUE..MAX_VALUE
- * inclusive
- * @return the smallest integer i in the range 0..len-1,
- * inclusive, such that c < list[i]
- */
-int32_t UnicodeSet::findCodePoint(UChar32 c) const {
- /* Examples:
- findCodePoint(c)
- set list[] c=0 1 3 4 7 8
- === ============== ===========
- [] [110000] 0 0 0 0 0 0
- [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
- [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
- [:Any:] [0, 110000] 1 1 1 1 1 1
- */
-
- // Return the smallest i such that c < list[i]. Assume
- // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
- if (c < list[0])
- return 0;
- // High runner test. c is often after the last range, so an
- // initial check for this condition pays off.
- int32_t lo = 0;
- int32_t hi = len - 1;
- if (lo >= hi || c >= list[hi-1])
- return hi;
- // invariant: c >= list[lo]
- // invariant: c < list[hi]
- for (;;) {
- int32_t i = (lo + hi) >> 1;
- if (i == lo) {
- break; // Found!
- } else if (c < list[i]) {
- hi = i;
- } else {
- lo = i;
- }
- }
- return hi;
-}
-
-/**
- * Returns true if this set contains every character
- * of the given range.
- * @param start first character, inclusive, of the range
- * @param end last character, inclusive, of the range
- * @return true if the test condition is met
- */
-UBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
- //int32_t i = -1;
- //for (;;) {
- // if (start < list[++i]) break;
- //}
- int32_t i = findCodePoint(start);
- return ((i & 1) != 0 && end < list[i]);
-}
-
-/**
- * Returns <tt>true</tt> if this set contains the given
- * multicharacter string.
- * @param s string to be checked for containment
- * @return <tt>true</tt> if this set contains the specified string
- */
-UBool UnicodeSet::contains(const UnicodeString& s) const {
- if (s.length() == 0) return FALSE;
- int32_t cp = getSingleCP(s);
- if (cp < 0) {
- return stringsContains(s);
- } else {
- return contains((UChar32) cp);
- }
-}
-
-/**
- * Returns true if this set contains all the characters and strings
- * of the given set.
- * @param c set to be checked for containment
- * @return true if the test condition is met
- */
-UBool UnicodeSet::containsAll(const UnicodeSet& c) const {
- // The specified set is a subset if all of its pairs are contained in
- // this set. It's possible to code this more efficiently in terms of
- // direct manipulation of the inversion lists if the need arises.
- int32_t n = c.getRangeCount();
- for (int i=0; i<n; ++i) {
- if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) {
- return FALSE;
- }
- }
- return !c.hasStrings() || (strings != nullptr && strings->containsAll(*c.strings));
-}
-
-/**
- * Returns true if this set contains all the characters
- * of the given string.
- * @param s string containing characters to be checked for containment
- * @return true if the test condition is met
- */
-UBool UnicodeSet::containsAll(const UnicodeString& s) const {
- return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) ==
- s.length());
-}
-
-/**
- * Returns true if this set contains none of the characters
- * of the given range.
- * @param start first character, inclusive, of the range
- * @param end last character, inclusive, of the range
- * @return true if the test condition is met
- */
-UBool UnicodeSet::containsNone(UChar32 start, UChar32 end) const {
- //int32_t i = -1;
- //for (;;) {
- // if (start < list[++i]) break;
- //}
- int32_t i = findCodePoint(start);
- return ((i & 1) == 0 && end < list[i]);
-}
-
-/**
- * Returns true if this set contains none of the characters and strings
- * of the given set.
- * @param c set to be checked for containment
- * @return true if the test condition is met
- */
-UBool UnicodeSet::containsNone(const UnicodeSet& c) const {
- // The specified set is a subset if all of its pairs are contained in
- // this set. It's possible to code this more efficiently in terms of
- // direct manipulation of the inversion lists if the need arises.
- int32_t n = c.getRangeCount();
- for (int32_t i=0; i<n; ++i) {
- if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) {
- return FALSE;
- }
- }
- return strings == nullptr || !c.hasStrings() || strings->containsNone(*c.strings);
-}
-
-/**
- * Returns true if this set contains none of the characters
- * of the given string.
- * @param s string containing characters to be checked for containment
- * @return true if the test condition is met
- */
-UBool UnicodeSet::containsNone(const UnicodeString& s) const {
- return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) ==
- s.length());
-}
-
-/**
- * Returns <tt>true</tt> if this set contains any character whose low byte
- * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for
- * indexing.
- */
-UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
- /* The index value v, in the range [0,255], is contained in this set if
- * it is contained in any pair of this set. Pairs either have the high
- * bytes equal, or unequal. If the high bytes are equal, then we have
- * aaxx..aayy, where aa is the high byte. Then v is contained if xx <=
- * v <= yy. If the high bytes are unequal we have aaxx..bbyy, bb>aa.
- * Then v is contained if xx <= v || v <= yy. (This is identical to the
- * time zone month containment logic.)
- */
- int32_t i;
- int32_t rangeCount=getRangeCount();
- for (i=0; i<rangeCount; ++i) {
- UChar32 low = getRangeStart(i);
- UChar32 high = getRangeEnd(i);
- if ((low & ~0xFF) == (high & ~0xFF)) {
- if ((low & 0xFF) <= v && v <= (high & 0xFF)) {
- return TRUE;
- }
- } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) {
- return TRUE;
- }
- }
- if (hasStrings()) {
- for (i=0; i<strings->size(); ++i) {
- const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
- //if (s.length() == 0) {
- // // Empty strings match everything
- // return TRUE;
- //}
- // assert(s.length() != 0); // We enforce this elsewhere
- UChar32 c = s.char32At(0);
- if ((c & 0xFF) == v) {
- return TRUE;
- }
- }
- }
- return FALSE;
-}
-
-/**
- * Implementation of UnicodeMatcher::matches(). Always matches the
- * longest possible multichar string.
- */
-UMatchDegree UnicodeSet::matches(const Replaceable& text,
- int32_t& offset,
- int32_t limit,
- UBool incremental) {
- if (offset == limit) {
- // Strings, if any, have length != 0, so we don't worry
- // about them here. If we ever allow zero-length strings
- // we much check for them here.
- if (contains(U_ETHER)) {
- return incremental ? U_PARTIAL_MATCH : U_MATCH;
- } else {
- return U_MISMATCH;
- }
- } else {
- if (hasStrings()) { // try strings first
-
- // might separate forward and backward loops later
- // for now they are combined
-
- // TODO Improve efficiency of this, at least in the forward
- // direction, if not in both. In the forward direction we
- // can assume the strings are sorted.
-
- int32_t i;
- UBool forward = offset < limit;
-
- // firstChar is the leftmost char to match in the
- // forward direction or the rightmost char to match in
- // the reverse direction.
- UChar firstChar = text.charAt(offset);
-
- // If there are multiple strings that can match we
- // return the longest match.
- int32_t highWaterLength = 0;
-
- for (i=0; i<strings->size(); ++i) {
- const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
-
- //if (trial.length() == 0) {
- // return U_MATCH; // null-string always matches
- //}
- // assert(trial.length() != 0); // We ensure this elsewhere
-
- UChar c = trial.charAt(forward ? 0 : trial.length() - 1);
-
- // Strings are sorted, so we can optimize in the
- // forward direction.
- if (forward && c > firstChar) break;
- if (c != firstChar) continue;
-
- int32_t matchLen = matchRest(text, offset, limit, trial);
-
- if (incremental) {
- int32_t maxLen = forward ? limit-offset : offset-limit;
- if (matchLen == maxLen) {
- // We have successfully matched but only up to limit.
- return U_PARTIAL_MATCH;
- }
- }
-
- if (matchLen == trial.length()) {
- // We have successfully matched the whole string.
- if (matchLen > highWaterLength) {
- highWaterLength = matchLen;
- }
- // In the forward direction we know strings
- // are sorted so we can bail early.
- if (forward && matchLen < highWaterLength) {
- break;
- }
- continue;
- }
- }
-
- // We've checked all strings without a partial match.
- // If we have full matches, return the longest one.
- if (highWaterLength != 0) {
- offset += forward ? highWaterLength : -highWaterLength;
- return U_MATCH;
- }
- }
- return UnicodeFilter::matches(text, offset, limit, incremental);
- }
-}
-
-/**
- * Returns the longest match for s in text at the given position.
- * If limit > start then match forward from start+1 to limit
- * matching all characters except s.charAt(0). If limit < start,
- * go backward starting from start-1 matching all characters
- * except s.charAt(s.length()-1). This method assumes that the
- * first character, text.charAt(start), matches s, so it does not
- * check it.
- * @param text the text to match
- * @param start the first character to match. In the forward
- * direction, text.charAt(start) is matched against s.charAt(0).
- * In the reverse direction, it is matched against
- * s.charAt(s.length()-1).
- * @param limit the limit offset for matching, either last+1 in
- * the forward direction, or last-1 in the reverse direction,
- * where last is the index of the last character to match.
- * @return If part of s matches up to the limit, return |limit -
- * start|. If all of s matches before reaching the limit, return
- * s.length(). If there is a mismatch between s and text, return
- * 0
- */
-int32_t UnicodeSet::matchRest(const Replaceable& text,
- int32_t start, int32_t limit,
- const UnicodeString& s) {
- int32_t i;
- int32_t maxLen;
- int32_t slen = s.length();
- if (start < limit) {
- maxLen = limit - start;
- if (maxLen > slen) maxLen = slen;
- for (i = 1; i < maxLen; ++i) {
- if (text.charAt(start + i) != s.charAt(i)) return 0;
- }
- } else {
- maxLen = start - limit;
- if (maxLen > slen) maxLen = slen;
- --slen; // <=> slen = s.length() - 1;
- for (i = 1; i < maxLen; ++i) {
- if (text.charAt(start - i) != s.charAt(slen - i)) return 0;
- }
- }
- return maxLen;
-}
-
-/**
- * Implement of UnicodeMatcher
- */
-void UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const {
- toUnionTo.addAll(*this);
-}
-
-/**
- * Returns the index of the given character within this set, where
- * the set is ordered by ascending code point. If the character
- * is not in this set, return -1. The inverse of this method is
- * <code>charAt()</code>.
- * @return an index from 0..size()-1, or -1
- */
-int32_t UnicodeSet::indexOf(UChar32 c) const {
- if (c < MIN_VALUE || c > MAX_VALUE) {
- return -1;
- }
- int32_t i = 0;
- int32_t n = 0;
- for (;;) {
- UChar32 start = list[i++];
- if (c < start) {
- return -1;
- }
- UChar32 limit = list[i++];
- if (c < limit) {
- return n + c - start;
- }
- n += limit - start;
- }
-}
-
-/**
- * Returns the character at the given index within this set, where
- * the set is ordered by ascending code point. If the index is
- * out of range, return (UChar32)-1. The inverse of this method is
- * <code>indexOf()</code>.
- * @param index an index from 0..size()-1
- * @return the character at the given index, or (UChar32)-1.
- */
-UChar32 UnicodeSet::charAt(int32_t index) const {
- if (index >= 0) {
- // len2 is the largest even integer <= len, that is, it is len
- // for even values and len-1 for odd values. With odd values
- // the last entry is UNICODESET_HIGH.
- int32_t len2 = len & ~1;
- for (int32_t i=0; i < len2;) {
- UChar32 start = list[i++];
- int32_t count = list[i++] - start;
- if (index < count) {
- return (UChar32)(start + index);
- }
- index -= count;
- }
- }
- return (UChar32)-1;
-}
-
-/**
- * Make this object represent the range <code>start - end</code>.
- * If <code>end > start</code> then this object is set to an
- * an empty range.
- *
- * @param start first character in the set, inclusive
- * @rparam end last character in the set, inclusive
- */
-UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
- clear();
- complement(start, end);
- return *this;
-}
-
-/**
- * Adds the specified range to this set if it is not already
- * present. If this set already contains the specified range,
- * the call leaves this set unchanged. If <code>end > start</code>
- * then an empty range is added, leaving the set unchanged.
- *
- * @param start first character, inclusive, of range to be added
- * to this set.
- * @param end last character, inclusive, of range to be added
- * to this set.
- */
-UnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) {
- if (pinCodePoint(start) < pinCodePoint(end)) {
- UChar32 limit = end + 1;
- // Fast path for adding a new range after the last one.
- // Odd list length: [..., lastStart, lastLimit, HIGH]
- if ((len & 1) != 0) {
- // If the list is empty, set lastLimit low enough to not be adjacent to 0.
- UChar32 lastLimit = len == 1 ? -2 : list[len - 2];
- if (lastLimit <= start && !isFrozen() && !isBogus()) {
- if (lastLimit == start) {
- // Extend the last range.
- list[len - 2] = limit;
- if (limit == UNICODESET_HIGH) {
- --len;
- }
- } else {
- list[len - 1] = start;
- if (limit < UNICODESET_HIGH) {
- if (ensureCapacity(len + 2)) {
- list[len++] = limit;
- list[len++] = UNICODESET_HIGH;
- }
- } else { // limit == UNICODESET_HIGH
- if (ensureCapacity(len + 1)) {
- list[len++] = UNICODESET_HIGH;
- }
- }
- }
- releasePattern();
- return *this;
- }
- }
- // This is slow. Could be much faster using findCodePoint(start)
- // and modifying the list, dealing with adjacent & overlapping ranges.
- UChar32 range[3] = { start, limit, UNICODESET_HIGH };
- add(range, 2, 0);
- } else if (start == end) {
- add(start);
- }
- return *this;
-}
-
-// #define DEBUG_US_ADD
-
-#ifdef DEBUG_US_ADD
-#include <stdio.h>
-void dump(UChar32 c) {
- if (c <= 0xFF) {
- printf("%c", (char)c);
- } else {
- printf("U+%04X", c);
- }
-}
-void dump(const UChar32* list, int32_t len) {
- printf("[");
- for (int32_t i=0; i<len; ++i) {
- if (i != 0) printf(", ");
- dump(list[i]);
- }
- printf("]");
-}
-#endif
-
-/**
- * Adds the specified character to this set if it is not already
- * present. If this set already contains the specified character,
- * the call leaves this set unchanged.
- */
-UnicodeSet& UnicodeSet::add(UChar32 c) {
- // find smallest i such that c < list[i]
- // if odd, then it is IN the set
- // if even, then it is OUT of the set
- int32_t i = findCodePoint(pinCodePoint(c));
-
- // already in set?
- if ((i & 1) != 0 || isFrozen() || isBogus()) return *this;
-
- // HIGH is 0x110000
- // assert(list[len-1] == HIGH);
-
- // empty = [HIGH]
- // [start_0, limit_0, start_1, limit_1, HIGH]
-
- // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
- // ^
- // list[i]
-
- // i == 0 means c is before the first range
-
-#ifdef DEBUG_US_ADD
- printf("Add of ");
- dump(c);
- printf(" found at %d", i);
- printf(": ");
- dump(list, len);
- printf(" => ");
-#endif
-
- if (c == list[i]-1) {
- // c is before start of next range
- list[i] = c;
- // if we touched the HIGH mark, then add a new one
- if (c == (UNICODESET_HIGH - 1)) {
- if (!ensureCapacity(len+1)) {
- // ensureCapacity will mark the object as Bogus if OOM failure happens.
- return *this;
- }
- list[len++] = UNICODESET_HIGH;
- }
- if (i > 0 && c == list[i-1]) {
- // collapse adjacent ranges
-
- // [..., start_k-1, c, c, limit_k, ..., HIGH]
- // ^
- // list[i]
-
- //for (int32_t k=i-1; k<len-2; ++k) {
- // list[k] = list[k+2];
- //}
- UChar32* dst = list + i - 1;
- UChar32* src = dst + 2;
- UChar32* srclimit = list + len;
- while (src < srclimit) *(dst++) = *(src++);
-
- len -= 2;
- }
- }
-
- else if (i > 0 && c == list[i-1]) {
- // c is after end of prior range
- list[i-1]++;
- // no need to check for collapse here
- }
-
- else {
- // At this point we know the new char is not adjacent to
- // any existing ranges, and it is not 10FFFF.
-
-
- // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
- // ^
- // list[i]
-
- // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH]
- // ^
- // list[i]
-
- if (!ensureCapacity(len+2)) {
- // ensureCapacity will mark the object as Bogus if OOM failure happens.
- return *this;
- }
-
- UChar32 *p = list + i;
- uprv_memmove(p + 2, p, (len - i) * sizeof(*p));
- list[i] = c;
- list[i+1] = c+1;
- len += 2;
- }
-
-#ifdef DEBUG_US_ADD
- dump(list, len);
- printf("\n");
-
- for (i=1; i<len; ++i) {
- if (list[i] <= list[i-1]) {
- // Corrupt array!
- printf("ERROR: list has been corrupted\n");
- exit(1);
- }
- }
-#endif
-
- releasePattern();
- return *this;
-}
-
-/**
- * Adds the specified multicharacter to this set if it is not already
- * present. If this set already contains the multicharacter,
- * the call leaves this set unchanged.
- * Thus "ch" => {"ch"}
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
- * @param s the source string
- * @return the modified set, for chaining
- */
-UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
- if (s.length() == 0 || isFrozen() || isBogus()) return *this;
- int32_t cp = getSingleCP(s);
- if (cp < 0) {
- if (!stringsContains(s)) {
- _add(s);
- releasePattern();
- }
- } else {
- add((UChar32)cp);
- }
- return *this;
-}
-
-/**
- * Adds the given string, in order, to 'strings'. The given string
- * must have been checked by the caller to not be empty and to not
- * already be in 'strings'.
- */
-void UnicodeSet::_add(const UnicodeString& s) {
- if (isFrozen() || isBogus()) {
- return;
- }
- UErrorCode ec = U_ZERO_ERROR;
- if (strings == nullptr && !allocateStrings(ec)) {
- setToBogus();
- return;
- }
- UnicodeString* t = new UnicodeString(s);
- if (t == NULL) { // Check for memory allocation error.
- setToBogus();
- return;
- }
- strings->sortedInsert(t, compareUnicodeString, ec);
- if (U_FAILURE(ec)) {
- setToBogus();
- delete t;
- }
-}
-
-/**
- * @return a code point IF the string consists of a single one.
- * otherwise returns -1.
- * @param string to test
- */
-int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
- //if (s.length() < 1) {
- // throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
- //}
- if (s.length() > 2) return -1;
- if (s.length() == 1) return s.charAt(0);
-
- // at this point, len = 2
- UChar32 cp = s.char32At(0);
- if (cp > 0xFFFF) { // is surrogate pair
- return cp;
- }
- return -1;
-}
-
-/**
- * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
- * @param the source string
- * @return the modified set, for chaining
- */
-UnicodeSet& UnicodeSet::addAll(const UnicodeString& s) {
- UChar32 cp;
- for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) {
- cp = s.char32At(i);
- add(cp);
- }
- return *this;
-}
-
-/**
- * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
- * @param the source string
- * @return the modified set, for chaining
- */
-UnicodeSet& UnicodeSet::retainAll(const UnicodeString& s) {
- UnicodeSet set;
- set.addAll(s);
- retainAll(set);
- return *this;
-}
-
-/**
- * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
- * @param the source string
- * @return the modified set, for chaining
- */
-UnicodeSet& UnicodeSet::complementAll(const UnicodeString& s) {
- UnicodeSet set;
- set.addAll(s);
- complementAll(set);
- return *this;
-}
-
-/**
- * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
- * @param the source string
- * @return the modified set, for chaining
- */
-UnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) {
- UnicodeSet set;
- set.addAll(s);
- removeAll(set);
- return *this;
-}
-
-UnicodeSet& UnicodeSet::removeAllStrings() {
- if (!isFrozen() && hasStrings()) {
- strings->removeAllElements();
- releasePattern();
- }
- return *this;
-}
-
-
-/**
- * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
- * @param the source string
- * @return a newly created set containing the given string
- */
-UnicodeSet* U_EXPORT2 UnicodeSet::createFrom(const UnicodeString& s) {
- UnicodeSet *set = new UnicodeSet();
- if (set != NULL) { // Check for memory allocation error.
- set->add(s);
- }
- return set;
-}
-
-
-/**
- * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
- * @param the source string
- * @return a newly created set containing the given characters
- */
-UnicodeSet* U_EXPORT2 UnicodeSet::createFromAll(const UnicodeString& s) {
- UnicodeSet *set = new UnicodeSet();
- if (set != NULL) { // Check for memory allocation error.
- set->addAll(s);
- }
- return set;
-}
-
-/**
- * Retain only the elements in this set that are contained in the
- * specified range. If <code>end > start</code> then an empty range is
- * retained, leaving the set empty.
- *
- * @param start first character, inclusive, of range to be retained
- * to this set.
- * @param end last character, inclusive, of range to be retained
- * to this set.
- */
-UnicodeSet& UnicodeSet::retain(UChar32 start, UChar32 end) {
- if (pinCodePoint(start) <= pinCodePoint(end)) {
- UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
- retain(range, 2, 0);
- } else {
- clear();
- }
- return *this;
-}
-
-UnicodeSet& UnicodeSet::retain(UChar32 c) {
- return retain(c, c);
-}
-
-/**
- * Removes the specified range from this set if it is present.
- * The set will not contain the specified range once the call
- * returns. If <code>end > start</code> then an empty range is
- * removed, leaving the set unchanged.
- *
- * @param start first character, inclusive, of range to be removed
- * from this set.
- * @param end last character, inclusive, of range to be removed
- * from this set.
- */
-UnicodeSet& UnicodeSet::remove(UChar32 start, UChar32 end) {
- if (pinCodePoint(start) <= pinCodePoint(end)) {
- UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
- retain(range, 2, 2);
- }
- return *this;
-}
-
-/**
- * Removes the specified character from this set if it is present.
- * The set will not contain the specified range once the call
- * returns.
- */
-UnicodeSet& UnicodeSet::remove(UChar32 c) {
- return remove(c, c);
-}
-
-/**
- * Removes the specified string from this set if it is present.
- * The set will not contain the specified character once the call
- * returns.
- * @param the source string
- * @return the modified set, for chaining
- */
-UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
- if (s.length() == 0 || isFrozen() || isBogus()) return *this;
- int32_t cp = getSingleCP(s);
- if (cp < 0) {
- if (strings != nullptr && strings->removeElement((void*) &s)) {
- releasePattern();
- }
- } else {
- remove((UChar32)cp, (UChar32)cp);
- }
- return *this;
-}
-
-/**
- * Complements the specified range in this set. Any character in
- * the range will be removed if it is in this set, or will be
- * added if it is not in this set. If <code>end > start</code>
- * then an empty range is xor'ed, leaving the set unchanged.
- *
- * @param start first character, inclusive, of range to be removed
- * from this set.
- * @param end last character, inclusive, of range to be removed
- * from this set.
- */
-UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) {
- if (isFrozen() || isBogus()) {
- return *this;
- }
- if (pinCodePoint(start) <= pinCodePoint(end)) {
- UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
- exclusiveOr(range, 2, 0);
- }
- releasePattern();
- return *this;
-}
-
-UnicodeSet& UnicodeSet::complement(UChar32 c) {
- return complement(c, c);
-}
-
-/**
- * This is equivalent to
- * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
- */
-UnicodeSet& UnicodeSet::complement(void) {
- if (isFrozen() || isBogus()) {
- return *this;
- }
- if (list[0] == UNICODESET_LOW) {
- uprv_memmove(list, list + 1, (size_t)(len-1)*sizeof(UChar32));
- --len;
- } else {
- if (!ensureCapacity(len+1)) {
- return *this;
- }
- uprv_memmove(list + 1, list, (size_t)len*sizeof(UChar32));
- list[0] = UNICODESET_LOW;
- ++len;
- }
- releasePattern();
- return *this;
-}
-
-/**
- * Complement the specified string in this set.
- * The set will not contain the specified string once the call
- * returns.
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
- * @param s the string to complement
- * @return this object, for chaining
- */
-UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
- if (s.length() == 0 || isFrozen() || isBogus()) return *this;
- int32_t cp = getSingleCP(s);
- if (cp < 0) {
- if (stringsContains(s)) {
- strings->removeElement((void*) &s);
- } else {
- _add(s);
- }
- releasePattern();
- } else {
- complement((UChar32)cp, (UChar32)cp);
- }
- return *this;
-}
-
-/**
- * Adds all of the elements in the specified set to this set if
- * they're not already present. This operation effectively
- * modifies this set so that its value is the <i>union</i> of the two
- * sets. The behavior of this operation is unspecified if the specified
- * collection is modified while the operation is in progress.
- *
- * @param c set whose elements are to be added to this set.
- * @see #add(char, char)
- */
-UnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) {
- if ( c.len>0 && c.list!=NULL ) {
- add(c.list, c.len, 0);
- }
-
- // Add strings in order
- if ( c.strings!=NULL ) {
- for (int32_t i=0; i<c.strings->size(); ++i) {
- const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i);
- if (!stringsContains(*s)) {
- _add(*s);
- }
- }
- }
- return *this;
-}
-
-/**
- * Retains only the elements in this set that are contained in the
- * specified set. In other words, removes from this set all of
- * its elements that are not contained in the specified set. This
- * operation effectively modifies this set so that its value is
- * the <i>intersection</i> of the two sets.
- *
- * @param c set that defines which elements this set will retain.
- */
-UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) {
- if (isFrozen() || isBogus()) {
- return *this;
- }
- retain(c.list, c.len, 0);
- if (hasStrings()) {
- if (!c.hasStrings()) {
- strings->removeAllElements();
- } else {
- strings->retainAll(*c.strings);
- }
- }
- return *this;
-}
-
-/**
- * Removes from this set all of its elements that are contained in the
- * specified set. This operation effectively modifies this
- * set so that its value is the <i>asymmetric set difference</i> of
- * the two sets.
- *
- * @param c set that defines which elements will be removed from
- * this set.
- */
-UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) {
- if (isFrozen() || isBogus()) {
- return *this;
- }
- retain(c.list, c.len, 2);
- if (hasStrings() && c.hasStrings()) {
- strings->removeAll(*c.strings);
- }
- return *this;
-}
-
-/**
- * Complements in this set all elements contained in the specified
- * set. Any character in the other set will be removed if it is
- * in this set, or will be added if it is not in this set.
- *
- * @param c set that defines which elements will be xor'ed from
- * this set.
- */
-UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
- if (isFrozen() || isBogus()) {
- return *this;
- }
- exclusiveOr(c.list, c.len, 0);
-
- if (c.strings != nullptr) {
- for (int32_t i=0; i<c.strings->size(); ++i) {
- void* e = c.strings->elementAt(i);
- if (strings == nullptr || !strings->removeElement(e)) {
- _add(*(const UnicodeString*)e);
- }
- }
- }
- return *this;
-}
-
-/**
- * Removes all of the elements from this set. This set will be
- * empty after this call returns.
- */
-UnicodeSet& UnicodeSet::clear(void) {
- if (isFrozen()) {
- return *this;
- }
- list[0] = UNICODESET_HIGH;
- len = 1;
- releasePattern();
- if (strings != NULL) {
- strings->removeAllElements();
- }
- // Remove bogus
- fFlags = 0;
- return *this;
-}
-
-/**
- * Iteration method that returns the number of ranges contained in
- * this set.
- * @see #getRangeStart
- * @see #getRangeEnd
- */
-int32_t UnicodeSet::getRangeCount() const {
- return len/2;
-}
-
-/**
- * Iteration method that returns the first character in the
- * specified range of this set.
- * @see #getRangeCount
- * @see #getRangeEnd
- */
-UChar32 UnicodeSet::getRangeStart(int32_t index) const {
- return list[index*2];
-}
-
-/**
- * Iteration method that returns the last character in the
- * specified range of this set.
- * @see #getRangeStart
- * @see #getRangeEnd
- */
-UChar32 UnicodeSet::getRangeEnd(int32_t index) const {
- return list[index*2 + 1] - 1;
-}
-
-const UnicodeString* UnicodeSet::getString(int32_t index) const {
- return (const UnicodeString*) strings->elementAt(index);
-}
-
-/**
- * Reallocate this objects internal structures to take up the least
- * possible space, without changing this object's value.
- */
-UnicodeSet& UnicodeSet::compact() {
- if (isFrozen() || isBogus()) {
- return *this;
- }
- // Delete buffer first to defragment memory less.
- if (buffer != stackList) {
- uprv_free(buffer);
- buffer = NULL;
- bufferCapacity = 0;
- }
- if (list == stackList) {
- // pass
- } else if (len <= INITIAL_CAPACITY) {
- uprv_memcpy(stackList, list, len * sizeof(UChar32));
- uprv_free(list);
- list = stackList;
- capacity = INITIAL_CAPACITY;
- } else if ((len + 7) < capacity) {
- // If we have more than a little unused capacity, shrink it to len.
- UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * len);
- if (temp) {
- list = temp;
- capacity = len;
- }
- // else what the heck happened?! We allocated less memory!
- // Oh well. We'll keep our original array.
- }
- if (strings != nullptr && strings->isEmpty()) {
- delete strings;
- strings = nullptr;
- }
- return *this;
-}
-
-#ifdef DEBUG_SERIALIZE
-#include <stdio.h>
-#endif
-
-/**
- * Deserialize constructor.
- */
-UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization,
- UErrorCode &ec) {
-
- if(U_FAILURE(ec)) {
- setToBogus();
- return;
- }
-
- if( (serialization != kSerialized)
- || (data==NULL)
- || (dataLen < 1)) {
- ec = U_ILLEGAL_ARGUMENT_ERROR;
- setToBogus();
- return;
- }
-
- // bmp?
- int32_t headerSize = ((data[0]&0x8000)) ?2:1;
- int32_t bmpLength = (headerSize==1)?data[0]:data[1];
-
- int32_t newLength = (((data[0]&0x7FFF)-bmpLength)/2)+bmpLength;
-#ifdef DEBUG_SERIALIZE
- printf("dataLen %d headerSize %d bmpLen %d len %d. data[0]=%X/%X/%X/%X\n", dataLen,headerSize,bmpLength,newLength, data[0],data[1],data[2],data[3]);
-#endif
- if(!ensureCapacity(newLength + 1)) { // +1 for HIGH
- return;
- }
- // copy bmp
- int32_t i;
- for(i = 0; i< bmpLength;i++) {
- list[i] = data[i+headerSize];
-#ifdef DEBUG_SERIALIZE
- printf("<<16@%d[%d] %X\n", i+headerSize, i, list[i]);
-#endif
- }
- // copy smp
- for(i=bmpLength;i<newLength;i++) {
- list[i] = ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+0] << 16) +
- ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+1]);
-#ifdef DEBUG_SERIALIZE
- printf("<<32@%d+[%d] %lX\n", headerSize+bmpLength+i, i, list[i]);
-#endif
- }
- U_ASSERT(i == newLength);
- if (i == 0 || list[i - 1] != UNICODESET_HIGH) {
- list[i++] = UNICODESET_HIGH;
- }
- len = i;
-}
-
-
-int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const {
- int32_t bmpLength, length, destLength;
-
- if (U_FAILURE(ec)) {
- return 0;
- }
-
- if (destCapacity<0 || (destCapacity>0 && dest==NULL)) {
- ec=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* count necessary 16-bit units */
- length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH
- // assert(length>=0);
- if (length==0) {
- /* empty set */
- if (destCapacity>0) {
- *dest=0;
- } else {
- ec=U_BUFFER_OVERFLOW_ERROR;
- }
- return 1;
- }
- /* now length>0 */
-
- if (this->list[length-1]<=0xffff) {
- /* all BMP */
- bmpLength=length;
- } else if (this->list[0]>=0x10000) {
- /* all supplementary */
- bmpLength=0;
- length*=2;
- } else {
- /* some BMP, some supplementary */
- for (bmpLength=0; bmpLength<length && this->list[bmpLength]<=0xffff; ++bmpLength) {}
- length=bmpLength+2*(length-bmpLength);
- }
-#ifdef DEBUG_SERIALIZE
- printf(">> bmpLength%d length%d len%d\n", bmpLength, length, len);
-#endif
- /* length: number of 16-bit array units */
- if (length>0x7fff) {
- /* there are only 15 bits for the length in the first serialized word */
- ec=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- /*
- * total serialized length:
- * number of 16-bit array units (length) +
- * 1 length unit (always) +
- * 1 bmpLength unit (if there are supplementary values)
- */
- destLength=length+((length>bmpLength)?2:1);
- if (destLength<=destCapacity) {
- const UChar32 *p;
- int32_t i;
-
-#ifdef DEBUG_SERIALIZE
- printf("writeHdr\n");
-#endif
- *dest=(uint16_t)length;
- if (length>bmpLength) {
- *dest|=0x8000;
- *++dest=(uint16_t)bmpLength;
- }
- ++dest;
-
- /* write the BMP part of the array */
- p=this->list;
- for (i=0; i<bmpLength; ++i) {
-#ifdef DEBUG_SERIALIZE
- printf("writebmp: %x\n", (int)*p);
-#endif
- *dest++=(uint16_t)*p++;
- }
-
- /* write the supplementary part of the array */
- for (; i<length; i+=2) {
-#ifdef DEBUG_SERIALIZE
- printf("write32: %x\n", (int)*p);
-#endif
- *dest++=(uint16_t)(*p>>16);
- *dest++=(uint16_t)*p++;
- }
- } else {
- ec=U_BUFFER_OVERFLOW_ERROR;
- }
- return destLength;
-}
-
-//----------------------------------------------------------------
-// Implementation: Utility methods
-//----------------------------------------------------------------
-
-/**
- * Allocate our strings vector and return TRUE if successful.
- */
-UBool UnicodeSet::allocateStrings(UErrorCode &status) {
- if (U_FAILURE(status)) {
- return FALSE;
- }
- strings = new UVector(uprv_deleteUObject,
- uhash_compareUnicodeString, 1, status);
- if (strings == NULL) { // Check for memory allocation error.
- status = U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
- if (U_FAILURE(status)) {
- delete strings;
- strings = NULL;
- return FALSE;
- }
- return TRUE;
-}
-
-int32_t UnicodeSet::nextCapacity(int32_t minCapacity) {
- // Grow exponentially to reduce the frequency of allocations.
- if (minCapacity < INITIAL_CAPACITY) {
- return minCapacity + INITIAL_CAPACITY;
- } else if (minCapacity <= 2500) {
- return 5 * minCapacity;
- } else {
- int32_t newCapacity = 2 * minCapacity;
- if (newCapacity > MAX_LENGTH) {
- newCapacity = MAX_LENGTH;
- }
- return newCapacity;
- }
-}
-
-bool UnicodeSet::ensureCapacity(int32_t newLen) {
- if (newLen > MAX_LENGTH) {
- newLen = MAX_LENGTH;
- }
- if (newLen <= capacity) {
- return true;
- }
- int32_t newCapacity = nextCapacity(newLen);
- UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32));
- if (temp == NULL) {
- setToBogus(); // set the object to bogus state if an OOM failure occurred.
- return false;
- }
- // Copy only the actual contents.
- uprv_memcpy(temp, list, len * sizeof(UChar32));
- if (list != stackList) {
- uprv_free(list);
- }
- list = temp;
- capacity = newCapacity;
- return true;
-}
-
-bool UnicodeSet::ensureBufferCapacity(int32_t newLen) {
- if (newLen > MAX_LENGTH) {
- newLen = MAX_LENGTH;
- }
- if (newLen <= bufferCapacity) {
- return true;
- }
- int32_t newCapacity = nextCapacity(newLen);
- UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32));
- if (temp == NULL) {
- setToBogus();
- return false;
- }
- // The buffer has no contents to be copied.
- // It is always filled from scratch after this call.
- if (buffer != stackList) {
- uprv_free(buffer);
- }
- buffer = temp;
- bufferCapacity = newCapacity;
- return true;
-}
-
-/**
- * Swap list and buffer.
- */
-void UnicodeSet::swapBuffers(void) {
- // swap list and buffer
- UChar32* temp = list;
- list = buffer;
- buffer = temp;
-
- int32_t c = capacity;
- capacity = bufferCapacity;
- bufferCapacity = c;
-}
-
-void UnicodeSet::setToBogus() {
- clear(); // Remove everything in the set.
- fFlags = kIsBogus;
-}
-
-//----------------------------------------------------------------
-// Implementation: Fundamental operators
-//----------------------------------------------------------------
-
-static inline UChar32 max(UChar32 a, UChar32 b) {
- return (a > b) ? a : b;
-}
-
-// polarity = 0, 3 is normal: x xor y
-// polarity = 1, 2: x xor ~y == x === y
-
-void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) {
- if (isFrozen() || isBogus()) {
- return;
- }
- if (!ensureBufferCapacity(len + otherLen)) {
- return;
- }
-
- int32_t i = 0, j = 0, k = 0;
- UChar32 a = list[i++];
- UChar32 b;
- if (polarity == 1 || polarity == 2) {
- b = UNICODESET_LOW;
- if (other[j] == UNICODESET_LOW) { // skip base if already LOW
- ++j;
- b = other[j];
- }
- } else {
- b = other[j++];
- }
- // simplest of all the routines
- // sort the values, discarding identicals!
- for (;;) {
- if (a < b) {
- buffer[k++] = a;
- a = list[i++];
- } else if (b < a) {
- buffer[k++] = b;
- b = other[j++];
- } else if (a != UNICODESET_HIGH) { // at this point, a == b
- // discard both values!
- a = list[i++];
- b = other[j++];
- } else { // DONE!
- buffer[k++] = UNICODESET_HIGH;
- len = k;
- break;
- }
- }
- swapBuffers();
- releasePattern();
-}
-
-// polarity = 0 is normal: x union y
-// polarity = 2: x union ~y
-// polarity = 1: ~x union y
-// polarity = 3: ~x union ~y
-
-void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
- if (isFrozen() || isBogus() || other==NULL) {
- return;
- }
- if (!ensureBufferCapacity(len + otherLen)) {
- return;
- }
-
- int32_t i = 0, j = 0, k = 0;
- UChar32 a = list[i++];
- UChar32 b = other[j++];
- // change from xor is that we have to check overlapping pairs
- // polarity bit 1 means a is second, bit 2 means b is.
- for (;;) {
- switch (polarity) {
- case 0: // both first; take lower if unequal
- if (a < b) { // take a
- // Back up over overlapping ranges in buffer[]
- if (k > 0 && a <= buffer[k-1]) {
- // Pick latter end value in buffer[] vs. list[]
- a = max(list[i], buffer[--k]);
- } else {
- // No overlap
- buffer[k++] = a;
- a = list[i];
- }
- i++; // Common if/else code factored out
- polarity ^= 1;
- } else if (b < a) { // take b
- if (k > 0 && b <= buffer[k-1]) {
- b = max(other[j], buffer[--k]);
- } else {
- buffer[k++] = b;
- b = other[j];
- }
- j++;
- polarity ^= 2;
- } else { // a == b, take a, drop b
- if (a == UNICODESET_HIGH) goto loop_end;
- // This is symmetrical; it doesn't matter if
- // we backtrack with a or b. - liu
- if (k > 0 && a <= buffer[k-1]) {
- a = max(list[i], buffer[--k]);
- } else {
- // No overlap
- buffer[k++] = a;
- a = list[i];
- }
- i++;
- polarity ^= 1;
- b = other[j++];
- polarity ^= 2;
- }
- break;
- case 3: // both second; take higher if unequal, and drop other
- if (b <= a) { // take a
- if (a == UNICODESET_HIGH) goto loop_end;
- buffer[k++] = a;
- } else { // take b
- if (b == UNICODESET_HIGH) goto loop_end;
- buffer[k++] = b;
- }
- a = list[i++];
- polarity ^= 1; // factored common code
- b = other[j++];
- polarity ^= 2;
- break;
- case 1: // a second, b first; if b < a, overlap
- if (a < b) { // no overlap, take a
- buffer[k++] = a; a = list[i++]; polarity ^= 1;
- } else if (b < a) { // OVERLAP, drop b
- b = other[j++];
- polarity ^= 2;
- } else { // a == b, drop both!
- if (a == UNICODESET_HIGH) goto loop_end;
- a = list[i++];
- polarity ^= 1;
- b = other[j++];
- polarity ^= 2;
- }
- break;
- case 2: // a first, b second; if a < b, overlap
- if (b < a) { // no overlap, take b
- buffer[k++] = b;
- b = other[j++];
- polarity ^= 2;
- } else if (a < b) { // OVERLAP, drop a
- a = list[i++];
- polarity ^= 1;
- } else { // a == b, drop both!
- if (a == UNICODESET_HIGH) goto loop_end;
- a = list[i++];
- polarity ^= 1;
- b = other[j++];
- polarity ^= 2;
- }
- break;
- }
- }
- loop_end:
- buffer[k++] = UNICODESET_HIGH; // terminate
- len = k;
- swapBuffers();
- releasePattern();
-}
-
-// polarity = 0 is normal: x intersect y
-// polarity = 2: x intersect ~y == set-minus
-// polarity = 1: ~x intersect y
-// polarity = 3: ~x intersect ~y
-
-void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) {
- if (isFrozen() || isBogus()) {
- return;
- }
- if (!ensureBufferCapacity(len + otherLen)) {
- return;
- }
-
- int32_t i = 0, j = 0, k = 0;
- UChar32 a = list[i++];
- UChar32 b = other[j++];
- // change from xor is that we have to check overlapping pairs
- // polarity bit 1 means a is second, bit 2 means b is.
- for (;;) {
- switch (polarity) {
- case 0: // both first; drop the smaller
- if (a < b) { // drop a
- a = list[i++];
- polarity ^= 1;
- } else if (b < a) { // drop b
- b = other[j++];
- polarity ^= 2;
- } else { // a == b, take one, drop other
- if (a == UNICODESET_HIGH) goto loop_end;
- buffer[k++] = a;
- a = list[i++];
- polarity ^= 1;
- b = other[j++];
- polarity ^= 2;
- }
- break;
- case 3: // both second; take lower if unequal
- if (a < b) { // take a
- buffer[k++] = a;
- a = list[i++];
- polarity ^= 1;
- } else if (b < a) { // take b
- buffer[k++] = b;
- b = other[j++];
- polarity ^= 2;
- } else { // a == b, take one, drop other
- if (a == UNICODESET_HIGH) goto loop_end;
- buffer[k++] = a;
- a = list[i++];
- polarity ^= 1;
- b = other[j++];
- polarity ^= 2;
- }
- break;
- case 1: // a second, b first;
- if (a < b) { // NO OVERLAP, drop a
- a = list[i++];
- polarity ^= 1;
- } else if (b < a) { // OVERLAP, take b
- buffer[k++] = b;
- b = other[j++];
- polarity ^= 2;
- } else { // a == b, drop both!
- if (a == UNICODESET_HIGH) goto loop_end;
- a = list[i++];
- polarity ^= 1;
- b = other[j++];
- polarity ^= 2;
- }
- break;
- case 2: // a first, b second; if a < b, overlap
- if (b < a) { // no overlap, drop b
- b = other[j++];
- polarity ^= 2;
- } else if (a < b) { // OVERLAP, take a
- buffer[k++] = a;
- a = list[i++];
- polarity ^= 1;
- } else { // a == b, drop both!
- if (a == UNICODESET_HIGH) goto loop_end;
- a = list[i++];
- polarity ^= 1;
- b = other[j++];
- polarity ^= 2;
- }
- break;
- }
- }
- loop_end:
- buffer[k++] = UNICODESET_HIGH; // terminate
- len = k;
- swapBuffers();
- releasePattern();
-}
-
-/**
- * Append the <code>toPattern()</code> representation of a
- * string to the given <code>StringBuffer</code>.
- */
-void UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool
-escapeUnprintable) {
- UChar32 cp;
- for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) {
- _appendToPat(buf, cp = s.char32At(i), escapeUnprintable);
- }
-}
-
-/**
- * Append the <code>toPattern()</code> representation of a
- * character to the given <code>StringBuffer</code>.
- */
-void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool
-escapeUnprintable) {
- if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
- // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
- // unprintable
- if (ICU_Utility::escapeUnprintable(buf, c)) {
- return;
- }
- }
- // Okay to let ':' pass through
- switch (c) {
- case SET_OPEN:
- case SET_CLOSE:
- case HYPHEN:
- case COMPLEMENT:
- case INTERSECTION:
- case BACKSLASH:
- case OPEN_BRACE:
- case CLOSE_BRACE:
- case COLON:
- case SymbolTable::SYMBOL_REF:
- buf.append(BACKSLASH);
- break;
- default:
- // Escape whitespace
- if (PatternProps::isWhiteSpace(c)) {
- buf.append(BACKSLASH);
- }
- break;
- }
- buf.append(c);
-}
-
-/**
- * Append a string representation of this set to result. This will be
- * a cleaned version of the string passed to applyPattern(), if there
- * is one. Otherwise it will be generated.
- */
-UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
- UBool escapeUnprintable) const
-{
- if (pat != NULL) {
- int32_t i;
- int32_t backslashCount = 0;
- for (i=0; i<patLen; ) {
- UChar32 c;
- U16_NEXT(pat, i, patLen, c);
- if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
- // If the unprintable character is preceded by an odd
- // number of backslashes, then it has been escaped.
- // Before unescaping it, we delete the final
- // backslash.
- if ((backslashCount % 2) == 1) {
- result.truncate(result.length() - 1);
- }
- ICU_Utility::escapeUnprintable(result, c);
- backslashCount = 0;
- } else {
- result.append(c);
- if (c == BACKSLASH) {
- ++backslashCount;
- } else {
- backslashCount = 0;
- }
- }
- }
- return result;
- }
-
- return _generatePattern(result, escapeUnprintable);
-}
-
-/**
- * Returns a string representation of this set. If the result of
- * calling this function is passed to a UnicodeSet constructor, it
- * will produce another set that is equal to this one.
- */
-UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
- UBool escapeUnprintable) const
-{
- result.truncate(0);
- return _toPattern(result, escapeUnprintable);
-}
-
-/**
- * Generate and append a string representation of this set to result.
- * This does not use this.pat, the cleaned up copy of the string
- * passed to applyPattern().
- */
-UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
- UBool escapeUnprintable) const
-{
- result.append(SET_OPEN);
-
-// // Check against the predefined categories. We implicitly build
-// // up ALL category sets the first time toPattern() is called.
-// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
-// if (*this == getCategorySet(cat)) {
-// result.append(COLON);
-// result.append(CATEGORY_NAMES, cat*2, 2);
-// return result.append(CATEGORY_CLOSE);
-// }
-// }
-
- int32_t count = getRangeCount();
-
- // If the set contains at least 2 intervals and includes both
- // MIN_VALUE and MAX_VALUE, then the inverse representation will
- // be more economical.
- if (count > 1 &&
- getRangeStart(0) == MIN_VALUE &&
- getRangeEnd(count-1) == MAX_VALUE) {
-
- // Emit the inverse
- result.append(COMPLEMENT);
-
- for (int32_t i = 1; i < count; ++i) {
- UChar32 start = getRangeEnd(i-1)+1;
- UChar32 end = getRangeStart(i)-1;
- _appendToPat(result, start, escapeUnprintable);
- if (start != end) {
- if ((start+1) != end) {
- result.append(HYPHEN);
- }
- _appendToPat(result, end, escapeUnprintable);
- }
- }
- }
-
- // Default; emit the ranges as pairs
- else {
- for (int32_t i = 0; i < count; ++i) {
- UChar32 start = getRangeStart(i);
- UChar32 end = getRangeEnd(i);
- _appendToPat(result, start, escapeUnprintable);
- if (start != end) {
- if ((start+1) != end) {
- result.append(HYPHEN);
- }
- _appendToPat(result, end, escapeUnprintable);
- }
- }
- }
-
- if (strings != nullptr) {
- for (int32_t i = 0; i<strings->size(); ++i) {
- result.append(OPEN_BRACE);
- _appendToPat(result,
- *(const UnicodeString*) strings->elementAt(i),
- escapeUnprintable);
- result.append(CLOSE_BRACE);
- }
- }
- return result.append(SET_CLOSE);
-}
-
-/**
-* Release existing cached pattern
-*/
-void UnicodeSet::releasePattern() {
- if (pat) {
- uprv_free(pat);
- pat = NULL;
- patLen = 0;
- }
-}
-
-/**
-* Set the new pattern to cache.
-*/
-void UnicodeSet::setPattern(const char16_t *newPat, int32_t newPatLen) {
- releasePattern();
- pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar));
- if (pat) {
- patLen = newPatLen;
- u_memcpy(pat, newPat, patLen);
- pat[patLen] = 0;
- }
- // else we don't care if malloc failed. This was just a nice cache.
- // We can regenerate an equivalent pattern later when requested.
-}
-
-UnicodeSet *UnicodeSet::freeze() {
- if(!isFrozen() && !isBogus()) {
- compact();
-
- // Optimize contains() and span() and similar functions.
- if (hasStrings()) {
- stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL);
- if (stringSpan == nullptr) {
- setToBogus();
- return this;
- } else if (!stringSpan->needsStringSpanUTF16()) {
- // All strings are irrelevant for span() etc. because
- // all of each string's code points are contained in this set.
- // Do not check needsStringSpanUTF8() because UTF-8 has at most as
- // many relevant strings as UTF-16.
- // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().)
- delete stringSpan;
- stringSpan = NULL;
- }
- }
- if (stringSpan == NULL) {
- // No span-relevant strings: Optimize for code point spans.
- bmpSet=new BMPSet(list, len);
- if (bmpSet == NULL) { // Check for memory allocation error.
- setToBogus();
- }
- }
- }
- return this;
-}
-
-int32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
- if(length>0 && bmpSet!=NULL) {
- return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s);
- }
- if(length<0) {
- length=u_strlen(s);
- }
- if(length==0) {
- return 0;
- }
- if(stringSpan!=NULL) {
- return stringSpan->span(s, length, spanCondition);
- } else if(hasStrings()) {
- uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
- UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED :
- UnicodeSetStringSpan::FWD_UTF16_CONTAINED;
- UnicodeSetStringSpan strSpan(*this, *strings, which);
- if(strSpan.needsStringSpanUTF16()) {
- return strSpan.span(s, length, spanCondition);
- }
- }
-
- if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
- spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
- }
-
- UChar32 c;
- int32_t start=0, prev=0;
- do {
- U16_NEXT(s, start, length, c);
- if(spanCondition!=contains(c)) {
- break;
- }
- } while((prev=start)<length);
- return prev;
-}
-
-int32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
- if(length>0 && bmpSet!=NULL) {
- return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s);
- }
- if(length<0) {
- length=u_strlen(s);
- }
- if(length==0) {
- return 0;
- }
- if(stringSpan!=NULL) {
- return stringSpan->spanBack(s, length, spanCondition);
- } else if(hasStrings()) {
- uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
- UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED :
- UnicodeSetStringSpan::BACK_UTF16_CONTAINED;
- UnicodeSetStringSpan strSpan(*this, *strings, which);
- if(strSpan.needsStringSpanUTF16()) {
- return strSpan.spanBack(s, length, spanCondition);
- }
- }
-
- if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
- spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
- }
-
- UChar32 c;
- int32_t prev=length;
- do {
- U16_PREV(s, 0, length, c);
- if(spanCondition!=contains(c)) {
- break;
- }
- } while((prev=length)>0);
- return prev;
-}
-
-int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
- if(length>0 && bmpSet!=NULL) {
- const uint8_t *s0=(const uint8_t *)s;
- return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0);
- }
- if(length<0) {
- length=(int32_t)uprv_strlen(s);
- }
- if(length==0) {
- return 0;
- }
- if(stringSpan!=NULL) {
- return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition);
- } else if(hasStrings()) {
- uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
- UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED :
- UnicodeSetStringSpan::FWD_UTF8_CONTAINED;
- UnicodeSetStringSpan strSpan(*this, *strings, which);
- if(strSpan.needsStringSpanUTF8()) {
- return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition);
- }
- }
-
- if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
- spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
- }
-
- UChar32 c;
- int32_t start=0, prev=0;
- do {
- U8_NEXT_OR_FFFD(s, start, length, c);
- if(spanCondition!=contains(c)) {
- break;
- }
- } while((prev=start)<length);
- return prev;
-}
-
-int32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
- if(length>0 && bmpSet!=NULL) {
- const uint8_t *s0=(const uint8_t *)s;
- return bmpSet->spanBackUTF8(s0, length, spanCondition);
- }
- if(length<0) {
- length=(int32_t)uprv_strlen(s);
- }
- if(length==0) {
- return 0;
- }
- if(stringSpan!=NULL) {
- return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition);
- } else if(hasStrings()) {
- uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
- UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED :
- UnicodeSetStringSpan::BACK_UTF8_CONTAINED;
- UnicodeSetStringSpan strSpan(*this, *strings, which);
- if(strSpan.needsStringSpanUTF8()) {
- return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition);
- }
- }
-
- if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
- spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
- }
-
- UChar32 c;
- int32_t prev=length;
- do {
- U8_PREV_OR_FFFD(s, 0, length, c);
- if(spanCondition!=contains(c)) {
- break;
- }
- } while((prev=length)>0);
- return prev;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/uniset_closure.cpp b/contrib/libs/icu/common/uniset_closure.cpp
deleted file mode 100644
index 882231ba1a5..00000000000
--- a/contrib/libs/icu/common/uniset_closure.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uniset_closure.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011may30
-* created by: Markus W. Scherer
-*
-* UnicodeSet::closeOver() and related methods moved here from uniset_props.cpp
-* to simplify dependencies.
-* In particular, this depends on the BreakIterator, but the BreakIterator
-* code also builds UnicodeSets from patterns and needs uniset_props.
-*/
-
-#include "unicode/brkiter.h"
-#include "unicode/locid.h"
-#include "unicode/parsepos.h"
-#include "unicode/uniset.h"
-#include "cmemory.h"
-#include "ruleiter.h"
-#include "ucase.h"
-#include "util.h"
-#include "uvector.h"
-
-U_NAMESPACE_BEGIN
-
-// TODO memory debugging provided inside uniset.cpp
-// could be made available here but probably obsolete with use of modern
-// memory leak checker tools
-#define _dbgct(me)
-
-//----------------------------------------------------------------
-// Constructors &c
-//----------------------------------------------------------------
-
-UnicodeSet::UnicodeSet(const UnicodeString& pattern,
- uint32_t options,
- const SymbolTable* symbols,
- UErrorCode& status) {
- applyPattern(pattern, options, symbols, status);
- _dbgct(this);
-}
-
-UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
- uint32_t options,
- const SymbolTable* symbols,
- UErrorCode& status) {
- applyPattern(pattern, pos, options, symbols, status);
- _dbgct(this);
-}
-
-//----------------------------------------------------------------
-// Public API
-//----------------------------------------------------------------
-
-UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
- uint32_t options,
- const SymbolTable* symbols,
- UErrorCode& status) {
- ParsePosition pos(0);
- applyPattern(pattern, pos, options, symbols, status);
- if (U_FAILURE(status)) return *this;
-
- int32_t i = pos.getIndex();
-
- if (options & USET_IGNORE_SPACE) {
- // Skip over trailing whitespace
- ICU_Utility::skipWhitespace(pattern, i, TRUE);
- }
-
- if (i != pattern.length()) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return *this;
-}
-
-UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
- ParsePosition& pos,
- uint32_t options,
- const SymbolTable* symbols,
- UErrorCode& status) {
- if (U_FAILURE(status)) {
- return *this;
- }
- if (isFrozen()) {
- status = U_NO_WRITE_PERMISSION;
- return *this;
- }
- // Need to build the pattern in a temporary string because
- // _applyPattern calls add() etc., which set pat to empty.
- UnicodeString rebuiltPat;
- RuleCharacterIterator chars(pattern, symbols, pos);
- applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status);
- if (U_FAILURE(status)) return *this;
- if (chars.inVariable()) {
- // syntaxError(chars, "Extra chars in variable value");
- status = U_MALFORMED_SET;
- return *this;
- }
- setPattern(rebuiltPat);
- return *this;
-}
-
-// USetAdder implementation
-// Does not use uset.h to reduce code dependencies
-static void U_CALLCONV
-_set_add(USet *set, UChar32 c) {
- ((UnicodeSet *)set)->add(c);
-}
-
-static void U_CALLCONV
-_set_addRange(USet *set, UChar32 start, UChar32 end) {
- ((UnicodeSet *)set)->add(start, end);
-}
-
-static void U_CALLCONV
-_set_addString(USet *set, const UChar *str, int32_t length) {
- ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
-}
-
-//----------------------------------------------------------------
-// Case folding API
-//----------------------------------------------------------------
-
-// add the result of a full case mapping to the set
-// use str as a temporary string to avoid constructing one
-static inline void
-addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) {
- if(result >= 0) {
- if(result > UCASE_MAX_STRING_LENGTH) {
- // add a single-code point case mapping
- set.add(result);
- } else {
- // add a string case mapping from full with length result
- str.setTo((UBool)FALSE, full, result);
- set.add(str);
- }
- }
- // result < 0: the code point mapped to itself, no need to add it
- // see ucase.h
-}
-
-UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
- if (isFrozen() || isBogus()) {
- return *this;
- }
- if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
- {
- UnicodeSet foldSet(*this);
- UnicodeString str;
- USetAdder sa = {
- foldSet.toUSet(),
- _set_add,
- _set_addRange,
- _set_addString,
- NULL, // don't need remove()
- NULL // don't need removeRange()
- };
-
- // start with input set to guarantee inclusion
- // USET_CASE: remove strings because the strings will actually be reduced (folded);
- // therefore, start with no strings and add only those needed
- if ((attribute & USET_CASE_INSENSITIVE) && foldSet.hasStrings()) {
- foldSet.strings->removeAllElements();
- }
-
- int32_t n = getRangeCount();
- UChar32 result;
- const UChar *full;
-
- for (int32_t i=0; i<n; ++i) {
- UChar32 start = getRangeStart(i);
- UChar32 end = getRangeEnd(i);
-
- if (attribute & USET_CASE_INSENSITIVE) {
- // full case closure
- for (UChar32 cp=start; cp<=end; ++cp) {
- ucase_addCaseClosure(cp, &sa);
- }
- } else {
- // add case mappings
- // (does not add long s for regular s, or Kelvin for k, for example)
- for (UChar32 cp=start; cp<=end; ++cp) {
- result = ucase_toFullLower(cp, NULL, NULL, &full, UCASE_LOC_ROOT);
- addCaseMapping(foldSet, result, full, str);
-
- result = ucase_toFullTitle(cp, NULL, NULL, &full, UCASE_LOC_ROOT);
- addCaseMapping(foldSet, result, full, str);
-
- result = ucase_toFullUpper(cp, NULL, NULL, &full, UCASE_LOC_ROOT);
- addCaseMapping(foldSet, result, full, str);
-
- result = ucase_toFullFolding(cp, &full, 0);
- addCaseMapping(foldSet, result, full, str);
- }
- }
- }
- if (hasStrings()) {
- if (attribute & USET_CASE_INSENSITIVE) {
- for (int32_t j=0; j<strings->size(); ++j) {
- str = *(const UnicodeString *) strings->elementAt(j);
- str.foldCase();
- if(!ucase_addStringCaseClosure(str.getBuffer(), str.length(), &sa)) {
- foldSet.add(str); // does not map to code points: add the folded string itself
- }
- }
- } else {
- Locale root("");
-#if !UCONFIG_NO_BREAK_ITERATION
- UErrorCode status = U_ZERO_ERROR;
- BreakIterator *bi = BreakIterator::createWordInstance(root, status);
- if (U_SUCCESS(status)) {
-#endif
- const UnicodeString *pStr;
-
- for (int32_t j=0; j<strings->size(); ++j) {
- pStr = (const UnicodeString *) strings->elementAt(j);
- (str = *pStr).toLower(root);
- foldSet.add(str);
-#if !UCONFIG_NO_BREAK_ITERATION
- (str = *pStr).toTitle(bi, root);
- foldSet.add(str);
-#endif
- (str = *pStr).toUpper(root);
- foldSet.add(str);
- (str = *pStr).foldCase();
- foldSet.add(str);
- }
-#if !UCONFIG_NO_BREAK_ITERATION
- }
- delete bi;
-#endif
- }
- }
- *this = foldSet;
- }
- }
- return *this;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/uniset_props.cpp b/contrib/libs/icu/common/uniset_props.cpp
deleted file mode 100644
index 37277fcb751..00000000000
--- a/contrib/libs/icu/common/uniset_props.cpp
+++ /dev/null
@@ -1,1174 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uniset_props.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004aug25
-* created by: Markus W. Scherer
-*
-* Character property dependent functions moved here from uniset.cpp
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/uniset.h"
-#include "unicode/parsepos.h"
-#include "unicode/uchar.h"
-#include "unicode/uscript.h"
-#include "unicode/symtable.h"
-#include "unicode/uset.h"
-#include "unicode/locid.h"
-#include "unicode/brkiter.h"
-#include "uset_imp.h"
-#include "ruleiter.h"
-#include "cmemory.h"
-#include "ucln_cmn.h"
-#include "util.h"
-#include "uvector.h"
-#include "uprops.h"
-#include "propname.h"
-#include "normalizer2impl.h"
-#include "uinvchar.h"
-#include "uprops.h"
-#include "charstr.h"
-#include "cstring.h"
-#include "mutex.h"
-#include "umutex.h"
-#include "uassert.h"
-#include "hash.h"
-
-U_NAMESPACE_USE
-
-// Define UChar constants using hex for EBCDIC compatibility
-// Used #define to reduce private static exports and memory access time.
-#define SET_OPEN ((UChar)0x005B) /*[*/
-#define SET_CLOSE ((UChar)0x005D) /*]*/
-#define HYPHEN ((UChar)0x002D) /*-*/
-#define COMPLEMENT ((UChar)0x005E) /*^*/
-#define COLON ((UChar)0x003A) /*:*/
-#define BACKSLASH ((UChar)0x005C) /*\*/
-#define INTERSECTION ((UChar)0x0026) /*&*/
-#define UPPER_U ((UChar)0x0055) /*U*/
-#define LOWER_U ((UChar)0x0075) /*u*/
-#define OPEN_BRACE ((UChar)123) /*{*/
-#define CLOSE_BRACE ((UChar)125) /*}*/
-#define UPPER_P ((UChar)0x0050) /*P*/
-#define LOWER_P ((UChar)0x0070) /*p*/
-#define UPPER_N ((UChar)78) /*N*/
-#define EQUALS ((UChar)0x003D) /*=*/
-
-//static const UChar POSIX_OPEN[] = { SET_OPEN,COLON,0 }; // "[:"
-static const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 }; // ":]"
-//static const UChar PERL_OPEN[] = { BACKSLASH,LOWER_P,0 }; // "\\p"
-//static const UChar PERL_CLOSE[] = { CLOSE_BRACE,0 }; // "}"
-//static const UChar NAME_OPEN[] = { BACKSLASH,UPPER_N,0 }; // "\\N"
-static const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/
-
-// Special property set IDs
-static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
-static const char ASCII[] = "ASCII"; // [\u0000-\u007F]
-static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
-
-// Unicode name property alias
-#define NAME_PROP "na"
-#define NAME_PROP_LENGTH 2
-
-/**
- * Delimiter string used in patterns to close a category reference:
- * ":]". Example: "[:Lu:]".
- */
-//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
-
-// Cached sets ------------------------------------------------------------- ***
-
-U_CDECL_BEGIN
-static UBool U_CALLCONV uset_cleanup();
-
-static UnicodeSet *uni32Singleton;
-static icu::UInitOnce uni32InitOnce = U_INITONCE_INITIALIZER;
-
-/**
- * Cleanup function for UnicodeSet
- */
-static UBool U_CALLCONV uset_cleanup(void) {
- delete uni32Singleton;
- uni32Singleton = NULL;
- uni32InitOnce.reset();
- return TRUE;
-}
-
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-namespace {
-
-// Cache some sets for other services -------------------------------------- ***
-void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
- U_ASSERT(uni32Singleton == NULL);
- uni32Singleton = new UnicodeSet(UNICODE_STRING_SIMPLE("[:age=3.2:]"), errorCode);
- if(uni32Singleton==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- } else {
- uni32Singleton->freeze();
- }
- ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
-}
-
-
-U_CFUNC UnicodeSet *
-uniset_getUnicode32Instance(UErrorCode &errorCode) {
- umtx_initOnce(uni32InitOnce, &createUni32Set, errorCode);
- return uni32Singleton;
-}
-
-// helper functions for matching of pattern syntax pieces ------------------ ***
-// these functions are parallel to the PERL_OPEN etc. strings above
-
-// using these functions is not only faster than UnicodeString::compare() and
-// caseCompare(), but they also make UnicodeSet work for simple patterns when
-// no Unicode properties data is available - when caseCompare() fails
-
-static inline UBool
-isPerlOpen(const UnicodeString &pattern, int32_t pos) {
- UChar c;
- return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P);
-}
-
-/*static inline UBool
-isPerlClose(const UnicodeString &pattern, int32_t pos) {
- return pattern.charAt(pos)==CLOSE_BRACE;
-}*/
-
-static inline UBool
-isNameOpen(const UnicodeString &pattern, int32_t pos) {
- return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N;
-}
-
-static inline UBool
-isPOSIXOpen(const UnicodeString &pattern, int32_t pos) {
- return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON;
-}
-
-/*static inline UBool
-isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
- return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE;
-}*/
-
-// TODO memory debugging provided inside uniset.cpp
-// could be made available here but probably obsolete with use of modern
-// memory leak checker tools
-#define _dbgct(me)
-
-} // namespace
-
-//----------------------------------------------------------------
-// Constructors &c
-//----------------------------------------------------------------
-
-/**
- * Constructs a set from the given pattern, optionally ignoring
- * white space. See the class description for the syntax of the
- * pattern language.
- * @param pattern a string specifying what characters are in the set
- */
-UnicodeSet::UnicodeSet(const UnicodeString& pattern,
- UErrorCode& status) {
- applyPattern(pattern, status);
- _dbgct(this);
-}
-
-//----------------------------------------------------------------
-// Public API
-//----------------------------------------------------------------
-
-UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
- UErrorCode& status) {
- // Equivalent to
- // return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
- // but without dependency on closeOver().
- ParsePosition pos(0);
- applyPatternIgnoreSpace(pattern, pos, NULL, status);
- if (U_FAILURE(status)) return *this;
-
- int32_t i = pos.getIndex();
- // Skip over trailing whitespace
- ICU_Utility::skipWhitespace(pattern, i, TRUE);
- if (i != pattern.length()) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return *this;
-}
-
-void
-UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
- ParsePosition& pos,
- const SymbolTable* symbols,
- UErrorCode& status) {
- if (U_FAILURE(status)) {
- return;
- }
- if (isFrozen()) {
- status = U_NO_WRITE_PERMISSION;
- return;
- }
- // Need to build the pattern in a temporary string because
- // _applyPattern calls add() etc., which set pat to empty.
- UnicodeString rebuiltPat;
- RuleCharacterIterator chars(pattern, symbols, pos);
- applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status);
- if (U_FAILURE(status)) return;
- if (chars.inVariable()) {
- // syntaxError(chars, "Extra chars in variable value");
- status = U_MALFORMED_SET;
- return;
- }
- setPattern(rebuiltPat);
-}
-
-/**
- * Return true if the given position, in the given pattern, appears
- * to be the start of a UnicodeSet pattern.
- */
-UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
- return ((pos+1) < pattern.length() &&
- pattern.charAt(pos) == (UChar)91/*[*/) ||
- resemblesPropertyPattern(pattern, pos);
-}
-
-//----------------------------------------------------------------
-// Implementation: Pattern parsing
-//----------------------------------------------------------------
-
-namespace {
-
-/**
- * A small all-inline class to manage a UnicodeSet pointer. Add
- * operator->() etc. as needed.
- */
-class UnicodeSetPointer {
- UnicodeSet* p;
-public:
- inline UnicodeSetPointer() : p(0) {}
- inline ~UnicodeSetPointer() { delete p; }
- inline UnicodeSet* pointer() { return p; }
- inline UBool allocate() {
- if (p == 0) {
- p = new UnicodeSet();
- }
- return p != 0;
- }
-};
-
-constexpr int32_t MAX_DEPTH = 100;
-
-} // namespace
-
-/**
- * Parse the pattern from the given RuleCharacterIterator. The
- * iterator is advanced over the parsed pattern.
- * @param chars iterator over the pattern characters. Upon return
- * it will be advanced to the first character after the parsed
- * pattern, or the end of the iteration if all characters are
- * parsed.
- * @param symbols symbol table to use to parse and dereference
- * variables, or null if none.
- * @param rebuiltPat the pattern that was parsed, rebuilt or
- * copied from the input pattern, as appropriate.
- * @param options a bit mask of zero or more of the following:
- * IGNORE_SPACE, CASE.
- */
-void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
- const SymbolTable* symbols,
- UnicodeString& rebuiltPat,
- uint32_t options,
- UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
- int32_t depth,
- UErrorCode& ec) {
- if (U_FAILURE(ec)) return;
- if (depth > MAX_DEPTH) {
- ec = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- // Syntax characters: [ ] ^ - & { }
-
- // Recognized special forms for chars, sets: c-c s-s s&s
-
- int32_t opts = RuleCharacterIterator::PARSE_VARIABLES |
- RuleCharacterIterator::PARSE_ESCAPES;
- if ((options & USET_IGNORE_SPACE) != 0) {
- opts |= RuleCharacterIterator::SKIP_WHITESPACE;
- }
-
- UnicodeString patLocal, buf;
- UBool usePat = FALSE;
- UnicodeSetPointer scratch;
- RuleCharacterIterator::Pos backup;
-
- // mode: 0=before [, 1=between [...], 2=after ]
- // lastItem: 0=none, 1=char, 2=set
- int8_t lastItem = 0, mode = 0;
- UChar32 lastChar = 0;
- UChar op = 0;
-
- UBool invert = FALSE;
-
- clear();
-
- while (mode != 2 && !chars.atEnd()) {
- U_ASSERT((lastItem == 0 && op == 0) ||
- (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
- (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
- op == INTERSECTION /*'&'*/)));
-
- UChar32 c = 0;
- UBool literal = FALSE;
- UnicodeSet* nested = 0; // alias - do not delete
-
- // -------- Check for property pattern
-
- // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
- int8_t setMode = 0;
- if (resemblesPropertyPattern(chars, opts)) {
- setMode = 2;
- }
-
- // -------- Parse '[' of opening delimiter OR nested set.
- // If there is a nested set, use `setMode' to define how
- // the set should be parsed. If the '[' is part of the
- // opening delimiter for this pattern, parse special
- // strings "[", "[^", "[-", and "[^-". Check for stand-in
- // characters representing a nested set in the symbol
- // table.
-
- else {
- // Prepare to backup if necessary
- chars.getPos(backup);
- c = chars.next(opts, literal, ec);
- if (U_FAILURE(ec)) return;
-
- if (c == 0x5B /*'['*/ && !literal) {
- if (mode == 1) {
- chars.setPos(backup); // backup
- setMode = 1;
- } else {
- // Handle opening '[' delimiter
- mode = 1;
- patLocal.append((UChar) 0x5B /*'['*/);
- chars.getPos(backup); // prepare to backup
- c = chars.next(opts, literal, ec);
- if (U_FAILURE(ec)) return;
- if (c == 0x5E /*'^'*/ && !literal) {
- invert = TRUE;
- patLocal.append((UChar) 0x5E /*'^'*/);
- chars.getPos(backup); // prepare to backup
- c = chars.next(opts, literal, ec);
- if (U_FAILURE(ec)) return;
- }
- // Fall through to handle special leading '-';
- // otherwise restart loop for nested [], \p{}, etc.
- if (c == HYPHEN /*'-'*/) {
- literal = TRUE;
- // Fall through to handle literal '-' below
- } else {
- chars.setPos(backup); // backup
- continue;
- }
- }
- } else if (symbols != 0) {
- const UnicodeFunctor *m = symbols->lookupMatcher(c);
- if (m != 0) {
- const UnicodeSet *ms = dynamic_cast<const UnicodeSet *>(m);
- if (ms == NULL) {
- ec = U_MALFORMED_SET;
- return;
- }
- // casting away const, but `nested' won't be modified
- // (important not to modify stored set)
- nested = const_cast<UnicodeSet*>(ms);
- setMode = 3;
- }
- }
- }
-
- // -------- Handle a nested set. This either is inline in
- // the pattern or represented by a stand-in that has
- // previously been parsed and was looked up in the symbol
- // table.
-
- if (setMode != 0) {
- if (lastItem == 1) {
- if (op != 0) {
- // syntaxError(chars, "Char expected after operator");
- ec = U_MALFORMED_SET;
- return;
- }
- add(lastChar, lastChar);
- _appendToPat(patLocal, lastChar, FALSE);
- lastItem = 0;
- op = 0;
- }
-
- if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) {
- patLocal.append(op);
- }
-
- if (nested == 0) {
- // lazy allocation
- if (!scratch.allocate()) {
- ec = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- nested = scratch.pointer();
- }
- switch (setMode) {
- case 1:
- nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
- break;
- case 2:
- chars.skipIgnored(opts);
- nested->applyPropertyPattern(chars, patLocal, ec);
- if (U_FAILURE(ec)) return;
- break;
- case 3: // `nested' already parsed
- nested->_toPattern(patLocal, FALSE);
- break;
- }
-
- usePat = TRUE;
-
- if (mode == 0) {
- // Entire pattern is a category; leave parse loop
- *this = *nested;
- mode = 2;
- break;
- }
-
- switch (op) {
- case HYPHEN: /*'-'*/
- removeAll(*nested);
- break;
- case INTERSECTION: /*'&'*/
- retainAll(*nested);
- break;
- case 0:
- addAll(*nested);
- break;
- }
-
- op = 0;
- lastItem = 2;
-
- continue;
- }
-
- if (mode == 0) {
- // syntaxError(chars, "Missing '['");
- ec = U_MALFORMED_SET;
- return;
- }
-
- // -------- Parse special (syntax) characters. If the
- // current character is not special, or if it is escaped,
- // then fall through and handle it below.
-
- if (!literal) {
- switch (c) {
- case 0x5D /*']'*/:
- if (lastItem == 1) {
- add(lastChar, lastChar);
- _appendToPat(patLocal, lastChar, FALSE);
- }
- // Treat final trailing '-' as a literal
- if (op == HYPHEN /*'-'*/) {
- add(op, op);
- patLocal.append(op);
- } else if (op == INTERSECTION /*'&'*/) {
- // syntaxError(chars, "Trailing '&'");
- ec = U_MALFORMED_SET;
- return;
- }
- patLocal.append((UChar) 0x5D /*']'*/);
- mode = 2;
- continue;
- case HYPHEN /*'-'*/:
- if (op == 0) {
- if (lastItem != 0) {
- op = (UChar) c;
- continue;
- } else {
- // Treat final trailing '-' as a literal
- add(c, c);
- c = chars.next(opts, literal, ec);
- if (U_FAILURE(ec)) return;
- if (c == 0x5D /*']'*/ && !literal) {
- patLocal.append(HYPHEN_RIGHT_BRACE, 2);
- mode = 2;
- continue;
- }
- }
- }
- // syntaxError(chars, "'-' not after char or set");
- ec = U_MALFORMED_SET;
- return;
- case INTERSECTION /*'&'*/:
- if (lastItem == 2 && op == 0) {
- op = (UChar) c;
- continue;
- }
- // syntaxError(chars, "'&' not after set");
- ec = U_MALFORMED_SET;
- return;
- case 0x5E /*'^'*/:
- // syntaxError(chars, "'^' not after '['");
- ec = U_MALFORMED_SET;
- return;
- case 0x7B /*'{'*/:
- if (op != 0) {
- // syntaxError(chars, "Missing operand after operator");
- ec = U_MALFORMED_SET;
- return;
- }
- if (lastItem == 1) {
- add(lastChar, lastChar);
- _appendToPat(patLocal, lastChar, FALSE);
- }
- lastItem = 0;
- buf.truncate(0);
- {
- UBool ok = FALSE;
- while (!chars.atEnd()) {
- c = chars.next(opts, literal, ec);
- if (U_FAILURE(ec)) return;
- if (c == 0x7D /*'}'*/ && !literal) {
- ok = TRUE;
- break;
- }
- buf.append(c);
- }
- if (buf.length() < 1 || !ok) {
- // syntaxError(chars, "Invalid multicharacter string");
- ec = U_MALFORMED_SET;
- return;
- }
- }
- // We have new string. Add it to set and continue;
- // we don't need to drop through to the further
- // processing
- add(buf);
- patLocal.append((UChar) 0x7B /*'{'*/);
- _appendToPat(patLocal, buf, FALSE);
- patLocal.append((UChar) 0x7D /*'}'*/);
- continue;
- case SymbolTable::SYMBOL_REF:
- // symbols nosymbols
- // [a-$] error error (ambiguous)
- // [a$] anchor anchor
- // [a-$x] var "x"* literal '$'
- // [a-$.] error literal '$'
- // *We won't get here in the case of var "x"
- {
- chars.getPos(backup);
- c = chars.next(opts, literal, ec);
- if (U_FAILURE(ec)) return;
- UBool anchor = (c == 0x5D /*']'*/ && !literal);
- if (symbols == 0 && !anchor) {
- c = SymbolTable::SYMBOL_REF;
- chars.setPos(backup);
- break; // literal '$'
- }
- if (anchor && op == 0) {
- if (lastItem == 1) {
- add(lastChar, lastChar);
- _appendToPat(patLocal, lastChar, FALSE);
- }
- add(U_ETHER);
- usePat = TRUE;
- patLocal.append((UChar) SymbolTable::SYMBOL_REF);
- patLocal.append((UChar) 0x5D /*']'*/);
- mode = 2;
- continue;
- }
- // syntaxError(chars, "Unquoted '$'");
- ec = U_MALFORMED_SET;
- return;
- }
- default:
- break;
- }
- }
-
- // -------- Parse literal characters. This includes both
- // escaped chars ("\u4E01") and non-syntax characters
- // ("a").
-
- switch (lastItem) {
- case 0:
- lastItem = 1;
- lastChar = c;
- break;
- case 1:
- if (op == HYPHEN /*'-'*/) {
- if (lastChar >= c) {
- // Don't allow redundant (a-a) or empty (b-a) ranges;
- // these are most likely typos.
- // syntaxError(chars, "Invalid range");
- ec = U_MALFORMED_SET;
- return;
- }
- add(lastChar, c);
- _appendToPat(patLocal, lastChar, FALSE);
- patLocal.append(op);
- _appendToPat(patLocal, c, FALSE);
- lastItem = 0;
- op = 0;
- } else {
- add(lastChar, lastChar);
- _appendToPat(patLocal, lastChar, FALSE);
- lastChar = c;
- }
- break;
- case 2:
- if (op != 0) {
- // syntaxError(chars, "Set expected after operator");
- ec = U_MALFORMED_SET;
- return;
- }
- lastChar = c;
- lastItem = 1;
- break;
- }
- }
-
- if (mode != 2) {
- // syntaxError(chars, "Missing ']'");
- ec = U_MALFORMED_SET;
- return;
- }
-
- chars.skipIgnored(opts);
-
- /**
- * Handle global flags (invert, case insensitivity). If this
- * pattern should be compiled case-insensitive, then we need
- * to close over case BEFORE COMPLEMENTING. This makes
- * patterns like /[^abc]/i work.
- */
- if ((options & USET_CASE_INSENSITIVE) != 0) {
- (this->*caseClosure)(USET_CASE_INSENSITIVE);
- }
- else if ((options & USET_ADD_CASE_MAPPINGS) != 0) {
- (this->*caseClosure)(USET_ADD_CASE_MAPPINGS);
- }
- if (invert) {
- complement();
- }
-
- // Use the rebuilt pattern (patLocal) only if necessary. Prefer the
- // generated pattern.
- if (usePat) {
- rebuiltPat.append(patLocal);
- } else {
- _generatePattern(rebuiltPat, FALSE);
- }
- if (isBogus() && U_SUCCESS(ec)) {
- // We likely ran out of memory. AHHH!
- ec = U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-//----------------------------------------------------------------
-// Property set implementation
-//----------------------------------------------------------------
-
-namespace {
-
-static UBool numericValueFilter(UChar32 ch, void* context) {
- return u_getNumericValue(ch) == *(double*)context;
-}
-
-static UBool generalCategoryMaskFilter(UChar32 ch, void* context) {
- int32_t value = *(int32_t*)context;
- return (U_GET_GC_MASK((UChar32) ch) & value) != 0;
-}
-
-static UBool versionFilter(UChar32 ch, void* context) {
- static const UVersionInfo none = { 0, 0, 0, 0 };
- UVersionInfo v;
- u_charAge(ch, v);
- UVersionInfo* version = (UVersionInfo*)context;
- return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0;
-}
-
-typedef struct {
- UProperty prop;
- int32_t value;
-} IntPropertyContext;
-
-static UBool intPropertyFilter(UChar32 ch, void* context) {
- IntPropertyContext* c = (IntPropertyContext*)context;
- return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value;
-}
-
-static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
- return uscript_hasScript(ch, *(UScriptCode*)context);
-}
-
-} // namespace
-
-/**
- * Generic filter-based scanning code for UCD property UnicodeSets.
- */
-void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
- void* context,
- const UnicodeSet* inclusions,
- UErrorCode &status) {
- if (U_FAILURE(status)) return;
-
- // Logically, walk through all Unicode characters, noting the start
- // and end of each range for which filter.contain(c) is
- // true. Add each range to a set.
- //
- // To improve performance, use an inclusions set which
- // encodes information about character ranges that are known
- // to have identical properties.
- // inclusions contains the first characters of
- // same-value ranges for the given property.
-
- clear();
-
- UChar32 startHasProperty = -1;
- int32_t limitRange = inclusions->getRangeCount();
-
- for (int j=0; j<limitRange; ++j) {
- // get current range
- UChar32 start = inclusions->getRangeStart(j);
- UChar32 end = inclusions->getRangeEnd(j);
-
- // for all the code points in the range, process
- for (UChar32 ch = start; ch <= end; ++ch) {
- // only add to this UnicodeSet on inflection points --
- // where the hasProperty value changes to false
- if ((*filter)(ch, context)) {
- if (startHasProperty < 0) {
- startHasProperty = ch;
- }
- } else if (startHasProperty >= 0) {
- add(startHasProperty, ch-1);
- startHasProperty = -1;
- }
- }
- }
- if (startHasProperty >= 0) {
- add((UChar32)startHasProperty, (UChar32)0x10FFFF);
- }
- if (isBogus() && U_SUCCESS(status)) {
- // We likely ran out of memory. AHHH!
- status = U_MEMORY_ALLOCATION_ERROR;
- }
-}
-
-namespace {
-
-static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
- /* Note: we use ' ' in compiler code page */
- int32_t j = 0;
- char ch;
- --dstCapacity; /* make room for term. zero */
- while ((ch = *src++) != 0) {
- if (ch == ' ' && (j==0 || (j>0 && dst[j-1]==' '))) {
- continue;
- }
- if (j >= dstCapacity) return FALSE;
- dst[j++] = ch;
- }
- if (j > 0 && dst[j-1] == ' ') --j;
- dst[j] = 0;
- return TRUE;
-}
-
-} // namespace
-
-//----------------------------------------------------------------
-// Property set API
-//----------------------------------------------------------------
-
-#define FAIL(ec) UPRV_BLOCK_MACRO_BEGIN { \
- ec=U_ILLEGAL_ARGUMENT_ERROR; \
- return *this; \
-} UPRV_BLOCK_MACRO_END
-
-UnicodeSet&
-UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) {
- if (U_FAILURE(ec) || isFrozen()) { return *this; }
- if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
- const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
- applyFilter(generalCategoryMaskFilter, &value, inclusions, ec);
- } else if (prop == UCHAR_SCRIPT_EXTENSIONS) {
- const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
- UScriptCode script = (UScriptCode)value;
- applyFilter(scriptExtensionsFilter, &script, inclusions, ec);
- } else if (0 <= prop && prop < UCHAR_BINARY_LIMIT) {
- if (value == 0 || value == 1) {
- const USet *set = u_getBinaryPropertySet(prop, &ec);
- if (U_FAILURE(ec)) { return *this; }
- copyFrom(*UnicodeSet::fromUSet(set), TRUE);
- if (value == 0) {
- complement();
- }
- } else {
- clear();
- }
- } else if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
- const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
- IntPropertyContext c = {prop, value};
- applyFilter(intPropertyFilter, &c, inclusions, ec);
- } else {
- ec = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return *this;
-}
-
-UnicodeSet&
-UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
- const UnicodeString& value,
- UErrorCode& ec) {
- if (U_FAILURE(ec) || isFrozen()) return *this;
-
- // prop and value used to be converted to char * using the default
- // converter instead of the invariant conversion.
- // This should not be necessary because all Unicode property and value
- // names use only invariant characters.
- // If there are any variant characters, then we won't find them anyway.
- // Checking first avoids assertion failures in the conversion.
- if( !uprv_isInvariantUString(prop.getBuffer(), prop.length()) ||
- !uprv_isInvariantUString(value.getBuffer(), value.length())
- ) {
- FAIL(ec);
- }
- CharString pname, vname;
- pname.appendInvariantChars(prop, ec);
- vname.appendInvariantChars(value, ec);
- if (U_FAILURE(ec)) return *this;
-
- UProperty p;
- int32_t v;
- UBool invert = FALSE;
-
- if (value.length() > 0) {
- p = u_getPropertyEnum(pname.data());
- if (p == UCHAR_INVALID_CODE) FAIL(ec);
-
- // Treat gc as gcm
- if (p == UCHAR_GENERAL_CATEGORY) {
- p = UCHAR_GENERAL_CATEGORY_MASK;
- }
-
- if ((p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) ||
- (p >= UCHAR_INT_START && p < UCHAR_INT_LIMIT) ||
- (p >= UCHAR_MASK_START && p < UCHAR_MASK_LIMIT)) {
- v = u_getPropertyValueEnum(p, vname.data());
- if (v == UCHAR_INVALID_CODE) {
- // Handle numeric CCC
- if (p == UCHAR_CANONICAL_COMBINING_CLASS ||
- p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS ||
- p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) {
- char* end;
- double val = uprv_strtod(vname.data(), &end);
- // Anything between 0 and 255 is valid even if unused.
- // Cast double->int only after range check.
- // We catch NaN here because comparing it with both 0 and 255 will be false
- // (as are all comparisons with NaN).
- if (*end != 0 || !(0 <= val && val <= 255) ||
- (v = (int32_t)val) != val) {
- // non-integral value or outside 0..255, or trailing junk
- FAIL(ec);
- }
- } else {
- FAIL(ec);
- }
- }
- }
-
- else {
-
- switch (p) {
- case UCHAR_NUMERIC_VALUE:
- {
- char* end;
- double val = uprv_strtod(vname.data(), &end);
- if (*end != 0) {
- FAIL(ec);
- }
- applyFilter(numericValueFilter, &val,
- CharacterProperties::getInclusionsForProperty(p, ec), ec);
- return *this;
- }
- case UCHAR_NAME:
- {
- // Must munge name, since u_charFromName() does not do
- // 'loose' matching.
- char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength
- if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec);
- UChar32 ch = u_charFromName(U_EXTENDED_CHAR_NAME, buf, &ec);
- if (U_SUCCESS(ec)) {
- clear();
- add(ch);
- return *this;
- } else {
- FAIL(ec);
- }
- }
- case UCHAR_UNICODE_1_NAME:
- // ICU 49 deprecates the Unicode_1_Name property APIs.
- FAIL(ec);
- case UCHAR_AGE:
- {
- // Must munge name, since u_versionFromString() does not do
- // 'loose' matching.
- char buf[128];
- if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec);
- UVersionInfo version;
- u_versionFromString(version, buf);
- applyFilter(versionFilter, &version,
- CharacterProperties::getInclusionsForProperty(p, ec), ec);
- return *this;
- }
- case UCHAR_SCRIPT_EXTENSIONS:
- v = u_getPropertyValueEnum(UCHAR_SCRIPT, vname.data());
- if (v == UCHAR_INVALID_CODE) {
- FAIL(ec);
- }
- // fall through to calling applyIntPropertyValue()
- break;
- default:
- // p is a non-binary, non-enumerated property that we
- // don't support (yet).
- FAIL(ec);
- }
- }
- }
-
- else {
- // value is empty. Interpret as General Category, Script, or
- // Binary property.
- p = UCHAR_GENERAL_CATEGORY_MASK;
- v = u_getPropertyValueEnum(p, pname.data());
- if (v == UCHAR_INVALID_CODE) {
- p = UCHAR_SCRIPT;
- v = u_getPropertyValueEnum(p, pname.data());
- if (v == UCHAR_INVALID_CODE) {
- p = u_getPropertyEnum(pname.data());
- if (p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) {
- v = 1;
- } else if (0 == uprv_comparePropertyNames(ANY, pname.data())) {
- set(MIN_VALUE, MAX_VALUE);
- return *this;
- } else if (0 == uprv_comparePropertyNames(ASCII, pname.data())) {
- set(0, 0x7F);
- return *this;
- } else if (0 == uprv_comparePropertyNames(ASSIGNED, pname.data())) {
- // [:Assigned:]=[:^Cn:]
- p = UCHAR_GENERAL_CATEGORY_MASK;
- v = U_GC_CN_MASK;
- invert = TRUE;
- } else {
- FAIL(ec);
- }
- }
- }
- }
-
- applyIntPropertyValue(p, v, ec);
- if(invert) {
- complement();
- }
-
- if (isBogus() && U_SUCCESS(ec)) {
- // We likely ran out of memory. AHHH!
- ec = U_MEMORY_ALLOCATION_ERROR;
- }
- return *this;
-}
-
-//----------------------------------------------------------------
-// Property set patterns
-//----------------------------------------------------------------
-
-/**
- * Return true if the given position, in the given pattern, appears
- * to be the start of a property set pattern.
- */
-UBool UnicodeSet::resemblesPropertyPattern(const UnicodeString& pattern,
- int32_t pos) {
- // Patterns are at least 5 characters long
- if ((pos+5) > pattern.length()) {
- return FALSE;
- }
-
- // Look for an opening [:, [:^, \p, or \P
- return isPOSIXOpen(pattern, pos) || isPerlOpen(pattern, pos) || isNameOpen(pattern, pos);
-}
-
-/**
- * Return true if the given iterator appears to point at a
- * property pattern. Regardless of the result, return with the
- * iterator unchanged.
- * @param chars iterator over the pattern characters. Upon return
- * it will be unchanged.
- * @param iterOpts RuleCharacterIterator options
- */
-UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars,
- int32_t iterOpts) {
- // NOTE: literal will always be FALSE, because we don't parse escapes.
- UBool result = FALSE, literal;
- UErrorCode ec = U_ZERO_ERROR;
- iterOpts &= ~RuleCharacterIterator::PARSE_ESCAPES;
- RuleCharacterIterator::Pos pos;
- chars.getPos(pos);
- UChar32 c = chars.next(iterOpts, literal, ec);
- if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) {
- UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE,
- literal, ec);
- result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) :
- (d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/);
- }
- chars.setPos(pos);
- return result && U_SUCCESS(ec);
-}
-
-/**
- * Parse the given property pattern at the given parse position.
- */
-UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
- ParsePosition& ppos,
- UErrorCode &ec) {
- int32_t pos = ppos.getIndex();
-
- UBool posix = FALSE; // true for [:pat:], false for \p{pat} \P{pat} \N{pat}
- UBool isName = FALSE; // true for \N{pat}, o/w false
- UBool invert = FALSE;
-
- if (U_FAILURE(ec)) return *this;
-
- // Minimum length is 5 characters, e.g. \p{L}
- if ((pos+5) > pattern.length()) {
- FAIL(ec);
- }
-
- // On entry, ppos should point to one of the following locations:
- // Look for an opening [:, [:^, \p, or \P
- if (isPOSIXOpen(pattern, pos)) {
- posix = TRUE;
- pos += 2;
- pos = ICU_Utility::skipWhitespace(pattern, pos);
- if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) {
- ++pos;
- invert = TRUE;
- }
- } else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) {
- UChar c = pattern.charAt(pos+1);
- invert = (c == UPPER_P);
- isName = (c == UPPER_N);
- pos += 2;
- pos = ICU_Utility::skipWhitespace(pattern, pos);
- if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) {
- // Syntax error; "\p" or "\P" not followed by "{"
- FAIL(ec);
- }
- } else {
- // Open delimiter not seen
- FAIL(ec);
- }
-
- // Look for the matching close delimiter, either :] or }
- int32_t close;
- if (posix) {
- close = pattern.indexOf(POSIX_CLOSE, 2, pos);
- } else {
- close = pattern.indexOf(CLOSE_BRACE, pos);
- }
- if (close < 0) {
- // Syntax error; close delimiter missing
- FAIL(ec);
- }
-
- // Look for an '=' sign. If this is present, we will parse a
- // medium \p{gc=Cf} or long \p{GeneralCategory=Format}
- // pattern.
- int32_t equals = pattern.indexOf(EQUALS, pos);
- UnicodeString propName, valueName;
- if (equals >= 0 && equals < close && !isName) {
- // Equals seen; parse medium/long pattern
- pattern.extractBetween(pos, equals, propName);
- pattern.extractBetween(equals+1, close, valueName);
- }
-
- else {
- // Handle case where no '=' is seen, and \N{}
- pattern.extractBetween(pos, close, propName);
-
- // Handle \N{name}
- if (isName) {
- // This is a little inefficient since it means we have to
- // parse NAME_PROP back to UCHAR_NAME even though we already
- // know it's UCHAR_NAME. If we refactor the API to
- // support args of (UProperty, char*) then we can remove
- // NAME_PROP and make this a little more efficient.
- valueName = propName;
- propName = UnicodeString(NAME_PROP, NAME_PROP_LENGTH, US_INV);
- }
- }
-
- applyPropertyAlias(propName, valueName, ec);
-
- if (U_SUCCESS(ec)) {
- if (invert) {
- complement();
- }
-
- // Move to the limit position after the close delimiter if the
- // parse succeeded.
- ppos.setIndex(close + (posix ? 2 : 1));
- }
-
- return *this;
-}
-
-/**
- * Parse a property pattern.
- * @param chars iterator over the pattern characters. Upon return
- * it will be advanced to the first character after the parsed
- * pattern, or the end of the iteration if all characters are
- * parsed.
- * @param rebuiltPat the pattern that was parsed, rebuilt or
- * copied from the input pattern, as appropriate.
- */
-void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars,
- UnicodeString& rebuiltPat,
- UErrorCode& ec) {
- if (U_FAILURE(ec)) return;
- UnicodeString pattern;
- chars.lookahead(pattern);
- ParsePosition pos(0);
- applyPropertyPattern(pattern, pos, ec);
- if (U_FAILURE(ec)) return;
- if (pos.getIndex() == 0) {
- // syntaxError(chars, "Invalid property pattern");
- ec = U_MALFORMED_SET;
- return;
- }
- chars.jumpahead(pos.getIndex());
- rebuiltPat.append(pattern, 0, pos.getIndex());
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/unisetspan.cpp b/contrib/libs/icu/common/unisetspan.cpp
deleted file mode 100644
index 68e44d91ee7..00000000000
--- a/contrib/libs/icu/common/unisetspan.cpp
+++ /dev/null
@@ -1,1509 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2007-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: unisetspan.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2007mar01
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/uniset.h"
-#include "unicode/ustring.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "uvector.h"
-#include "unisetspan.h"
-
-U_NAMESPACE_BEGIN
-
-/*
- * List of offsets from the current position from where to try matching
- * a code point or a string.
- * Store offsets rather than indexes to simplify the code and use the same list
- * for both increments (in span()) and decrements (in spanBack()).
- *
- * Assumption: The maximum offset is limited, and the offsets that are stored
- * at any one time are relatively dense, that is, there are normally no gaps of
- * hundreds or thousands of offset values.
- *
- * The implementation uses a circular buffer of byte flags,
- * each indicating whether the corresponding offset is in the list.
- * This avoids inserting into a sorted list of offsets (or absolute indexes) and
- * physically moving part of the list.
- *
- * Note: In principle, the caller should setMaxLength() to the maximum of the
- * max string length and U16_LENGTH/U8_LENGTH to account for
- * "long" single code points.
- * However, this implementation uses at least a staticList with more than
- * U8_LENGTH entries anyway.
- *
- * Note: If maxLength were guaranteed to be no more than 32 or 64,
- * the list could be stored as bit flags in a single integer.
- * Rather than handling a circular buffer with a start list index,
- * the integer would simply be shifted when lower offsets are removed.
- * UnicodeSet does not have a limit on the lengths of strings.
- */
-class OffsetList { // Only ever stack-allocated, does not need to inherit UMemory.
-public:
- OffsetList() : list(staticList), capacity(0), length(0), start(0) {}
-
- ~OffsetList() {
- if(list!=staticList) {
- uprv_free(list);
- }
- }
-
- // Call exactly once if the list is to be used.
- void setMaxLength(int32_t maxLength) {
- if(maxLength<=(int32_t)sizeof(staticList)) {
- capacity=(int32_t)sizeof(staticList);
- } else {
- UBool *l=(UBool *)uprv_malloc(maxLength);
- if(l!=NULL) {
- list=l;
- capacity=maxLength;
- }
- }
- uprv_memset(list, 0, capacity);
- }
-
- void clear() {
- uprv_memset(list, 0, capacity);
- start=length=0;
- }
-
- UBool isEmpty() const {
- return (UBool)(length==0);
- }
-
- // Reduce all stored offsets by delta, used when the current position
- // moves by delta.
- // There must not be any offsets lower than delta.
- // If there is an offset equal to delta, it is removed.
- // delta=[1..maxLength]
- void shift(int32_t delta) {
- int32_t i=start+delta;
- if(i>=capacity) {
- i-=capacity;
- }
- if(list[i]) {
- list[i]=FALSE;
- --length;
- }
- start=i;
- }
-
- // Add an offset. The list must not contain it yet.
- // offset=[1..maxLength]
- void addOffset(int32_t offset) {
- int32_t i=start+offset;
- if(i>=capacity) {
- i-=capacity;
- }
- list[i]=TRUE;
- ++length;
- }
-
- // offset=[1..maxLength]
- UBool containsOffset(int32_t offset) const {
- int32_t i=start+offset;
- if(i>=capacity) {
- i-=capacity;
- }
- return list[i];
- }
-
- // Find the lowest stored offset from a non-empty list, remove it,
- // and reduce all other offsets by this minimum.
- // Returns [1..maxLength].
- int32_t popMinimum() {
- // Look for the next offset in list[start+1..capacity-1].
- int32_t i=start, result;
- while(++i<capacity) {
- if(list[i]) {
- list[i]=FALSE;
- --length;
- result=i-start;
- start=i;
- return result;
- }
- }
- // i==capacity
-
- // Wrap around and look for the next offset in list[0..start].
- // Since the list is not empty, there will be one.
- result=capacity-start;
- i=0;
- while(!list[i]) {
- ++i;
- }
- list[i]=FALSE;
- --length;
- start=i;
- return result+=i;
- }
-
-private:
- UBool *list;
- int32_t capacity;
- int32_t length;
- int32_t start;
-
- UBool staticList[16];
-};
-
-// Get the number of UTF-8 bytes for a UTF-16 (sub)string.
-static int32_t
-getUTF8Length(const UChar *s, int32_t length) {
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t length8=0;
- u_strToUTF8(NULL, 0, &length8, s, length, &errorCode);
- if(U_SUCCESS(errorCode) || errorCode==U_BUFFER_OVERFLOW_ERROR) {
- return length8;
- } else {
- // The string contains an unpaired surrogate.
- // Ignore this string.
- return 0;
- }
-}
-
-// Append the UTF-8 version of the string to t and return the appended UTF-8 length.
-static int32_t
-appendUTF8(const UChar *s, int32_t length, uint8_t *t, int32_t capacity) {
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t length8=0;
- u_strToUTF8((char *)t, capacity, &length8, s, length, &errorCode);
- if(U_SUCCESS(errorCode)) {
- return length8;
- } else {
- // The string contains an unpaired surrogate.
- // Ignore this string.
- return 0;
- }
-}
-
-static inline uint8_t
-makeSpanLengthByte(int32_t spanLength) {
- // 0xfe==UnicodeSetStringSpan::LONG_SPAN
- return spanLength<0xfe ? (uint8_t)spanLength : (uint8_t)0xfe;
-}
-
-// Construct for all variants of span(), or only for any one variant.
-// Initialize as little as possible, for single use.
-UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
- const UVector &setStrings,
- uint32_t which)
- : spanSet(0, 0x10ffff), pSpanNotSet(NULL), strings(setStrings),
- utf8Lengths(NULL), spanLengths(NULL), utf8(NULL),
- utf8Length(0),
- maxLength16(0), maxLength8(0),
- all((UBool)(which==ALL)) {
- spanSet.retainAll(set);
- if(which&NOT_CONTAINED) {
- // Default to the same sets.
- // addToSpanNotSet() will create a separate set if necessary.
- pSpanNotSet=&spanSet;
- }
-
- // Determine if the strings even need to be taken into account at all for span() etc.
- // If any string is relevant, then all strings need to be used for
- // span(longest match) but only the relevant ones for span(while contained).
- // TODO: Possible optimization: Distinguish CONTAINED vs. LONGEST_MATCH
- // and do not store UTF-8 strings if !thisRelevant and CONTAINED.
- // (Only store irrelevant UTF-8 strings for LONGEST_MATCH where they are relevant after all.)
- // Also count the lengths of the UTF-8 versions of the strings for memory allocation.
- int32_t stringsLength=strings.size();
-
- int32_t i, spanLength;
- UBool someRelevant=FALSE;
- for(i=0; i<stringsLength; ++i) {
- const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
- const UChar *s16=string.getBuffer();
- int32_t length16=string.length();
- UBool thisRelevant;
- spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
- if(spanLength<length16) { // Relevant string.
- someRelevant=thisRelevant=TRUE;
- } else {
- thisRelevant=FALSE;
- }
- if((which&UTF16) && length16>maxLength16) {
- maxLength16=length16;
- }
- if((which&UTF8) && (thisRelevant || (which&CONTAINED))) {
- int32_t length8=getUTF8Length(s16, length16);
- utf8Length+=length8;
- if(length8>maxLength8) {
- maxLength8=length8;
- }
- }
- }
- if(!someRelevant) {
- maxLength16=maxLength8=0;
- return;
- }
-
- // Freeze after checking for the need to use strings at all because freezing
- // a set takes some time and memory which are wasted if there are no relevant strings.
- if(all) {
- spanSet.freeze();
- }
-
- uint8_t *spanBackLengths;
- uint8_t *spanUTF8Lengths;
- uint8_t *spanBackUTF8Lengths;
-
- // Allocate a block of meta data.
- int32_t allocSize;
- if(all) {
- // UTF-8 lengths, 4 sets of span lengths, UTF-8 strings.
- allocSize=stringsLength*(4+1+1+1+1)+utf8Length;
- } else {
- allocSize=stringsLength; // One set of span lengths.
- if(which&UTF8) {
- // UTF-8 lengths and UTF-8 strings.
- allocSize+=stringsLength*4+utf8Length;
- }
- }
- if(allocSize<=(int32_t)sizeof(staticLengths)) {
- utf8Lengths=staticLengths;
- } else {
- utf8Lengths=(int32_t *)uprv_malloc(allocSize);
- if(utf8Lengths==NULL) {
- maxLength16=maxLength8=0; // Prevent usage by making needsStringSpanUTF16/8() return FALSE.
- return; // Out of memory.
- }
- }
-
- if(all) {
- // Store span lengths for all span() variants.
- spanLengths=(uint8_t *)(utf8Lengths+stringsLength);
- spanBackLengths=spanLengths+stringsLength;
- spanUTF8Lengths=spanBackLengths+stringsLength;
- spanBackUTF8Lengths=spanUTF8Lengths+stringsLength;
- utf8=spanBackUTF8Lengths+stringsLength;
- } else {
- // Store span lengths for only one span() variant.
- if(which&UTF8) {
- spanLengths=(uint8_t *)(utf8Lengths+stringsLength);
- utf8=spanLengths+stringsLength;
- } else {
- spanLengths=(uint8_t *)utf8Lengths;
- }
- spanBackLengths=spanUTF8Lengths=spanBackUTF8Lengths=spanLengths;
- }
-
- // Set the meta data and pSpanNotSet and write the UTF-8 strings.
- int32_t utf8Count=0; // Count UTF-8 bytes written so far.
-
- for(i=0; i<stringsLength; ++i) {
- const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
- const UChar *s16=string.getBuffer();
- int32_t length16=string.length();
- spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
- if(spanLength<length16) { // Relevant string.
- if(which&UTF16) {
- if(which&CONTAINED) {
- if(which&FWD) {
- spanLengths[i]=makeSpanLengthByte(spanLength);
- }
- if(which&BACK) {
- spanLength=length16-spanSet.spanBack(s16, length16, USET_SPAN_CONTAINED);
- spanBackLengths[i]=makeSpanLengthByte(spanLength);
- }
- } else /* not CONTAINED, not all, but NOT_CONTAINED */ {
- spanLengths[i]=spanBackLengths[i]=0; // Only store a relevant/irrelevant flag.
- }
- }
- if(which&UTF8) {
- uint8_t *s8=utf8+utf8Count;
- int32_t length8=appendUTF8(s16, length16, s8, utf8Length-utf8Count);
- utf8Count+=utf8Lengths[i]=length8;
- if(length8==0) { // Irrelevant for UTF-8 because not representable in UTF-8.
- spanUTF8Lengths[i]=spanBackUTF8Lengths[i]=(uint8_t)ALL_CP_CONTAINED;
- } else { // Relevant for UTF-8.
- if(which&CONTAINED) {
- if(which&FWD) {
- spanLength=spanSet.spanUTF8((const char *)s8, length8, USET_SPAN_CONTAINED);
- spanUTF8Lengths[i]=makeSpanLengthByte(spanLength);
- }
- if(which&BACK) {
- spanLength=length8-spanSet.spanBackUTF8((const char *)s8, length8, USET_SPAN_CONTAINED);
- spanBackUTF8Lengths[i]=makeSpanLengthByte(spanLength);
- }
- } else /* not CONTAINED, not all, but NOT_CONTAINED */ {
- spanUTF8Lengths[i]=spanBackUTF8Lengths[i]=0; // Only store a relevant/irrelevant flag.
- }
- }
- }
- if(which&NOT_CONTAINED) {
- // Add string start and end code points to the spanNotSet so that
- // a span(while not contained) stops before any string.
- UChar32 c;
- if(which&FWD) {
- int32_t len=0;
- U16_NEXT(s16, len, length16, c);
- addToSpanNotSet(c);
- }
- if(which&BACK) {
- int32_t len=length16;
- U16_PREV(s16, 0, len, c);
- addToSpanNotSet(c);
- }
- }
- } else { // Irrelevant string.
- if(which&UTF8) {
- if(which&CONTAINED) { // Only necessary for LONGEST_MATCH.
- uint8_t *s8=utf8+utf8Count;
- int32_t length8=appendUTF8(s16, length16, s8, utf8Length-utf8Count);
- utf8Count+=utf8Lengths[i]=length8;
- } else {
- utf8Lengths[i]=0;
- }
- }
- if(all) {
- spanLengths[i]=spanBackLengths[i]=
- spanUTF8Lengths[i]=spanBackUTF8Lengths[i]=
- (uint8_t)ALL_CP_CONTAINED;
- } else {
- // All spanXYZLengths pointers contain the same address.
- spanLengths[i]=(uint8_t)ALL_CP_CONTAINED;
- }
- }
- }
-
- // Finish.
- if(all) {
- pSpanNotSet->freeze();
- }
-}
-
-// Copy constructor. Assumes which==ALL for a frozen set.
-UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSetStringSpan &otherStringSpan,
- const UVector &newParentSetStrings)
- : spanSet(otherStringSpan.spanSet), pSpanNotSet(NULL), strings(newParentSetStrings),
- utf8Lengths(NULL), spanLengths(NULL), utf8(NULL),
- utf8Length(otherStringSpan.utf8Length),
- maxLength16(otherStringSpan.maxLength16), maxLength8(otherStringSpan.maxLength8),
- all(TRUE) {
- if(otherStringSpan.pSpanNotSet==&otherStringSpan.spanSet) {
- pSpanNotSet=&spanSet;
- } else {
- pSpanNotSet=otherStringSpan.pSpanNotSet->clone();
- }
-
- // Allocate a block of meta data.
- // UTF-8 lengths, 4 sets of span lengths, UTF-8 strings.
- int32_t stringsLength=strings.size();
- int32_t allocSize=stringsLength*(4+1+1+1+1)+utf8Length;
- if(allocSize<=(int32_t)sizeof(staticLengths)) {
- utf8Lengths=staticLengths;
- } else {
- utf8Lengths=(int32_t *)uprv_malloc(allocSize);
- if(utf8Lengths==NULL) {
- maxLength16=maxLength8=0; // Prevent usage by making needsStringSpanUTF16/8() return FALSE.
- return; // Out of memory.
- }
- }
-
- spanLengths=(uint8_t *)(utf8Lengths+stringsLength);
- utf8=spanLengths+stringsLength*4;
- uprv_memcpy(utf8Lengths, otherStringSpan.utf8Lengths, allocSize);
-}
-
-UnicodeSetStringSpan::~UnicodeSetStringSpan() {
- if(pSpanNotSet!=NULL && pSpanNotSet!=&spanSet) {
- delete pSpanNotSet;
- }
- if(utf8Lengths!=NULL && utf8Lengths!=staticLengths) {
- uprv_free(utf8Lengths);
- }
-}
-
-void UnicodeSetStringSpan::addToSpanNotSet(UChar32 c) {
- if(pSpanNotSet==NULL || pSpanNotSet==&spanSet) {
- if(spanSet.contains(c)) {
- return; // Nothing to do.
- }
- UnicodeSet *newSet=spanSet.cloneAsThawed();
- if(newSet==NULL) {
- return; // Out of memory.
- } else {
- pSpanNotSet=newSet;
- }
- }
- pSpanNotSet->add(c);
-}
-
-// Compare strings without any argument checks. Requires length>0.
-static inline UBool
-matches16(const UChar *s, const UChar *t, int32_t length) {
- do {
- if(*s++!=*t++) {
- return FALSE;
- }
- } while(--length>0);
- return TRUE;
-}
-
-static inline UBool
-matches8(const uint8_t *s, const uint8_t *t, int32_t length) {
- do {
- if(*s++!=*t++) {
- return FALSE;
- }
- } while(--length>0);
- return TRUE;
-}
-
-// Compare 16-bit Unicode strings (which may be malformed UTF-16)
-// at code point boundaries.
-// That is, each edge of a match must not be in the middle of a surrogate pair.
-static inline UBool
-matches16CPB(const UChar *s, int32_t start, int32_t limit, const UChar *t, int32_t length) {
- s+=start;
- limit-=start;
- return matches16(s, t, length) &&
- !(0<start && U16_IS_LEAD(s[-1]) && U16_IS_TRAIL(s[0])) &&
- !(length<limit && U16_IS_LEAD(s[length-1]) && U16_IS_TRAIL(s[length]));
-}
-
-// Does the set contain the next code point?
-// If so, return its length; otherwise return its negative length.
-static inline int32_t
-spanOne(const UnicodeSet &set, const UChar *s, int32_t length) {
- UChar c=*s, c2;
- if(c>=0xd800 && c<=0xdbff && length>=2 && U16_IS_TRAIL(c2=s[1])) {
- return set.contains(U16_GET_SUPPLEMENTARY(c, c2)) ? 2 : -2;
- }
- return set.contains(c) ? 1 : -1;
-}
-
-static inline int32_t
-spanOneBack(const UnicodeSet &set, const UChar *s, int32_t length) {
- UChar c=s[length-1], c2;
- if(c>=0xdc00 && c<=0xdfff && length>=2 && U16_IS_LEAD(c2=s[length-2])) {
- return set.contains(U16_GET_SUPPLEMENTARY(c2, c)) ? 2 : -2;
- }
- return set.contains(c) ? 1 : -1;
-}
-
-static inline int32_t
-spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
- UChar32 c=*s;
- if(U8_IS_SINGLE(c)) {
- return set.contains(c) ? 1 : -1;
- }
- // Take advantage of non-ASCII fastpaths in U8_NEXT_OR_FFFD().
- int32_t i=0;
- U8_NEXT_OR_FFFD(s, i, length, c);
- return set.contains(c) ? i : -i;
-}
-
-static inline int32_t
-spanOneBackUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
- UChar32 c=s[length-1];
- if(U8_IS_SINGLE(c)) {
- return set.contains(c) ? 1 : -1;
- }
- int32_t i=length-1;
- c=utf8_prevCharSafeBody(s, 0, &i, c, -3);
- length-=i;
- return set.contains(c) ? length : -length;
-}
-
-/*
- * Note: In span() when spanLength==0 (after a string match, or at the beginning
- * after an empty code point span) and in spanNot() and spanNotUTF8(),
- * string matching could use a binary search
- * because all string matches are done from the same start index.
- *
- * For UTF-8, this would require a comparison function that returns UTF-16 order.
- *
- * This optimization should not be necessary for normal UnicodeSets because
- * most sets have no strings, and most sets with strings have
- * very few very short strings.
- * For cases with many strings, it might be better to use a different API
- * and implementation with a DFA (state machine).
- */
-
-/*
- * Algorithm for span(USET_SPAN_CONTAINED)
- *
- * Theoretical algorithm:
- * - Iterate through the string, and at each code point boundary:
- * + If the code point there is in the set, then remember to continue after it.
- * + If a set string matches at the current position, then remember to continue after it.
- * + Either recursively span for each code point or string match,
- * or recursively span for all but the shortest one and
- * iteratively continue the span with the shortest local match.
- * + Remember the longest recursive span (the farthest end point).
- * + If there is no match at the current position, neither for the code point there
- * nor for any set string, then stop and return the longest recursive span length.
- *
- * Optimized implementation:
- *
- * (We assume that most sets will have very few very short strings.
- * A span using a string-less set is extremely fast.)
- *
- * Create and cache a spanSet which contains all of the single code points
- * of the original set but none of its strings.
- *
- * - Start with spanLength=spanSet.span(USET_SPAN_CONTAINED).
- * - Loop:
- * + Try to match each set string at the end of the spanLength.
- * ~ Set strings that start with set-contained code points must be matched
- * with a partial overlap because the recursive algorithm would have tried
- * to match them at every position.
- * ~ Set strings that entirely consist of set-contained code points
- * are irrelevant for span(USET_SPAN_CONTAINED) because the
- * recursive algorithm would continue after them anyway
- * and find the longest recursive match from their end.
- * ~ Rather than recursing, note each end point of a set string match.
- * + If no set string matched after spanSet.span(), then return
- * with where the spanSet.span() ended.
- * + If at least one set string matched after spanSet.span(), then
- * pop the shortest string match end point and continue
- * the loop, trying to match all set strings from there.
- * + If at least one more set string matched after a previous string match,
- * then test if the code point after the previous string match is also
- * contained in the set.
- * Continue the loop with the shortest end point of either this code point
- * or a matching set string.
- * + If no more set string matched after a previous string match,
- * then try another spanLength=spanSet.span(USET_SPAN_CONTAINED).
- * Stop if spanLength==0, otherwise continue the loop.
- *
- * By noting each end point of a set string match,
- * the function visits each string position at most once and finishes
- * in linear time.
- *
- * The recursive algorithm may visit the same string position many times
- * if multiple paths lead to it and finishes in exponential time.
- */
-
-/*
- * Algorithm for span(USET_SPAN_SIMPLE)
- *
- * Theoretical algorithm:
- * - Iterate through the string, and at each code point boundary:
- * + If the code point there is in the set, then remember to continue after it.
- * + If a set string matches at the current position, then remember to continue after it.
- * + Continue from the farthest match position and ignore all others.
- * + If there is no match at the current position,
- * then stop and return the current position.
- *
- * Optimized implementation:
- *
- * (Same assumption and spanSet as above.)
- *
- * - Start with spanLength=spanSet.span(USET_SPAN_CONTAINED).
- * - Loop:
- * + Try to match each set string at the end of the spanLength.
- * ~ Set strings that start with set-contained code points must be matched
- * with a partial overlap because the standard algorithm would have tried
- * to match them earlier.
- * ~ Set strings that entirely consist of set-contained code points
- * must be matched with a full overlap because the longest-match algorithm
- * would hide set string matches that end earlier.
- * Such set strings need not be matched earlier inside the code point span
- * because the standard algorithm would then have continued after
- * the set string match anyway.
- * ~ Remember the longest set string match (farthest end point) from the earliest
- * starting point.
- * + If no set string matched after spanSet.span(), then return
- * with where the spanSet.span() ended.
- * + If at least one set string matched, then continue the loop after the
- * longest match from the earliest position.
- * + If no more set string matched after a previous string match,
- * then try another spanLength=spanSet.span(USET_SPAN_CONTAINED).
- * Stop if spanLength==0, otherwise continue the loop.
- */
-
-int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
- if(spanCondition==USET_SPAN_NOT_CONTAINED) {
- return spanNot(s, length);
- }
- int32_t spanLength=spanSet.span(s, length, USET_SPAN_CONTAINED);
- if(spanLength==length) {
- return length;
- }
-
- // Consider strings; they may overlap with the span.
- OffsetList offsets;
- if(spanCondition==USET_SPAN_CONTAINED) {
- // Use offset list to try all possibilities.
- offsets.setMaxLength(maxLength16);
- }
- int32_t pos=spanLength, rest=length-pos;
- int32_t i, stringsLength=strings.size();
- for(;;) {
- if(spanCondition==USET_SPAN_CONTAINED) {
- for(i=0; i<stringsLength; ++i) {
- int32_t overlap=spanLengths[i];
- if(overlap==ALL_CP_CONTAINED) {
- continue; // Irrelevant string.
- }
- const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
- const UChar *s16=string.getBuffer();
- int32_t length16=string.length();
-
- // Try to match this string at pos-overlap..pos.
- if(overlap>=LONG_SPAN) {
- overlap=length16;
- // While contained: No point matching fully inside the code point span.
- U16_BACK_1(s16, 0, overlap); // Length of the string minus the last code point.
- }
- if(overlap>spanLength) {
- overlap=spanLength;
- }
- int32_t inc=length16-overlap; // Keep overlap+inc==length16.
- for(;;) {
- if(inc>rest) {
- break;
- }
- // Try to match if the increment is not listed already.
- if(!offsets.containsOffset(inc) && matches16CPB(s, pos-overlap, length, s16, length16)) {
- if(inc==rest) {
- return length; // Reached the end of the string.
- }
- offsets.addOffset(inc);
- }
- if(overlap==0) {
- break;
- }
- --overlap;
- ++inc;
- }
- }
- } else /* USET_SPAN_SIMPLE */ {
- int32_t maxInc=0, maxOverlap=0;
- for(i=0; i<stringsLength; ++i) {
- int32_t overlap=spanLengths[i];
- // For longest match, we do need to try to match even an all-contained string
- // to find the match from the earliest start.
-
- const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
- const UChar *s16=string.getBuffer();
- int32_t length16=string.length();
-
- // Try to match this string at pos-overlap..pos.
- if(overlap>=LONG_SPAN) {
- overlap=length16;
- // Longest match: Need to match fully inside the code point span
- // to find the match from the earliest start.
- }
- if(overlap>spanLength) {
- overlap=spanLength;
- }
- int32_t inc=length16-overlap; // Keep overlap+inc==length16.
- for(;;) {
- if(inc>rest || overlap<maxOverlap) {
- break;
- }
- // Try to match if the string is longer or starts earlier.
- if( (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ inc>maxInc) &&
- matches16CPB(s, pos-overlap, length, s16, length16)
- ) {
- maxInc=inc; // Longest match from earliest start.
- maxOverlap=overlap;
- break;
- }
- --overlap;
- ++inc;
- }
- }
-
- if(maxInc!=0 || maxOverlap!=0) {
- // Longest-match algorithm, and there was a string match.
- // Simply continue after it.
- pos+=maxInc;
- rest-=maxInc;
- if(rest==0) {
- return length; // Reached the end of the string.
- }
- spanLength=0; // Match strings from after a string match.
- continue;
- }
- }
- // Finished trying to match all strings at pos.
-
- if(spanLength!=0 || pos==0) {
- // The position is after an unlimited code point span (spanLength!=0),
- // not after a string match.
- // The only position where spanLength==0 after a span is pos==0.
- // Otherwise, an unlimited code point span is only tried again when no
- // strings match, and if such a non-initial span fails we stop.
- if(offsets.isEmpty()) {
- return pos; // No strings matched after a span.
- }
- // Match strings from after the next string match.
- } else {
- // The position is after a string match (or a single code point).
- if(offsets.isEmpty()) {
- // No more strings matched after a previous string match.
- // Try another code point span from after the last string match.
- spanLength=spanSet.span(s+pos, rest, USET_SPAN_CONTAINED);
- if( spanLength==rest || // Reached the end of the string, or
- spanLength==0 // neither strings nor span progressed.
- ) {
- return pos+spanLength;
- }
- pos+=spanLength;
- rest-=spanLength;
- continue; // spanLength>0: Match strings from after a span.
- } else {
- // Try to match only one code point from after a string match if some
- // string matched beyond it, so that we try all possible positions
- // and don't overshoot.
- spanLength=spanOne(spanSet, s+pos, rest);
- if(spanLength>0) {
- if(spanLength==rest) {
- return length; // Reached the end of the string.
- }
- // Match strings after this code point.
- // There cannot be any increments below it because UnicodeSet strings
- // contain multiple code points.
- pos+=spanLength;
- rest-=spanLength;
- offsets.shift(spanLength);
- spanLength=0;
- continue; // Match strings from after a single code point.
- }
- // Match strings from after the next string match.
- }
- }
- int32_t minOffset=offsets.popMinimum();
- pos+=minOffset;
- rest-=minOffset;
- spanLength=0; // Match strings from after a string match.
- }
-}
-
-int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
- if(spanCondition==USET_SPAN_NOT_CONTAINED) {
- return spanNotBack(s, length);
- }
- int32_t pos=spanSet.spanBack(s, length, USET_SPAN_CONTAINED);
- if(pos==0) {
- return 0;
- }
- int32_t spanLength=length-pos;
-
- // Consider strings; they may overlap with the span.
- OffsetList offsets;
- if(spanCondition==USET_SPAN_CONTAINED) {
- // Use offset list to try all possibilities.
- offsets.setMaxLength(maxLength16);
- }
- int32_t i, stringsLength=strings.size();
- uint8_t *spanBackLengths=spanLengths;
- if(all) {
- spanBackLengths+=stringsLength;
- }
- for(;;) {
- if(spanCondition==USET_SPAN_CONTAINED) {
- for(i=0; i<stringsLength; ++i) {
- int32_t overlap=spanBackLengths[i];
- if(overlap==ALL_CP_CONTAINED) {
- continue; // Irrelevant string.
- }
- const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
- const UChar *s16=string.getBuffer();
- int32_t length16=string.length();
-
- // Try to match this string at pos-(length16-overlap)..pos-length16.
- if(overlap>=LONG_SPAN) {
- overlap=length16;
- // While contained: No point matching fully inside the code point span.
- int32_t len1=0;
- U16_FWD_1(s16, len1, overlap);
- overlap-=len1; // Length of the string minus the first code point.
- }
- if(overlap>spanLength) {
- overlap=spanLength;
- }
- int32_t dec=length16-overlap; // Keep dec+overlap==length16.
- for(;;) {
- if(dec>pos) {
- break;
- }
- // Try to match if the decrement is not listed already.
- if(!offsets.containsOffset(dec) && matches16CPB(s, pos-dec, length, s16, length16)) {
- if(dec==pos) {
- return 0; // Reached the start of the string.
- }
- offsets.addOffset(dec);
- }
- if(overlap==0) {
- break;
- }
- --overlap;
- ++dec;
- }
- }
- } else /* USET_SPAN_SIMPLE */ {
- int32_t maxDec=0, maxOverlap=0;
- for(i=0; i<stringsLength; ++i) {
- int32_t overlap=spanBackLengths[i];
- // For longest match, we do need to try to match even an all-contained string
- // to find the match from the latest end.
-
- const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
- const UChar *s16=string.getBuffer();
- int32_t length16=string.length();
-
- // Try to match this string at pos-(length16-overlap)..pos-length16.
- if(overlap>=LONG_SPAN) {
- overlap=length16;
- // Longest match: Need to match fully inside the code point span
- // to find the match from the latest end.
- }
- if(overlap>spanLength) {
- overlap=spanLength;
- }
- int32_t dec=length16-overlap; // Keep dec+overlap==length16.
- for(;;) {
- if(dec>pos || overlap<maxOverlap) {
- break;
- }
- // Try to match if the string is longer or ends later.
- if( (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ dec>maxDec) &&
- matches16CPB(s, pos-dec, length, s16, length16)
- ) {
- maxDec=dec; // Longest match from latest end.
- maxOverlap=overlap;
- break;
- }
- --overlap;
- ++dec;
- }
- }
-
- if(maxDec!=0 || maxOverlap!=0) {
- // Longest-match algorithm, and there was a string match.
- // Simply continue before it.
- pos-=maxDec;
- if(pos==0) {
- return 0; // Reached the start of the string.
- }
- spanLength=0; // Match strings from before a string match.
- continue;
- }
- }
- // Finished trying to match all strings at pos.
-
- if(spanLength!=0 || pos==length) {
- // The position is before an unlimited code point span (spanLength!=0),
- // not before a string match.
- // The only position where spanLength==0 before a span is pos==length.
- // Otherwise, an unlimited code point span is only tried again when no
- // strings match, and if such a non-initial span fails we stop.
- if(offsets.isEmpty()) {
- return pos; // No strings matched before a span.
- }
- // Match strings from before the next string match.
- } else {
- // The position is before a string match (or a single code point).
- if(offsets.isEmpty()) {
- // No more strings matched before a previous string match.
- // Try another code point span from before the last string match.
- int32_t oldPos=pos;
- pos=spanSet.spanBack(s, oldPos, USET_SPAN_CONTAINED);
- spanLength=oldPos-pos;
- if( pos==0 || // Reached the start of the string, or
- spanLength==0 // neither strings nor span progressed.
- ) {
- return pos;
- }
- continue; // spanLength>0: Match strings from before a span.
- } else {
- // Try to match only one code point from before a string match if some
- // string matched beyond it, so that we try all possible positions
- // and don't overshoot.
- spanLength=spanOneBack(spanSet, s, pos);
- if(spanLength>0) {
- if(spanLength==pos) {
- return 0; // Reached the start of the string.
- }
- // Match strings before this code point.
- // There cannot be any decrements below it because UnicodeSet strings
- // contain multiple code points.
- pos-=spanLength;
- offsets.shift(spanLength);
- spanLength=0;
- continue; // Match strings from before a single code point.
- }
- // Match strings from before the next string match.
- }
- }
- pos-=offsets.popMinimum();
- spanLength=0; // Match strings from before a string match.
- }
-}
-
-int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
- if(spanCondition==USET_SPAN_NOT_CONTAINED) {
- return spanNotUTF8(s, length);
- }
- int32_t spanLength=spanSet.spanUTF8((const char *)s, length, USET_SPAN_CONTAINED);
- if(spanLength==length) {
- return length;
- }
-
- // Consider strings; they may overlap with the span.
- OffsetList offsets;
- if(spanCondition==USET_SPAN_CONTAINED) {
- // Use offset list to try all possibilities.
- offsets.setMaxLength(maxLength8);
- }
- int32_t pos=spanLength, rest=length-pos;
- int32_t i, stringsLength=strings.size();
- uint8_t *spanUTF8Lengths=spanLengths;
- if(all) {
- spanUTF8Lengths+=2*stringsLength;
- }
- for(;;) {
- const uint8_t *s8=utf8;
- int32_t length8;
- if(spanCondition==USET_SPAN_CONTAINED) {
- for(i=0; i<stringsLength; ++i) {
- length8=utf8Lengths[i];
- if(length8==0) {
- continue; // String not representable in UTF-8.
- }
- int32_t overlap=spanUTF8Lengths[i];
- if(overlap==ALL_CP_CONTAINED) {
- s8+=length8;
- continue; // Irrelevant string.
- }
-
- // Try to match this string at pos-overlap..pos.
- if(overlap>=LONG_SPAN) {
- overlap=length8;
- // While contained: No point matching fully inside the code point span.
- U8_BACK_1(s8, 0, overlap); // Length of the string minus the last code point.
- }
- if(overlap>spanLength) {
- overlap=spanLength;
- }
- int32_t inc=length8-overlap; // Keep overlap+inc==length8.
- for(;;) {
- if(inc>rest) {
- break;
- }
- // Try to match if the increment is not listed already.
- // Match at code point boundaries. (The UTF-8 strings were converted
- // from UTF-16 and are guaranteed to be well-formed.)
- if(!U8_IS_TRAIL(s[pos-overlap]) &&
- !offsets.containsOffset(inc) &&
- matches8(s+pos-overlap, s8, length8)) {
- if(inc==rest) {
- return length; // Reached the end of the string.
- }
- offsets.addOffset(inc);
- }
- if(overlap==0) {
- break;
- }
- --overlap;
- ++inc;
- }
- s8+=length8;
- }
- } else /* USET_SPAN_SIMPLE */ {
- int32_t maxInc=0, maxOverlap=0;
- for(i=0; i<stringsLength; ++i) {
- length8=utf8Lengths[i];
- if(length8==0) {
- continue; // String not representable in UTF-8.
- }
- int32_t overlap=spanUTF8Lengths[i];
- // For longest match, we do need to try to match even an all-contained string
- // to find the match from the earliest start.
-
- // Try to match this string at pos-overlap..pos.
- if(overlap>=LONG_SPAN) {
- overlap=length8;
- // Longest match: Need to match fully inside the code point span
- // to find the match from the earliest start.
- }
- if(overlap>spanLength) {
- overlap=spanLength;
- }
- int32_t inc=length8-overlap; // Keep overlap+inc==length8.
- for(;;) {
- if(inc>rest || overlap<maxOverlap) {
- break;
- }
- // Try to match if the string is longer or starts earlier.
- // Match at code point boundaries. (The UTF-8 strings were converted
- // from UTF-16 and are guaranteed to be well-formed.)
- if(!U8_IS_TRAIL(s[pos-overlap]) &&
- (overlap>maxOverlap ||
- /* redundant overlap==maxOverlap && */ inc>maxInc) &&
- matches8(s+pos-overlap, s8, length8)) {
- maxInc=inc; // Longest match from earliest start.
- maxOverlap=overlap;
- break;
- }
- --overlap;
- ++inc;
- }
- s8+=length8;
- }
-
- if(maxInc!=0 || maxOverlap!=0) {
- // Longest-match algorithm, and there was a string match.
- // Simply continue after it.
- pos+=maxInc;
- rest-=maxInc;
- if(rest==0) {
- return length; // Reached the end of the string.
- }
- spanLength=0; // Match strings from after a string match.
- continue;
- }
- }
- // Finished trying to match all strings at pos.
-
- if(spanLength!=0 || pos==0) {
- // The position is after an unlimited code point span (spanLength!=0),
- // not after a string match.
- // The only position where spanLength==0 after a span is pos==0.
- // Otherwise, an unlimited code point span is only tried again when no
- // strings match, and if such a non-initial span fails we stop.
- if(offsets.isEmpty()) {
- return pos; // No strings matched after a span.
- }
- // Match strings from after the next string match.
- } else {
- // The position is after a string match (or a single code point).
- if(offsets.isEmpty()) {
- // No more strings matched after a previous string match.
- // Try another code point span from after the last string match.
- spanLength=spanSet.spanUTF8((const char *)s+pos, rest, USET_SPAN_CONTAINED);
- if( spanLength==rest || // Reached the end of the string, or
- spanLength==0 // neither strings nor span progressed.
- ) {
- return pos+spanLength;
- }
- pos+=spanLength;
- rest-=spanLength;
- continue; // spanLength>0: Match strings from after a span.
- } else {
- // Try to match only one code point from after a string match if some
- // string matched beyond it, so that we try all possible positions
- // and don't overshoot.
- spanLength=spanOneUTF8(spanSet, s+pos, rest);
- if(spanLength>0) {
- if(spanLength==rest) {
- return length; // Reached the end of the string.
- }
- // Match strings after this code point.
- // There cannot be any increments below it because UnicodeSet strings
- // contain multiple code points.
- pos+=spanLength;
- rest-=spanLength;
- offsets.shift(spanLength);
- spanLength=0;
- continue; // Match strings from after a single code point.
- }
- // Match strings from after the next string match.
- }
- }
- int32_t minOffset=offsets.popMinimum();
- pos+=minOffset;
- rest-=minOffset;
- spanLength=0; // Match strings from after a string match.
- }
-}
-
-int32_t UnicodeSetStringSpan::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
- if(spanCondition==USET_SPAN_NOT_CONTAINED) {
- return spanNotBackUTF8(s, length);
- }
- int32_t pos=spanSet.spanBackUTF8((const char *)s, length, USET_SPAN_CONTAINED);
- if(pos==0) {
- return 0;
- }
- int32_t spanLength=length-pos;
-
- // Consider strings; they may overlap with the span.
- OffsetList offsets;
- if(spanCondition==USET_SPAN_CONTAINED) {
- // Use offset list to try all possibilities.
- offsets.setMaxLength(maxLength8);
- }
- int32_t i, stringsLength=strings.size();
- uint8_t *spanBackUTF8Lengths=spanLengths;
- if(all) {
- spanBackUTF8Lengths+=3*stringsLength;
- }
- for(;;) {
- const uint8_t *s8=utf8;
- int32_t length8;
- if(spanCondition==USET_SPAN_CONTAINED) {
- for(i=0; i<stringsLength; ++i) {
- length8=utf8Lengths[i];
- if(length8==0) {
- continue; // String not representable in UTF-8.
- }
- int32_t overlap=spanBackUTF8Lengths[i];
- if(overlap==ALL_CP_CONTAINED) {
- s8+=length8;
- continue; // Irrelevant string.
- }
-
- // Try to match this string at pos-(length8-overlap)..pos-length8.
- if(overlap>=LONG_SPAN) {
- overlap=length8;
- // While contained: No point matching fully inside the code point span.
- int32_t len1=0;
- U8_FWD_1(s8, len1, overlap);
- overlap-=len1; // Length of the string minus the first code point.
- }
- if(overlap>spanLength) {
- overlap=spanLength;
- }
- int32_t dec=length8-overlap; // Keep dec+overlap==length8.
- for(;;) {
- if(dec>pos) {
- break;
- }
- // Try to match if the decrement is not listed already.
- // Match at code point boundaries. (The UTF-8 strings were converted
- // from UTF-16 and are guaranteed to be well-formed.)
- if( !U8_IS_TRAIL(s[pos-dec]) &&
- !offsets.containsOffset(dec) &&
- matches8(s+pos-dec, s8, length8)
- ) {
- if(dec==pos) {
- return 0; // Reached the start of the string.
- }
- offsets.addOffset(dec);
- }
- if(overlap==0) {
- break;
- }
- --overlap;
- ++dec;
- }
- s8+=length8;
- }
- } else /* USET_SPAN_SIMPLE */ {
- int32_t maxDec=0, maxOverlap=0;
- for(i=0; i<stringsLength; ++i) {
- length8=utf8Lengths[i];
- if(length8==0) {
- continue; // String not representable in UTF-8.
- }
- int32_t overlap=spanBackUTF8Lengths[i];
- // For longest match, we do need to try to match even an all-contained string
- // to find the match from the latest end.
-
- // Try to match this string at pos-(length8-overlap)..pos-length8.
- if(overlap>=LONG_SPAN) {
- overlap=length8;
- // Longest match: Need to match fully inside the code point span
- // to find the match from the latest end.
- }
- if(overlap>spanLength) {
- overlap=spanLength;
- }
- int32_t dec=length8-overlap; // Keep dec+overlap==length8.
- for(;;) {
- if(dec>pos || overlap<maxOverlap) {
- break;
- }
- // Try to match if the string is longer or ends later.
- // Match at code point boundaries. (The UTF-8 strings were converted
- // from UTF-16 and are guaranteed to be well-formed.)
- if( !U8_IS_TRAIL(s[pos-dec]) &&
- (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ dec>maxDec) &&
- matches8(s+pos-dec, s8, length8)
- ) {
- maxDec=dec; // Longest match from latest end.
- maxOverlap=overlap;
- break;
- }
- --overlap;
- ++dec;
- }
- s8+=length8;
- }
-
- if(maxDec!=0 || maxOverlap!=0) {
- // Longest-match algorithm, and there was a string match.
- // Simply continue before it.
- pos-=maxDec;
- if(pos==0) {
- return 0; // Reached the start of the string.
- }
- spanLength=0; // Match strings from before a string match.
- continue;
- }
- }
- // Finished trying to match all strings at pos.
-
- if(spanLength!=0 || pos==length) {
- // The position is before an unlimited code point span (spanLength!=0),
- // not before a string match.
- // The only position where spanLength==0 before a span is pos==length.
- // Otherwise, an unlimited code point span is only tried again when no
- // strings match, and if such a non-initial span fails we stop.
- if(offsets.isEmpty()) {
- return pos; // No strings matched before a span.
- }
- // Match strings from before the next string match.
- } else {
- // The position is before a string match (or a single code point).
- if(offsets.isEmpty()) {
- // No more strings matched before a previous string match.
- // Try another code point span from before the last string match.
- int32_t oldPos=pos;
- pos=spanSet.spanBackUTF8((const char *)s, oldPos, USET_SPAN_CONTAINED);
- spanLength=oldPos-pos;
- if( pos==0 || // Reached the start of the string, or
- spanLength==0 // neither strings nor span progressed.
- ) {
- return pos;
- }
- continue; // spanLength>0: Match strings from before a span.
- } else {
- // Try to match only one code point from before a string match if some
- // string matched beyond it, so that we try all possible positions
- // and don't overshoot.
- spanLength=spanOneBackUTF8(spanSet, s, pos);
- if(spanLength>0) {
- if(spanLength==pos) {
- return 0; // Reached the start of the string.
- }
- // Match strings before this code point.
- // There cannot be any decrements below it because UnicodeSet strings
- // contain multiple code points.
- pos-=spanLength;
- offsets.shift(spanLength);
- spanLength=0;
- continue; // Match strings from before a single code point.
- }
- // Match strings from before the next string match.
- }
- }
- pos-=offsets.popMinimum();
- spanLength=0; // Match strings from before a string match.
- }
-}
-
-/*
- * Algorithm for spanNot()==span(USET_SPAN_NOT_CONTAINED)
- *
- * Theoretical algorithm:
- * - Iterate through the string, and at each code point boundary:
- * + If the code point there is in the set, then return with the current position.
- * + If a set string matches at the current position, then return with the current position.
- *
- * Optimized implementation:
- *
- * (Same assumption as for span() above.)
- *
- * Create and cache a spanNotSet which contains all of the single code points
- * of the original set but none of its strings.
- * For each set string add its initial code point to the spanNotSet.
- * (Also add its final code point for spanNotBack().)
- *
- * - Loop:
- * + Do spanLength=spanNotSet.span(USET_SPAN_NOT_CONTAINED).
- * + If the current code point is in the original set, then
- * return the current position.
- * + If any set string matches at the current position, then
- * return the current position.
- * + If there is no match at the current position, neither for the code point there
- * nor for any set string, then skip this code point and continue the loop.
- * This happens for set-string-initial code points that were added to spanNotSet
- * when there is not actually a match for such a set string.
- */
-
-int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const {
- int32_t pos=0, rest=length;
- int32_t i, stringsLength=strings.size();
- do {
- // Span until we find a code point from the set,
- // or a code point that starts or ends some string.
- i=pSpanNotSet->span(s+pos, rest, USET_SPAN_NOT_CONTAINED);
- if(i==rest) {
- return length; // Reached the end of the string.
- }
- pos+=i;
- rest-=i;
-
- // Check whether the current code point is in the original set,
- // without the string starts and ends.
- int32_t cpLength=spanOne(spanSet, s+pos, rest);
- if(cpLength>0) {
- return pos; // There is a set element at pos.
- }
-
- // Try to match the strings at pos.
- for(i=0; i<stringsLength; ++i) {
- if(spanLengths[i]==ALL_CP_CONTAINED) {
- continue; // Irrelevant string.
- }
- const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
- const UChar *s16=string.getBuffer();
- int32_t length16=string.length();
- if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) {
- return pos; // There is a set element at pos.
- }
- }
-
- // The span(while not contained) ended on a string start/end which is
- // not in the original set. Skip this code point and continue.
- // cpLength<0
- pos-=cpLength;
- rest+=cpLength;
- } while(rest!=0);
- return length; // Reached the end of the string.
-}
-
-int32_t UnicodeSetStringSpan::spanNotBack(const UChar *s, int32_t length) const {
- int32_t pos=length;
- int32_t i, stringsLength=strings.size();
- do {
- // Span until we find a code point from the set,
- // or a code point that starts or ends some string.
- pos=pSpanNotSet->spanBack(s, pos, USET_SPAN_NOT_CONTAINED);
- if(pos==0) {
- return 0; // Reached the start of the string.
- }
-
- // Check whether the current code point is in the original set,
- // without the string starts and ends.
- int32_t cpLength=spanOneBack(spanSet, s, pos);
- if(cpLength>0) {
- return pos; // There is a set element at pos.
- }
-
- // Try to match the strings at pos.
- for(i=0; i<stringsLength; ++i) {
- // Use spanLengths rather than a spanBackLengths pointer because
- // it is easier and we only need to know whether the string is irrelevant
- // which is the same in either array.
- if(spanLengths[i]==ALL_CP_CONTAINED) {
- continue; // Irrelevant string.
- }
- const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
- const UChar *s16=string.getBuffer();
- int32_t length16=string.length();
- if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) {
- return pos; // There is a set element at pos.
- }
- }
-
- // The span(while not contained) ended on a string start/end which is
- // not in the original set. Skip this code point and continue.
- // cpLength<0
- pos+=cpLength;
- } while(pos!=0);
- return 0; // Reached the start of the string.
-}
-
-int32_t UnicodeSetStringSpan::spanNotUTF8(const uint8_t *s, int32_t length) const {
- int32_t pos=0, rest=length;
- int32_t i, stringsLength=strings.size();
- uint8_t *spanUTF8Lengths=spanLengths;
- if(all) {
- spanUTF8Lengths+=2*stringsLength;
- }
- do {
- // Span until we find a code point from the set,
- // or a code point that starts or ends some string.
- i=pSpanNotSet->spanUTF8((const char *)s+pos, rest, USET_SPAN_NOT_CONTAINED);
- if(i==rest) {
- return length; // Reached the end of the string.
- }
- pos+=i;
- rest-=i;
-
- // Check whether the current code point is in the original set,
- // without the string starts and ends.
- int32_t cpLength=spanOneUTF8(spanSet, s+pos, rest);
- if(cpLength>0) {
- return pos; // There is a set element at pos.
- }
-
- // Try to match the strings at pos.
- const uint8_t *s8=utf8;
- int32_t length8;
- for(i=0; i<stringsLength; ++i) {
- length8=utf8Lengths[i];
- // ALL_CP_CONTAINED: Irrelevant string.
- if(length8!=0 && spanUTF8Lengths[i]!=ALL_CP_CONTAINED && length8<=rest && matches8(s+pos, s8, length8)) {
- return pos; // There is a set element at pos.
- }
- s8+=length8;
- }
-
- // The span(while not contained) ended on a string start/end which is
- // not in the original set. Skip this code point and continue.
- // cpLength<0
- pos-=cpLength;
- rest+=cpLength;
- } while(rest!=0);
- return length; // Reached the end of the string.
-}
-
-int32_t UnicodeSetStringSpan::spanNotBackUTF8(const uint8_t *s, int32_t length) const {
- int32_t pos=length;
- int32_t i, stringsLength=strings.size();
- uint8_t *spanBackUTF8Lengths=spanLengths;
- if(all) {
- spanBackUTF8Lengths+=3*stringsLength;
- }
- do {
- // Span until we find a code point from the set,
- // or a code point that starts or ends some string.
- pos=pSpanNotSet->spanBackUTF8((const char *)s, pos, USET_SPAN_NOT_CONTAINED);
- if(pos==0) {
- return 0; // Reached the start of the string.
- }
-
- // Check whether the current code point is in the original set,
- // without the string starts and ends.
- int32_t cpLength=spanOneBackUTF8(spanSet, s, pos);
- if(cpLength>0) {
- return pos; // There is a set element at pos.
- }
-
- // Try to match the strings at pos.
- const uint8_t *s8=utf8;
- int32_t length8;
- for(i=0; i<stringsLength; ++i) {
- length8=utf8Lengths[i];
- // ALL_CP_CONTAINED: Irrelevant string.
- if(length8!=0 && spanBackUTF8Lengths[i]!=ALL_CP_CONTAINED && length8<=pos && matches8(s+pos-length8, s8, length8)) {
- return pos; // There is a set element at pos.
- }
- s8+=length8;
- }
-
- // The span(while not contained) ended on a string start/end which is
- // not in the original set. Skip this code point and continue.
- // cpLength<0
- pos+=cpLength;
- } while(pos!=0);
- return 0; // Reached the start of the string.
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/unisetspan.h b/contrib/libs/icu/common/unisetspan.h
deleted file mode 100644
index f1e78ff3ee2..00000000000
--- a/contrib/libs/icu/common/unisetspan.h
+++ /dev/null
@@ -1,157 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2007, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: unisetspan.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2007mar01
-* created by: Markus W. Scherer
-*/
-
-#ifndef __UNISETSPAN_H__
-#define __UNISETSPAN_H__
-
-#include "unicode/utypes.h"
-#include "unicode/uniset.h"
-
-U_NAMESPACE_BEGIN
-
-/*
- * Implement span() etc. for a set with strings.
- * Avoid recursion because of its exponential complexity.
- * Instead, try multiple paths at once and track them with an IndexList.
- */
-class UnicodeSetStringSpan : public UMemory {
-public:
- /*
- * Which span() variant will be used?
- * The object is either built for one variant and used once,
- * or built for all and may be used many times.
- */
- enum {
- FWD = 0x20,
- BACK = 0x10,
- UTF16 = 8,
- UTF8 = 4,
- CONTAINED = 2,
- NOT_CONTAINED = 1,
-
- ALL = 0x3f,
-
- FWD_UTF16_CONTAINED = FWD | UTF16 | CONTAINED,
- FWD_UTF16_NOT_CONTAINED = FWD | UTF16 | NOT_CONTAINED,
- FWD_UTF8_CONTAINED = FWD | UTF8 | CONTAINED,
- FWD_UTF8_NOT_CONTAINED = FWD | UTF8 | NOT_CONTAINED,
- BACK_UTF16_CONTAINED = BACK | UTF16 | CONTAINED,
- BACK_UTF16_NOT_CONTAINED= BACK | UTF16 | NOT_CONTAINED,
- BACK_UTF8_CONTAINED = BACK | UTF8 | CONTAINED,
- BACK_UTF8_NOT_CONTAINED = BACK | UTF8 | NOT_CONTAINED
- };
-
- UnicodeSetStringSpan(const UnicodeSet &set, const UVector &setStrings, uint32_t which);
-
- // Copy constructor. Assumes which==ALL for a frozen set.
- UnicodeSetStringSpan(const UnicodeSetStringSpan &otherStringSpan, const UVector &newParentSetStrings);
-
- ~UnicodeSetStringSpan();
-
- /*
- * Do the strings need to be checked in span() etc.?
- * @return TRUE if strings need to be checked (call span() here),
- * FALSE if not (use a BMPSet for best performance).
- */
- inline UBool needsStringSpanUTF16();
- inline UBool needsStringSpanUTF8();
-
- // For fast UnicodeSet::contains(c).
- inline UBool contains(UChar32 c) const;
-
- int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
-
- int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
-
- int32_t spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
-
- int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
-
-private:
- // Special spanLength byte values.
- enum {
- // The spanLength is >=0xfe.
- LONG_SPAN=0xfe,
- // All code points in the string are contained in the parent set.
- ALL_CP_CONTAINED=0xff
- };
-
- // Add a starting or ending string character to the spanNotSet
- // so that a character span ends before any string.
- void addToSpanNotSet(UChar32 c);
-
- int32_t spanNot(const UChar *s, int32_t length) const;
- int32_t spanNotBack(const UChar *s, int32_t length) const;
- int32_t spanNotUTF8(const uint8_t *s, int32_t length) const;
- int32_t spanNotBackUTF8(const uint8_t *s, int32_t length) const;
-
- // Set for span(). Same as parent but without strings.
- UnicodeSet spanSet;
-
- // Set for span(not contained).
- // Same as spanSet, plus characters that start or end strings.
- UnicodeSet *pSpanNotSet;
-
- // The strings of the parent set.
- const UVector &strings;
-
- // Pointer to the UTF-8 string lengths.
- // Also pointer to further allocated storage for meta data and
- // UTF-8 string contents as necessary.
- int32_t *utf8Lengths;
-
- // Pointer to the part of the (utf8Lengths) memory block that stores
- // the lengths of span(), spanBack() etc. for each string.
- uint8_t *spanLengths;
-
- // Pointer to the part of the (utf8Lengths) memory block that stores
- // the UTF-8 versions of the parent set's strings.
- uint8_t *utf8;
-
- // Number of bytes for all UTF-8 versions of strings together.
- int32_t utf8Length;
-
- // Maximum lengths of relevant strings.
- int32_t maxLength16;
- int32_t maxLength8;
-
- // Set up for all variants of span()?
- UBool all;
-
- // Memory for small numbers and lengths of strings.
- // For example, for 8 strings:
- // 8 UTF-8 lengths, 8*4 bytes span lengths, 8*2 3-byte UTF-8 characters
- // = 112 bytes = int32_t[28].
- int32_t staticLengths[32];
-};
-
-UBool UnicodeSetStringSpan::needsStringSpanUTF16() {
- return (UBool)(maxLength16!=0);
-}
-
-UBool UnicodeSetStringSpan::needsStringSpanUTF8() {
- return (UBool)(maxLength8!=0);
-}
-
-UBool UnicodeSetStringSpan::contains(UChar32 c) const {
- return spanSet.contains(c);
-}
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/unistr.cpp b/contrib/libs/icu/common/unistr.cpp
deleted file mode 100644
index 077b4d6ef20..00000000000
--- a/contrib/libs/icu/common/unistr.cpp
+++ /dev/null
@@ -1,1982 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 1999-2016, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-*
-* File unistr.cpp
-*
-* Modification History:
-*
-* Date Name Description
-* 09/25/98 stephen Creation.
-* 04/20/99 stephen Overhauled per 4/16 code review.
-* 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
-* 11/18/99 aliu Added handleReplaceBetween() to make inherit from
-* Replaceable.
-* 06/25/01 grhoten Removed the dependency on iostream
-******************************************************************************
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/appendable.h"
-#include "unicode/putil.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "unicode/ustring.h"
-#include "unicode/unistr.h"
-#include "unicode/utf.h"
-#include "unicode/utf16.h"
-#include "uelement.h"
-#include "ustr_imp.h"
-#include "umutex.h"
-#include "uassert.h"
-
-#if 0
-
-#include <iostream>
-using namespace std;
-
-//DEBUGGING
-void
-print(const UnicodeString& s,
- const char *name)
-{
- UChar c;
- cout << name << ":|";
- for(int i = 0; i < s.length(); ++i) {
- c = s[i];
- if(c>= 0x007E || c < 0x0020)
- cout << "[0x" << hex << s[i] << "]";
- else
- cout << (char) s[i];
- }
- cout << '|' << endl;
-}
-
-void
-print(const UChar *s,
- int32_t len,
- const char *name)
-{
- UChar c;
- cout << name << ":|";
- for(int i = 0; i < len; ++i) {
- c = s[i];
- if(c>= 0x007E || c < 0x0020)
- cout << "[0x" << hex << s[i] << "]";
- else
- cout << (char) s[i];
- }
- cout << '|' << endl;
-}
-// END DEBUGGING
-#endif
-
-// Local function definitions for now
-
-// need to copy areas that may overlap
-static
-inline void
-us_arrayCopy(const UChar *src, int32_t srcStart,
- UChar *dst, int32_t dstStart, int32_t count)
-{
- if(count>0) {
- uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
- }
-}
-
-// u_unescapeAt() callback to get a UChar from a UnicodeString
-U_CDECL_BEGIN
-static UChar U_CALLCONV
-UnicodeString_charAt(int32_t offset, void *context) {
- return ((icu::UnicodeString*) context)->charAt(offset);
-}
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-/* The Replaceable virtual destructor can't be defined in the header
- due to how AIX works with multiple definitions of virtual functions.
-*/
-Replaceable::~Replaceable() {}
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
-
-UnicodeString U_EXPORT2
-operator+ (const UnicodeString &s1, const UnicodeString &s2) {
- return
- UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
- append(s1).
- append(s2);
-}
-
-//========================================
-// Reference Counting functions, put at top of file so that optimizing compilers
-// have a chance to automatically inline.
-//========================================
-
-void
-UnicodeString::addRef() {
- umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
-}
-
-int32_t
-UnicodeString::removeRef() {
- return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
-}
-
-int32_t
-UnicodeString::refCount() const {
- return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
-}
-
-void
-UnicodeString::releaseArray() {
- if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
- uprv_free((int32_t *)fUnion.fFields.fArray - 1);
- }
-}
-
-
-
-//========================================
-// Constructors
-//========================================
-
-// The default constructor is inline in unistr.h.
-
-UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
- fUnion.fFields.fLengthAndFlags = 0;
- if(count <= 0 || (uint32_t)c > 0x10ffff) {
- // just allocate and do not do anything else
- allocate(capacity);
- } else if(c <= 0xffff) {
- int32_t length = count;
- if(capacity < length) {
- capacity = length;
- }
- if(allocate(capacity)) {
- UChar *array = getArrayStart();
- UChar unit = (UChar)c;
- for(int32_t i = 0; i < length; ++i) {
- array[i] = unit;
- }
- setLength(length);
- }
- } else { // supplementary code point, write surrogate pairs
- if(count > (INT32_MAX / 2)) {
- // We would get more than 2G UChars.
- allocate(capacity);
- return;
- }
- int32_t length = count * 2;
- if(capacity < length) {
- capacity = length;
- }
- if(allocate(capacity)) {
- UChar *array = getArrayStart();
- UChar lead = U16_LEAD(c);
- UChar trail = U16_TRAIL(c);
- for(int32_t i = 0; i < length; i += 2) {
- array[i] = lead;
- array[i + 1] = trail;
- }
- setLength(length);
- }
- }
-}
-
-UnicodeString::UnicodeString(UChar ch) {
- fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
- fUnion.fStackFields.fBuffer[0] = ch;
-}
-
-UnicodeString::UnicodeString(UChar32 ch) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- int32_t i = 0;
- UBool isError = FALSE;
- U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
- // We test isError so that the compiler does not complain that we don't.
- // If isError then i==0 which is what we want anyway.
- if(!isError) {
- setShortLength(i);
- }
-}
-
-UnicodeString::UnicodeString(const UChar *text) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- doAppend(text, 0, -1);
-}
-
-UnicodeString::UnicodeString(const UChar *text,
- int32_t textLength) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- doAppend(text, 0, textLength);
-}
-
-UnicodeString::UnicodeString(UBool isTerminated,
- ConstChar16Ptr textPtr,
- int32_t textLength) {
- fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
- const UChar *text = textPtr;
- if(text == NULL) {
- // treat as an empty string, do not alias
- setToEmpty();
- } else if(textLength < -1 ||
- (textLength == -1 && !isTerminated) ||
- (textLength >= 0 && isTerminated && text[textLength] != 0)
- ) {
- setToBogus();
- } else {
- if(textLength == -1) {
- // text is terminated, or else it would have failed the above test
- textLength = u_strlen(text);
- }
- setArray(const_cast<UChar *>(text), textLength,
- isTerminated ? textLength + 1 : textLength);
- }
-}
-
-UnicodeString::UnicodeString(UChar *buff,
- int32_t buffLength,
- int32_t buffCapacity) {
- fUnion.fFields.fLengthAndFlags = kWritableAlias;
- if(buff == NULL) {
- // treat as an empty string, do not alias
- setToEmpty();
- } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
- setToBogus();
- } else {
- if(buffLength == -1) {
- // fLength = u_strlen(buff); but do not look beyond buffCapacity
- const UChar *p = buff, *limit = buff + buffCapacity;
- while(p != limit && *p != 0) {
- ++p;
- }
- buffLength = (int32_t)(p - buff);
- }
- setArray(buff, buffLength, buffCapacity);
- }
-}
-
-UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- if(src==NULL) {
- // treat as an empty string
- } else {
- if(length<0) {
- length=(int32_t)uprv_strlen(src);
- }
- if(cloneArrayIfNeeded(length, length, FALSE)) {
- u_charsToUChars(src, getArrayStart(), length);
- setLength(length);
- } else {
- setToBogus();
- }
- }
-}
-
-#if U_CHARSET_IS_UTF8
-
-UnicodeString::UnicodeString(const char *codepageData) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- if(codepageData != 0) {
- setToUTF8(codepageData);
- }
-}
-
-UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- // if there's nothing to convert, do nothing
- if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
- return;
- }
- if(dataLength == -1) {
- dataLength = (int32_t)uprv_strlen(codepageData);
- }
- setToUTF8(StringPiece(codepageData, dataLength));
-}
-
-// else see unistr_cnv.cpp
-#endif
-
-UnicodeString::UnicodeString(const UnicodeString& that) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- copyFrom(that);
-}
-
-UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT {
- copyFieldsFrom(src, TRUE);
-}
-
-UnicodeString::UnicodeString(const UnicodeString& that,
- int32_t srcStart) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- setTo(that, srcStart);
-}
-
-UnicodeString::UnicodeString(const UnicodeString& that,
- int32_t srcStart,
- int32_t srcLength) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- setTo(that, srcStart, srcLength);
-}
-
-// Replaceable base class clone() default implementation, does not clone
-Replaceable *
-Replaceable::clone() const {
- return NULL;
-}
-
-// UnicodeString overrides clone() with a real implementation
-UnicodeString *
-UnicodeString::clone() const {
- return new UnicodeString(*this);
-}
-
-//========================================
-// array allocation
-//========================================
-
-namespace {
-
-const int32_t kGrowSize = 128;
-
-// The number of bytes for one int32_t reference counter and capacity UChars
-// must fit into a 32-bit size_t (at least when on a 32-bit platform).
-// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
-// and round up to a multiple of 16 bytes.
-// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
-// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
-// but that does not seem worth it.)
-const int32_t kMaxCapacity = 0x7ffffff5;
-
-int32_t getGrowCapacity(int32_t newLength) {
- int32_t growSize = (newLength >> 2) + kGrowSize;
- if(growSize <= (kMaxCapacity - newLength)) {
- return newLength + growSize;
- } else {
- return kMaxCapacity;
- }
-}
-
-} // namespace
-
-UBool
-UnicodeString::allocate(int32_t capacity) {
- if(capacity <= US_STACKBUF_SIZE) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- return TRUE;
- }
- if(capacity <= kMaxCapacity) {
- ++capacity; // for the NUL
- // Switch to size_t which is unsigned so that we can allocate up to 4GB.
- // Reference counter + UChars.
- size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
- // Round up to a multiple of 16.
- numBytes = (numBytes + 15) & ~15;
- int32_t *array = (int32_t *) uprv_malloc(numBytes);
- if(array != NULL) {
- // set initial refCount and point behind the refCount
- *array++ = 1;
- numBytes -= sizeof(int32_t);
-
- // have fArray point to the first UChar
- fUnion.fFields.fArray = (UChar *)array;
- fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
- fUnion.fFields.fLengthAndFlags = kLongString;
- return TRUE;
- }
- }
- fUnion.fFields.fLengthAndFlags = kIsBogus;
- fUnion.fFields.fArray = 0;
- fUnion.fFields.fCapacity = 0;
- return FALSE;
-}
-
-//========================================
-// Destructor
-//========================================
-
-#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
-static u_atomic_int32_t finalLengthCounts[0x400]; // UnicodeString::kMaxShortLength+1
-static u_atomic_int32_t beyondCount(0);
-
-U_CAPI void unistr_printLengths() {
- int32_t i;
- for(i = 0; i <= 59; ++i) {
- printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]);
- }
- int32_t beyond = beyondCount;
- for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
- beyond += finalLengthCounts[i];
- }
- printf(">59, %9d\n", beyond);
-}
-#endif
-
-UnicodeString::~UnicodeString()
-{
-#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
- // Count lengths of strings at the end of their lifetime.
- // Useful for discussion of a desirable stack buffer size.
- // Count the contents length, not the optional NUL terminator nor further capacity.
- // Ignore open-buffer strings and strings which alias external storage.
- if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
- if(hasShortLength()) {
- umtx_atomic_inc(finalLengthCounts + getShortLength());
- } else {
- umtx_atomic_inc(&beyondCount);
- }
- }
-#endif
-
- releaseArray();
-}
-
-//========================================
-// Factory methods
-//========================================
-
-UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
- UnicodeString result;
- result.setToUTF8(utf8);
- return result;
-}
-
-UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
- UnicodeString result;
- int32_t capacity;
- // Most UTF-32 strings will be BMP-only and result in a same-length
- // UTF-16 string. We overestimate the capacity just slightly,
- // just in case there are a few supplementary characters.
- if(length <= US_STACKBUF_SIZE) {
- capacity = US_STACKBUF_SIZE;
- } else {
- capacity = length + (length >> 4) + 4;
- }
- do {
- UChar *utf16 = result.getBuffer(capacity);
- int32_t length16;
- UErrorCode errorCode = U_ZERO_ERROR;
- u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
- utf32, length,
- 0xfffd, // Substitution character.
- NULL, // Don't care about number of substitutions.
- &errorCode);
- result.releaseBuffer(length16);
- if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
- capacity = length16 + 1; // +1 for the terminating NUL.
- continue;
- } else if(U_FAILURE(errorCode)) {
- result.setToBogus();
- }
- break;
- } while(TRUE);
- return result;
-}
-
-//========================================
-// Assignment
-//========================================
-
-UnicodeString &
-UnicodeString::operator=(const UnicodeString &src) {
- return copyFrom(src);
-}
-
-UnicodeString &
-UnicodeString::fastCopyFrom(const UnicodeString &src) {
- return copyFrom(src, TRUE);
-}
-
-UnicodeString &
-UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
- // if assigning to ourselves, do nothing
- if(this == &src) {
- return *this;
- }
-
- // is the right side bogus?
- if(src.isBogus()) {
- setToBogus();
- return *this;
- }
-
- // delete the current contents
- releaseArray();
-
- if(src.isEmpty()) {
- // empty string - use the stack buffer
- setToEmpty();
- return *this;
- }
-
- // fLength>0 and not an "open" src.getBuffer(minCapacity)
- fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
- switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
- case kShortString:
- // short string using the stack buffer, do the same
- uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
- getShortLength() * U_SIZEOF_UCHAR);
- break;
- case kLongString:
- // src uses a refCounted string buffer, use that buffer with refCount
- // src is const, use a cast - we don't actually change it
- ((UnicodeString &)src).addRef();
- // copy all fields, share the reference-counted buffer
- fUnion.fFields.fArray = src.fUnion.fFields.fArray;
- fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
- if(!hasShortLength()) {
- fUnion.fFields.fLength = src.fUnion.fFields.fLength;
- }
- break;
- case kReadonlyAlias:
- if(fastCopy) {
- // src is a readonly alias, do the same
- // -> maintain the readonly alias as such
- fUnion.fFields.fArray = src.fUnion.fFields.fArray;
- fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
- if(!hasShortLength()) {
- fUnion.fFields.fLength = src.fUnion.fFields.fLength;
- }
- break;
- }
- // else if(!fastCopy) fall through to case kWritableAlias
- // -> allocate a new buffer and copy the contents
- U_FALLTHROUGH;
- case kWritableAlias: {
- // src is a writable alias; we make a copy of that instead
- int32_t srcLength = src.length();
- if(allocate(srcLength)) {
- u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
- setLength(srcLength);
- break;
- }
- // if there is not enough memory, then fall through to setting to bogus
- U_FALLTHROUGH;
- }
- default:
- // if src is bogus, set ourselves to bogus
- // do not call setToBogus() here because fArray and flags are not consistent here
- fUnion.fFields.fLengthAndFlags = kIsBogus;
- fUnion.fFields.fArray = 0;
- fUnion.fFields.fCapacity = 0;
- break;
- }
-
- return *this;
-}
-
-UnicodeString &UnicodeString::operator=(UnicodeString &&src) U_NOEXCEPT {
- // No explicit check for self move assignment, consistent with standard library.
- // Self move assignment causes no crash nor leak but might make the object bogus.
- releaseArray();
- copyFieldsFrom(src, TRUE);
- return *this;
-}
-
-// Same as move assignment except without memory management.
-void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT {
- int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
- if(lengthAndFlags & kUsingStackBuffer) {
- // Short string using the stack buffer, copy the contents.
- // Check for self assignment to prevent "overlap in memcpy" warnings,
- // although it should be harmless to copy a buffer to itself exactly.
- if(this != &src) {
- uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
- getShortLength() * U_SIZEOF_UCHAR);
- }
- } else {
- // In all other cases, copy all fields.
- fUnion.fFields.fArray = src.fUnion.fFields.fArray;
- fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
- if(!hasShortLength()) {
- fUnion.fFields.fLength = src.fUnion.fFields.fLength;
- }
- if(setSrcToBogus) {
- // Set src to bogus without releasing any memory.
- src.fUnion.fFields.fLengthAndFlags = kIsBogus;
- src.fUnion.fFields.fArray = NULL;
- src.fUnion.fFields.fCapacity = 0;
- }
- }
-}
-
-void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT {
- UnicodeString temp; // Empty short string: Known not to need releaseArray().
- // Copy fields without resetting source values in between.
- temp.copyFieldsFrom(*this, FALSE);
- this->copyFieldsFrom(other, FALSE);
- other.copyFieldsFrom(temp, FALSE);
- // Set temp to an empty string so that other's memory is not released twice.
- temp.fUnion.fFields.fLengthAndFlags = kShortString;
-}
-
-//========================================
-// Miscellaneous operations
-//========================================
-
-UnicodeString UnicodeString::unescape() const {
- UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
- if (result.isBogus()) {
- return result;
- }
- const UChar *array = getBuffer();
- int32_t len = length();
- int32_t prev = 0;
- for (int32_t i=0;;) {
- if (i == len) {
- result.append(array, prev, len - prev);
- break;
- }
- if (array[i++] == 0x5C /*'\\'*/) {
- result.append(array, prev, (i - 1) - prev);
- UChar32 c = unescapeAt(i); // advances i
- if (c < 0) {
- result.remove(); // return empty string
- break; // invalid escape sequence
- }
- result.append(c);
- prev = i;
- }
- }
- return result;
-}
-
-UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
- return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
-}
-
-//========================================
-// Read-only implementation
-//========================================
-UBool
-UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
- // Requires: this & text not bogus and have same lengths.
- // Byte-wise comparison works for equality regardless of endianness.
- return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
-}
-
-int8_t
-UnicodeString::doCompare( int32_t start,
- int32_t length,
- const UChar *srcChars,
- int32_t srcStart,
- int32_t srcLength) const
-{
- // compare illegal string values
- if(isBogus()) {
- return -1;
- }
-
- // pin indices to legal values
- pinIndices(start, length);
-
- if(srcChars == NULL) {
- // treat const UChar *srcChars==NULL as an empty string
- return length == 0 ? 0 : 1;
- }
-
- // get the correct pointer
- const UChar *chars = getArrayStart();
-
- chars += start;
- srcChars += srcStart;
-
- int32_t minLength;
- int8_t lengthResult;
-
- // get the srcLength if necessary
- if(srcLength < 0) {
- srcLength = u_strlen(srcChars + srcStart);
- }
-
- // are we comparing different lengths?
- if(length != srcLength) {
- if(length < srcLength) {
- minLength = length;
- lengthResult = -1;
- } else {
- minLength = srcLength;
- lengthResult = 1;
- }
- } else {
- minLength = length;
- lengthResult = 0;
- }
-
- /*
- * note that uprv_memcmp() returns an int but we return an int8_t;
- * we need to take care not to truncate the result -
- * one way to do this is to right-shift the value to
- * move the sign bit into the lower 8 bits and making sure that this
- * does not become 0 itself
- */
-
- if(minLength > 0 && chars != srcChars) {
- int32_t result;
-
-# if U_IS_BIG_ENDIAN
- // big-endian: byte comparison works
- result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
- if(result != 0) {
- return (int8_t)(result >> 15 | 1);
- }
-# else
- // little-endian: compare UChar units
- do {
- result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
- if(result != 0) {
- return (int8_t)(result >> 15 | 1);
- }
- } while(--minLength > 0);
-# endif
- }
- return lengthResult;
-}
-
-/* String compare in code point order - doCompare() compares in code unit order. */
-int8_t
-UnicodeString::doCompareCodePointOrder(int32_t start,
- int32_t length,
- const UChar *srcChars,
- int32_t srcStart,
- int32_t srcLength) const
-{
- // compare illegal string values
- // treat const UChar *srcChars==NULL as an empty string
- if(isBogus()) {
- return -1;
- }
-
- // pin indices to legal values
- pinIndices(start, length);
-
- if(srcChars == NULL) {
- srcStart = srcLength = 0;
- }
-
- int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
- /* translate the 32-bit result into an 8-bit one */
- if(diff!=0) {
- return (int8_t)(diff >> 15 | 1);
- } else {
- return 0;
- }
-}
-
-int32_t
-UnicodeString::getLength() const {
- return length();
-}
-
-UChar
-UnicodeString::getCharAt(int32_t offset) const {
- return charAt(offset);
-}
-
-UChar32
-UnicodeString::getChar32At(int32_t offset) const {
- return char32At(offset);
-}
-
-UChar32
-UnicodeString::char32At(int32_t offset) const
-{
- int32_t len = length();
- if((uint32_t)offset < (uint32_t)len) {
- const UChar *array = getArrayStart();
- UChar32 c;
- U16_GET(array, 0, offset, len, c);
- return c;
- } else {
- return kInvalidUChar;
- }
-}
-
-int32_t
-UnicodeString::getChar32Start(int32_t offset) const {
- if((uint32_t)offset < (uint32_t)length()) {
- const UChar *array = getArrayStart();
- U16_SET_CP_START(array, 0, offset);
- return offset;
- } else {
- return 0;
- }
-}
-
-int32_t
-UnicodeString::getChar32Limit(int32_t offset) const {
- int32_t len = length();
- if((uint32_t)offset < (uint32_t)len) {
- const UChar *array = getArrayStart();
- U16_SET_CP_LIMIT(array, 0, offset, len);
- return offset;
- } else {
- return len;
- }
-}
-
-int32_t
-UnicodeString::countChar32(int32_t start, int32_t length) const {
- pinIndices(start, length);
- // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
- return u_countChar32(getArrayStart()+start, length);
-}
-
-UBool
-UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
- pinIndices(start, length);
- // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
- return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
-}
-
-int32_t
-UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
- // pin index
- int32_t len = length();
- if(index<0) {
- index=0;
- } else if(index>len) {
- index=len;
- }
-
- const UChar *array = getArrayStart();
- if(delta>0) {
- U16_FWD_N(array, index, len, delta);
- } else {
- U16_BACK_N(array, 0, index, -delta);
- }
-
- return index;
-}
-
-void
-UnicodeString::doExtract(int32_t start,
- int32_t length,
- UChar *dst,
- int32_t dstStart) const
-{
- // pin indices to legal values
- pinIndices(start, length);
-
- // do not copy anything if we alias dst itself
- const UChar *array = getArrayStart();
- if(array + start != dst + dstStart) {
- us_arrayCopy(array, start, dst, dstStart, length);
- }
-}
-
-int32_t
-UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
- UErrorCode &errorCode) const {
- int32_t len = length();
- if(U_SUCCESS(errorCode)) {
- if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- } else {
- const UChar *array = getArrayStart();
- if(len>0 && len<=destCapacity && array!=dest) {
- u_memcpy(dest, array, len);
- }
- return u_terminateUChars(dest, destCapacity, len, &errorCode);
- }
- }
-
- return len;
-}
-
-int32_t
-UnicodeString::extract(int32_t start,
- int32_t length,
- char *target,
- int32_t targetCapacity,
- enum EInvariant) const
-{
- // if the arguments are illegal, then do nothing
- if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
- return 0;
- }
-
- // pin the indices to legal values
- pinIndices(start, length);
-
- if(length <= targetCapacity) {
- u_UCharsToChars(getArrayStart() + start, target, length);
- }
- UErrorCode status = U_ZERO_ERROR;
- return u_terminateChars(target, targetCapacity, length, &status);
-}
-
-UnicodeString
-UnicodeString::tempSubString(int32_t start, int32_t len) const {
- pinIndices(start, len);
- const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
- if(array==NULL) {
- array=fUnion.fStackFields.fBuffer; // anything not NULL because that would make an empty string
- len=-2; // bogus result string
- }
- return UnicodeString(FALSE, array + start, len);
-}
-
-int32_t
-UnicodeString::toUTF8(int32_t start, int32_t len,
- char *target, int32_t capacity) const {
- pinIndices(start, len);
- int32_t length8;
- UErrorCode errorCode = U_ZERO_ERROR;
- u_strToUTF8WithSub(target, capacity, &length8,
- getBuffer() + start, len,
- 0xFFFD, // Standard substitution character.
- NULL, // Don't care about number of substitutions.
- &errorCode);
- return length8;
-}
-
-#if U_CHARSET_IS_UTF8
-
-int32_t
-UnicodeString::extract(int32_t start, int32_t len,
- char *target, uint32_t dstSize) const {
- // if the arguments are illegal, then do nothing
- if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
- return 0;
- }
- return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
-}
-
-// else see unistr_cnv.cpp
-#endif
-
-void
-UnicodeString::extractBetween(int32_t start,
- int32_t limit,
- UnicodeString& target) const {
- pinIndex(start);
- pinIndex(limit);
- doExtract(start, limit - start, target);
-}
-
-// When converting from UTF-16 to UTF-8, the result will have at most 3 times
-// as many bytes as the source has UChars.
-// The "worst cases" are writing systems like Indic, Thai and CJK with
-// 3:1 bytes:UChars.
-void
-UnicodeString::toUTF8(ByteSink &sink) const {
- int32_t length16 = length();
- if(length16 != 0) {
- char stackBuffer[1024];
- int32_t capacity = (int32_t)sizeof(stackBuffer);
- UBool utf8IsOwned = FALSE;
- char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
- 3*length16,
- stackBuffer, capacity,
- &capacity);
- int32_t length8 = 0;
- UErrorCode errorCode = U_ZERO_ERROR;
- u_strToUTF8WithSub(utf8, capacity, &length8,
- getBuffer(), length16,
- 0xFFFD, // Standard substitution character.
- NULL, // Don't care about number of substitutions.
- &errorCode);
- if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
- utf8 = (char *)uprv_malloc(length8);
- if(utf8 != NULL) {
- utf8IsOwned = TRUE;
- errorCode = U_ZERO_ERROR;
- u_strToUTF8WithSub(utf8, length8, &length8,
- getBuffer(), length16,
- 0xFFFD, // Standard substitution character.
- NULL, // Don't care about number of substitutions.
- &errorCode);
- } else {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- }
- }
- if(U_SUCCESS(errorCode)) {
- sink.Append(utf8, length8);
- sink.Flush();
- }
- if(utf8IsOwned) {
- uprv_free(utf8);
- }
- }
-}
-
-int32_t
-UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
- int32_t length32=0;
- if(U_SUCCESS(errorCode)) {
- // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
- u_strToUTF32WithSub(utf32, capacity, &length32,
- getBuffer(), length(),
- 0xfffd, // Substitution character.
- NULL, // Don't care about number of substitutions.
- &errorCode);
- }
- return length32;
-}
-
-int32_t
-UnicodeString::indexOf(const UChar *srcChars,
- int32_t srcStart,
- int32_t srcLength,
- int32_t start,
- int32_t length) const
-{
- if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
- return -1;
- }
-
- // UnicodeString does not find empty substrings
- if(srcLength < 0 && srcChars[srcStart] == 0) {
- return -1;
- }
-
- // get the indices within bounds
- pinIndices(start, length);
-
- // find the first occurrence of the substring
- const UChar *array = getArrayStart();
- const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
- if(match == NULL) {
- return -1;
- } else {
- return (int32_t)(match - array);
- }
-}
-
-int32_t
-UnicodeString::doIndexOf(UChar c,
- int32_t start,
- int32_t length) const
-{
- // pin indices
- pinIndices(start, length);
-
- // find the first occurrence of c
- const UChar *array = getArrayStart();
- const UChar *match = u_memchr(array + start, c, length);
- if(match == NULL) {
- return -1;
- } else {
- return (int32_t)(match - array);
- }
-}
-
-int32_t
-UnicodeString::doIndexOf(UChar32 c,
- int32_t start,
- int32_t length) const {
- // pin indices
- pinIndices(start, length);
-
- // find the first occurrence of c
- const UChar *array = getArrayStart();
- const UChar *match = u_memchr32(array + start, c, length);
- if(match == NULL) {
- return -1;
- } else {
- return (int32_t)(match - array);
- }
-}
-
-int32_t
-UnicodeString::lastIndexOf(const UChar *srcChars,
- int32_t srcStart,
- int32_t srcLength,
- int32_t start,
- int32_t length) const
-{
- if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
- return -1;
- }
-
- // UnicodeString does not find empty substrings
- if(srcLength < 0 && srcChars[srcStart] == 0) {
- return -1;
- }
-
- // get the indices within bounds
- pinIndices(start, length);
-
- // find the last occurrence of the substring
- const UChar *array = getArrayStart();
- const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
- if(match == NULL) {
- return -1;
- } else {
- return (int32_t)(match - array);
- }
-}
-
-int32_t
-UnicodeString::doLastIndexOf(UChar c,
- int32_t start,
- int32_t length) const
-{
- if(isBogus()) {
- return -1;
- }
-
- // pin indices
- pinIndices(start, length);
-
- // find the last occurrence of c
- const UChar *array = getArrayStart();
- const UChar *match = u_memrchr(array + start, c, length);
- if(match == NULL) {
- return -1;
- } else {
- return (int32_t)(match - array);
- }
-}
-
-int32_t
-UnicodeString::doLastIndexOf(UChar32 c,
- int32_t start,
- int32_t length) const {
- // pin indices
- pinIndices(start, length);
-
- // find the last occurrence of c
- const UChar *array = getArrayStart();
- const UChar *match = u_memrchr32(array + start, c, length);
- if(match == NULL) {
- return -1;
- } else {
- return (int32_t)(match - array);
- }
-}
-
-//========================================
-// Write implementation
-//========================================
-
-UnicodeString&
-UnicodeString::findAndReplace(int32_t start,
- int32_t length,
- const UnicodeString& oldText,
- int32_t oldStart,
- int32_t oldLength,
- const UnicodeString& newText,
- int32_t newStart,
- int32_t newLength)
-{
- if(isBogus() || oldText.isBogus() || newText.isBogus()) {
- return *this;
- }
-
- pinIndices(start, length);
- oldText.pinIndices(oldStart, oldLength);
- newText.pinIndices(newStart, newLength);
-
- if(oldLength == 0) {
- return *this;
- }
-
- while(length > 0 && length >= oldLength) {
- int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
- if(pos < 0) {
- // no more oldText's here: done
- break;
- } else {
- // we found oldText, replace it by newText and go beyond it
- replace(pos, oldLength, newText, newStart, newLength);
- length -= pos + oldLength - start;
- start = pos + newLength;
- }
- }
-
- return *this;
-}
-
-
-void
-UnicodeString::setToBogus()
-{
- releaseArray();
-
- fUnion.fFields.fLengthAndFlags = kIsBogus;
- fUnion.fFields.fArray = 0;
- fUnion.fFields.fCapacity = 0;
-}
-
-// turn a bogus string into an empty one
-void
-UnicodeString::unBogus() {
- if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
- setToEmpty();
- }
-}
-
-const char16_t *
-UnicodeString::getTerminatedBuffer() {
- if(!isWritable()) {
- return nullptr;
- }
- UChar *array = getArrayStart();
- int32_t len = length();
- if(len < getCapacity()) {
- if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
- // If len<capacity on a read-only alias, then array[len] is
- // either the original NUL (if constructed with (TRUE, s, length))
- // or one of the original string contents characters (if later truncated),
- // therefore we can assume that array[len] is initialized memory.
- if(array[len] == 0) {
- return array;
- }
- } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
- // kRefCounted: Do not write the NUL if the buffer is shared.
- // That is mostly safe, except when the length of one copy was modified
- // without copy-on-write, e.g., via truncate(newLength) or remove(void).
- // Then the NUL would be written into the middle of another copy's string.
-
- // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
- // Do not test if there is a NUL already because it might be uninitialized memory.
- // (That would be safe, but tools like valgrind & Purify would complain.)
- array[len] = 0;
- return array;
- }
- }
- if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
- array = getArrayStart();
- array[len] = 0;
- return array;
- } else {
- return nullptr;
- }
-}
-
-// setTo() analogous to the readonly-aliasing constructor with the same signature
-UnicodeString &
-UnicodeString::setTo(UBool isTerminated,
- ConstChar16Ptr textPtr,
- int32_t textLength)
-{
- if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
- // do not modify a string that has an "open" getBuffer(minCapacity)
- return *this;
- }
-
- const UChar *text = textPtr;
- if(text == NULL) {
- // treat as an empty string, do not alias
- releaseArray();
- setToEmpty();
- return *this;
- }
-
- if( textLength < -1 ||
- (textLength == -1 && !isTerminated) ||
- (textLength >= 0 && isTerminated && text[textLength] != 0)
- ) {
- setToBogus();
- return *this;
- }
-
- releaseArray();
-
- if(textLength == -1) {
- // text is terminated, or else it would have failed the above test
- textLength = u_strlen(text);
- }
- fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
- setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
- return *this;
-}
-
-// setTo() analogous to the writable-aliasing constructor with the same signature
-UnicodeString &
-UnicodeString::setTo(UChar *buffer,
- int32_t buffLength,
- int32_t buffCapacity) {
- if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
- // do not modify a string that has an "open" getBuffer(minCapacity)
- return *this;
- }
-
- if(buffer == NULL) {
- // treat as an empty string, do not alias
- releaseArray();
- setToEmpty();
- return *this;
- }
-
- if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
- setToBogus();
- return *this;
- } else if(buffLength == -1) {
- // buffLength = u_strlen(buff); but do not look beyond buffCapacity
- const UChar *p = buffer, *limit = buffer + buffCapacity;
- while(p != limit && *p != 0) {
- ++p;
- }
- buffLength = (int32_t)(p - buffer);
- }
-
- releaseArray();
-
- fUnion.fFields.fLengthAndFlags = kWritableAlias;
- setArray(buffer, buffLength, buffCapacity);
- return *this;
-}
-
-UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
- unBogus();
- int32_t length = utf8.length();
- int32_t capacity;
- // The UTF-16 string will be at most as long as the UTF-8 string.
- if(length <= US_STACKBUF_SIZE) {
- capacity = US_STACKBUF_SIZE;
- } else {
- capacity = length + 1; // +1 for the terminating NUL.
- }
- UChar *utf16 = getBuffer(capacity);
- int32_t length16;
- UErrorCode errorCode = U_ZERO_ERROR;
- u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
- utf8.data(), length,
- 0xfffd, // Substitution character.
- NULL, // Don't care about number of substitutions.
- &errorCode);
- releaseBuffer(length16);
- if(U_FAILURE(errorCode)) {
- setToBogus();
- }
- return *this;
-}
-
-UnicodeString&
-UnicodeString::setCharAt(int32_t offset,
- UChar c)
-{
- int32_t len = length();
- if(cloneArrayIfNeeded() && len > 0) {
- if(offset < 0) {
- offset = 0;
- } else if(offset >= len) {
- offset = len - 1;
- }
-
- getArrayStart()[offset] = c;
- }
- return *this;
-}
-
-UnicodeString&
-UnicodeString::replace(int32_t start,
- int32_t _length,
- UChar32 srcChar) {
- UChar buffer[U16_MAX_LENGTH];
- int32_t count = 0;
- UBool isError = FALSE;
- U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
- // We test isError so that the compiler does not complain that we don't.
- // If isError (srcChar is not a valid code point) then count==0 which means
- // we remove the source segment rather than replacing it with srcChar.
- return doReplace(start, _length, buffer, 0, isError ? 0 : count);
-}
-
-UnicodeString&
-UnicodeString::append(UChar32 srcChar) {
- UChar buffer[U16_MAX_LENGTH];
- int32_t _length = 0;
- UBool isError = FALSE;
- U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
- // We test isError so that the compiler does not complain that we don't.
- // If isError then _length==0 which turns the doAppend() into a no-op anyway.
- return isError ? *this : doAppend(buffer, 0, _length);
-}
-
-UnicodeString&
-UnicodeString::doReplace( int32_t start,
- int32_t length,
- const UnicodeString& src,
- int32_t srcStart,
- int32_t srcLength)
-{
- // pin the indices to legal values
- src.pinIndices(srcStart, srcLength);
-
- // get the characters from src
- // and replace the range in ourselves with them
- return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
-}
-
-UnicodeString&
-UnicodeString::doReplace(int32_t start,
- int32_t length,
- const UChar *srcChars,
- int32_t srcStart,
- int32_t srcLength)
-{
- if(!isWritable()) {
- return *this;
- }
-
- int32_t oldLength = this->length();
-
- // optimize (read-only alias).remove(0, start) and .remove(start, end)
- if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
- if(start == 0) {
- // remove prefix by adjusting the array pointer
- pinIndex(length);
- fUnion.fFields.fArray += length;
- fUnion.fFields.fCapacity -= length;
- setLength(oldLength - length);
- return *this;
- } else {
- pinIndex(start);
- if(length >= (oldLength - start)) {
- // remove suffix by reducing the length (like truncate())
- setLength(start);
- fUnion.fFields.fCapacity = start; // not NUL-terminated any more
- return *this;
- }
- }
- }
-
- if(start == oldLength) {
- return doAppend(srcChars, srcStart, srcLength);
- }
-
- if(srcChars == 0) {
- srcLength = 0;
- } else {
- // Perform all remaining operations relative to srcChars + srcStart.
- // From this point forward, do not use srcStart.
- srcChars += srcStart;
- if (srcLength < 0) {
- // get the srcLength if necessary
- srcLength = u_strlen(srcChars);
- }
- }
-
- // pin the indices to legal values
- pinIndices(start, length);
-
- // Calculate the size of the string after the replace.
- // Avoid int32_t overflow.
- int32_t newLength = oldLength - length;
- if(srcLength > (INT32_MAX - newLength)) {
- setToBogus();
- return *this;
- }
- newLength += srcLength;
-
- // Check for insertion into ourself
- const UChar *oldArray = getArrayStart();
- if (isBufferWritable() &&
- oldArray < srcChars + srcLength &&
- srcChars < oldArray + oldLength) {
- // Copy into a new UnicodeString and start over
- UnicodeString copy(srcChars, srcLength);
- if (copy.isBogus()) {
- setToBogus();
- return *this;
- }
- return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
- }
-
- // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
- // therefore we need to keep the current fArray
- UChar oldStackBuffer[US_STACKBUF_SIZE];
- if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
- // copy the stack buffer contents because it will be overwritten with
- // fUnion.fFields values
- u_memcpy(oldStackBuffer, oldArray, oldLength);
- oldArray = oldStackBuffer;
- }
-
- // clone our array and allocate a bigger array if needed
- int32_t *bufferToDelete = 0;
- if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
- FALSE, &bufferToDelete)
- ) {
- return *this;
- }
-
- // now do the replace
-
- UChar *newArray = getArrayStart();
- if(newArray != oldArray) {
- // if fArray changed, then we need to copy everything except what will change
- us_arrayCopy(oldArray, 0, newArray, 0, start);
- us_arrayCopy(oldArray, start + length,
- newArray, start + srcLength,
- oldLength - (start + length));
- } else if(length != srcLength) {
- // fArray did not change; copy only the portion that isn't changing, leaving a hole
- us_arrayCopy(oldArray, start + length,
- newArray, start + srcLength,
- oldLength - (start + length));
- }
-
- // now fill in the hole with the new string
- us_arrayCopy(srcChars, 0, newArray, start, srcLength);
-
- setLength(newLength);
-
- // delayed delete in case srcChars == fArray when we started, and
- // to keep oldArray alive for the above operations
- if (bufferToDelete) {
- uprv_free(bufferToDelete);
- }
-
- return *this;
-}
-
-// Versions of doReplace() only for append() variants.
-// doReplace() and doAppend() optimize for different cases.
-
-UnicodeString&
-UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
- if(srcLength == 0) {
- return *this;
- }
-
- // pin the indices to legal values
- src.pinIndices(srcStart, srcLength);
- return doAppend(src.getArrayStart(), srcStart, srcLength);
-}
-
-UnicodeString&
-UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) {
- if(!isWritable() || srcLength == 0 || srcChars == NULL) {
- return *this;
- }
-
- // Perform all remaining operations relative to srcChars + srcStart.
- // From this point forward, do not use srcStart.
- srcChars += srcStart;
-
- if(srcLength < 0) {
- // get the srcLength if necessary
- if((srcLength = u_strlen(srcChars)) == 0) {
- return *this;
- }
- }
-
- int32_t oldLength = length();
- int32_t newLength;
- if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
- setToBogus();
- return *this;
- }
-
- // Check for append onto ourself
- const UChar* oldArray = getArrayStart();
- if (isBufferWritable() &&
- oldArray < srcChars + srcLength &&
- srcChars < oldArray + oldLength) {
- // Copy into a new UnicodeString and start over
- UnicodeString copy(srcChars, srcLength);
- if (copy.isBogus()) {
- setToBogus();
- return *this;
- }
- return doAppend(copy.getArrayStart(), 0, srcLength);
- }
-
- // optimize append() onto a large-enough, owned string
- if((newLength <= getCapacity() && isBufferWritable()) ||
- cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
- UChar *newArray = getArrayStart();
- // Do not copy characters when
- // UChar *buffer=str.getAppendBuffer(...);
- // is followed by
- // str.append(buffer, length);
- // or
- // str.appendString(buffer, length)
- // or similar.
- if(srcChars != newArray + oldLength) {
- us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
- }
- setLength(newLength);
- }
- return *this;
-}
-
-/**
- * Replaceable API
- */
-void
-UnicodeString::handleReplaceBetween(int32_t start,
- int32_t limit,
- const UnicodeString& text) {
- replaceBetween(start, limit, text);
-}
-
-/**
- * Replaceable API
- */
-void
-UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
- if (limit <= start) {
- return; // Nothing to do; avoid bogus malloc call
- }
- UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
- // Check to make sure text is not null.
- if (text != NULL) {
- extractBetween(start, limit, text, 0);
- insert(dest, text, 0, limit - start);
- uprv_free(text);
- }
-}
-
-/**
- * Replaceable API
- *
- * NOTE: This is for the Replaceable class. There is no rep.cpp,
- * so we implement this function here.
- */
-UBool Replaceable::hasMetaData() const {
- return TRUE;
-}
-
-/**
- * Replaceable API
- */
-UBool UnicodeString::hasMetaData() const {
- return FALSE;
-}
-
-UnicodeString&
-UnicodeString::doReverse(int32_t start, int32_t length) {
- if(length <= 1 || !cloneArrayIfNeeded()) {
- return *this;
- }
-
- // pin the indices to legal values
- pinIndices(start, length);
- if(length <= 1) { // pinIndices() might have shrunk the length
- return *this;
- }
-
- UChar *left = getArrayStart() + start;
- UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
- UChar swap;
- UBool hasSupplementary = FALSE;
-
- // Before the loop we know left<right because length>=2.
- do {
- hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
- hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
- *right-- = swap;
- } while(left < right);
- // Make sure to test the middle code unit of an odd-length string.
- // Redundant if the length is even.
- hasSupplementary |= (UBool)U16_IS_LEAD(*left);
-
- /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
- if(hasSupplementary) {
- UChar swap2;
-
- left = getArrayStart() + start;
- right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
- while(left < right) {
- if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
- *left++ = swap2;
- *left++ = swap;
- } else {
- ++left;
- }
- }
- }
-
- return *this;
-}
-
-UBool
-UnicodeString::padLeading(int32_t targetLength,
- UChar padChar)
-{
- int32_t oldLength = length();
- if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
- return FALSE;
- } else {
- // move contents up by padding width
- UChar *array = getArrayStart();
- int32_t start = targetLength - oldLength;
- us_arrayCopy(array, 0, array, start, oldLength);
-
- // fill in padding character
- while(--start >= 0) {
- array[start] = padChar;
- }
- setLength(targetLength);
- return TRUE;
- }
-}
-
-UBool
-UnicodeString::padTrailing(int32_t targetLength,
- UChar padChar)
-{
- int32_t oldLength = length();
- if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
- return FALSE;
- } else {
- // fill in padding character
- UChar *array = getArrayStart();
- int32_t length = targetLength;
- while(--length >= oldLength) {
- array[length] = padChar;
- }
- setLength(targetLength);
- return TRUE;
- }
-}
-
-//========================================
-// Hashing
-//========================================
-int32_t
-UnicodeString::doHashCode() const
-{
- /* Delegate hash computation to uhash. This makes UnicodeString
- * hashing consistent with UChar* hashing. */
- int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
- if (hashCode == kInvalidHashCode) {
- hashCode = kEmptyHashCode;
- }
- return hashCode;
-}
-
-//========================================
-// External Buffer
-//========================================
-
-char16_t *
-UnicodeString::getBuffer(int32_t minCapacity) {
- if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
- fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
- setZeroLength();
- return getArrayStart();
- } else {
- return nullptr;
- }
-}
-
-void
-UnicodeString::releaseBuffer(int32_t newLength) {
- if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
- // set the new fLength
- int32_t capacity=getCapacity();
- if(newLength==-1) {
- // the new length is the string length, capped by fCapacity
- const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
- while(p<limit && *p!=0) {
- ++p;
- }
- newLength=(int32_t)(p-array);
- } else if(newLength>capacity) {
- newLength=capacity;
- }
- setLength(newLength);
- fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
- }
-}
-
-//========================================
-// Miscellaneous
-//========================================
-UBool
-UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
- int32_t growCapacity,
- UBool doCopyArray,
- int32_t **pBufferToDelete,
- UBool forceClone) {
- // default parameters need to be static, therefore
- // the defaults are -1 to have convenience defaults
- if(newCapacity == -1) {
- newCapacity = getCapacity();
- }
-
- // while a getBuffer(minCapacity) is "open",
- // prevent any modifications of the string by returning FALSE here
- // if the string is bogus, then only an assignment or similar can revive it
- if(!isWritable()) {
- return FALSE;
- }
-
- /*
- * We need to make a copy of the array if
- * the buffer is read-only, or
- * the buffer is refCounted (shared), and refCount>1, or
- * the buffer is too small.
- * Return FALSE if memory could not be allocated.
- */
- if(forceClone ||
- fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
- (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
- newCapacity > getCapacity()
- ) {
- // check growCapacity for default value and use of the stack buffer
- if(growCapacity < 0) {
- growCapacity = newCapacity;
- } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
- growCapacity = US_STACKBUF_SIZE;
- }
-
- // save old values
- UChar oldStackBuffer[US_STACKBUF_SIZE];
- UChar *oldArray;
- int32_t oldLength = length();
- int16_t flags = fUnion.fFields.fLengthAndFlags;
-
- if(flags&kUsingStackBuffer) {
- U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
- if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
- // copy the stack buffer contents because it will be overwritten with
- // fUnion.fFields values
- us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
- oldArray = oldStackBuffer;
- } else {
- oldArray = NULL; // no need to copy from the stack buffer to itself
- }
- } else {
- oldArray = fUnion.fFields.fArray;
- U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
- }
-
- // allocate a new array
- if(allocate(growCapacity) ||
- (newCapacity < growCapacity && allocate(newCapacity))
- ) {
- if(doCopyArray) {
- // copy the contents
- // do not copy more than what fits - it may be smaller than before
- int32_t minLength = oldLength;
- newCapacity = getCapacity();
- if(newCapacity < minLength) {
- minLength = newCapacity;
- }
- if(oldArray != NULL) {
- us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
- }
- setLength(minLength);
- } else {
- setZeroLength();
- }
-
- // release the old array
- if(flags & kRefCounted) {
- // the array is refCounted; decrement and release if 0
- u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
- if(umtx_atomic_dec(pRefCount) == 0) {
- if(pBufferToDelete == 0) {
- // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
- // is defined as volatile. (Volatile has useful non-standard behavior
- // with this compiler.)
- uprv_free((void *)pRefCount);
- } else {
- // the caller requested to delete it himself
- *pBufferToDelete = (int32_t *)pRefCount;
- }
- }
- }
- } else {
- // not enough memory for growCapacity and not even for the smaller newCapacity
- // reset the old values for setToBogus() to release the array
- if(!(flags&kUsingStackBuffer)) {
- fUnion.fFields.fArray = oldArray;
- }
- fUnion.fFields.fLengthAndFlags = flags;
- setToBogus();
- return FALSE;
- }
- }
- return TRUE;
-}
-
-// UnicodeStringAppendable ------------------------------------------------- ***
-
-UnicodeStringAppendable::~UnicodeStringAppendable() {}
-
-UBool
-UnicodeStringAppendable::appendCodeUnit(UChar c) {
- return str.doAppend(&c, 0, 1).isWritable();
-}
-
-UBool
-UnicodeStringAppendable::appendCodePoint(UChar32 c) {
- UChar buffer[U16_MAX_LENGTH];
- int32_t cLength = 0;
- UBool isError = FALSE;
- U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
- return !isError && str.doAppend(buffer, 0, cLength).isWritable();
-}
-
-UBool
-UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
- return str.doAppend(s, 0, length).isWritable();
-}
-
-UBool
-UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
- return str.cloneArrayIfNeeded(str.length() + appendCapacity);
-}
-
-UChar *
-UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
- int32_t desiredCapacityHint,
- UChar *scratch, int32_t scratchCapacity,
- int32_t *resultCapacity) {
- if(minCapacity < 1 || scratchCapacity < minCapacity) {
- *resultCapacity = 0;
- return NULL;
- }
- int32_t oldLength = str.length();
- if(minCapacity <= (kMaxCapacity - oldLength) &&
- desiredCapacityHint <= (kMaxCapacity - oldLength) &&
- str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
- *resultCapacity = str.getCapacity() - oldLength;
- return str.getArrayStart() + oldLength;
- }
- *resultCapacity = scratchCapacity;
- return scratch;
-}
-
-U_NAMESPACE_END
-
-U_NAMESPACE_USE
-
-U_CAPI int32_t U_EXPORT2
-uhash_hashUnicodeString(const UElement key) {
- const UnicodeString *str = (const UnicodeString*) key.pointer;
- return (str == NULL) ? 0 : str->hashCode();
-}
-
-// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
-// does not depend on hashtable code.
-U_CAPI UBool U_EXPORT2
-uhash_compareUnicodeString(const UElement key1, const UElement key2) {
- const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
- const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
- if (str1 == str2) {
- return TRUE;
- }
- if (str1 == NULL || str2 == NULL) {
- return FALSE;
- }
- return *str1 == *str2;
-}
-
-#ifdef U_STATIC_IMPLEMENTATION
-/*
-This should never be called. It is defined here to make sure that the
-virtual vector deleting destructor is defined within unistr.cpp.
-The vector deleting destructor is already a part of UObject,
-but defining it here makes sure that it is included with this object file.
-This makes sure that static library dependencies are kept to a minimum.
-*/
-static void uprv_UnicodeStringDummy(void) {
- delete [] (new UnicodeString[2]);
-}
-#endif
diff --git a/contrib/libs/icu/common/unistr_case.cpp b/contrib/libs/icu/common/unistr_case.cpp
deleted file mode 100644
index 2138d60c01c..00000000000
--- a/contrib/libs/icu/common/unistr_case.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: unistr_case.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:2
-*
-* created on: 2004aug19
-* created by: Markus W. Scherer
-*
-* Case-mapping functions moved here from unistr.cpp
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/brkiter.h"
-#include "unicode/casemap.h"
-#include "unicode/edits.h"
-#include "unicode/putil.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "unicode/ustring.h"
-#include "unicode/unistr.h"
-#include "unicode/uchar.h"
-#include "uassert.h"
-#include "ucasemap_imp.h"
-#include "uelement.h"
-
-U_NAMESPACE_BEGIN
-
-//========================================
-// Read-only implementation
-//========================================
-
-int8_t
-UnicodeString::doCaseCompare(int32_t start,
- int32_t length,
- const UChar *srcChars,
- int32_t srcStart,
- int32_t srcLength,
- uint32_t options) const
-{
- // compare illegal string values
- // treat const UChar *srcChars==NULL as an empty string
- if(isBogus()) {
- return -1;
- }
-
- // pin indices to legal values
- pinIndices(start, length);
-
- if(srcChars == NULL) {
- srcStart = srcLength = 0;
- }
-
- // get the correct pointer
- const UChar *chars = getArrayStart();
-
- chars += start;
- if(srcStart!=0) {
- srcChars += srcStart;
- }
-
- if(chars != srcChars) {
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
- options|U_COMPARE_IGNORE_CASE, &errorCode);
- if(result!=0) {
- return (int8_t)(result >> 24 | 1);
- }
- } else {
- // get the srcLength if necessary
- if(srcLength < 0) {
- srcLength = u_strlen(srcChars + srcStart);
- }
- if(length != srcLength) {
- return (int8_t)((length - srcLength) >> 24 | 1);
- }
- }
- return 0;
-}
-
-//========================================
-// Write implementation
-//========================================
-
-UnicodeString &
-UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
- UStringCaseMapper *stringCaseMapper) {
- if(isEmpty() || !isWritable()) {
- // nothing to do
- return *this;
- }
-
- UChar oldBuffer[2 * US_STACKBUF_SIZE];
- UChar *oldArray;
- int32_t oldLength = length();
- int32_t newLength;
- UBool writable = isBufferWritable();
- UErrorCode errorCode = U_ZERO_ERROR;
-
-#if !UCONFIG_NO_BREAK_ITERATION
- // Read-only alias to the original string contents for the titlecasing BreakIterator.
- // We cannot set the iterator simply to *this because *this is being modified.
- UnicodeString oldString;
-#endif
-
- // Try to avoid heap-allocating a new character array for this string.
- if (writable ? oldLength <= UPRV_LENGTHOF(oldBuffer) : oldLength < US_STACKBUF_SIZE) {
- // Short string: Copy the contents into a temporary buffer and
- // case-map back into the current array, or into the stack buffer.
- UChar *buffer = getArrayStart();
- int32_t capacity;
- oldArray = oldBuffer;
- u_memcpy(oldBuffer, buffer, oldLength);
- if (writable) {
- capacity = getCapacity();
- } else {
- // Switch from the read-only alias or shared heap buffer to the stack buffer.
- if (!cloneArrayIfNeeded(US_STACKBUF_SIZE, US_STACKBUF_SIZE, /* doCopyArray= */ FALSE)) {
- return *this;
- }
- U_ASSERT(fUnion.fFields.fLengthAndFlags & kUsingStackBuffer);
- buffer = fUnion.fStackFields.fBuffer;
- capacity = US_STACKBUF_SIZE;
- }
-#if !UCONFIG_NO_BREAK_ITERATION
- if (iter != nullptr) {
- oldString.setTo(FALSE, oldArray, oldLength);
- iter->setText(oldString);
- }
-#endif
- newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
- buffer, capacity,
- oldArray, oldLength, NULL, errorCode);
- if (U_SUCCESS(errorCode)) {
- setLength(newLength);
- return *this;
- } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
- // common overflow handling below
- } else {
- setToBogus();
- return *this;
- }
- } else {
- // Longer string or read-only buffer:
- // Collect only changes and then apply them to this string.
- // Case mapping often changes only small parts of a string,
- // and often does not change its length.
- oldArray = getArrayStart();
- Edits edits;
- UChar replacementChars[200];
-#if !UCONFIG_NO_BREAK_ITERATION
- if (iter != nullptr) {
- oldString.setTo(FALSE, oldArray, oldLength);
- iter->setText(oldString);
- }
-#endif
- stringCaseMapper(caseLocale, options | U_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR
- replacementChars, UPRV_LENGTHOF(replacementChars),
- oldArray, oldLength, &edits, errorCode);
- if (U_SUCCESS(errorCode)) {
- // Grow the buffer at most once, not for multiple doReplace() calls.
- newLength = oldLength + edits.lengthDelta();
- if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) {
- return *this;
- }
- for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) {
- doReplace(ei.destinationIndex(), ei.oldLength(),
- replacementChars, ei.replacementIndex(), ei.newLength());
- }
- if (U_FAILURE(errorCode)) {
- setToBogus();
- }
- return *this;
- } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
- // common overflow handling below
- newLength = oldLength + edits.lengthDelta();
- } else {
- setToBogus();
- return *this;
- }
- }
-
- // Handle buffer overflow, newLength is known.
- // We need to allocate a new buffer for the internal string case mapping function.
- // This is very similar to how doReplace() keeps the old array pointer
- // and deletes the old array itself after it is done.
- // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
- int32_t *bufferToDelete = 0;
- if (!cloneArrayIfNeeded(newLength, newLength, FALSE, &bufferToDelete, TRUE)) {
- return *this;
- }
- errorCode = U_ZERO_ERROR;
- // No need to iter->setText() again: The case mapper restarts via iter->first().
- newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
- getArrayStart(), getCapacity(),
- oldArray, oldLength, NULL, errorCode);
- if (bufferToDelete) {
- uprv_free(bufferToDelete);
- }
- if (U_SUCCESS(errorCode)) {
- setLength(newLength);
- } else {
- setToBogus();
- }
- return *this;
-}
-
-UnicodeString &
-UnicodeString::foldCase(uint32_t options) {
- return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold);
-}
-
-U_NAMESPACE_END
-
-// Defined here to reduce dependencies on break iterator
-U_CAPI int32_t U_EXPORT2
-uhash_hashCaselessUnicodeString(const UElement key) {
- U_NAMESPACE_USE
- const UnicodeString *str = (const UnicodeString*) key.pointer;
- if (str == NULL) {
- return 0;
- }
- // Inefficient; a better way would be to have a hash function in
- // UnicodeString that does case folding on the fly.
- UnicodeString copy(*str);
- return copy.foldCase().hashCode();
-}
-
-// Defined here to reduce dependencies on break iterator
-U_CAPI UBool U_EXPORT2
-uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
- U_NAMESPACE_USE
- const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
- const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
- if (str1 == str2) {
- return TRUE;
- }
- if (str1 == NULL || str2 == NULL) {
- return FALSE;
- }
- return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
-}
diff --git a/contrib/libs/icu/common/unistr_case_locale.cpp b/contrib/libs/icu/common/unistr_case_locale.cpp
deleted file mode 100644
index f0f3048d06f..00000000000
--- a/contrib/libs/icu/common/unistr_case_locale.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: unistr_case_locale.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011may31
-* created by: Markus W. Scherer
-*
-* Locale-sensitive case mapping functions (ones that call uloc_getDefault())
-* were moved here to break dependency cycles among parts of the common library.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/locid.h"
-#include "unicode/ucasemap.h"
-#include "unicode/unistr.h"
-#include "ucasemap_imp.h"
-
-U_NAMESPACE_BEGIN
-
-//========================================
-// Write implementation
-//========================================
-
-UnicodeString &
-UnicodeString::toLower() {
- return caseMap(ustrcase_getCaseLocale(NULL), 0,
- UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
-}
-
-UnicodeString &
-UnicodeString::toLower(const Locale &locale) {
- return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0,
- UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
-}
-
-UnicodeString &
-UnicodeString::toUpper() {
- return caseMap(ustrcase_getCaseLocale(NULL), 0,
- UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
-}
-
-UnicodeString &
-UnicodeString::toUpper(const Locale &locale) {
- return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0,
- UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/unistr_cnv.cpp b/contrib/libs/icu/common/unistr_cnv.cpp
deleted file mode 100644
index 64d3c16801c..00000000000
--- a/contrib/libs/icu/common/unistr_cnv.cpp
+++ /dev/null
@@ -1,417 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: unistr_cnv.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:2
-*
-* created on: 2004aug19
-* created by: Markus W. Scherer
-*
-* Character conversion functions moved here from unistr.cpp
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/putil.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "unicode/ustring.h"
-#include "unicode/unistr.h"
-#include "unicode/ucnv.h"
-#include "ucnv_imp.h"
-#include "putilimp.h"
-#include "ustr_cnv.h"
-#include "ustr_imp.h"
-
-U_NAMESPACE_BEGIN
-
-//========================================
-// Constructors
-//========================================
-
-#if !U_CHARSET_IS_UTF8
-
-UnicodeString::UnicodeString(const char *codepageData) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- if(codepageData != 0) {
- doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0);
- }
-}
-
-UnicodeString::UnicodeString(const char *codepageData,
- int32_t dataLength) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- if(codepageData != 0) {
- doCodepageCreate(codepageData, dataLength, 0);
- }
-}
-
-// else see unistr.cpp
-#endif
-
-UnicodeString::UnicodeString(const char *codepageData,
- const char *codepage) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- if(codepageData != 0) {
- doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
- }
-}
-
-UnicodeString::UnicodeString(const char *codepageData,
- int32_t dataLength,
- const char *codepage) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- if(codepageData != 0) {
- doCodepageCreate(codepageData, dataLength, codepage);
- }
-}
-
-UnicodeString::UnicodeString(const char *src, int32_t srcLength,
- UConverter *cnv,
- UErrorCode &errorCode) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- if(U_SUCCESS(errorCode)) {
- // check arguments
- if(src==NULL) {
- // treat as an empty string, do nothing more
- } else if(srcLength<-1) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- } else {
- // get input length
- if(srcLength==-1) {
- srcLength=(int32_t)uprv_strlen(src);
- }
- if(srcLength>0) {
- if(cnv!=0) {
- // use the provided converter
- ucnv_resetToUnicode(cnv);
- doCodepageCreate(src, srcLength, cnv, errorCode);
- } else {
- // use the default converter
- cnv=u_getDefaultConverter(&errorCode);
- doCodepageCreate(src, srcLength, cnv, errorCode);
- u_releaseDefaultConverter(cnv);
- }
- }
- }
-
- if(U_FAILURE(errorCode)) {
- setToBogus();
- }
- }
-}
-
-//========================================
-// Codeset conversion
-//========================================
-
-#if !U_CHARSET_IS_UTF8
-
-int32_t
-UnicodeString::extract(int32_t start,
- int32_t length,
- char *target,
- uint32_t dstSize) const {
- return extract(start, length, target, dstSize, 0);
-}
-
-// else see unistr.cpp
-#endif
-
-int32_t
-UnicodeString::extract(int32_t start,
- int32_t length,
- char *target,
- uint32_t dstSize,
- const char *codepage) const
-{
- // if the arguments are illegal, then do nothing
- if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
- return 0;
- }
-
- // pin the indices to legal values
- pinIndices(start, length);
-
- // We need to cast dstSize to int32_t for all subsequent code.
- // I don't know why the API was defined with uint32_t but we are stuck with it.
- // Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize
- // as a limit in some functions, it may wrap around and yield a pointer
- // that compares less-than target.
- int32_t capacity;
- if(dstSize < 0x7fffffff) {
- // Assume that the capacity is real and a limit pointer won't wrap around.
- capacity = (int32_t)dstSize;
- } else {
- // Pin the capacity so that a limit pointer does not wrap around.
- char *targetLimit = (char *)U_MAX_PTR(target);
- // U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff
- // greater than target and does not wrap around the top of the address space.
- capacity = (int32_t)(targetLimit - target);
- }
-
- // create the converter
- UConverter *converter;
- UErrorCode status = U_ZERO_ERROR;
-
- // just write the NUL if the string length is 0
- if(length == 0) {
- return u_terminateChars(target, capacity, 0, &status);
- }
-
- // if the codepage is the default, use our cache
- // if it is an empty string, then use the "invariant character" conversion
- if (codepage == 0) {
- const char *defaultName = ucnv_getDefaultName();
- if(UCNV_FAST_IS_UTF8(defaultName)) {
- return toUTF8(start, length, target, capacity);
- }
- converter = u_getDefaultConverter(&status);
- } else if (*codepage == 0) {
- // use the "invariant characters" conversion
- int32_t destLength;
- if(length <= capacity) {
- destLength = length;
- } else {
- destLength = capacity;
- }
- u_UCharsToChars(getArrayStart() + start, target, destLength);
- return u_terminateChars(target, capacity, length, &status);
- } else {
- converter = ucnv_open(codepage, &status);
- }
-
- length = doExtract(start, length, target, capacity, converter, status);
-
- // close the converter
- if (codepage == 0) {
- u_releaseDefaultConverter(converter);
- } else {
- ucnv_close(converter);
- }
-
- return length;
-}
-
-int32_t
-UnicodeString::extract(char *dest, int32_t destCapacity,
- UConverter *cnv,
- UErrorCode &errorCode) const
-{
- if(U_FAILURE(errorCode)) {
- return 0;
- }
-
- if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- // nothing to do?
- if(isEmpty()) {
- return u_terminateChars(dest, destCapacity, 0, &errorCode);
- }
-
- // get the converter
- UBool isDefaultConverter;
- if(cnv==0) {
- isDefaultConverter=TRUE;
- cnv=u_getDefaultConverter(&errorCode);
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- } else {
- isDefaultConverter=FALSE;
- ucnv_resetFromUnicode(cnv);
- }
-
- // convert
- int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode);
-
- // release the converter
- if(isDefaultConverter) {
- u_releaseDefaultConverter(cnv);
- }
-
- return len;
-}
-
-int32_t
-UnicodeString::doExtract(int32_t start, int32_t length,
- char *dest, int32_t destCapacity,
- UConverter *cnv,
- UErrorCode &errorCode) const
-{
- if(U_FAILURE(errorCode)) {
- if(destCapacity!=0) {
- *dest=0;
- }
- return 0;
- }
-
- const UChar *src=getArrayStart()+start, *srcLimit=src+length;
- char *originalDest=dest;
- const char *destLimit;
-
- if(destCapacity==0) {
- destLimit=dest=0;
- } else if(destCapacity==-1) {
- // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
- destLimit=(char*)U_MAX_PTR(dest);
- // for NUL-termination, translate into highest int32_t
- destCapacity=0x7fffffff;
- } else {
- destLimit=dest+destCapacity;
- }
-
- // perform the conversion
- ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
- length=(int32_t)(dest-originalDest);
-
- // if an overflow occurs, then get the preflighting length
- if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
- char buffer[1024];
-
- destLimit=buffer+sizeof(buffer);
- do {
- dest=buffer;
- errorCode=U_ZERO_ERROR;
- ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
- length+=(int32_t)(dest-buffer);
- } while(errorCode==U_BUFFER_OVERFLOW_ERROR);
- }
-
- return u_terminateChars(originalDest, destCapacity, length, &errorCode);
-}
-
-void
-UnicodeString::doCodepageCreate(const char *codepageData,
- int32_t dataLength,
- const char *codepage)
-{
- // if there's nothing to convert, do nothing
- if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
- return;
- }
- if(dataLength == -1) {
- dataLength = (int32_t)uprv_strlen(codepageData);
- }
-
- UErrorCode status = U_ZERO_ERROR;
-
- // create the converter
- // if the codepage is the default, use our cache
- // if it is an empty string, then use the "invariant character" conversion
- UConverter *converter;
- if (codepage == 0) {
- const char *defaultName = ucnv_getDefaultName();
- if(UCNV_FAST_IS_UTF8(defaultName)) {
- setToUTF8(StringPiece(codepageData, dataLength));
- return;
- }
- converter = u_getDefaultConverter(&status);
- } else if(*codepage == 0) {
- // use the "invariant characters" conversion
- if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
- u_charsToUChars(codepageData, getArrayStart(), dataLength);
- setLength(dataLength);
- } else {
- setToBogus();
- }
- return;
- } else {
- converter = ucnv_open(codepage, &status);
- }
-
- // if we failed, set the appropriate flags and return
- if(U_FAILURE(status)) {
- setToBogus();
- return;
- }
-
- // perform the conversion
- doCodepageCreate(codepageData, dataLength, converter, status);
- if(U_FAILURE(status)) {
- setToBogus();
- }
-
- // close the converter
- if(codepage == 0) {
- u_releaseDefaultConverter(converter);
- } else {
- ucnv_close(converter);
- }
-}
-
-void
-UnicodeString::doCodepageCreate(const char *codepageData,
- int32_t dataLength,
- UConverter *converter,
- UErrorCode &status)
-{
- if(U_FAILURE(status)) {
- return;
- }
-
- // set up the conversion parameters
- const char *mySource = codepageData;
- const char *mySourceEnd = mySource + dataLength;
- UChar *array, *myTarget;
-
- // estimate the size needed:
- int32_t arraySize;
- if(dataLength <= US_STACKBUF_SIZE) {
- // try to use the stack buffer
- arraySize = US_STACKBUF_SIZE;
- } else {
- // 1.25 UChar's per source byte should cover most cases
- arraySize = dataLength + (dataLength >> 2);
- }
-
- // we do not care about the current contents
- UBool doCopyArray = FALSE;
- for(;;) {
- if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
- setToBogus();
- break;
- }
-
- // perform the conversion
- array = getArrayStart();
- myTarget = array + length();
- ucnv_toUnicode(converter, &myTarget, array + getCapacity(),
- &mySource, mySourceEnd, 0, TRUE, &status);
-
- // update the conversion parameters
- setLength((int32_t)(myTarget - array));
-
- // allocate more space and copy data, if needed
- if(status == U_BUFFER_OVERFLOW_ERROR) {
- // reset the error code
- status = U_ZERO_ERROR;
-
- // keep the previous conversion results
- doCopyArray = TRUE;
-
- // estimate the new size needed, larger than before
- // try 2 UChar's per remaining source byte
- arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource));
- } else {
- break;
- }
- }
-}
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/unistr_props.cpp b/contrib/libs/icu/common/unistr_props.cpp
deleted file mode 100644
index 40064757902..00000000000
--- a/contrib/libs/icu/common/unistr_props.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: unistr_props.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:2
-*
-* created on: 2004aug25
-* created by: Markus W. Scherer
-*
-* Character property dependent functions moved here from unistr.cpp
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "unicode/unistr.h"
-#include "unicode/utf16.h"
-
-U_NAMESPACE_BEGIN
-
-UnicodeString&
-UnicodeString::trim()
-{
- if(isBogus()) {
- return *this;
- }
-
- UChar *array = getArrayStart();
- UChar32 c;
- int32_t oldLength = this->length();
- int32_t i = oldLength, length;
-
- // first cut off trailing white space
- for(;;) {
- length = i;
- if(i <= 0) {
- break;
- }
- U16_PREV(array, 0, i, c);
- if(!(c == 0x20 || u_isWhitespace(c))) {
- break;
- }
- }
- if(length < oldLength) {
- setLength(length);
- }
-
- // find leading white space
- int32_t start;
- i = 0;
- for(;;) {
- start = i;
- if(i >= length) {
- break;
- }
- U16_NEXT(array, i, length, c);
- if(!(c == 0x20 || u_isWhitespace(c))) {
- break;
- }
- }
-
- // move string forward over leading white space
- if(start > 0) {
- doReplace(0, start, 0, 0, 0);
- }
-
- return *this;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/unistr_titlecase_brkiter.cpp b/contrib/libs/icu/common/unistr_titlecase_brkiter.cpp
deleted file mode 100644
index 4969884b0dc..00000000000
--- a/contrib/libs/icu/common/unistr_titlecase_brkiter.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: unistr_titlecase_brkiter.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:2
-*
-* created on: 2011may30
-* created by: Markus W. Scherer
-*
-* Titlecasing functions that are based on BreakIterator
-* were moved here to break dependency cycles among parts of the common library.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/brkiter.h"
-#include "unicode/locid.h"
-#include "unicode/ucasemap.h"
-#include "unicode/unistr.h"
-#include "ucasemap_imp.h"
-
-U_NAMESPACE_BEGIN
-
-UnicodeString &
-UnicodeString::toTitle(BreakIterator *iter) {
- return toTitle(iter, Locale::getDefault(), 0);
-}
-
-UnicodeString &
-UnicodeString::toTitle(BreakIterator *iter, const Locale &locale) {
- return toTitle(iter, locale, 0);
-}
-
-UnicodeString &
-UnicodeString::toTitle(BreakIterator *iter, const Locale &locale, uint32_t options) {
- LocalPointer<BreakIterator> ownedIter;
- UErrorCode errorCode = U_ZERO_ERROR;
- iter = ustrcase_getTitleBreakIterator(&locale, "", options, iter, ownedIter, errorCode);
- if (iter == nullptr) {
- setToBogus();
- return *this;
- }
- caseMap(ustrcase_getCaseLocale(locale.getBaseName()), options, iter, ustrcase_internalToTitle);
- return *this;
-}
-
-U_NAMESPACE_END
-
-#endif // !UCONFIG_NO_BREAK_ITERATION
diff --git a/contrib/libs/icu/common/unistrappender.h b/contrib/libs/icu/common/unistrappender.h
deleted file mode 100644
index 75fcb9e775f..00000000000
--- a/contrib/libs/icu/common/unistrappender.h
+++ /dev/null
@@ -1,90 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 2015, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-*
-* File unistrappender.h
-******************************************************************************
-*/
-
-#ifndef __UNISTRAPPENDER_H__
-#define __UNISTRAPPENDER_H__
-
-#include "unicode/unistr.h"
-#include "unicode/uobject.h"
-#include "unicode/utf16.h"
-#include "unicode/utypes.h"
-#include "cmemory.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * An optimization for the slowness of calling UnicodeString::append()
- * one character at a time in a loop. It stores appends in a buffer while
- * never actually calling append on the unicode string unless the buffer
- * fills up or is flushed.
- *
- * proper usage:
- * {
- * UnicodeStringAppender appender(astring);
- * for (int32_t i = 0; i < 100; ++i) {
- * appender.append((UChar) i);
- * }
- * // appender flushed automatically when it goes out of scope.
- * }
- */
-class UnicodeStringAppender : public UMemory {
-public:
-
- /**
- * dest is the UnicodeString being appended to. It must always
- * exist while this instance exists.
- */
- UnicodeStringAppender(UnicodeString &dest) : fDest(&dest), fIdx(0) { }
-
- inline void append(UChar x) {
- if (fIdx == UPRV_LENGTHOF(fBuffer)) {
- fDest->append(fBuffer, 0, fIdx);
- fIdx = 0;
- }
- fBuffer[fIdx++] = x;
- }
-
- inline void append(UChar32 x) {
- if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) {
- fDest->append(fBuffer, 0, fIdx);
- fIdx = 0;
- }
- U16_APPEND_UNSAFE(fBuffer, fIdx, x);
- }
-
- /**
- * Ensures that all appended characters have been written out to dest.
- */
- inline void flush() {
- if (fIdx) {
- fDest->append(fBuffer, 0, fIdx);
- }
- fIdx = 0;
- }
-
- /**
- * flush the buffer when we go out of scope.
- */
- ~UnicodeStringAppender() {
- flush();
- }
-private:
- UnicodeString *fDest;
- int32_t fIdx;
- UChar fBuffer[32];
- UnicodeStringAppender(const UnicodeStringAppender &other);
- UnicodeStringAppender &operator=(const UnicodeStringAppender &other);
-};
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/unorm.cpp b/contrib/libs/icu/common/unorm.cpp
deleted file mode 100644
index 2d9f46052ff..00000000000
--- a/contrib/libs/icu/common/unorm.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (c) 1996-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-* File unorm.cpp
-*
-* Created by: Vladimir Weinstein 12052000
-*
-* Modification history :
-*
-* Date Name Description
-* 02/01/01 synwee Added normalization quickcheck enum and method.
-* 02/12/01 synwee Commented out quickcheck util api has been approved
-* Added private method for doing FCD checks
-* 02/23/01 synwee Modified quickcheck and checkFCE to run through
-* string for codepoints < 0x300 for the normalization
-* mode NFC.
-* 05/25/01+ Markus Scherer total rewrite, implement all normalization here
-* instead of just wrappers around normlzr.cpp,
-* load unorm.dat, support Unicode 3.1 with
-* supplementary code points, etc.
-* 2009-nov..2010-jan Markus Scherer total rewrite, new Normalizer2 API & code
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/udata.h"
-#include "unicode/ustring.h"
-#include "unicode/uiter.h"
-#include "unicode/unorm.h"
-#include "unicode/unorm2.h"
-#include "normalizer2impl.h"
-#include "unormimp.h"
-#include "uprops.h"
-#include "ustr_imp.h"
-
-U_NAMESPACE_USE
-
-/* quick check functions ---------------------------------------------------- */
-
-U_CAPI UNormalizationCheckResult U_EXPORT2
-unorm_quickCheck(const UChar *src,
- int32_t srcLength,
- UNormalizationMode mode,
- UErrorCode *pErrorCode) {
- const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
- return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode);
-}
-
-U_CAPI UNormalizationCheckResult U_EXPORT2
-unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
- UNormalizationMode mode, int32_t options,
- UErrorCode *pErrorCode) {
- const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
- if(options&UNORM_UNICODE_3_2) {
- FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode));
- return unorm2_quickCheck(
- reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)),
- src, srcLength, pErrorCode);
- } else {
- return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode);
- }
-}
-
-U_CAPI UBool U_EXPORT2
-unorm_isNormalized(const UChar *src, int32_t srcLength,
- UNormalizationMode mode,
- UErrorCode *pErrorCode) {
- const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
- return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode);
-}
-
-U_CAPI UBool U_EXPORT2
-unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
- UNormalizationMode mode, int32_t options,
- UErrorCode *pErrorCode) {
- const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
- if(options&UNORM_UNICODE_3_2) {
- FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode));
- return unorm2_isNormalized(
- reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)),
- src, srcLength, pErrorCode);
- } else {
- return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode);
- }
-}
-
-/* normalize() API ---------------------------------------------------------- */
-
-/** Public API for normalizing. */
-U_CAPI int32_t U_EXPORT2
-unorm_normalize(const UChar *src, int32_t srcLength,
- UNormalizationMode mode, int32_t options,
- UChar *dest, int32_t destCapacity,
- UErrorCode *pErrorCode) {
- const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
- if(options&UNORM_UNICODE_3_2) {
- FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode));
- return unorm2_normalize(
- reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)),
- src, srcLength, dest, destCapacity, pErrorCode);
- } else {
- return unorm2_normalize((const UNormalizer2 *)n2,
- src, srcLength, dest, destCapacity, pErrorCode);
- }
-}
-
-
-/* iteration functions ------------------------------------------------------ */
-
-static int32_t
-_iterate(UCharIterator *src, UBool forward,
- UChar *dest, int32_t destCapacity,
- const Normalizer2 *n2,
- UBool doNormalize, UBool *pNeededToNormalize,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if(pNeededToNormalize!=NULL) {
- *pNeededToNormalize=FALSE;
- }
- if(!(forward ? src->hasNext(src) : src->hasPrevious(src))) {
- return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
- }
-
- UnicodeString buffer;
- UChar32 c;
- if(forward) {
- /* get one character and ignore its properties */
- buffer.append(uiter_next32(src));
- /* get all following characters until we see a boundary */
- while((c=uiter_next32(src))>=0) {
- if(n2->hasBoundaryBefore(c)) {
- /* back out the latest movement to stop at the boundary */
- src->move(src, -U16_LENGTH(c), UITER_CURRENT);
- break;
- } else {
- buffer.append(c);
- }
- }
- } else {
- while((c=uiter_previous32(src))>=0) {
- /* always write this character to the front of the buffer */
- buffer.insert(0, c);
- /* stop if this just-copied character is a boundary */
- if(n2->hasBoundaryBefore(c)) {
- break;
- }
- }
- }
-
- UnicodeString destString(dest, 0, destCapacity);
- if(buffer.length()>0 && doNormalize) {
- n2->normalize(buffer, destString, *pErrorCode).extract(dest, destCapacity, *pErrorCode);
- if(pNeededToNormalize!=NULL && U_SUCCESS(*pErrorCode)) {
- *pNeededToNormalize= destString!=buffer;
- }
- return destString.length();
- } else {
- /* just copy the source characters */
- return buffer.extract(dest, destCapacity, *pErrorCode);
- }
-}
-
-static int32_t
-unorm_iterate(UCharIterator *src, UBool forward,
- UChar *dest, int32_t destCapacity,
- UNormalizationMode mode, int32_t options,
- UBool doNormalize, UBool *pNeededToNormalize,
- UErrorCode *pErrorCode) {
- const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
- if(options&UNORM_UNICODE_3_2) {
- const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- FilteredNormalizer2 fn2(*n2, *uni32);
- return _iterate(src, forward, dest, destCapacity,
- &fn2, doNormalize, pNeededToNormalize, pErrorCode);
- }
- return _iterate(src, forward, dest, destCapacity,
- n2, doNormalize, pNeededToNormalize, pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-unorm_previous(UCharIterator *src,
- UChar *dest, int32_t destCapacity,
- UNormalizationMode mode, int32_t options,
- UBool doNormalize, UBool *pNeededToNormalize,
- UErrorCode *pErrorCode) {
- return unorm_iterate(src, FALSE,
- dest, destCapacity,
- mode, options,
- doNormalize, pNeededToNormalize,
- pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-unorm_next(UCharIterator *src,
- UChar *dest, int32_t destCapacity,
- UNormalizationMode mode, int32_t options,
- UBool doNormalize, UBool *pNeededToNormalize,
- UErrorCode *pErrorCode) {
- return unorm_iterate(src, TRUE,
- dest, destCapacity,
- mode, options,
- doNormalize, pNeededToNormalize,
- pErrorCode);
-}
-
-/* Concatenation of normalized strings -------------------------------------- */
-
-static int32_t
-_concatenate(const UChar *left, int32_t leftLength,
- const UChar *right, int32_t rightLength,
- UChar *dest, int32_t destCapacity,
- const Normalizer2 *n2,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(destCapacity<0 || (dest==NULL && destCapacity>0) ||
- left==NULL || leftLength<-1 || right==NULL || rightLength<-1) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* check for overlapping right and destination */
- if( dest!=NULL &&
- ((right>=dest && right<(dest+destCapacity)) ||
- (rightLength>0 && dest>=right && dest<(right+rightLength)))
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* allow left==dest */
- UnicodeString destString;
- if(left==dest) {
- destString.setTo(dest, leftLength, destCapacity);
- } else {
- destString.setTo(dest, 0, destCapacity);
- destString.append(left, leftLength);
- }
- return n2->append(destString, UnicodeString(rightLength<0, right, rightLength), *pErrorCode).
- extract(dest, destCapacity, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-unorm_concatenate(const UChar *left, int32_t leftLength,
- const UChar *right, int32_t rightLength,
- UChar *dest, int32_t destCapacity,
- UNormalizationMode mode, int32_t options,
- UErrorCode *pErrorCode) {
- const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
- if(options&UNORM_UNICODE_3_2) {
- const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- FilteredNormalizer2 fn2(*n2, *uni32);
- return _concatenate(left, leftLength, right, rightLength,
- dest, destCapacity, &fn2, pErrorCode);
- }
- return _concatenate(left, leftLength, right, rightLength,
- dest, destCapacity, n2, pErrorCode);
-}
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
diff --git a/contrib/libs/icu/common/unormcmp.cpp b/contrib/libs/icu/common/unormcmp.cpp
deleted file mode 100644
index 689b0b53b2d..00000000000
--- a/contrib/libs/icu/common/unormcmp.cpp
+++ /dev/null
@@ -1,640 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2001-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: unormcmp.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004sep13
-* created by: Markus W. Scherer
-*
-* unorm_compare() function moved here from unorm.cpp for better modularization.
-* Depends on both normalization and case folding.
-* Allows unorm.cpp to not depend on any character properties code.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/unorm.h"
-#include "unicode/ustring.h"
-#include "cmemory.h"
-#include "normalizer2impl.h"
-#include "ucase.h"
-#include "uprops.h"
-#include "ustr_imp.h"
-
-U_NAMESPACE_USE
-
-/* compare canonically equivalent ------------------------------------------- */
-
-/*
- * Compare two strings for canonical equivalence.
- * Further options include case-insensitive comparison and
- * code point order (as opposed to code unit order).
- *
- * In this function, canonical equivalence is optional as well.
- * If canonical equivalence is tested, then both strings must fulfill
- * the FCD check.
- *
- * Semantically, this is equivalent to
- * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
- * where code point order, NFD and foldCase are all optional.
- *
- * String comparisons almost always yield results before processing both strings
- * completely.
- * They are generally more efficient working incrementally instead of
- * performing the sub-processing (strlen, normalization, case-folding)
- * on the entire strings first.
- *
- * It is also unnecessary to not normalize identical characters.
- *
- * This function works in principle as follows:
- *
- * loop {
- * get one code unit c1 from s1 (-1 if end of source)
- * get one code unit c2 from s2 (-1 if end of source)
- *
- * if(either string finished) {
- * return result;
- * }
- * if(c1==c2) {
- * continue;
- * }
- *
- * // c1!=c2
- * try to decompose/case-fold c1/c2, and continue if one does;
- *
- * // still c1!=c2 and neither decomposes/case-folds, return result
- * return c1-c2;
- * }
- *
- * When a character decomposes, then the pointer for that source changes to
- * the decomposition, pushing the previous pointer onto a stack.
- * When the end of the decomposition is reached, then the code unit reader
- * pops the previous source from the stack.
- * (Same for case-folding.)
- *
- * This is complicated further by operating on variable-width UTF-16.
- * The top part of the loop works on code units, while lookups for decomposition
- * and case-folding need code points.
- * Code points are assembled after the equality/end-of-source part.
- * The source pointer is only advanced beyond all code units when the code point
- * actually decomposes/case-folds.
- *
- * If we were on a trail surrogate unit when assembling a code point,
- * and the code point decomposes/case-folds, then the decomposition/folding
- * result must be compared with the part of the other string that corresponds to
- * this string's lead surrogate.
- * Since we only assemble a code point when hitting a trail unit when the
- * preceding lead units were identical, we back up the other string by one unit
- * in such a case.
- *
- * The optional code point order comparison at the end works with
- * the same fix-up as the other code point order comparison functions.
- * See ustring.c and the comment near the end of this function.
- *
- * Assumption: A decomposition or case-folding result string never contains
- * a single surrogate. This is a safe assumption in the Unicode Standard.
- * Therefore, we do not need to check for surrogate pairs across
- * decomposition/case-folding boundaries.
- *
- * Further assumptions (see verifications tstnorm.cpp):
- * The API function checks for FCD first, while the core function
- * first case-folds and then decomposes. This requires that case-folding does not
- * un-FCD any strings.
- *
- * The API function may also NFD the input and turn off decomposition.
- * This requires that case-folding does not un-NFD strings either.
- *
- * TODO If any of the above two assumptions is violated,
- * then this entire code must be re-thought.
- * If this happens, then a simple solution is to case-fold both strings up front
- * and to turn off UNORM_INPUT_IS_FCD.
- * We already do this when not both strings are in FCD because makeFCD
- * would be a partial NFD before the case folding, which does not work.
- * Note that all of this is only a problem when case-folding _and_
- * canonical equivalence come together.
- * (Comments in unorm_compare() are more up to date than this TODO.)
- */
-
-/* stack element for previous-level source/decomposition pointers */
-struct CmpEquivLevel {
- const UChar *start, *s, *limit;
-};
-typedef struct CmpEquivLevel CmpEquivLevel;
-
-/**
- * Internal option for unorm_cmpEquivFold() for decomposing.
- * If not set, just do strcasecmp().
- */
-#define _COMPARE_EQUIV 0x80000
-
-/* internal function */
-static int32_t
-unorm_cmpEquivFold(const UChar *s1, int32_t length1,
- const UChar *s2, int32_t length2,
- uint32_t options,
- UErrorCode *pErrorCode) {
- const Normalizer2Impl *nfcImpl;
-
- /* current-level start/limit - s1/s2 as current */
- const UChar *start1, *start2, *limit1, *limit2;
-
- /* decomposition and case folding variables */
- const UChar *p;
- int32_t length;
-
- /* stacks of previous-level start/current/limit */
- CmpEquivLevel stack1[2], stack2[2];
-
- /* buffers for algorithmic decompositions */
- UChar decomp1[4], decomp2[4];
-
- /* case folding buffers, only use current-level start/limit */
- UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
-
- /* track which is the current level per string */
- int32_t level1, level2;
-
- /* current code units, and code points for lookups */
- UChar32 c1, c2, cp1, cp2;
-
- /* no argument error checking because this itself is not an API */
-
- /*
- * assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set
- * otherwise this function must behave exactly as uprv_strCompare()
- * not checking for that here makes testing this function easier
- */
-
- /* normalization/properties data loaded? */
- if((options&_COMPARE_EQUIV)!=0) {
- nfcImpl=Normalizer2Factory::getNFCImpl(*pErrorCode);
- } else {
- nfcImpl=NULL;
- }
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* initialize */
- start1=s1;
- if(length1==-1) {
- limit1=NULL;
- } else {
- limit1=s1+length1;
- }
-
- start2=s2;
- if(length2==-1) {
- limit2=NULL;
- } else {
- limit2=s2+length2;
- }
-
- level1=level2=0;
- c1=c2=-1;
-
- /* comparison loop */
- for(;;) {
- /*
- * here a code unit value of -1 means "get another code unit"
- * below it will mean "this source is finished"
- */
-
- if(c1<0) {
- /* get next code unit from string 1, post-increment */
- for(;;) {
- if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
- if(level1==0) {
- c1=-1;
- break;
- }
- } else {
- ++s1;
- break;
- }
-
- /* reached end of level buffer, pop one level */
- do {
- --level1;
- start1=stack1[level1].start; /*Not uninitialized*/
- } while(start1==NULL);
- s1=stack1[level1].s; /*Not uninitialized*/
- limit1=stack1[level1].limit; /*Not uninitialized*/
- }
- }
-
- if(c2<0) {
- /* get next code unit from string 2, post-increment */
- for(;;) {
- if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
- if(level2==0) {
- c2=-1;
- break;
- }
- } else {
- ++s2;
- break;
- }
-
- /* reached end of level buffer, pop one level */
- do {
- --level2;
- start2=stack2[level2].start; /*Not uninitialized*/
- } while(start2==NULL);
- s2=stack2[level2].s; /*Not uninitialized*/
- limit2=stack2[level2].limit; /*Not uninitialized*/
- }
- }
-
- /*
- * compare c1 and c2
- * either variable c1, c2 is -1 only if the corresponding string is finished
- */
- if(c1==c2) {
- if(c1<0) {
- return 0; /* c1==c2==-1 indicating end of strings */
- }
- c1=c2=-1; /* make us fetch new code units */
- continue;
- } else if(c1<0) {
- return -1; /* string 1 ends before string 2 */
- } else if(c2<0) {
- return 1; /* string 2 ends before string 1 */
- }
- /* c1!=c2 && c1>=0 && c2>=0 */
-
- /* get complete code points for c1, c2 for lookups if either is a surrogate */
- cp1=c1;
- if(U_IS_SURROGATE(c1)) {
- UChar c;
-
- if(U_IS_SURROGATE_LEAD(c1)) {
- if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
- /* advance ++s1; only below if cp1 decomposes/case-folds */
- cp1=U16_GET_SUPPLEMENTARY(c1, c);
- }
- } else /* isTrail(c1) */ {
- if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
- cp1=U16_GET_SUPPLEMENTARY(c, c1);
- }
- }
- }
-
- cp2=c2;
- if(U_IS_SURROGATE(c2)) {
- UChar c;
-
- if(U_IS_SURROGATE_LEAD(c2)) {
- if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
- /* advance ++s2; only below if cp2 decomposes/case-folds */
- cp2=U16_GET_SUPPLEMENTARY(c2, c);
- }
- } else /* isTrail(c2) */ {
- if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
- cp2=U16_GET_SUPPLEMENTARY(c, c2);
- }
- }
- }
-
- /*
- * go down one level for each string
- * continue with the main loop as soon as there is a real change
- */
-
- if( level1==0 && (options&U_COMPARE_IGNORE_CASE) &&
- (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
- ) {
- /* cp1 case-folds to the code point "length" or to p[length] */
- if(U_IS_SURROGATE(c1)) {
- if(U_IS_SURROGATE_LEAD(c1)) {
- /* advance beyond source surrogate pair if it case-folds */
- ++s1;
- } else /* isTrail(c1) */ {
- /*
- * we got a supplementary code point when hitting its trail surrogate,
- * therefore the lead surrogate must have been the same as in the other string;
- * compare this decomposition with the lead surrogate in the other string
- * remember that this simulates bulk text replacement:
- * the decomposition would replace the entire code point
- */
- --s2;
- c2=*(s2-1);
- }
- }
-
- /* push current level pointers */
- stack1[0].start=start1;
- stack1[0].s=s1;
- stack1[0].limit=limit1;
- ++level1;
-
- /* copy the folding result to fold1[] */
- if(length<=UCASE_MAX_STRING_LENGTH) {
- u_memcpy(fold1, p, length);
- } else {
- int32_t i=0;
- U16_APPEND_UNSAFE(fold1, i, length);
- length=i;
- }
-
- /* set next level pointers to case folding */
- start1=s1=fold1;
- limit1=fold1+length;
-
- /* get ready to read from decomposition, continue with loop */
- c1=-1;
- continue;
- }
-
- if( level2==0 && (options&U_COMPARE_IGNORE_CASE) &&
- (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
- ) {
- /* cp2 case-folds to the code point "length" or to p[length] */
- if(U_IS_SURROGATE(c2)) {
- if(U_IS_SURROGATE_LEAD(c2)) {
- /* advance beyond source surrogate pair if it case-folds */
- ++s2;
- } else /* isTrail(c2) */ {
- /*
- * we got a supplementary code point when hitting its trail surrogate,
- * therefore the lead surrogate must have been the same as in the other string;
- * compare this decomposition with the lead surrogate in the other string
- * remember that this simulates bulk text replacement:
- * the decomposition would replace the entire code point
- */
- --s1;
- c1=*(s1-1);
- }
- }
-
- /* push current level pointers */
- stack2[0].start=start2;
- stack2[0].s=s2;
- stack2[0].limit=limit2;
- ++level2;
-
- /* copy the folding result to fold2[] */
- if(length<=UCASE_MAX_STRING_LENGTH) {
- u_memcpy(fold2, p, length);
- } else {
- int32_t i=0;
- U16_APPEND_UNSAFE(fold2, i, length);
- length=i;
- }
-
- /* set next level pointers to case folding */
- start2=s2=fold2;
- limit2=fold2+length;
-
- /* get ready to read from decomposition, continue with loop */
- c2=-1;
- continue;
- }
-
- if( level1<2 && (options&_COMPARE_EQUIV) &&
- 0!=(p=nfcImpl->getDecomposition((UChar32)cp1, decomp1, length))
- ) {
- /* cp1 decomposes into p[length] */
- if(U_IS_SURROGATE(c1)) {
- if(U_IS_SURROGATE_LEAD(c1)) {
- /* advance beyond source surrogate pair if it decomposes */
- ++s1;
- } else /* isTrail(c1) */ {
- /*
- * we got a supplementary code point when hitting its trail surrogate,
- * therefore the lead surrogate must have been the same as in the other string;
- * compare this decomposition with the lead surrogate in the other string
- * remember that this simulates bulk text replacement:
- * the decomposition would replace the entire code point
- */
- --s2;
- c2=*(s2-1);
- }
- }
-
- /* push current level pointers */
- stack1[level1].start=start1;
- stack1[level1].s=s1;
- stack1[level1].limit=limit1;
- ++level1;
-
- /* set empty intermediate level if skipped */
- if(level1<2) {
- stack1[level1++].start=NULL;
- }
-
- /* set next level pointers to decomposition */
- start1=s1=p;
- limit1=p+length;
-
- /* get ready to read from decomposition, continue with loop */
- c1=-1;
- continue;
- }
-
- if( level2<2 && (options&_COMPARE_EQUIV) &&
- 0!=(p=nfcImpl->getDecomposition((UChar32)cp2, decomp2, length))
- ) {
- /* cp2 decomposes into p[length] */
- if(U_IS_SURROGATE(c2)) {
- if(U_IS_SURROGATE_LEAD(c2)) {
- /* advance beyond source surrogate pair if it decomposes */
- ++s2;
- } else /* isTrail(c2) */ {
- /*
- * we got a supplementary code point when hitting its trail surrogate,
- * therefore the lead surrogate must have been the same as in the other string;
- * compare this decomposition with the lead surrogate in the other string
- * remember that this simulates bulk text replacement:
- * the decomposition would replace the entire code point
- */
- --s1;
- c1=*(s1-1);
- }
- }
-
- /* push current level pointers */
- stack2[level2].start=start2;
- stack2[level2].s=s2;
- stack2[level2].limit=limit2;
- ++level2;
-
- /* set empty intermediate level if skipped */
- if(level2<2) {
- stack2[level2++].start=NULL;
- }
-
- /* set next level pointers to decomposition */
- start2=s2=p;
- limit2=p+length;
-
- /* get ready to read from decomposition, continue with loop */
- c2=-1;
- continue;
- }
-
- /*
- * no decomposition/case folding, max level for both sides:
- * return difference result
- *
- * code point order comparison must not just return cp1-cp2
- * because when single surrogates are present then the surrogate pairs
- * that formed cp1 and cp2 may be from different string indexes
- *
- * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
- * c1=d800 cp1=10001 c2=dc00 cp2=10000
- * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
- *
- * therefore, use same fix-up as in ustring.c/uprv_strCompare()
- * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
- * so we have slightly different pointer/start/limit comparisons here
- */
-
- if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
- /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
- if(
- (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
- (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
- ) {
- /* part of a surrogate pair, leave >=d800 */
- } else {
- /* BMP code point - may be surrogate code point - make <d800 */
- c1-=0x2800;
- }
-
- if(
- (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
- (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
- ) {
- /* part of a surrogate pair, leave >=d800 */
- } else {
- /* BMP code point - may be surrogate code point - make <d800 */
- c2-=0x2800;
- }
- }
-
- return c1-c2;
- }
-}
-
-static
-UBool _normalize(const Normalizer2 *n2, const UChar *s, int32_t length,
- UnicodeString &normalized, UErrorCode *pErrorCode) {
- UnicodeString str(length<0, s, length);
-
- // check if s fulfill the conditions
- int32_t spanQCYes=n2->spanQuickCheckYes(str, *pErrorCode);
- if (U_FAILURE(*pErrorCode)) {
- return FALSE;
- }
- /*
- * ICU 2.4 had a further optimization:
- * If both strings were not in FCD, then they were both NFD'ed,
- * and the _COMPARE_EQUIV option was turned off.
- * It is not entirely clear that this is valid with the current
- * definition of the canonical caseless match.
- * Therefore, ICU 2.6 removes that optimization.
- */
- if(spanQCYes<str.length()) {
- UnicodeString unnormalized=str.tempSubString(spanQCYes);
- normalized.setTo(FALSE, str.getBuffer(), spanQCYes);
- n2->normalizeSecondAndAppend(normalized, unnormalized, *pErrorCode);
- if (U_SUCCESS(*pErrorCode)) {
- return TRUE;
- }
- }
- return FALSE;
-}
-
-U_CAPI int32_t U_EXPORT2
-unorm_compare(const UChar *s1, int32_t length1,
- const UChar *s2, int32_t length2,
- uint32_t options,
- UErrorCode *pErrorCode) {
- /* argument checking */
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(s1==0 || length1<-1 || s2==0 || length2<-1) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- UnicodeString fcd1, fcd2;
- int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT);
- options|=_COMPARE_EQUIV;
-
- /*
- * UAX #21 Case Mappings, as fixed for Unicode version 4
- * (see Jitterbug 2021), defines a canonical caseless match as
- *
- * A string X is a canonical caseless match
- * for a string Y if and only if
- * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
- *
- * For better performance, we check for FCD (or let the caller tell us that
- * both strings are in FCD) for the inner normalization.
- * BasicNormalizerTest::FindFoldFCDExceptions() makes sure that
- * case-folding preserves the FCD-ness of a string.
- * The outer normalization is then only performed by unorm_cmpEquivFold()
- * when there is a difference.
- *
- * Exception: When using the Turkic case-folding option, we do perform
- * full NFD first. This is because in the Turkic case precomposed characters
- * with 0049 capital I or 0069 small i fold differently whether they
- * are first decomposed or not, so an FCD check - a check only for
- * canonical order - is not sufficient.
- */
- if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
- const Normalizer2 *n2;
- if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
- n2=Normalizer2::getNFDInstance(*pErrorCode);
- } else {
- n2=Normalizer2Factory::getFCDInstance(*pErrorCode);
- }
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if(normOptions&UNORM_UNICODE_3_2) {
- const UnicodeSet *uni32=uniset_getUnicode32Instance(*pErrorCode);
- FilteredNormalizer2 fn2(*n2, *uni32);
- if(_normalize(&fn2, s1, length1, fcd1, pErrorCode)) {
- s1=fcd1.getBuffer();
- length1=fcd1.length();
- }
- if(_normalize(&fn2, s2, length2, fcd2, pErrorCode)) {
- s2=fcd2.getBuffer();
- length2=fcd2.length();
- }
- } else {
- if(_normalize(n2, s1, length1, fcd1, pErrorCode)) {
- s1=fcd1.getBuffer();
- length1=fcd1.length();
- }
- if(_normalize(n2, s2, length2, fcd2, pErrorCode)) {
- s2=fcd2.getBuffer();
- length2=fcd2.length();
- }
- }
- }
-
- if(U_SUCCESS(*pErrorCode)) {
- return unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode);
- } else {
- return 0;
- }
-}
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
diff --git a/contrib/libs/icu/common/unormimp.h b/contrib/libs/icu/common/unormimp.h
deleted file mode 100644
index 88c7975cc4e..00000000000
--- a/contrib/libs/icu/common/unormimp.h
+++ /dev/null
@@ -1,488 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2001-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: unormimp.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001may25
-* created by: Markus W. Scherer
-*/
-
-#ifndef __UNORMIMP_H__
-#define __UNORMIMP_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "udataswp.h"
-
-/*
- * The 2001-2010 implementation of the normalization code loads its data from
- * unorm.icu, which is generated with the gennorm tool.
- * The format of that file is described at the end of this file.
- */
-
-/* norm32 value constants */
-enum {
- /* quick check flags 0..3 set mean "no" for their forms */
- _NORM_QC_NFC=0x11, /* no|maybe */
- _NORM_QC_NFKC=0x22, /* no|maybe */
- _NORM_QC_NFD=4, /* no */
- _NORM_QC_NFKD=8, /* no */
-
- _NORM_QC_ANY_NO=0xf,
-
- /* quick check flags 4..5 mean "maybe" for their forms; test flags>=_NORM_QC_MAYBE */
- _NORM_QC_MAYBE=0x10,
- _NORM_QC_ANY_MAYBE=0x30,
-
- _NORM_QC_MASK=0x3f,
-
- _NORM_COMBINES_FWD=0x40,
- _NORM_COMBINES_BACK=0x80,
- _NORM_COMBINES_ANY=0xc0,
-
- _NORM_CC_SHIFT=8, /* UnicodeData.txt combining class in bits 15..8 */
- _NORM_CC_MASK=0xff00,
-
- _NORM_EXTRA_SHIFT=16, /* 16 bits for the index to UChars and other extra data */
- _NORM_EXTRA_INDEX_TOP=0xfc00, /* start of surrogate specials after shift */
-
- _NORM_EXTRA_SURROGATE_MASK=0x3ff,
- _NORM_EXTRA_SURROGATE_TOP=0x3f0, /* hangul etc. */
-
- _NORM_EXTRA_HANGUL=_NORM_EXTRA_SURROGATE_TOP,
- _NORM_EXTRA_JAMO_L,
- _NORM_EXTRA_JAMO_V,
- _NORM_EXTRA_JAMO_T
-};
-
-/* norm32 value constants using >16 bits */
-#define _NORM_MIN_SPECIAL 0xfc000000
-#define _NORM_SURROGATES_TOP 0xfff00000
-#define _NORM_MIN_HANGUL 0xfff00000
-#define _NORM_MIN_JAMO_V 0xfff20000
-#define _NORM_JAMO_V_TOP 0xfff30000
-
-/* value constants for auxTrie */
-enum {
- _NORM_AUX_COMP_EX_SHIFT=10,
- _NORM_AUX_UNSAFE_SHIFT=11,
- _NORM_AUX_NFC_SKIPPABLE_F_SHIFT=12
-};
-
-#define _NORM_AUX_MAX_FNC ((int32_t)1<<_NORM_AUX_COMP_EX_SHIFT)
-
-#define _NORM_AUX_FNC_MASK (uint32_t)(_NORM_AUX_MAX_FNC-1)
-#define _NORM_AUX_COMP_EX_MASK ((uint32_t)1<<_NORM_AUX_COMP_EX_SHIFT)
-#define _NORM_AUX_UNSAFE_MASK ((uint32_t)1<<_NORM_AUX_UNSAFE_SHIFT)
-#define _NORM_AUX_NFC_SKIP_F_MASK ((uint32_t)1<<_NORM_AUX_NFC_SKIPPABLE_F_SHIFT)
-
-/* canonStartSets[0..31] contains indexes for what is in the array */
-enum {
- _NORM_SET_INDEX_CANON_SETS_LENGTH, /* number of uint16_t in canonical starter sets */
- _NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH, /* number of uint16_t in the BMP search table (contains pairs) */
- _NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH,/* number of uint16_t in the supplementary search table (contains triplets) */
-
- /* from formatVersion 2.3: */
- _NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET, /* uint16_t offset from canonStartSets[0] to the
- exclusion set for CJK compatibility characters */
- _NORM_SET_INDEX_NX_UNICODE32_OFFSET, /* uint16_t offset from canonStartSets[0] to the
- exclusion set for Unicode 3.2 characters */
- _NORM_SET_INDEX_NX_RESERVED_OFFSET, /* uint16_t offset from canonStartSets[0] to the
- end of the previous exclusion set */
-
- _NORM_SET_INDEX_TOP=32 /* changing this requires a new formatVersion */
-};
-
-/* more constants for canonical starter sets */
-
-/* 14 bit indexes to canonical USerializedSets */
-#define _NORM_MAX_CANON_SETS 0x4000
-
-/* single-code point BMP sets are encoded directly in the search table except if result=0x4000..0x7fff */
-#define _NORM_CANON_SET_BMP_MASK 0xc000
-#define _NORM_CANON_SET_BMP_IS_INDEX 0x4000
-
-/* indexes[] value names */
-enum {
- _NORM_INDEX_TRIE_SIZE, /* number of bytes in normalization trie */
- _NORM_INDEX_UCHAR_COUNT, /* number of UChars in extra data */
-
- _NORM_INDEX_COMBINE_DATA_COUNT, /* number of uint16_t words for combining data */
- _NORM_INDEX_COMBINE_FWD_COUNT, /* number of code points that combine forward */
- _NORM_INDEX_COMBINE_BOTH_COUNT, /* number of code points that combine forward and backward */
- _NORM_INDEX_COMBINE_BACK_COUNT, /* number of code points that combine backward */
-
- _NORM_INDEX_MIN_NFC_NO_MAYBE, /* first code point with quick check NFC NO/MAYBE */
- _NORM_INDEX_MIN_NFKC_NO_MAYBE, /* first code point with quick check NFKC NO/MAYBE */
- _NORM_INDEX_MIN_NFD_NO_MAYBE, /* first code point with quick check NFD NO/MAYBE */
- _NORM_INDEX_MIN_NFKD_NO_MAYBE, /* first code point with quick check NFKD NO/MAYBE */
-
- _NORM_INDEX_FCD_TRIE_SIZE, /* number of bytes in FCD trie */
-
- _NORM_INDEX_AUX_TRIE_SIZE, /* number of bytes in the auxiliary trie */
- _NORM_INDEX_CANON_SET_COUNT, /* number of uint16_t in the array of serialized USet */
-
- _NORM_INDEX_TOP=32 /* changing this requires a new formatVersion */
-};
-
-enum {
- /* FCD check: everything below this code point is known to have a 0 lead combining class */
- _NORM_MIN_WITH_LEAD_CC=0x300
-};
-
-enum {
- /**
- * Bit 7 of the length byte for a decomposition string in extra data is
- * a flag indicating whether the decomposition string is
- * preceded by a 16-bit word with the leading and trailing cc
- * of the decomposition (like for A-umlaut);
- * if not, then both cc's are zero (like for compatibility ideographs).
- */
- _NORM_DECOMP_FLAG_LENGTH_HAS_CC=0x80,
- /**
- * Bits 6..0 of the length byte contain the actual length.
- */
- _NORM_DECOMP_LENGTH_MASK=0x7f
-};
-
-/** Constants for options flags for normalization. */
-enum {
- /** Options bit 0, do not decompose Hangul syllables. */
- UNORM_NX_HANGUL=1,
- /** Options bit 1, do not decompose CJK compatibility characters. */
- UNORM_NX_CJK_COMPAT=2
-};
-
-/**
- * Description of the format of unorm.icu version 2.3.
- *
- * Main change from version 1 to version 2:
- * Use of new, common UTrie instead of normalization-specific tries.
- * Change to version 2.1: add third/auxiliary trie with associated data.
- * Change to version 2.2: add skippable (f) flag data (_NORM_AUX_NFC_SKIP_F_MASK).
- * Change to version 2.3: add serialized sets for normalization exclusions
- * stored inside canonStartSets[]
- *
- * For more details of how to use the data structures see the code
- * in unorm.cpp (runtime normalization code) and
- * in gennorm.c and gennorm/store.c (build-time data generation).
- *
- * For the serialized format of UTrie see utrie.c/UTrieHeader.
- *
- * - Overall partition
- *
- * unorm.dat customarily begins with a UDataInfo structure, see udata.h and .c.
- * After that there are the following structures:
- *
- * int32_t indexes[_NORM_INDEX_TOP]; -- _NORM_INDEX_TOP=32, see enum in this file
- *
- * UTrie normTrie; -- size in bytes=indexes[_NORM_INDEX_TRIE_SIZE]
- *
- * uint16_t extraData[extraDataTop]; -- extraDataTop=indexes[_NORM_INDEX_UCHAR_COUNT]
- * extraData[0] contains the number of units for
- * FC_NFKC_Closure (formatVersion>=2.1)
- *
- * uint16_t combiningTable[combiningTableTop]; -- combiningTableTop=indexes[_NORM_INDEX_COMBINE_DATA_COUNT]
- * combiningTableTop may include one 16-bit padding unit
- * to make sure that fcdTrie is 32-bit-aligned
- *
- * UTrie fcdTrie; -- size in bytes=indexes[_NORM_INDEX_FCD_TRIE_SIZE]
- *
- * UTrie auxTrie; -- size in bytes=indexes[_NORM_INDEX_AUX_TRIE_SIZE]
- *
- * uint16_t canonStartSets[canonStartSetsTop] -- canonStartSetsTop=indexes[_NORM_INDEX_CANON_SET_COUNT]
- * serialized USets and binary search tables, see below
- *
- *
- * The indexes array contains lengths and sizes of the following arrays and structures
- * as well as the following values:
- * indexes[_NORM_INDEX_COMBINE_FWD_COUNT]=combineFwdTop
- * -- one more than the highest combining index computed for forward-only-combining characters
- * indexes[_NORM_INDEX_COMBINE_BOTH_COUNT]=combineBothTop-combineFwdTop
- * -- number of combining indexes computed for both-ways-combining characters
- * indexes[_NORM_INDEX_COMBINE_BACK_COUNT]=combineBackTop-combineBothTop
- * -- number of combining indexes computed for backward-only-combining characters
- *
- * indexes[_NORM_INDEX_MIN_NF*_NO_MAYBE] (where *={ C, D, KC, KD })
- * -- first code point with a quick check NF* value of NO/MAYBE
- *
- *
- * - Tries
- *
- * The main structures are two UTrie tables ("compact arrays"),
- * each with one index array and one data array.
- * See utrie.h and utrie.c.
- *
- *
- * - Tries in unorm.dat
- *
- * The first trie (normTrie above)
- * provides data for the NF* quick checks and normalization.
- * The second trie (fcdTrie above) provides data just for FCD checks.
- *
- *
- * - norm32 data words from the first trie
- *
- * The norm32Table contains one 32-bit word "norm32" per code point.
- * It contains the following bit fields:
- * 31..16 extra data index, _NORM_EXTRA_SHIFT is used to shift this field down
- * if this index is <_NORM_EXTRA_INDEX_TOP then it is an index into
- * extraData[] where variable-length normalization data for this
- * code point is found
- * if this index is <_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_SURROGATE_TOP
- * then this is a norm32 for a leading surrogate, and the index
- * value is used together with the following trailing surrogate
- * code unit in the second trie access
- * if this index is >=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_SURROGATE_TOP
- * then this is a norm32 for a "special" character,
- * i.e., the character is a Hangul syllable or a Jamo
- * see _NORM_EXTRA_HANGUL etc.
- * generally, instead of extracting this index from the norm32 and
- * comparing it with the above constants,
- * the normalization code compares the entire norm32 value
- * with _NORM_MIN_SPECIAL, _NORM_SURROGATES_TOP, _NORM_MIN_HANGUL etc.
- *
- * 15..8 combining class (cc) according to UnicodeData.txt
- *
- * 7..6 _NORM_COMBINES_ANY flags, used in composition to see if a character
- * combines with any following or preceding character(s)
- * at all
- * 7 _NORM_COMBINES_BACK
- * 6 _NORM_COMBINES_FWD
- *
- * 5..0 quick check flags, set for "no" or "maybe", with separate flags for
- * each normalization form
- * the higher bits are "maybe" flags; for NF*D there are no such flags
- * the lower bits are "no" flags for all forms, in the same order
- * as the "maybe" flags,
- * which is (MSB to LSB): NFKD NFD NFKC NFC
- * 5..4 _NORM_QC_ANY_MAYBE
- * 3..0 _NORM_QC_ANY_NO
- * see further related constants
- *
- *
- * - Extra data per code point
- *
- * "Extra data" is referenced by the index in norm32.
- * It is variable-length data. It is only present, and only those parts
- * of it are, as needed for a given character.
- * The norm32 extra data index is added to the beginning of extraData[]
- * to get to a vector of 16-bit words with data at the following offsets:
- *
- * [-1] Combining index for composition.
- * Stored only if norm32&_NORM_COMBINES_ANY .
- * [0] Lengths of the canonical and compatibility decomposition strings.
- * Stored only if there are decompositions, i.e.,
- * if norm32&(_NORM_QC_NFD|_NORM_QC_NFKD)
- * High byte: length of NFKD, or 0 if none
- * Low byte: length of NFD, or 0 if none
- * Each length byte also has another flag:
- * Bit 7 of a length byte is set if there are non-zero
- * combining classes (cc's) associated with the respective
- * decomposition. If this flag is set, then the decomposition
- * is preceded by a 16-bit word that contains the
- * leading and trailing cc's.
- * Bits 6..0 of a length byte are the length of the
- * decomposition string, not counting the cc word.
- * [1..n] NFD
- * [n+1..] NFKD
- *
- * Each of the two decompositions consists of up to two parts:
- * - The 16-bit words with the leading and trailing cc's.
- * This is only stored if bit 7 of the corresponding length byte
- * is set. In this case, at least one of the cc's is not zero.
- * High byte: leading cc==cc of the first code point in the decomposition string
- * Low byte: trailing cc==cc of the last code point in the decomposition string
- * - The decomposition string in UTF-16, with length code units.
- *
- *
- * - Combining indexes and combiningTable[]
- *
- * Combining indexes are stored at the [-1] offset of the extra data
- * if the character combines forward or backward with any other characters.
- * They are used for (re)composition in NF*C.
- * Values of combining indexes are arranged according to whether a character
- * combines forward, backward, or both ways:
- * forward-only < both ways < backward-only
- *
- * The index values for forward-only and both-ways combining characters
- * are indexes into the combiningTable[].
- * The index values for backward-only combining characters are simply
- * incremented from the preceding index values to be unique.
- *
- * In the combiningTable[], a variable-length list
- * of variable-length (back-index, code point) pair entries is stored
- * for each forward-combining character.
- *
- * These back-indexes are the combining indexes of both-ways or backward-only
- * combining characters that the forward-combining character combines with.
- *
- * Each list is sorted in ascending order of back-indexes.
- * Each list is terminated with the last back-index having bit 15 set.
- *
- * Each pair (back-index, code point) takes up either 2 or 3
- * 16-bit words.
- * The first word of a list entry is the back-index, with its bit 15 set if
- * this is the last pair in the list.
- *
- * The second word contains flags in bits 15..13 that determine
- * if there is a third word and how the combined character is encoded:
- * 15 set if there is a third word in this list entry
- * 14 set if the result is a supplementary character
- * 13 set if the result itself combines forward
- *
- * According to these bits 15..14 of the second word,
- * the result character is encoded as follows:
- * 00 or 01 The result is <=0x1fff and stored in bits 12..0 of
- * the second word.
- * 10 The result is 0x2000..0xffff and stored in the third word.
- * Bits 12..0 of the second word are not used.
- * 11 The result is a supplementary character.
- * Bits 9..0 of the leading surrogate are in bits 9..0 of
- * the second word.
- * Add 0xd800 to these bits to get the complete surrogate.
- * Bits 12..10 of the second word are not used.
- * The trailing surrogate is stored in the third word.
- *
- *
- * - FCD trie
- *
- * The FCD trie is very simple.
- * It is a folded trie with 16-bit data words.
- * In each word, the high byte contains the leading cc of the character,
- * and the low byte contains the trailing cc of the character.
- * These cc's are the cc's of the first and last code points in the
- * canonical decomposition of the character.
- *
- * Since all 16 bits are used for cc's, lead surrogates must be tested
- * by checking the code unit instead of the trie data.
- * This is done only if the 16-bit data word is not zero.
- * If the code unit is a leading surrogate and the data word is not zero,
- * then instead of cc's it contains the offset for the second trie lookup.
- *
- *
- * - Auxiliary trie and data
- *
- * The auxiliary 16-bit trie contains data for additional properties.
- * Bits
- * 15..13 reserved
- * 12 not NFC_Skippable (f) (formatVersion>=2.2)
- * 11 flag: not a safe starter for canonical closure
- * 10 composition exclusion
- * 9.. 0 index into extraData[] to FC_NFKC_Closure string
- * (not for lead surrogate),
- * or lead surrogate offset (for lead surrogate, if 9..0 not zero)
- *
- * - FC_NFKC_Closure strings in extraData[]
- *
- * Strings are either stored as a single code unit or as the length
- * followed by that many units.
- * const UChar *s=extraData+(index from auxTrie data bits 9..0);
- * int32_t length;
- * if(*s<0xff00) {
- * // s points to the single-unit string
- * length=1;
- * } else {
- * length=*s&0xff;
- * ++s;
- * }
- *
- * Conditions for "NF* Skippable" from Mark Davis' com.ibm.text.UCD.NFSkippable:
- * (used in NormalizerTransliterator)
- *
- * A skippable character is
- * a) unassigned, or ALL of the following:
- * b) of combining class 0.
- * c) not decomposed by this normalization form.
- * AND if NFC or NFKC,
- * d) can never compose with a previous character.
- * e) can never compose with a following character.
- * f) can never change if another character is added.
- * Example: a-breve might satisfy all but f, but if you
- * add an ogonek it changes to a-ogonek + breve
- *
- * a)..e) must be tested from norm32.
- * Since f) is more complicated, the (not-)NFC_Skippable flag (f) is built
- * into the auxiliary trie.
- * The same bit is used for NFC and NFKC; (c) differs for them.
- * As usual, we build the "not skippable" flags so that unassigned
- * code points get a 0 bit.
- * This bit is only valid after (a)..(e) test FALSE; test NFD_NO before (f) as well.
- * Test Hangul LV syllables entirely in code.
- *
- *
- * - structure inside canonStartSets[]
- *
- * This array maps from code points c to sets of code points (USerializedSet).
- * The result sets are the code points whose canonical decompositions start
- * with c.
- *
- * canonStartSets[] contains the following sub-arrays:
- *
- * indexes[_NORM_SET_INDEX_TOP]
- * - contains lengths of sub-arrays etc.
- *
- * startSets[indexes[_NORM_SET_INDEX_CANON_SETS_LENGTH]-_NORM_SET_INDEX_TOP]
- * - contains serialized sets (USerializedSet) of canonical starters for
- * enumerating canonically equivalent strings
- * indexes[_NORM_SET_INDEX_CANON_SETS_LENGTH] includes _NORM_SET_INDEX_TOP
- * for details about the structure see uset.c
- *
- * bmpTable[indexes[_NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH]]
- * - a sorted search table for BMP code points whose results are
- * either indexes to USerializedSets or single code points for
- * single-code point sets;
- * each entry is a pair of { code point, result } with result=(binary) yy xxxxxx xxxxxxxx
- * if yy==01 then there is a USerializedSet at canonStartSets+x
- * else build a USerializedSet with result as the single code point
- *
- * suppTable[indexes[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH]]
- * - a sorted search table for supplementary code points whose results are
- * either indexes to USerializedSets or single code points for
- * single-code point sets;
- * each entry is a triplet of { high16(cp), low16(cp), result }
- * each code point's high-word may contain extra data in bits 15..5:
- * if the high word has bit 15 set, then build a set with a single code point
- * which is (((high16(cp)&0x1f00)<<8)|result;
- * else there is a USerializedSet at canonStartSets+result
- *
- * FormatVersion 2.3 adds 2 serialized sets for normalization exclusions.
- * They are stored in the data file so that the runtime normalization code need
- * not depend on other properties and their data and implementation files.
- * The _NORM_SET_INDEX_NX_..._OFFSET offsets in the canonStartSets index table
- * give the location for each set.
- * There is no set stored for UNORM_NX_HANGUL because it's trivial to create
- * without using properties.
- *
- * Set contents:
- *
- * _NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET (for UNORM_NX_CJK_COMPAT)
- * [[:Ideographic:]&[:NFD_QC=No:]]
- * =[CJK Ideographs]&[has canonical decomposition]
- *
- * _NORM_SET_INDEX_NX_UNICODE32_OFFSET (for UNORM_UNICODE_3_2)
- * [:^Age=3.2:]
- * =set with all code points that were not designated by the specified Unicode version
- *
- * _NORM_SET_INDEX_NX_RESERVED_OFFSET
- * This is an offset that points to where the next, future set would start.
- * Currently it indicates where the previous set ends, and thus its length.
- * The name for this enum constant may in the future be applied to different
- * index slots. In order to get the limit of a set, use its index slot and
- * the immediately following one regardless of that one's enum name.
- */
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
-
-#endif
diff --git a/contrib/libs/icu/common/uobject.cpp b/contrib/libs/icu/common/uobject.cpp
deleted file mode 100644
index e222b2ce9b9..00000000000
--- a/contrib/libs/icu/common/uobject.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2002-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: uobject.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002jun26
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/uobject.h"
-#include "cmemory.h"
-
-U_NAMESPACE_BEGIN
-
-#if U_OVERRIDE_CXX_ALLOCATION
-
-/*
- * Default implementation of UMemory::new/delete
- * using uprv_malloc() and uprv_free().
- *
- * For testing, this is used together with a list of imported symbols to verify
- * that ICU is not using the global ::new and ::delete operators.
- *
- * These operators can be implemented like this or any other appropriate way
- * when customizing ICU for certain environments.
- * Whenever ICU is customized in binary incompatible ways please be sure
- * to use library name suffixes to distinguish such libraries from
- * the standard build.
- *
- * Instead of just modifying these C++ new/delete operators, it is usually best
- * to modify the uprv_malloc()/uprv_free()/uprv_realloc() functions in cmemory.c.
- *
- * Memory test on Windows/MSVC 6:
- * The global operators new and delete look as follows:
- * 04F 00000000 UNDEF notype () External | ??2@YAPAXI@Z (void * __cdecl operator new(unsigned int))
- * 03F 00000000 UNDEF notype () External | ??3@YAXPAX@Z (void __cdecl operator delete(void *))
- *
- * These lines are from output generated by the MSVC 6 tool dumpbin with
- * dumpbin /symbols *.obj
- *
- * ??2@YAPAXI@Z and ??3@YAXPAX@Z are the linker symbols in the .obj
- * files and are imported from msvcrtd.dll (in a debug build).
- *
- * Make sure that with the UMemory operators new and delete defined these two symbols
- * do not appear in the dumpbin /symbols output for the ICU libraries!
- *
- * If such a symbol appears in the output then look in the preceding lines in the output
- * for which file and function calls the global new or delete operator,
- * and replace with uprv_malloc/uprv_free.
- */
-
-void * U_EXPORT2 UMemory::operator new(size_t size) U_NOEXCEPT {
- return uprv_malloc(size);
-}
-
-void U_EXPORT2 UMemory::operator delete(void *p) U_NOEXCEPT {
- if(p!=NULL) {
- uprv_free(p);
- }
-}
-
-void * U_EXPORT2 UMemory::operator new[](size_t size) U_NOEXCEPT {
- return uprv_malloc(size);
-}
-
-void U_EXPORT2 UMemory::operator delete[](void *p) U_NOEXCEPT {
- if(p!=NULL) {
- uprv_free(p);
- }
-}
-
-#if U_HAVE_DEBUG_LOCATION_NEW
-void * U_EXPORT2 UMemory::operator new(size_t size, const char* /*file*/, int /*line*/) U_NOEXCEPT {
- return UMemory::operator new(size);
-}
-
-void U_EXPORT2 UMemory::operator delete(void* p, const char* /*file*/, int /*line*/) U_NOEXCEPT {
- UMemory::operator delete(p);
-}
-#endif /* U_HAVE_DEBUG_LOCATION_NEW */
-
-
-#endif
-
-UObject::~UObject() {}
-
-UClassID UObject::getDynamicClassID() const { return NULL; }
-
-U_NAMESPACE_END
-
-U_NAMESPACE_USE
-
-U_CAPI void U_EXPORT2
-uprv_deleteUObject(void *obj) {
- delete static_cast<UObject *>(obj);
-}
diff --git a/contrib/libs/icu/common/uposixdefs.h b/contrib/libs/icu/common/uposixdefs.h
deleted file mode 100644
index 23c3f6d4667..00000000000
--- a/contrib/libs/icu/common/uposixdefs.h
+++ /dev/null
@@ -1,77 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2011-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: uposixdefs.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011jul25
-* created by: Markus W. Scherer
-*
-* Common definitions for implementation files working with POSIX functions.
-* *Important*: #include this file before any other header files!
-*/
-
-#ifndef __UPOSIXDEFS_H__
-#define __UPOSIXDEFS_H__
-
-/*
- * Define _XOPEN_SOURCE for access to POSIX functions.
- *
- * We cannot use U_PLATFORM from platform.h/utypes.h because
- * "The Open Group Base Specifications"
- * chapter "2.2 The Compilation Environment" says:
- * "In the compilation of an application that #defines a feature test macro
- * specified by IEEE Std 1003.1-2001,
- * no header defined by IEEE Std 1003.1-2001 shall be included prior to
- * the definition of the feature test macro."
- */
-#ifdef _XOPEN_SOURCE
- /* Use the predefined value. */
-#else
- /*
- * Version 6.0:
- * The Open Group Base Specifications Issue 6 (IEEE Std 1003.1, 2004 Edition)
- * also known as
- * SUSv3 = Open Group Single UNIX Specification, Version 3 (UNIX03)
- *
- * Note: This definition used to be in C source code (e.g., putil.c)
- * and define _XOPEN_SOURCE to different values depending on __STDC_VERSION__.
- * In C++ source code (e.g., putil.cpp), __STDC_VERSION__ is not defined at all.
- */
-# define _XOPEN_SOURCE 600
-#endif
-
-/*
- * Make sure things like readlink and such functions work.
- * Poorly upgraded Solaris machines can't have this defined.
- * Cleanly installed Solaris can use this #define.
- *
- * z/OS needs this definition for timeval and to get usleep.
- */
-#if !defined(_XOPEN_SOURCE_EXTENDED) && defined(__TOS_MVS__)
-# define _XOPEN_SOURCE_EXTENDED 1
-#endif
-
-/**
- * Solaris says:
- * "...it is invalid to compile an XPG6 or a POSIX.1-2001 application with anything other
- * than a c99 or later compiler."
- * Apparently C++11 is not "or later". Work around this.
- */
-#if defined(__cplusplus) && (defined(sun) || defined(__sun)) && !defined (_STDC_C99)
-# define _STDC_C99
-#endif
-
-#if !defined _POSIX_C_SOURCE && \
- defined(__APPLE__) && defined(__MACH__) && !defined(__clang__)
-// Needed to prevent EOWNERDEAD issues with GCC on Mac
-#define _POSIX_C_SOURCE 200809L
-#endif
-
-#endif /* __UPOSIXDEFS_H__ */
diff --git a/contrib/libs/icu/common/uprops.cpp b/contrib/libs/icu/common/uprops.cpp
deleted file mode 100644
index 1604ad9a179..00000000000
--- a/contrib/libs/icu/common/uprops.cpp
+++ /dev/null
@@ -1,797 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uprops.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002feb24
-* created by: Markus W. Scherer
-*
-* Implementations for mostly non-core Unicode character properties
-* stored in uprops.icu.
-*
-* With the APIs implemented here, almost all properties files and
-* their associated implementation files are used from this file,
-* including those for normalization and case mappings.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "unicode/ucptrie.h"
-#include "unicode/udata.h"
-#include "unicode/unorm2.h"
-#include "unicode/uscript.h"
-#include "unicode/ustring.h"
-#include "cstring.h"
-#include "mutex.h"
-#include "normalizer2impl.h"
-#include "umutex.h"
-#include "ubidi_props.h"
-#include "uprops.h"
-#include "ucase.h"
-#include "ucln_cmn.h"
-#include "ulayout_props.h"
-#include "ustr_imp.h"
-
-U_NAMESPACE_USE
-
-// Unicode text layout properties data -----------------------------------------
-
-namespace {
-
-icu::UInitOnce gLayoutInitOnce = U_INITONCE_INITIALIZER;
-UDataMemory *gLayoutMemory = nullptr;
-
-UCPTrie *gInpcTrie = nullptr; // Indic_Positional_Category
-UCPTrie *gInscTrie = nullptr; // Indic_Syllabic_Category
-UCPTrie *gVoTrie = nullptr; // Vertical_Orientation
-
-int32_t gMaxInpcValue = 0;
-int32_t gMaxInscValue = 0;
-int32_t gMaxVoValue = 0;
-
-UBool U_CALLCONV uprops_cleanup() {
- udata_close(gLayoutMemory);
- gLayoutMemory = nullptr;
-
- ucptrie_close(gInpcTrie);
- gInpcTrie = nullptr;
- ucptrie_close(gInscTrie);
- gInscTrie = nullptr;
- ucptrie_close(gVoTrie);
- gVoTrie = nullptr;
-
- gMaxInpcValue = 0;
- gMaxInscValue = 0;
- gMaxVoValue = 0;
-
- gLayoutInitOnce.reset();
- return TRUE;
-}
-
-UBool U_CALLCONV
-ulayout_isAcceptable(void * /*context*/,
- const char * /* type */, const char * /*name*/,
- const UDataInfo *pInfo) {
- return pInfo->size >= 20 &&
- pInfo->isBigEndian == U_IS_BIG_ENDIAN &&
- pInfo->charsetFamily == U_CHARSET_FAMILY &&
- pInfo->dataFormat[0] == ULAYOUT_FMT_0 &&
- pInfo->dataFormat[1] == ULAYOUT_FMT_1 &&
- pInfo->dataFormat[2] == ULAYOUT_FMT_2 &&
- pInfo->dataFormat[3] == ULAYOUT_FMT_3 &&
- pInfo->formatVersion[0] == 1;
-}
-
-// UInitOnce singleton initialization function
-void U_CALLCONV ulayout_load(UErrorCode &errorCode) {
- gLayoutMemory = udata_openChoice(
- nullptr, ULAYOUT_DATA_TYPE, ULAYOUT_DATA_NAME,
- ulayout_isAcceptable, nullptr, &errorCode);
- if (U_FAILURE(errorCode)) { return; }
-
- const uint8_t *inBytes = (const uint8_t *)udata_getMemory(gLayoutMemory);
- const int32_t *inIndexes = (const int32_t *)inBytes;
- int32_t indexesLength = inIndexes[ULAYOUT_IX_INDEXES_LENGTH];
- if (indexesLength < 12) {
- errorCode = U_INVALID_FORMAT_ERROR; // Not enough indexes.
- return;
- }
- int32_t offset = indexesLength * 4;
- int32_t top = inIndexes[ULAYOUT_IX_INPC_TRIE_TOP];
- int32_t trieSize = top - offset;
- if (trieSize >= 16) {
- gInpcTrie = ucptrie_openFromBinary(
- UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY,
- inBytes + offset, trieSize, nullptr, &errorCode);
- }
- offset = top;
- top = inIndexes[ULAYOUT_IX_INSC_TRIE_TOP];
- trieSize = top - offset;
- if (trieSize >= 16) {
- gInscTrie = ucptrie_openFromBinary(
- UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY,
- inBytes + offset, trieSize, nullptr, &errorCode);
- }
- offset = top;
- top = inIndexes[ULAYOUT_IX_VO_TRIE_TOP];
- trieSize = top - offset;
- if (trieSize >= 16) {
- gVoTrie = ucptrie_openFromBinary(
- UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY,
- inBytes + offset, trieSize, nullptr, &errorCode);
- }
-
- uint32_t maxValues = inIndexes[ULAYOUT_IX_MAX_VALUES];
- gMaxInpcValue = maxValues >> ULAYOUT_MAX_INPC_SHIFT;
- gMaxInscValue = (maxValues >> ULAYOUT_MAX_INSC_SHIFT) & 0xff;
- gMaxVoValue = (maxValues >> ULAYOUT_MAX_VO_SHIFT) & 0xff;
-
- ucln_common_registerCleanup(UCLN_COMMON_UPROPS, uprops_cleanup);
-}
-
-UBool ulayout_ensureData(UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return FALSE; }
- umtx_initOnce(gLayoutInitOnce, &ulayout_load, errorCode);
- return U_SUCCESS(errorCode);
-}
-
-UBool ulayout_ensureData() {
- UErrorCode errorCode = U_ZERO_ERROR;
- return ulayout_ensureData(errorCode);
-}
-
-} // namespace
-
-/* general properties API functions ----------------------------------------- */
-
-struct BinaryProperty;
-
-typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which);
-
-struct BinaryProperty {
- int32_t column; // SRC_PROPSVEC column, or "source" if mask==0
- uint32_t mask;
- BinaryPropertyContains *contains;
-};
-
-static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) {
- /* systematic, directly stored properties */
- return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0;
-}
-
-static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
- return static_cast<UBool>(ucase_hasBinaryProperty(c, which));
-}
-
-static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return ubidi_isBidiControl(c);
-}
-
-static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return ubidi_isMirrored(c);
-}
-
-static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return ubidi_isJoinControl(c);
-}
-
-#if UCONFIG_NO_NORMALIZATION
-static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) {
- return FALSE;
-}
-#else
-static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- // By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
- UErrorCode errorCode=U_ZERO_ERROR;
- const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
- return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c));
-}
-#endif
-
-// UCHAR_NF*_INERT properties
-#if UCONFIG_NO_NORMALIZATION
-static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) {
- return FALSE;
-}
-#else
-static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
- UErrorCode errorCode=U_ZERO_ERROR;
- const Normalizer2 *norm2=Normalizer2Factory::getInstance(
- (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
- return U_SUCCESS(errorCode) && norm2->isInert(c);
-}
-#endif
-
-#if UCONFIG_NO_NORMALIZATION
-static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) {
- return FALSE;
-}
-#else
-static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- UnicodeString nfd;
- UErrorCode errorCode=U_ZERO_ERROR;
- const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode);
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- if(nfcNorm2->getDecomposition(c, nfd)) {
- /* c has a decomposition */
- if(nfd.length()==1) {
- c=nfd[0]; /* single BMP code point */
- } else if(nfd.length()<=U16_MAX_LENGTH &&
- nfd.length()==U16_LENGTH(c=nfd.char32At(0))
- ) {
- /* single supplementary code point */
- } else {
- c=U_SENTINEL;
- }
- } else if(c<0) {
- return FALSE; /* protect against bad input */
- }
- if(c>=0) {
- /* single code point */
- const UChar *resultString;
- return (UBool)(ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT)>=0);
- } else {
- /* guess some large but stack-friendly capacity */
- UChar dest[2*UCASE_MAX_STRING_LENGTH];
- int32_t destLength;
- destLength=u_strFoldCase(dest, UPRV_LENGTHOF(dest),
- nfd.getBuffer(), nfd.length(),
- U_FOLD_CASE_DEFAULT, &errorCode);
- return (UBool)(U_SUCCESS(errorCode) &&
- 0!=u_strCompare(nfd.getBuffer(), nfd.length(),
- dest, destLength, FALSE));
- }
-}
-#endif
-
-#if UCONFIG_NO_NORMALIZATION
-static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) {
- return FALSE;
-}
-#else
-static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- UErrorCode errorCode=U_ZERO_ERROR;
- const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode);
- if(U_FAILURE(errorCode)) {
- return FALSE;
- }
- UnicodeString src(c);
- UnicodeString dest;
- {
- // The ReorderingBuffer must be in a block because its destructor
- // needs to release dest's buffer before we look at its contents.
- ReorderingBuffer buffer(*kcf, dest);
- // Small destCapacity for NFKC_CF(c).
- if(buffer.init(5, errorCode)) {
- const UChar *srcArray=src.getBuffer();
- kcf->compose(srcArray, srcArray+src.length(), FALSE,
- TRUE, buffer, errorCode);
- }
- }
- return U_SUCCESS(errorCode) && dest!=src;
-}
-#endif
-
-#if UCONFIG_NO_NORMALIZATION
-static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) {
- return FALSE;
-}
-#else
-static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- UErrorCode errorCode=U_ZERO_ERROR;
- const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
- return
- U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) &&
- impl->isCanonSegmentStarter(c);
-}
-#endif
-
-static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return u_isalnumPOSIX(c);
-}
-
-static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return u_isblank(c);
-}
-
-static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return u_isgraphPOSIX(c);
-}
-
-static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return u_isprintPOSIX(c);
-}
-
-static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return u_isxdigit(c);
-}
-
-static UBool isRegionalIndicator(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- // Property starts are a subset of lb=RI etc.
- return 0x1F1E6<=c && c<=0x1F1FF;
-}
-
-static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
- /*
- * column and mask values for binary properties from u_getUnicodeProperties().
- * Must be in order of corresponding UProperty,
- * and there must be exactly one entry per binary UProperty.
- *
- * Properties with mask==0 are handled in code.
- * For them, column is the UPropertySource value.
- */
- { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains },
- { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains },
- { UPROPS_SRC_BIDI, 0, isBidiControl },
- { UPROPS_SRC_BIDI, 0, isMirrored },
- { 1, U_MASK(UPROPS_DASH), defaultContains },
- { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains },
- { 1, U_MASK(UPROPS_DEPRECATED), defaultContains },
- { 1, U_MASK(UPROPS_DIACRITIC), defaultContains },
- { 1, U_MASK(UPROPS_EXTENDER), defaultContains },
- { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion },
- { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains },
- { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains },
- { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains },
- { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains },
- { 1, U_MASK(UPROPS_HYPHEN), defaultContains },
- { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains },
- { 1, U_MASK(UPROPS_ID_START), defaultContains },
- { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains },
- { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains },
- { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains },
- { UPROPS_SRC_BIDI, 0, isJoinControl },
- { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains },
- { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE
- { 1, U_MASK(UPROPS_MATH), defaultContains },
- { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains },
- { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains },
- { 1, U_MASK(UPROPS_RADICAL), defaultContains },
- { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED
- { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains },
- { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains },
- { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE
- { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains },
- { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains },
- { 1, U_MASK(UPROPS_XID_START), defaultContains },
- { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE
- { 1, U_MASK(UPROPS_S_TERM), defaultContains },
- { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains },
- { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT
- { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT
- { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT
- { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT
- { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter },
- { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains },
- { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains },
- { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum },
- { UPROPS_SRC_CHAR, 0, isPOSIX_blank },
- { UPROPS_SRC_CHAR, 0, isPOSIX_graph },
- { UPROPS_SRC_CHAR, 0, isPOSIX_print },
- { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit },
- { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED
- { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE
- { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED
- { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED
- { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED
- { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded },
- { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED
- { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded },
- { 2, U_MASK(UPROPS_2_EMOJI), defaultContains },
- { 2, U_MASK(UPROPS_2_EMOJI_PRESENTATION), defaultContains },
- { 2, U_MASK(UPROPS_2_EMOJI_MODIFIER), defaultContains },
- { 2, U_MASK(UPROPS_2_EMOJI_MODIFIER_BASE), defaultContains },
- { 2, U_MASK(UPROPS_2_EMOJI_COMPONENT), defaultContains },
- { 2, 0, isRegionalIndicator },
- { 1, U_MASK(UPROPS_PREPENDED_CONCATENATION_MARK), defaultContains },
- { 2, U_MASK(UPROPS_2_EXTENDED_PICTOGRAPHIC), defaultContains },
-};
-
-U_CAPI UBool U_EXPORT2
-u_hasBinaryProperty(UChar32 c, UProperty which) {
- /* c is range-checked in the functions that are called from here */
- if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) {
- /* not a known binary property */
- return FALSE;
- } else {
- const BinaryProperty &prop=binProps[which];
- return prop.contains(prop, c, which);
- }
-}
-
-struct IntProperty;
-
-typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which);
-typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which);
-
-struct IntProperty {
- int32_t column; // SRC_PROPSVEC column, or "source" if mask==0
- uint32_t mask;
- int32_t shift; // =maxValue if getMaxValueFromShift() is used
- IntPropertyGetValue *getValue;
- IntPropertyGetMaxValue *getMaxValue;
-};
-
-static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) {
- /* systematic, directly stored properties */
- return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift;
-}
-
-static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) {
- return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift;
-}
-
-static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) {
- return prop.shift;
-}
-
-static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return (int32_t)u_charDirection(c);
-}
-
-static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return (int32_t)ubidi_getPairedBracketType(c);
-}
-
-static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
- return ubidi_getMaxValue(which);
-}
-
-#if UCONFIG_NO_NORMALIZATION
-static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) {
- return 0;
-}
-#else
-static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return u_getCombiningClass(c);
-}
-#endif
-
-static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return (int32_t)u_charType(c);
-}
-
-static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return ubidi_getJoiningGroup(c);
-}
-
-static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return ubidi_getJoiningType(c);
-}
-
-static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c));
- return UPROPS_NTV_GET_TYPE(ntv);
-}
-
-static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- UErrorCode errorCode=U_ZERO_ERROR;
- return (int32_t)uscript_getScript(c, &errorCode);
-}
-
-static int32_t scriptGetMaxValue(const IntProperty &/*prop*/, UProperty /*which*/) {
- uint32_t scriptX=uprv_getMaxValues(0)&UPROPS_SCRIPT_X_MASK;
- return uprops_mergeScriptCodeOrIndex(scriptX);
-}
-
-/*
- * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
- * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
- */
-static const UHangulSyllableType gcbToHst[]={
- U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */
- U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */
- U_HST_NOT_APPLICABLE, /* U_GCB_CR */
- U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */
- U_HST_LEADING_JAMO, /* U_GCB_L */
- U_HST_NOT_APPLICABLE, /* U_GCB_LF */
- U_HST_LV_SYLLABLE, /* U_GCB_LV */
- U_HST_LVT_SYLLABLE, /* U_GCB_LVT */
- U_HST_TRAILING_JAMO, /* U_GCB_T */
- U_HST_VOWEL_JAMO /* U_GCB_V */
- /*
- * Omit GCB values beyond what we need for hst.
- * The code below checks for the array length.
- */
-};
-
-static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- /* see comments on gcbToHst[] above */
- int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
- if(gcb<UPRV_LENGTHOF(gcbToHst)) {
- return gcbToHst[gcb];
- } else {
- return U_HST_NOT_APPLICABLE;
- }
-}
-
-#if UCONFIG_NO_NORMALIZATION
-static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) {
- return 0;
-}
-#else
-static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) {
- return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
-}
-#endif
-
-#if UCONFIG_NO_NORMALIZATION
-static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) {
- return 0;
-}
-#else
-static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return unorm_getFCD16(c)>>8;
-}
-#endif
-
-#if UCONFIG_NO_NORMALIZATION
-static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) {
- return 0;
-}
-#else
-static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return unorm_getFCD16(c)&0xff;
-}
-#endif
-
-static int32_t getInPC(const IntProperty &, UChar32 c, UProperty) {
- return ulayout_ensureData() && gInpcTrie != nullptr ? ucptrie_get(gInpcTrie, c) : 0;
-}
-
-static int32_t getInSC(const IntProperty &, UChar32 c, UProperty) {
- return ulayout_ensureData() && gInscTrie != nullptr ? ucptrie_get(gInscTrie, c) : 0;
-}
-
-static int32_t getVo(const IntProperty &, UChar32 c, UProperty) {
- return ulayout_ensureData() && gVoTrie != nullptr ? ucptrie_get(gVoTrie, c) : 0;
-}
-
-static int32_t layoutGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
- if (!ulayout_ensureData()) { return 0; }
- switch (which) {
- case UCHAR_INDIC_POSITIONAL_CATEGORY:
- return gMaxInpcValue;
- case UCHAR_INDIC_SYLLABIC_CATEGORY:
- return gMaxInscValue;
- case UCHAR_VERTICAL_ORIENTATION:
- return gMaxVoValue;
- default:
- return 0;
- }
-}
-
-static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={
- /*
- * column, mask and shift values for int-value properties from u_getUnicodeProperties().
- * Must be in order of corresponding UProperty,
- * and there must be exactly one entry per int UProperty.
- *
- * Properties with mask==0 are handled in code.
- * For them, column is the UPropertySource value.
- */
- { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue },
- { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue },
- { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift },
- { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue },
- { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue },
- { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift },
- { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue },
- { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue },
- { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue },
- { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift },
- { UPROPS_SRC_PROPSVEC, 0, 0, getScript, scriptGetMaxValue },
- { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift },
- // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
- { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift },
- // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
- { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift },
- // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE
- { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift },
- // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE
- { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift },
- { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift },
- { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift },
- { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue },
- { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue },
- { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue },
- { UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue },
- { UPROPS_SRC_INPC, 0, 0, getInPC, layoutGetMaxValue },
- { UPROPS_SRC_INSC, 0, 0, getInSC, layoutGetMaxValue },
- { UPROPS_SRC_VO, 0, 0, getVo, layoutGetMaxValue },
-};
-
-U_CAPI int32_t U_EXPORT2
-u_getIntPropertyValue(UChar32 c, UProperty which) {
- if(which<UCHAR_INT_START) {
- if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) {
- const BinaryProperty &prop=binProps[which];
- return prop.contains(prop, c, which);
- }
- } else if(which<UCHAR_INT_LIMIT) {
- const IntProperty &prop=intProps[which-UCHAR_INT_START];
- return prop.getValue(prop, c, which);
- } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
- return U_MASK(u_charType(c));
- }
- return 0; // undefined
-}
-
-U_CAPI int32_t U_EXPORT2
-u_getIntPropertyMinValue(UProperty /*which*/) {
- return 0; /* all binary/enum/int properties have a minimum value of 0 */
-}
-
-U_CAPI int32_t U_EXPORT2
-u_getIntPropertyMaxValue(UProperty which) {
- if(which<UCHAR_INT_START) {
- if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) {
- return 1; // maximum TRUE for all binary properties
- }
- } else if(which<UCHAR_INT_LIMIT) {
- const IntProperty &prop=intProps[which-UCHAR_INT_START];
- return prop.getMaxValue(prop, which);
- }
- return -1; // undefined
-}
-
-U_CFUNC UPropertySource U_EXPORT2
-uprops_getSource(UProperty which) {
- if(which<UCHAR_BINARY_START) {
- return UPROPS_SRC_NONE; /* undefined */
- } else if(which<UCHAR_BINARY_LIMIT) {
- const BinaryProperty &prop=binProps[which];
- if(prop.mask!=0) {
- return UPROPS_SRC_PROPSVEC;
- } else {
- return (UPropertySource)prop.column;
- }
- } else if(which<UCHAR_INT_START) {
- return UPROPS_SRC_NONE; /* undefined */
- } else if(which<UCHAR_INT_LIMIT) {
- const IntProperty &prop=intProps[which-UCHAR_INT_START];
- if(prop.mask!=0) {
- return UPROPS_SRC_PROPSVEC;
- } else {
- return (UPropertySource)prop.column;
- }
- } else if(which<UCHAR_STRING_START) {
- switch(which) {
- case UCHAR_GENERAL_CATEGORY_MASK:
- case UCHAR_NUMERIC_VALUE:
- return UPROPS_SRC_CHAR;
-
- default:
- return UPROPS_SRC_NONE;
- }
- } else if(which<UCHAR_STRING_LIMIT) {
- switch(which) {
- case UCHAR_AGE:
- return UPROPS_SRC_PROPSVEC;
-
- case UCHAR_BIDI_MIRRORING_GLYPH:
- return UPROPS_SRC_BIDI;
-
- case UCHAR_CASE_FOLDING:
- case UCHAR_LOWERCASE_MAPPING:
- case UCHAR_SIMPLE_CASE_FOLDING:
- case UCHAR_SIMPLE_LOWERCASE_MAPPING:
- case UCHAR_SIMPLE_TITLECASE_MAPPING:
- case UCHAR_SIMPLE_UPPERCASE_MAPPING:
- case UCHAR_TITLECASE_MAPPING:
- case UCHAR_UPPERCASE_MAPPING:
- return UPROPS_SRC_CASE;
-
- case UCHAR_ISO_COMMENT:
- case UCHAR_NAME:
- case UCHAR_UNICODE_1_NAME:
- return UPROPS_SRC_NAMES;
-
- default:
- return UPROPS_SRC_NONE;
- }
- } else {
- switch(which) {
- case UCHAR_SCRIPT_EXTENSIONS:
- return UPROPS_SRC_PROPSVEC;
- default:
- return UPROPS_SRC_NONE; /* undefined */
- }
- }
-}
-
-U_CFUNC void U_EXPORT2
-uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode) {
- if (!ulayout_ensureData(*pErrorCode)) { return; }
- const UCPTrie *trie;
- switch (src) {
- case UPROPS_SRC_INPC:
- trie = gInpcTrie;
- break;
- case UPROPS_SRC_INSC:
- trie = gInscTrie;
- break;
- case UPROPS_SRC_VO:
- trie = gVoTrie;
- break;
- default:
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- if (trie == nullptr) {
- *pErrorCode = U_MISSING_RESOURCE_ERROR;
- return;
- }
-
- // Add the start code point of each same-value range of the trie.
- UChar32 start = 0, end;
- while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
- nullptr, nullptr, nullptr)) >= 0) {
- sa->add(sa->set, start);
- start = end + 1;
- }
-}
-
-#if !UCONFIG_NO_NORMALIZATION
-
-U_CAPI int32_t U_EXPORT2
-u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) {
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- // Compute the FC_NFKC_Closure on the fly:
- // We have the API for complete coverage of Unicode properties, although
- // this value by itself is not useful via API.
- // (What could be useful is a custom normalization table that combines
- // case folding and NFKC.)
- // For the derivation, see Unicode's DerivedNormalizationProps.txt.
- const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- // first: b = NFKC(Fold(a))
- UnicodeString folded1String;
- const UChar *folded1;
- int32_t folded1Length=ucase_toFullFolding(c, &folded1, U_FOLD_CASE_DEFAULT);
- if(folded1Length<0) {
- const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc);
- if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) {
- return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC
- }
- folded1String.setTo(c);
- } else {
- if(folded1Length>UCASE_MAX_STRING_LENGTH) {
- folded1String.setTo(folded1Length);
- } else {
- folded1String.setTo(FALSE, folded1, folded1Length);
- }
- }
- UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode);
- // second: c = NFKC(Fold(b))
- UnicodeString folded2String(kc1);
- UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode);
- // if (c != b) add the mapping from a to c
- if(U_FAILURE(*pErrorCode) || kc1==kc2) {
- return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
- } else {
- return kc2.extract(dest, destCapacity, *pErrorCode);
- }
-}
-
-#endif
diff --git a/contrib/libs/icu/common/uprops.h b/contrib/libs/icu/common/uprops.h
deleted file mode 100644
index 8bf929919f2..00000000000
--- a/contrib/libs/icu/common/uprops.h
+++ /dev/null
@@ -1,504 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uprops.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002feb24
-* created by: Markus W. Scherer
-*
-* Constants for mostly non-core Unicode character properties
-* stored in uprops.icu.
-*/
-
-#ifndef __UPROPS_H__
-#define __UPROPS_H__
-
-#include "unicode/utypes.h"
-#include "unicode/uset.h"
-#include "uset_imp.h"
-#include "udataswp.h"
-
-/* indexes[] entries */
-enum {
- UPROPS_PROPS32_INDEX,
- UPROPS_EXCEPTIONS_INDEX,
- UPROPS_EXCEPTIONS_TOP_INDEX,
-
- UPROPS_ADDITIONAL_TRIE_INDEX,
- UPROPS_ADDITIONAL_VECTORS_INDEX,
- UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX,
-
- UPROPS_SCRIPT_EXTENSIONS_INDEX,
-
- UPROPS_RESERVED_INDEX_7,
- UPROPS_RESERVED_INDEX_8,
-
- /* size of the data file (number of 32-bit units after the header) */
- UPROPS_DATA_TOP_INDEX,
-
- /* maximum values for code values in vector word 0 */
- UPROPS_MAX_VALUES_INDEX=10,
- /* maximum values for code values in vector word 2 */
- UPROPS_MAX_VALUES_2_INDEX,
-
- UPROPS_INDEX_COUNT=16
-};
-
-/* definitions for the main properties words */
-enum {
- /* general category shift==0 0 (5 bits) */
- /* reserved 5 (1 bit) */
- UPROPS_NUMERIC_TYPE_VALUE_SHIFT=6 /* 6 (10 bits) */
-};
-
-#define GET_CATEGORY(props) ((props)&0x1f)
-#define CAT_MASK(props) U_MASK(GET_CATEGORY(props))
-
-#define GET_NUMERIC_TYPE_VALUE(props) ((props)>>UPROPS_NUMERIC_TYPE_VALUE_SHIFT)
-
-/* constants for the storage form of numeric types and values */
-enum {
- /** No numeric value. */
- UPROPS_NTV_NONE=0,
- /** Decimal digits: nv=0..9 */
- UPROPS_NTV_DECIMAL_START=1,
- /** Other digits: nv=0..9 */
- UPROPS_NTV_DIGIT_START=11,
- /** Small integers: nv=0..154 */
- UPROPS_NTV_NUMERIC_START=21,
- /** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */
- UPROPS_NTV_FRACTION_START=0xb0,
- /**
- * Large integers:
- * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33)
- * (only one significant decimal digit)
- */
- UPROPS_NTV_LARGE_START=0x1e0,
- /**
- * Sexagesimal numbers:
- * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4)
- */
- UPROPS_NTV_BASE60_START=0x300,
- /**
- * Fraction-20 values:
- * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640
- * numerator: num = 2*(frac20&3)+1
- * denominator: den = 20<<(frac20>>2)
- */
- UPROPS_NTV_FRACTION20_START=UPROPS_NTV_BASE60_START+36, // 0x300+9*4=0x324
- /**
- * Fraction-32 values:
- * frac32 = ntv-0x34c = 0..15 -> 1|3|5|7 / 32|64|128|256
- * numerator: num = 2*(frac32&3)+1
- * denominator: den = 32<<(frac32>>2)
- */
- UPROPS_NTV_FRACTION32_START=UPROPS_NTV_FRACTION20_START+24, // 0x324+6*4=0x34c
- /** No numeric value (yet). */
- UPROPS_NTV_RESERVED_START=UPROPS_NTV_FRACTION32_START+16, // 0x34c+4*4=0x35c
-
- UPROPS_NTV_MAX_SMALL_INT=UPROPS_NTV_FRACTION_START-UPROPS_NTV_NUMERIC_START-1
-};
-
-#define UPROPS_NTV_GET_TYPE(ntv) \
- ((ntv==UPROPS_NTV_NONE) ? U_NT_NONE : \
- (ntv<UPROPS_NTV_DIGIT_START) ? U_NT_DECIMAL : \
- (ntv<UPROPS_NTV_NUMERIC_START) ? U_NT_DIGIT : \
- U_NT_NUMERIC)
-
-/* number of properties vector words */
-#define UPROPS_VECTOR_WORDS 3
-
-/*
- * Properties in vector word 0
- * Bits
- * 31..24 DerivedAge version major/minor one nibble each
- * 23..22 3..1: Bits 21..20 & 7..0 = Script_Extensions index
- * 3: Script value from Script_Extensions
- * 2: Script=Inherited
- * 1: Script=Common
- * 0: Script=bits 21..20 & 7..0
- * 21..20 Bits 9..8 of the UScriptCode, or index to Script_Extensions
- * 19..17 East Asian Width
- * 16.. 8 UBlockCode
- * 7.. 0 UScriptCode, or index to Script_Extensions
- */
-
-/* derived age: one nibble each for major and minor version numbers */
-#define UPROPS_AGE_MASK 0xff000000
-#define UPROPS_AGE_SHIFT 24
-
-/* Script_Extensions: mask includes Script */
-#define UPROPS_SCRIPT_X_MASK 0x00f000ff
-#define UPROPS_SCRIPT_X_SHIFT 22
-
-// The UScriptCode or Script_Extensions index is split across two bit fields.
-// (Starting with Unicode 13/ICU 66/2019 due to more varied Script_Extensions.)
-// Shift the high bits right by 12 to assemble the full value.
-#define UPROPS_SCRIPT_HIGH_MASK 0x00300000
-#define UPROPS_SCRIPT_HIGH_SHIFT 12
-#define UPROPS_MAX_SCRIPT 0x3ff
-
-#define UPROPS_EA_MASK 0x000e0000
-#define UPROPS_EA_SHIFT 17
-
-#define UPROPS_BLOCK_MASK 0x0001ff00
-#define UPROPS_BLOCK_SHIFT 8
-
-#define UPROPS_SCRIPT_LOW_MASK 0x000000ff
-
-/* UPROPS_SCRIPT_X_WITH_COMMON must be the lowest value that involves Script_Extensions. */
-#define UPROPS_SCRIPT_X_WITH_COMMON 0x400000
-#define UPROPS_SCRIPT_X_WITH_INHERITED 0x800000
-#define UPROPS_SCRIPT_X_WITH_OTHER 0xc00000
-
-#ifdef __cplusplus
-
-namespace {
-
-inline uint32_t uprops_mergeScriptCodeOrIndex(uint32_t scriptX) {
- return
- ((scriptX & UPROPS_SCRIPT_HIGH_MASK) >> UPROPS_SCRIPT_HIGH_SHIFT) |
- (scriptX & UPROPS_SCRIPT_LOW_MASK);
-}
-
-} // namespace
-
-#endif // __cplusplus
-
-/*
- * Properties in vector word 1
- * Each bit encodes one binary property.
- * The following constants represent the bit number, use 1<<UPROPS_XYZ.
- * UPROPS_BINARY_1_TOP<=32!
- *
- * Keep this list of property enums in sync with
- * propListNames[] in icu/source/tools/genprops/props2.c!
- *
- * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
- */
-enum {
- UPROPS_WHITE_SPACE,
- UPROPS_DASH,
- UPROPS_HYPHEN,
- UPROPS_QUOTATION_MARK,
- UPROPS_TERMINAL_PUNCTUATION,
- UPROPS_MATH,
- UPROPS_HEX_DIGIT,
- UPROPS_ASCII_HEX_DIGIT,
- UPROPS_ALPHABETIC,
- UPROPS_IDEOGRAPHIC,
- UPROPS_DIACRITIC,
- UPROPS_EXTENDER,
- UPROPS_NONCHARACTER_CODE_POINT,
- UPROPS_GRAPHEME_EXTEND,
- UPROPS_GRAPHEME_LINK,
- UPROPS_IDS_BINARY_OPERATOR,
- UPROPS_IDS_TRINARY_OPERATOR,
- UPROPS_RADICAL,
- UPROPS_UNIFIED_IDEOGRAPH,
- UPROPS_DEFAULT_IGNORABLE_CODE_POINT,
- UPROPS_DEPRECATED,
- UPROPS_LOGICAL_ORDER_EXCEPTION,
- UPROPS_XID_START,
- UPROPS_XID_CONTINUE,
- UPROPS_ID_START, /* ICU 2.6, uprops format version 3.2 */
- UPROPS_ID_CONTINUE,
- UPROPS_GRAPHEME_BASE,
- UPROPS_S_TERM, /* new in ICU 3.0 and Unicode 4.0.1 */
- UPROPS_VARIATION_SELECTOR,
- UPROPS_PATTERN_SYNTAX, /* new in ICU 3.4 and Unicode 4.1 */
- UPROPS_PATTERN_WHITE_SPACE,
- UPROPS_PREPENDED_CONCATENATION_MARK, // new in ICU 60 and Unicode 10
- UPROPS_BINARY_1_TOP /* ==32 - full! */
-};
-
-/*
- * Properties in vector word 2
- * Bits
- * 31..26 http://www.unicode.org/reports/tr51/#Emoji_Properties
- * 25..20 Line Break
- * 19..15 Sentence Break
- * 14..10 Word Break
- * 9.. 5 Grapheme Cluster Break
- * 4.. 0 Decomposition Type
- */
-enum {
- UPROPS_2_EXTENDED_PICTOGRAPHIC=26,
- UPROPS_2_EMOJI_COMPONENT,
- UPROPS_2_EMOJI,
- UPROPS_2_EMOJI_PRESENTATION,
- UPROPS_2_EMOJI_MODIFIER,
- UPROPS_2_EMOJI_MODIFIER_BASE
-};
-
-#define UPROPS_LB_MASK 0x03f00000
-#define UPROPS_LB_SHIFT 20
-
-#define UPROPS_SB_MASK 0x000f8000
-#define UPROPS_SB_SHIFT 15
-
-#define UPROPS_WB_MASK 0x00007c00
-#define UPROPS_WB_SHIFT 10
-
-#define UPROPS_GCB_MASK 0x000003e0
-#define UPROPS_GCB_SHIFT 5
-
-#define UPROPS_DT_MASK 0x0000001f
-
-/**
- * Gets the main properties value for a code point.
- * Implemented in uchar.c for uprops.cpp.
- */
-U_CFUNC uint32_t
-u_getMainProperties(UChar32 c);
-
-/**
- * Get a properties vector word for a code point.
- * Implemented in uchar.c for uprops.cpp.
- * @return 0 if no data or illegal argument
- */
-U_CFUNC uint32_t
-u_getUnicodeProperties(UChar32 c, int32_t column);
-
-/**
- * Get the the maximum values for some enum/int properties.
- * Use the same column numbers as for u_getUnicodeProperties().
- * The returned value will contain maximum values stored in the same bit fields
- * as where the enum values are stored in the u_getUnicodeProperties()
- * return values for the same columns.
- *
- * Valid columns are those for properties words that contain enumerated values.
- * (ICU 2.6: columns 0 and 2)
- * For other column numbers, this function will return 0.
- *
- * @internal
- */
-U_CFUNC int32_t
-uprv_getMaxValues(int32_t column);
-
-/**
- * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
- * @internal
- */
-U_CFUNC UBool
-u_isalnumPOSIX(UChar32 c);
-
-/**
- * Checks if c is in
- * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
- * with space=\p{Whitespace} and Control=Cc.
- * Implements UCHAR_POSIX_GRAPH.
- * @internal
- */
-U_CFUNC UBool
-u_isgraphPOSIX(UChar32 c);
-
-/**
- * Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
- * Implements UCHAR_POSIX_PRINT.
- * @internal
- */
-U_CFUNC UBool
-u_isprintPOSIX(UChar32 c);
-
-/** Turn a bit index into a bit flag. @internal */
-#define FLAG(n) ((uint32_t)1<<(n))
-
-/** Flags for general categories in the order of UCharCategory. @internal */
-#define _Cn FLAG(U_GENERAL_OTHER_TYPES)
-#define _Lu FLAG(U_UPPERCASE_LETTER)
-#define _Ll FLAG(U_LOWERCASE_LETTER)
-#define _Lt FLAG(U_TITLECASE_LETTER)
-#define _Lm FLAG(U_MODIFIER_LETTER)
-/* #define _Lo FLAG(U_OTHER_LETTER) -- conflicts with MS Visual Studio 9.0 xiosbase */
-#define _Mn FLAG(U_NON_SPACING_MARK)
-#define _Me FLAG(U_ENCLOSING_MARK)
-#define _Mc FLAG(U_COMBINING_SPACING_MARK)
-#define _Nd FLAG(U_DECIMAL_DIGIT_NUMBER)
-#define _Nl FLAG(U_LETTER_NUMBER)
-#define _No FLAG(U_OTHER_NUMBER)
-#define _Zs FLAG(U_SPACE_SEPARATOR)
-#define _Zl FLAG(U_LINE_SEPARATOR)
-#define _Zp FLAG(U_PARAGRAPH_SEPARATOR)
-#define _Cc FLAG(U_CONTROL_CHAR)
-#define _Cf FLAG(U_FORMAT_CHAR)
-#define _Co FLAG(U_PRIVATE_USE_CHAR)
-#define _Cs FLAG(U_SURROGATE)
-#define _Pd FLAG(U_DASH_PUNCTUATION)
-#define _Ps FLAG(U_START_PUNCTUATION)
-/* #define _Pe FLAG(U_END_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 xlocnum */
-/* #define _Pc FLAG(U_CONNECTOR_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
-#define _Po FLAG(U_OTHER_PUNCTUATION)
-#define _Sm FLAG(U_MATH_SYMBOL)
-#define _Sc FLAG(U_CURRENCY_SYMBOL)
-#define _Sk FLAG(U_MODIFIER_SYMBOL)
-#define _So FLAG(U_OTHER_SYMBOL)
-#define _Pi FLAG(U_INITIAL_PUNCTUATION)
-/* #define _Pf FLAG(U_FINAL_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
-
-/** Some code points. @internal */
-enum {
- TAB =0x0009,
- LF =0x000a,
- FF =0x000c,
- CR =0x000d,
- U_A =0x0041,
- U_F =0x0046,
- U_Z =0x005a,
- U_a =0x0061,
- U_f =0x0066,
- U_z =0x007a,
- DEL =0x007f,
- NL =0x0085,
- NBSP =0x00a0,
- CGJ =0x034f,
- FIGURESP=0x2007,
- HAIRSP =0x200a,
- ZWNJ =0x200c,
- ZWJ =0x200d,
- RLM =0x200f,
- NNBSP =0x202f,
- WJ =0x2060,
- INHSWAP =0x206a,
- NOMDIG =0x206f,
- U_FW_A =0xff21,
- U_FW_F =0xff26,
- U_FW_Z =0xff3a,
- U_FW_a =0xff41,
- U_FW_f =0xff46,
- U_FW_z =0xff5a,
- ZWNBSP =0xfeff
-};
-
-/**
- * Get the maximum length of a (regular/1.0/extended) character name.
- * @return 0 if no character names available.
- */
-U_CAPI int32_t U_EXPORT2
-uprv_getMaxCharNameLength(void);
-
-/**
- * Fills set with characters that are used in Unicode character names.
- * Includes all characters that are used in regular/Unicode 1.0/extended names.
- * Just empties the set if no character names are available.
- * @param sa USetAdder to receive characters.
- */
-U_CAPI void U_EXPORT2
-uprv_getCharNameCharacters(const USetAdder *sa);
-
-/**
- * Constants for which data and implementation files provide which properties.
- * Used by UnicodeSet for service-specific property enumeration.
- * @internal
- */
-enum UPropertySource {
- /** No source, not a supported property. */
- UPROPS_SRC_NONE,
- /** From uchar.c/uprops.icu main trie */
- UPROPS_SRC_CHAR,
- /** From uchar.c/uprops.icu properties vectors trie */
- UPROPS_SRC_PROPSVEC,
- /** From unames.c/unames.icu */
- UPROPS_SRC_NAMES,
- /** From ucase.c/ucase.icu */
- UPROPS_SRC_CASE,
- /** From ubidi_props.c/ubidi.icu */
- UPROPS_SRC_BIDI,
- /** From uchar.c/uprops.icu main trie as well as properties vectors trie */
- UPROPS_SRC_CHAR_AND_PROPSVEC,
- /** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */
- UPROPS_SRC_CASE_AND_NORM,
- /** From normalizer2impl.cpp/nfc.nrm */
- UPROPS_SRC_NFC,
- /** From normalizer2impl.cpp/nfkc.nrm */
- UPROPS_SRC_NFKC,
- /** From normalizer2impl.cpp/nfkc_cf.nrm */
- UPROPS_SRC_NFKC_CF,
- /** From normalizer2impl.cpp/nfc.nrm canonical iterator data */
- UPROPS_SRC_NFC_CANON_ITER,
- // Text layout properties.
- UPROPS_SRC_INPC,
- UPROPS_SRC_INSC,
- UPROPS_SRC_VO,
- /** One more than the highest UPropertySource (UPROPS_SRC_) constant. */
- UPROPS_SRC_COUNT
-};
-typedef enum UPropertySource UPropertySource;
-
-/**
- * @see UPropertySource
- * @internal
- */
-U_CFUNC UPropertySource U_EXPORT2
-uprops_getSource(UProperty which);
-
-/**
- * Enumerate uprops.icu's main data trie and add the
- * start of each range of same properties to the set.
- * @internal
- */
-U_CFUNC void U_EXPORT2
-uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
-
-/**
- * Enumerate uprops.icu's properties vectors trie and add the
- * start of each range of same properties to the set.
- * @internal
- */
-U_CFUNC void U_EXPORT2
-upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
-
-U_CFUNC void U_EXPORT2
-uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode);
-
-/**
- * Return a set of characters for property enumeration.
- * For each two consecutive characters (start, limit) in the set,
- * all of the properties for start..limit-1 are all the same.
- *
- * @param sa USetAdder to receive result. Existing contents are lost.
- * @internal
- */
-/*U_CFUNC void U_EXPORT2
-uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode);
-*/
-
-/**
- * Swap the ICU Unicode character names file. See uchar.c.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-uchar_swapNames(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-#ifdef __cplusplus
-
-U_NAMESPACE_BEGIN
-
-class UnicodeSet;
-
-class CharacterProperties {
-public:
- CharacterProperties() = delete;
- static const UnicodeSet *getInclusionsForProperty(UProperty prop, UErrorCode &errorCode);
-};
-
-// implemented in uniset_props.cpp
-U_CFUNC UnicodeSet *
-uniset_getUnicode32Instance(UErrorCode &errorCode);
-
-U_NAMESPACE_END
-
-#endif
-
-#endif
diff --git a/contrib/libs/icu/common/ures_cnv.cpp b/contrib/libs/icu/common/ures_cnv.cpp
deleted file mode 100644
index 1aa58e753ce..00000000000
--- a/contrib/libs/icu/common/ures_cnv.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1997-2006, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ures_cnv.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004aug25
-* created by: Markus W. Scherer
-*
-* Character conversion functions moved here from uresbund.c
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/putil.h"
-#include "unicode/ustring.h"
-#include "unicode/ucnv.h"
-#include "unicode/ures.h"
-#include "uinvchar.h"
-#include "ustr_cnv.h"
-
-U_CAPI UResourceBundle * U_EXPORT2
-ures_openU(const UChar *myPath,
- const char *localeID,
- UErrorCode *status)
-{
- char pathBuffer[1024];
- int32_t length;
- char *path = pathBuffer;
-
- if(status==NULL || U_FAILURE(*status)) {
- return NULL;
- }
- if(myPath==NULL) {
- path = NULL;
- }
- else {
- length=u_strlen(myPath);
- if(length>=(int32_t)sizeof(pathBuffer)) {
- *status=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- } else if(uprv_isInvariantUString(myPath, length)) {
- /*
- * the invariant converter is sufficient for package and tree names
- * and is more efficient
- */
- u_UCharsToChars(myPath, path, length+1); /* length+1 to include the NUL */
- } else {
-#if !UCONFIG_NO_CONVERSION
- /* use the default converter to support variant-character paths */
- UConverter *cnv=u_getDefaultConverter(status);
- length=ucnv_fromUChars(cnv, path, (int32_t)sizeof(pathBuffer), myPath, length, status);
- u_releaseDefaultConverter(cnv);
- if(U_FAILURE(*status)) {
- return NULL;
- }
- if(length>=(int32_t)sizeof(pathBuffer)) {
- /* not NUL-terminated - path too long */
- *status=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-#else
- /* the default converter is not available */
- *status=U_UNSUPPORTED_ERROR;
- return NULL;
-#endif
- }
- }
-
- return ures_open(path, localeID, status);
-}
diff --git a/contrib/libs/icu/common/uresbund.cpp b/contrib/libs/icu/common/uresbund.cpp
deleted file mode 100644
index 97df4a85ebe..00000000000
--- a/contrib/libs/icu/common/uresbund.cpp
+++ /dev/null
@@ -1,3090 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 1997-2016, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-*
-* File uresbund.cpp
-*
-* Modification History:
-*
-* Date Name Description
-* 04/01/97 aliu Creation.
-* 06/14/99 stephen Removed functions taking a filename suffix.
-* 07/20/99 stephen Changed for UResourceBundle typedef'd to void*
-* 11/09/99 weiv Added ures_getLocale()
-* March 2000 weiv Total overhaul - using data in DLLs
-* 06/20/2000 helena OS/400 port changes; mostly typecast.
-* 06/24/02 weiv Added support for resource sharing
-******************************************************************************
-*/
-
-#include "unicode/ures.h"
-#include "unicode/ustring.h"
-#include "unicode/ucnv.h"
-#include "charstr.h"
-#include "uresimp.h"
-#include "ustr_imp.h"
-#include "cwchar.h"
-#include "ucln_cmn.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "mutex.h"
-#include "uhash.h"
-#include "unicode/uenum.h"
-#include "uenumimp.h"
-#include "ulocimp.h"
-#include "umutex.h"
-#include "putilimp.h"
-#include "uassert.h"
-#include "uresdata.h"
-
-using namespace icu;
-
-/*
-Static cache for already opened resource bundles - mostly for keeping fallback info
-TODO: This cache should probably be removed when the deprecated code is
- completely removed.
-*/
-static UHashtable *cache = NULL;
-static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER;
-
-static UMutex resbMutex;
-
-/* INTERNAL: hashes an entry */
-static int32_t U_CALLCONV hashEntry(const UHashTok parm) {
- UResourceDataEntry *b = (UResourceDataEntry *)parm.pointer;
- UHashTok namekey, pathkey;
- namekey.pointer = b->fName;
- pathkey.pointer = b->fPath;
- return uhash_hashChars(namekey)+37u*uhash_hashChars(pathkey);
-}
-
-/* INTERNAL: compares two entries */
-static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
- UResourceDataEntry *b1 = (UResourceDataEntry *)p1.pointer;
- UResourceDataEntry *b2 = (UResourceDataEntry *)p2.pointer;
- UHashTok name1, name2, path1, path2;
- name1.pointer = b1->fName;
- name2.pointer = b2->fName;
- path1.pointer = b1->fPath;
- path2.pointer = b2->fPath;
- return (UBool)(uhash_compareChars(name1, name2) &&
- uhash_compareChars(path1, path2));
-}
-
-
-/**
- * Internal function, gets parts of locale name according
- * to the position of '_' character
- */
-static UBool chopLocale(char *name) {
- char *i = uprv_strrchr(name, '_');
-
- if(i != NULL) {
- *i = '\0';
- return TRUE;
- }
-
- return FALSE;
-}
-
-/**
- * Internal function
- */
-static void entryIncrease(UResourceDataEntry *entry) {
- Mutex lock(&resbMutex);
- entry->fCountExisting++;
- while(entry->fParent != NULL) {
- entry = entry->fParent;
- entry->fCountExisting++;
- }
-}
-
-/**
- * Internal function. Tries to find a resource in given Resource
- * Bundle, as well as in its parents
- */
-static const ResourceData *getFallbackData(const UResourceBundle* resBundle, const char* * resTag, UResourceDataEntry* *realData, Resource *res, UErrorCode *status) {
- UResourceDataEntry *resB = resBundle->fData;
- int32_t indexR = -1;
- int32_t i = 0;
- *res = RES_BOGUS;
- if(resB != NULL) {
- if(resB->fBogus == U_ZERO_ERROR) { /* if this resource is real, */
- *res = res_getTableItemByKey(&(resB->fData), resB->fData.rootRes, &indexR, resTag); /* try to get data from there */
- i++;
- }
- if(resBundle->fHasFallback == TRUE) {
- while(*res == RES_BOGUS && resB->fParent != NULL) { /* Otherwise, we'll look in parents */
- resB = resB->fParent;
- if(resB->fBogus == U_ZERO_ERROR) {
- i++;
- *res = res_getTableItemByKey(&(resB->fData), resB->fData.rootRes, &indexR, resTag);
- }
- }
- }
-
- if(*res != RES_BOGUS) { /* If the resource is found in parents, we need to adjust the error */
- if(i>1) {
- if(uprv_strcmp(resB->fName, uloc_getDefault())==0 || uprv_strcmp(resB->fName, kRootLocaleName)==0) {
- *status = U_USING_DEFAULT_WARNING;
- } else {
- *status = U_USING_FALLBACK_WARNING;
- }
- }
- *realData = resB;
- return (&(resB->fData));
- } else { /* If resource is not found, we need to give an error */
- *status = U_MISSING_RESOURCE_ERROR;
- return NULL;
- }
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- return NULL;
- }
-}
-
-static void
-free_entry(UResourceDataEntry *entry) {
- UResourceDataEntry *alias;
- res_unload(&(entry->fData));
- if(entry->fName != NULL && entry->fName != entry->fNameBuffer) {
- uprv_free(entry->fName);
- }
- if(entry->fPath != NULL) {
- uprv_free(entry->fPath);
- }
- if(entry->fPool != NULL) {
- --entry->fPool->fCountExisting;
- }
- alias = entry->fAlias;
- if(alias != NULL) {
- while(alias->fAlias != NULL) {
- alias = alias->fAlias;
- }
- --alias->fCountExisting;
- }
- uprv_free(entry);
-}
-
-/* Works just like ucnv_flushCache() */
-static int32_t ures_flushCache()
-{
- UResourceDataEntry *resB;
- int32_t pos;
- int32_t rbDeletedNum = 0;
- const UHashElement *e;
- UBool deletedMore;
-
- /*if shared data hasn't even been lazy evaluated yet
- * return 0
- */
- Mutex lock(&resbMutex);
- if (cache == NULL) {
- return 0;
- }
-
- do {
- deletedMore = FALSE;
- /*creates an enumeration to iterate through every element in the table */
- pos = UHASH_FIRST;
- while ((e = uhash_nextElement(cache, &pos)) != NULL)
- {
- resB = (UResourceDataEntry *) e->value.pointer;
- /* Deletes only if reference counter == 0
- * Don't worry about the children of this node.
- * Those will eventually get deleted too, if not already.
- * Don't worry about the parents of this node.
- * Those will eventually get deleted too, if not already.
- */
- /* 04/05/2002 [weiv] fCountExisting should now be accurate. If it's not zero, that means that */
- /* some resource bundles are still open somewhere. */
-
- if (resB->fCountExisting == 0) {
- rbDeletedNum++;
- deletedMore = TRUE;
- uhash_removeElement(cache, e);
- free_entry(resB);
- }
- }
- /*
- * Do it again to catch bundles (aliases, pool bundle) whose fCountExisting
- * got decremented by free_entry().
- */
- } while(deletedMore);
-
- return rbDeletedNum;
-}
-
-#ifdef URES_DEBUG
-#include <stdio.h>
-
-U_CAPI UBool U_EXPORT2 ures_dumpCacheContents(void) {
- UBool cacheNotEmpty = FALSE;
- int32_t pos = UHASH_FIRST;
- const UHashElement *e;
- UResourceDataEntry *resB;
-
- Mutex lock(&resbMutex);
- if (cache == NULL) {
- fprintf(stderr,"%s:%d: RB Cache is NULL.\n", __FILE__, __LINE__);
- return FALSE;
- }
-
- while ((e = uhash_nextElement(cache, &pos)) != NULL) {
- cacheNotEmpty=TRUE;
- resB = (UResourceDataEntry *) e->value.pointer;
- fprintf(stderr,"%s:%d: RB Cache: Entry @0x%p, refcount %d, name %s:%s. Pool 0x%p, alias 0x%p, parent 0x%p\n",
- __FILE__, __LINE__,
- (void*)resB, resB->fCountExisting,
- resB->fName?resB->fName:"NULL",
- resB->fPath?resB->fPath:"NULL",
- (void*)resB->fPool,
- (void*)resB->fAlias,
- (void*)resB->fParent);
- }
-
- fprintf(stderr,"%s:%d: RB Cache still contains %d items.\n", __FILE__, __LINE__, uhash_count(cache));
- return cacheNotEmpty;
-}
-
-#endif
-
-static UBool U_CALLCONV ures_cleanup(void)
-{
- if (cache != NULL) {
- ures_flushCache();
- uhash_close(cache);
- cache = NULL;
- }
- gCacheInitOnce.reset();
- return TRUE;
-}
-
-/** INTERNAL: Initializes the cache for resources */
-static void U_CALLCONV createCache(UErrorCode &status) {
- U_ASSERT(cache == NULL);
- cache = uhash_open(hashEntry, compareEntries, NULL, &status);
- ucln_common_registerCleanup(UCLN_COMMON_URES, ures_cleanup);
-}
-
-static void initCache(UErrorCode *status) {
- umtx_initOnce(gCacheInitOnce, &createCache, *status);
-}
-
-/** INTERNAL: sets the name (locale) of the resource bundle to given name */
-
-static void setEntryName(UResourceDataEntry *res, const char *name, UErrorCode *status) {
- int32_t len = (int32_t)uprv_strlen(name);
- if(res->fName != NULL && res->fName != res->fNameBuffer) {
- uprv_free(res->fName);
- }
- if (len < (int32_t)sizeof(res->fNameBuffer)) {
- res->fName = res->fNameBuffer;
- }
- else {
- res->fName = (char *)uprv_malloc(len+1);
- }
- if(res->fName == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- uprv_strcpy(res->fName, name);
- }
-}
-
-static UResourceDataEntry *
-getPoolEntry(const char *path, UErrorCode *status);
-
-/**
- * INTERNAL: Inits and opens an entry from a data DLL.
- * CAUTION: resbMutex must be locked when calling this function.
- */
-static UResourceDataEntry *init_entry(const char *localeID, const char *path, UErrorCode *status) {
- UResourceDataEntry *r = NULL;
- UResourceDataEntry find;
- /*int32_t hashValue;*/
- const char *name;
- char aliasName[100] = { 0 };
- int32_t aliasLen = 0;
- /*UBool isAlias = FALSE;*/
- /*UHashTok hashkey; */
-
- if(U_FAILURE(*status)) {
- return NULL;
- }
-
- /* here we try to deduce the right locale name */
- if(localeID == NULL) { /* if localeID is NULL, we're trying to open default locale */
- name = uloc_getDefault();
- } else if(*localeID == 0) { /* if localeID is "" then we try to open root locale */
- name = kRootLocaleName;
- } else { /* otherwise, we'll open what we're given */
- name = localeID;
- }
-
- find.fName = (char *)name;
- find.fPath = (char *)path;
-
- /* calculate the hash value of the entry */
- /*hashkey.pointer = (void *)&find;*/
- /*hashValue = hashEntry(hashkey);*/
-
- /* check to see if we already have this entry */
- r = (UResourceDataEntry *)uhash_get(cache, &find);
- if(r == NULL) {
- /* if the entry is not yet in the hash table, we'll try to construct a new one */
- r = (UResourceDataEntry *) uprv_malloc(sizeof(UResourceDataEntry));
- if(r == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- uprv_memset(r, 0, sizeof(UResourceDataEntry));
- /*r->fHashKey = hashValue;*/
-
- setEntryName(r, name, status);
- if (U_FAILURE(*status)) {
- uprv_free(r);
- return NULL;
- }
-
- if(path != NULL) {
- r->fPath = (char *)uprv_strdup(path);
- if(r->fPath == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(r);
- return NULL;
- }
- }
-
- /* this is the actual loading */
- res_load(&(r->fData), r->fPath, r->fName, status);
-
- if (U_FAILURE(*status)) {
- /* if we failed to load due to an out-of-memory error, exit early. */
- if (*status == U_MEMORY_ALLOCATION_ERROR) {
- uprv_free(r);
- return NULL;
- }
- /* we have no such entry in dll, so it will always use fallback */
- *status = U_USING_FALLBACK_WARNING;
- r->fBogus = U_USING_FALLBACK_WARNING;
- } else { /* if we have a regular entry */
- Resource aliasres;
- if (r->fData.usesPoolBundle) {
- r->fPool = getPoolEntry(r->fPath, status);
- if (U_SUCCESS(*status)) {
- const int32_t *poolIndexes = r->fPool->fData.pRoot + 1;
- if(r->fData.pRoot[1 + URES_INDEX_POOL_CHECKSUM] == poolIndexes[URES_INDEX_POOL_CHECKSUM]) {
- r->fData.poolBundleKeys = (const char *)(poolIndexes + (poolIndexes[URES_INDEX_LENGTH] & 0xff));
- r->fData.poolBundleStrings = r->fPool->fData.p16BitUnits;
- } else {
- r->fBogus = *status = U_INVALID_FORMAT_ERROR;
- }
- } else {
- r->fBogus = *status;
- }
- }
- if (U_SUCCESS(*status)) {
- /* handle the alias by trying to get out the %%Alias tag.*/
- /* We'll try to get alias string from the bundle */
- aliasres = res_getResource(&(r->fData), "%%ALIAS");
- if (aliasres != RES_BOGUS) {
- // No tracing: called during initial data loading
- const UChar *alias = res_getStringNoTrace(&(r->fData), aliasres, &aliasLen);
- if(alias != NULL && aliasLen > 0) { /* if there is actual alias - unload and load new data */
- u_UCharsToChars(alias, aliasName, aliasLen+1);
- r->fAlias = init_entry(aliasName, path, status);
- }
- }
- }
- }
-
- {
- UResourceDataEntry *oldR = NULL;
- if((oldR = (UResourceDataEntry *)uhash_get(cache, r)) == NULL) { /* if the data is not cached */
- /* just insert it in the cache */
- UErrorCode cacheStatus = U_ZERO_ERROR;
- uhash_put(cache, (void *)r, r, &cacheStatus);
- if (U_FAILURE(cacheStatus)) {
- *status = cacheStatus;
- free_entry(r);
- r = NULL;
- }
- } else {
- /* somebody have already inserted it while we were working, discard newly opened data */
- /* Also, we could get here IF we opened an alias */
- free_entry(r);
- r = oldR;
- }
- }
-
- }
- if(r != NULL) {
- /* return the real bundle */
- while(r->fAlias != NULL) {
- r = r->fAlias;
- }
- r->fCountExisting++; /* we increase its reference count */
- /* if the resource has a warning */
- /* we don't want to overwrite a status with no error */
- if(r->fBogus != U_ZERO_ERROR && U_SUCCESS(*status)) {
- *status = r->fBogus; /* set the returning status */
- }
- }
- return r;
-}
-
-static UResourceDataEntry *
-getPoolEntry(const char *path, UErrorCode *status) {
- UResourceDataEntry *poolBundle = init_entry(kPoolBundleName, path, status);
- if( U_SUCCESS(*status) &&
- (poolBundle == NULL || poolBundle->fBogus != U_ZERO_ERROR || !poolBundle->fData.isPoolBundle)
- ) {
- *status = U_INVALID_FORMAT_ERROR;
- }
- return poolBundle;
-}
-
-/* INTERNAL: */
-/* CAUTION: resbMutex must be locked when calling this function! */
-static UResourceDataEntry *
-findFirstExisting(const char* path, char* name,
- UBool *isRoot, UBool *hasChopped, UBool *isDefault, UErrorCode* status) {
- UResourceDataEntry *r = NULL;
- UBool hasRealData = FALSE;
- const char *defaultLoc = uloc_getDefault();
- *hasChopped = TRUE; /* we're starting with a fresh name */
-
- while(*hasChopped && !hasRealData) {
- r = init_entry(name, path, status);
- /* Null pointer test */
- if (U_FAILURE(*status)) {
- return NULL;
- }
- *isDefault = (UBool)(uprv_strncmp(name, defaultLoc, uprv_strlen(name)) == 0);
- hasRealData = (UBool)(r->fBogus == U_ZERO_ERROR);
- if(!hasRealData) {
- /* this entry is not real. We will discard it. */
- /* However, the parent line for this entry is */
- /* not to be used - as there might be parent */
- /* lines in cache from previous openings that */
- /* are not updated yet. */
- r->fCountExisting--;
- /*entryCloseInt(r);*/
- r = NULL;
- *status = U_USING_FALLBACK_WARNING;
- } else {
- uprv_strcpy(name, r->fName); /* this is needed for supporting aliases */
- }
-
- *isRoot = (UBool)(uprv_strcmp(name, kRootLocaleName) == 0);
-
- /*Fallback data stuff*/
- *hasChopped = chopLocale(name);
- if (*hasChopped && *name == '\0') {
- uprv_strcpy(name, "und");
- }
- }
- return r;
-}
-
-static void ures_setIsStackObject( UResourceBundle* resB, UBool state) {
- if(state) {
- resB->fMagic1 = 0;
- resB->fMagic2 = 0;
- } else {
- resB->fMagic1 = MAGIC1;
- resB->fMagic2 = MAGIC2;
- }
-}
-
-static UBool ures_isStackObject(const UResourceBundle* resB) {
- return((resB->fMagic1 == MAGIC1 && resB->fMagic2 == MAGIC2)?FALSE:TRUE);
-}
-
-
-U_CFUNC void ures_initStackObject(UResourceBundle* resB) {
- uprv_memset(resB, 0, sizeof(UResourceBundle));
- ures_setIsStackObject(resB, TRUE);
-}
-
-U_NAMESPACE_BEGIN
-
-StackUResourceBundle::StackUResourceBundle() {
- ures_initStackObject(&bundle);
-}
-
-StackUResourceBundle::~StackUResourceBundle() {
- ures_close(&bundle);
-}
-
-U_NAMESPACE_END
-
-static UBool // returns U_SUCCESS(*status)
-loadParentsExceptRoot(UResourceDataEntry *&t1,
- char name[], int32_t nameCapacity,
- UBool usingUSRData, char usrDataPath[], UErrorCode *status) {
- if (U_FAILURE(*status)) { return FALSE; }
- UBool hasChopped = TRUE;
- while (hasChopped && t1->fParent == NULL && !t1->fData.noFallback &&
- res_getResource(&t1->fData,"%%ParentIsRoot") == RES_BOGUS) {
- Resource parentRes = res_getResource(&t1->fData, "%%Parent");
- if (parentRes != RES_BOGUS) { // An explicit parent was found.
- int32_t parentLocaleLen = 0;
- // No tracing: called during initial data loading
- const UChar *parentLocaleName = res_getStringNoTrace(&(t1->fData), parentRes, &parentLocaleLen);
- if(parentLocaleName != NULL && 0 < parentLocaleLen && parentLocaleLen < nameCapacity) {
- u_UCharsToChars(parentLocaleName, name, parentLocaleLen + 1);
- if (uprv_strcmp(name, kRootLocaleName) == 0) {
- return TRUE;
- }
- }
- }
- // Insert regular parents.
- UErrorCode parentStatus = U_ZERO_ERROR;
- UResourceDataEntry *t2 = init_entry(name, t1->fPath, &parentStatus);
- if (U_FAILURE(parentStatus)) {
- *status = parentStatus;
- return FALSE;
- }
- UResourceDataEntry *u2 = NULL;
- UErrorCode usrStatus = U_ZERO_ERROR;
- if (usingUSRData) { // This code inserts user override data into the inheritance chain.
- u2 = init_entry(name, usrDataPath, &usrStatus);
- // If we failed due to out-of-memory, report that to the caller and exit early.
- if (usrStatus == U_MEMORY_ALLOCATION_ERROR) {
- *status = usrStatus;
- return FALSE;
- }
- }
-
- if (usingUSRData && U_SUCCESS(usrStatus) && u2->fBogus == U_ZERO_ERROR) {
- t1->fParent = u2;
- u2->fParent = t2;
- } else {
- t1->fParent = t2;
- if (usingUSRData) {
- // The USR override data wasn't found, set it to be deleted.
- u2->fCountExisting = 0;
- }
- }
- t1 = t2;
- hasChopped = chopLocale(name);
- }
- return TRUE;
-}
-
-static UBool // returns U_SUCCESS(*status)
-insertRootBundle(UResourceDataEntry *&t1, UErrorCode *status) {
- if (U_FAILURE(*status)) { return FALSE; }
- UErrorCode parentStatus = U_ZERO_ERROR;
- UResourceDataEntry *t2 = init_entry(kRootLocaleName, t1->fPath, &parentStatus);
- if (U_FAILURE(parentStatus)) {
- *status = parentStatus;
- return FALSE;
- }
- t1->fParent = t2;
- t1 = t2;
- return TRUE;
-}
-
-enum UResOpenType {
- /**
- * Open a resource bundle for the locale;
- * if there is not even a base language bundle, then fall back to the default locale;
- * if there is no bundle for that either, then load the root bundle.
- *
- * This is the default bundle loading behavior.
- */
- URES_OPEN_LOCALE_DEFAULT_ROOT,
- // TODO: ICU ticket #11271 "consistent default locale across locale trees"
- // Add an option to look at the main locale tree for whether to
- // fall back to root directly (if the locale has main data) or
- // fall back to the default locale first (if the locale does not even have main data).
- /**
- * Open a resource bundle for the locale;
- * if there is not even a base language bundle, then load the root bundle;
- * never fall back to the default locale.
- *
- * This is used for algorithms that have good pan-Unicode default behavior,
- * such as case mappings, collation, and segmentation (BreakIterator).
- */
- URES_OPEN_LOCALE_ROOT,
- /**
- * Open a resource bundle for the exact bundle name as requested;
- * no fallbacks, do not load parent bundles.
- *
- * This is used for supplemental (non-locale) data.
- */
- URES_OPEN_DIRECT
-};
-typedef enum UResOpenType UResOpenType;
-
-static UResourceDataEntry *entryOpen(const char* path, const char* localeID,
- UResOpenType openType, UErrorCode* status) {
- U_ASSERT(openType != URES_OPEN_DIRECT);
- UErrorCode intStatus = U_ZERO_ERROR;
- UResourceDataEntry *r = NULL;
- UResourceDataEntry *t1 = NULL;
- UBool isDefault = FALSE;
- UBool isRoot = FALSE;
- UBool hasRealData = FALSE;
- UBool hasChopped = TRUE;
- UBool usingUSRData = U_USE_USRDATA && ( path == NULL || uprv_strncmp(path,U_ICUDATA_NAME,8) == 0);
-
- char name[ULOC_FULLNAME_CAPACITY];
- char usrDataPath[96];
-
- initCache(status);
-
- if(U_FAILURE(*status)) {
- return NULL;
- }
-
- uprv_strncpy(name, localeID, sizeof(name) - 1);
- name[sizeof(name) - 1] = 0;
-
- if ( usingUSRData ) {
- if ( path == NULL ) {
- uprv_strcpy(usrDataPath, U_USRDATA_NAME);
- } else {
- uprv_strncpy(usrDataPath, path, sizeof(usrDataPath) - 1);
- usrDataPath[0] = 'u';
- usrDataPath[1] = 's';
- usrDataPath[2] = 'r';
- usrDataPath[sizeof(usrDataPath) - 1] = 0;
- }
- }
-
- Mutex lock(&resbMutex); // Lock resbMutex until the end of this function.
-
- /* We're going to skip all the locales that do not have any data */
- r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus);
-
- // If we failed due to out-of-memory, report the failure and exit early.
- if (intStatus == U_MEMORY_ALLOCATION_ERROR) {
- *status = intStatus;
- goto finish;
- }
-
- if(r != NULL) { /* if there is one real locale, we can look for parents. */
- t1 = r;
- hasRealData = TRUE;
- if ( usingUSRData ) { /* This code inserts user override data into the inheritance chain */
- UErrorCode usrStatus = U_ZERO_ERROR;
- UResourceDataEntry *u1 = init_entry(t1->fName, usrDataPath, &usrStatus);
- // If we failed due to out-of-memory, report the failure and exit early.
- if (intStatus == U_MEMORY_ALLOCATION_ERROR) {
- *status = intStatus;
- goto finish;
- }
- if ( u1 != NULL ) {
- if(u1->fBogus == U_ZERO_ERROR) {
- u1->fParent = t1;
- r = u1;
- } else {
- /* the USR override data wasn't found, set it to be deleted */
- u1->fCountExisting = 0;
- }
- }
- }
- if (hasChopped && !isRoot) {
- if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) {
- goto finish;
- }
- }
- }
-
- /* we could have reached this point without having any real data */
- /* if that is the case, we need to chain in the default locale */
- if(r==NULL && openType == URES_OPEN_LOCALE_DEFAULT_ROOT && !isDefault && !isRoot) {
- /* insert default locale */
- uprv_strcpy(name, uloc_getDefault());
- r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus);
- // If we failed due to out-of-memory, report the failure and exit early.
- if (intStatus == U_MEMORY_ALLOCATION_ERROR) {
- *status = intStatus;
- goto finish;
- }
- intStatus = U_USING_DEFAULT_WARNING;
- if(r != NULL) { /* the default locale exists */
- t1 = r;
- hasRealData = TRUE;
- isDefault = TRUE;
- // TODO: Why not if (usingUSRData) { ... } like in the non-default-locale code path?
- if (hasChopped && !isRoot) {
- if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) {
- goto finish;
- }
- }
- }
- }
-
- /* we could still have r == NULL at this point - maybe even default locale is not */
- /* present */
- if(r == NULL) {
- uprv_strcpy(name, kRootLocaleName);
- r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus);
- // If we failed due to out-of-memory, report the failure and exit early.
- if (intStatus == U_MEMORY_ALLOCATION_ERROR) {
- *status = intStatus;
- goto finish;
- }
- if(r != NULL) {
- t1 = r;
- intStatus = U_USING_DEFAULT_WARNING;
- hasRealData = TRUE;
- } else { /* we don't even have the root locale */
- *status = U_MISSING_RESOURCE_ERROR;
- goto finish;
- }
- } else if(!isRoot && uprv_strcmp(t1->fName, kRootLocaleName) != 0 &&
- t1->fParent == NULL && !r->fData.noFallback) {
- if (!insertRootBundle(t1, status)) {
- goto finish;
- }
- if(!hasRealData) {
- r->fBogus = U_USING_DEFAULT_WARNING;
- }
- }
-
- // TODO: Does this ever loop?
- while(r != NULL && !isRoot && t1->fParent != NULL) {
- t1->fParent->fCountExisting++;
- t1 = t1->fParent;
- }
-
-finish:
- if(U_SUCCESS(*status)) {
- if(intStatus != U_ZERO_ERROR) {
- *status = intStatus;
- }
- return r;
- } else {
- return NULL;
- }
-}
-
-/**
- * Version of entryOpen() and findFirstExisting() for ures_openDirect(),
- * with no fallbacks.
- * Parent and root locale bundles are loaded if
- * the requested bundle does not have the "nofallback" flag.
- */
-static UResourceDataEntry *
-entryOpenDirect(const char* path, const char* localeID, UErrorCode* status) {
- initCache(status);
- if(U_FAILURE(*status)) {
- return NULL;
- }
-
- Mutex lock(&resbMutex);
- // findFirstExisting() without fallbacks.
- UResourceDataEntry *r = init_entry(localeID, path, status);
- if(U_SUCCESS(*status)) {
- if(r->fBogus != U_ZERO_ERROR) {
- r->fCountExisting--;
- r = NULL;
- }
- } else {
- r = NULL;
- }
-
- // Some code depends on the ures_openDirect() bundle to have a parent bundle chain,
- // unless it is marked with "nofallback".
- UResourceDataEntry *t1 = r;
- if(r != NULL && uprv_strcmp(localeID, kRootLocaleName) != 0 && // not root
- r->fParent == NULL && !r->fData.noFallback &&
- uprv_strlen(localeID) < ULOC_FULLNAME_CAPACITY) {
- char name[ULOC_FULLNAME_CAPACITY];
- uprv_strcpy(name, localeID);
- if(!chopLocale(name) || uprv_strcmp(name, kRootLocaleName) == 0 ||
- loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), FALSE, NULL, status)) {
- if(uprv_strcmp(t1->fName, kRootLocaleName) != 0 && t1->fParent == NULL) {
- insertRootBundle(t1, status);
- }
- }
- if(U_FAILURE(*status)) {
- r = NULL;
- }
- }
-
- if(r != NULL) {
- // TODO: Does this ever loop?
- while(t1->fParent != NULL) {
- t1->fParent->fCountExisting++;
- t1 = t1->fParent;
- }
- }
- return r;
-}
-
-/**
- * Functions to create and destroy resource bundles.
- * CAUTION: resbMutex must be locked when calling this function.
- */
-/* INTERNAL: */
-static void entryCloseInt(UResourceDataEntry *resB) {
- UResourceDataEntry *p = resB;
-
- while(resB != NULL) {
- p = resB->fParent;
- resB->fCountExisting--;
-
- /* Entries are left in the cache. TODO: add ures_flushCache() to force a flush
- of the cache. */
-/*
- if(resB->fCountExisting <= 0) {
- uhash_remove(cache, resB);
- if(resB->fBogus == U_ZERO_ERROR) {
- res_unload(&(resB->fData));
- }
- if(resB->fName != NULL) {
- uprv_free(resB->fName);
- }
- if(resB->fPath != NULL) {
- uprv_free(resB->fPath);
- }
- uprv_free(resB);
- }
-*/
-
- resB = p;
- }
-}
-
-/**
- * API: closes a resource bundle and cleans up.
- */
-
-static void entryClose(UResourceDataEntry *resB) {
- Mutex lock(&resbMutex);
- entryCloseInt(resB);
-}
-
-/*
-U_CFUNC void ures_setResPath(UResourceBundle *resB, const char* toAdd) {
- if(resB->fResPath == NULL) {
- resB->fResPath = resB->fResBuf;
- *(resB->fResPath) = 0;
- }
- resB->fResPathLen = uprv_strlen(toAdd);
- if(RES_BUFSIZE <= resB->fResPathLen+1) {
- if(resB->fResPath == resB->fResBuf) {
- resB->fResPath = (char *)uprv_malloc((resB->fResPathLen+1)*sizeof(char));
- } else {
- resB->fResPath = (char *)uprv_realloc(resB->fResPath, (resB->fResPathLen+1)*sizeof(char));
- }
- }
- uprv_strcpy(resB->fResPath, toAdd);
-}
-*/
-static void ures_appendResPath(UResourceBundle *resB, const char* toAdd, int32_t lenToAdd, UErrorCode *status) {
- int32_t resPathLenOrig = resB->fResPathLen;
- if(resB->fResPath == NULL) {
- resB->fResPath = resB->fResBuf;
- *(resB->fResPath) = 0;
- resB->fResPathLen = 0;
- }
- resB->fResPathLen += lenToAdd;
- if(RES_BUFSIZE <= resB->fResPathLen+1) {
- if(resB->fResPath == resB->fResBuf) {
- resB->fResPath = (char *)uprv_malloc((resB->fResPathLen+1)*sizeof(char));
- /* Check that memory was allocated correctly. */
- if (resB->fResPath == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- uprv_strcpy(resB->fResPath, resB->fResBuf);
- } else {
- char *temp = (char *)uprv_realloc(resB->fResPath, (resB->fResPathLen+1)*sizeof(char));
- /* Check that memory was reallocated correctly. */
- if (temp == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- resB->fResPath = temp;
- }
- }
- uprv_strcpy(resB->fResPath + resPathLenOrig, toAdd);
-}
-
-static void ures_freeResPath(UResourceBundle *resB) {
- if (resB->fResPath && resB->fResPath != resB->fResBuf) {
- uprv_free(resB->fResPath);
- }
- resB->fResPath = NULL;
- resB->fResPathLen = 0;
-}
-
-static void
-ures_closeBundle(UResourceBundle* resB, UBool freeBundleObj)
-{
- if(resB != NULL) {
- if(resB->fData != NULL) {
- entryClose(resB->fData);
- }
- if(resB->fVersion != NULL) {
- uprv_free(resB->fVersion);
- }
- ures_freeResPath(resB);
-
- if(ures_isStackObject(resB) == FALSE && freeBundleObj) {
- uprv_free(resB);
- }
-#if 0 /*U_DEBUG*/
- else {
- /* poison the data */
- uprv_memset(resB, -1, sizeof(UResourceBundle));
- }
-#endif
- }
-}
-
-U_CAPI void U_EXPORT2
-ures_close(UResourceBundle* resB)
-{
- ures_closeBundle(resB, TRUE);
-}
-
-static UResourceBundle *init_resb_result(const ResourceData *rdata, Resource r,
- const char *key, int32_t idx, UResourceDataEntry *realData,
- const UResourceBundle *parent, int32_t noAlias,
- UResourceBundle *resB, UErrorCode *status)
-{
- if(status == NULL || U_FAILURE(*status)) {
- return resB;
- }
- if (parent == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- if(RES_GET_TYPE(r) == URES_ALIAS) { /* This is an alias, need to exchange with real data */
- if(noAlias < URES_MAX_ALIAS_LEVEL) {
- int32_t len = 0;
- const UChar *alias = res_getAlias(rdata, r, &len);
- if(len > 0) {
- /* we have an alias, now let's cut it up */
- char stackAlias[200];
- char *chAlias = NULL, *path = NULL, *locale = NULL, *keyPath = NULL;
- int32_t capacity;
-
- /*
- * Allocate enough space for both the char * version
- * of the alias and parent->fResPath.
- *
- * We do this so that res_findResource() can modify the path,
- * which allows us to remove redundant _res_findResource() variants
- * in uresdata.c.
- * res_findResource() now NUL-terminates each segment so that table keys
- * can always be compared with strcmp() instead of strncmp().
- * Saves code there and simplifies testing and code coverage.
- *
- * markus 2003oct17
- */
- ++len; /* count the terminating NUL */
- if(parent->fResPath != NULL) {
- capacity = (int32_t)uprv_strlen(parent->fResPath) + 1;
- } else {
- capacity = 0;
- }
- if(capacity < len) {
- capacity = len;
- }
- if(capacity <= (int32_t)sizeof(stackAlias)) {
- capacity = (int32_t)sizeof(stackAlias);
- chAlias = stackAlias;
- } else {
- chAlias = (char *)uprv_malloc(capacity);
- /* test for NULL */
- if(chAlias == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- }
- u_UCharsToChars(alias, chAlias, len);
-
- if(*chAlias == RES_PATH_SEPARATOR) {
- /* there is a path included */
- locale = uprv_strchr(chAlias+1, RES_PATH_SEPARATOR);
- if(locale == NULL) {
- locale = uprv_strchr(chAlias, 0); /* avoid locale == NULL to make code below work */
- } else {
- *locale = 0;
- locale++;
- }
- path = chAlias+1;
- if(uprv_strcmp(path, "LOCALE") == 0) {
- /* this is an XPath alias, starting with "/LOCALE/" */
- /* it contains the path to a resource which should be looked up */
- /* starting in the requested locale */
- keyPath = locale;
- locale = parent->fTopLevelData->fName; /* this is the requested locale's name */
- path = realData->fPath; /* we will be looking in the same package */
- } else {
- if(uprv_strcmp(path, "ICUDATA") == 0) { /* want ICU data */
- path = NULL;
- }
- keyPath = uprv_strchr(locale, RES_PATH_SEPARATOR);
- if(keyPath) {
- *keyPath = 0;
- keyPath++;
- }
- }
- } else {
- /* no path, start with a locale */
- locale = chAlias;
- keyPath = uprv_strchr(locale, RES_PATH_SEPARATOR);
- if(keyPath) {
- *keyPath = 0;
- keyPath++;
- }
- path = realData->fPath;
- }
-
-
- {
- /* got almost everything, let's try to open */
- /* first, open the bundle with real data */
- UResourceBundle *result = resB;
- const char* temp = NULL;
- UErrorCode intStatus = U_ZERO_ERROR;
- UResourceBundle *mainRes = ures_openDirect(path, locale, &intStatus);
- if(U_SUCCESS(intStatus)) {
- if(keyPath == NULL) {
- /* no key path. This means that we are going to
- * to use the corresponding resource from
- * another bundle
- */
- /* first, we are going to get a corresponding parent
- * resource to the one we are searching.
- */
- char *aKey = parent->fResPath;
- if(aKey) {
- uprv_strcpy(chAlias, aKey); /* allocated large enough above */
- aKey = chAlias;
- r = res_findResource(&(mainRes->fResData), mainRes->fRes, &aKey, &temp);
- } else {
- r = mainRes->fRes;
- }
- if(key) {
- /* we need to make keyPath from parent's fResPath and
- * current key, if there is a key associated
- */
- len = (int32_t)(uprv_strlen(key) + 1);
- if(len > capacity) {
- capacity = len;
- if(chAlias == stackAlias) {
- chAlias = (char *)uprv_malloc(capacity);
- } else {
- chAlias = (char *)uprv_realloc(chAlias, capacity);
- }
- if(chAlias == NULL) {
- ures_close(mainRes);
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- }
- uprv_memcpy(chAlias, key, len);
- aKey = chAlias;
- r = res_findResource(&(mainRes->fResData), r, &aKey, &temp);
- } else if(idx != -1) {
- /* if there is no key, but there is an index, try to get by the index */
- /* here we have either a table or an array, so get the element */
- int32_t type = RES_GET_TYPE(r);
- if(URES_IS_TABLE(type)) {
- r = res_getTableItemByIndex(&(mainRes->fResData), r, idx, (const char **)&aKey);
- } else { /* array */
- r = res_getArrayItem(&(mainRes->fResData), r, idx);
- }
- }
- if(r != RES_BOGUS) {
- result = init_resb_result(&(mainRes->fResData), r, temp, -1, mainRes->fData, mainRes, noAlias+1, resB, status);
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- result = resB;
- }
- } else {
- /* this one is a bit trickier.
- * we start finding keys, but after we resolve one alias, the path might continue.
- * Consider:
- * aliastest:alias { "testtypes/anotheralias/Sequence" }
- * anotheralias:alias { "/ICUDATA/sh/CollationElements" }
- * aliastest resource should finally have the sequence, not collation elements.
- */
- UResourceDataEntry *dataEntry = mainRes->fData;
- char stackPath[URES_MAX_BUFFER_SIZE];
- char *pathBuf = stackPath, *myPath = pathBuf;
- if(uprv_strlen(keyPath) >= UPRV_LENGTHOF(stackPath)) {
- pathBuf = (char *)uprv_malloc((uprv_strlen(keyPath)+1)*sizeof(char));
- if(pathBuf == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- ures_close(mainRes);
- return NULL;
- }
- }
- uprv_strcpy(pathBuf, keyPath);
- result = mainRes;
- /* now we have fallback following here */
- do {
- r = dataEntry->fData.rootRes;
- /* this loop handles 'found' resources over several levels */
- while(*myPath && U_SUCCESS(*status)) {
- r = res_findResource(&(dataEntry->fData), r, &myPath, &temp);
- if(r != RES_BOGUS) { /* found a resource, but it might be an indirection */
- resB = init_resb_result(&(dataEntry->fData), r, temp, -1, dataEntry, result, noAlias+1, resB, status);
- result = resB;
- if(result) {
- r = result->fRes; /* switch to a new resource, possibly a new tree */
- dataEntry = result->fData;
- }
- } else { /* no resource found, we don't really want to look anymore on this level */
- break;
- }
- }
- dataEntry = dataEntry->fParent;
- uprv_strcpy(pathBuf, keyPath);
- myPath = pathBuf;
- } while(r == RES_BOGUS && dataEntry != NULL);
- if(r == RES_BOGUS) {
- *status = U_MISSING_RESOURCE_ERROR;
- result = resB;
- }
- if(pathBuf != stackPath) {
- uprv_free(pathBuf);
- }
- }
- } else { /* we failed to open the resource we're aliasing to */
- *status = intStatus;
- }
- if(chAlias != stackAlias) {
- uprv_free(chAlias);
- }
- if(mainRes != result) {
- ures_close(mainRes);
- }
- ResourceTracer(resB).maybeTrace("getalias");
- return result;
- }
- } else {
- /* bad alias, should be an error */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return resB;
- }
- } else {
- *status = U_TOO_MANY_ALIASES_ERROR;
- return resB;
- }
- }
- if(resB == NULL) {
- resB = (UResourceBundle *)uprv_malloc(sizeof(UResourceBundle));
- /* test for NULL */
- if (resB == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- ures_setIsStackObject(resB, FALSE);
- resB->fResPath = NULL;
- resB->fResPathLen = 0;
- } else {
- if(resB->fData != NULL) {
- entryClose(resB->fData);
- }
- if(resB->fVersion != NULL) {
- uprv_free(resB->fVersion);
- }
- /*
- weiv: if stack object was passed in, it doesn't really need to be reinited,
- since the purpose of initing is to remove stack junk. However, at this point
- we would not do anything to an allocated object, so stack object should be
- treated the same
- */
- /*
- if(ures_isStackObject(resB) != FALSE) {
- ures_initStackObject(resB);
- }
- */
- if(parent != resB) {
- ures_freeResPath(resB);
- }
- }
- resB->fData = realData;
- entryIncrease(resB->fData);
- resB->fHasFallback = FALSE;
- resB->fIsTopLevel = FALSE;
- resB->fIndex = -1;
- resB->fKey = key;
- /*resB->fParentRes = parent;*/
- resB->fTopLevelData = parent->fTopLevelData;
- if(parent->fResPath && parent != resB) {
- ures_appendResPath(resB, parent->fResPath, parent->fResPathLen, status);
- }
- if(key != NULL) {
- ures_appendResPath(resB, key, (int32_t)uprv_strlen(key), status);
- if(resB->fResPath[resB->fResPathLen-1] != RES_PATH_SEPARATOR) {
- ures_appendResPath(resB, RES_PATH_SEPARATOR_S, 1, status);
- }
- } else if(idx >= 0) {
- char buf[256];
- int32_t len = T_CString_integerToString(buf, idx, 10);
- ures_appendResPath(resB, buf, len, status);
- if(resB->fResPath[resB->fResPathLen-1] != RES_PATH_SEPARATOR) {
- ures_appendResPath(resB, RES_PATH_SEPARATOR_S, 1, status);
- }
- }
- /* Make sure that Purify doesn't complain about uninitialized memory copies. */
- {
- int32_t usedLen = ((resB->fResBuf == resB->fResPath) ? resB->fResPathLen : 0);
- uprv_memset(resB->fResBuf + usedLen, 0, sizeof(resB->fResBuf) - usedLen);
- }
-
- resB->fVersion = NULL;
- resB->fRes = r;
- /*resB->fParent = parent->fRes;*/
- uprv_memmove(&resB->fResData, rdata, sizeof(ResourceData));
- resB->fSize = res_countArrayItems(&(resB->fResData), resB->fRes);
- ResourceTracer(resB).trace("get");
- return resB;
-}
-
-UResourceBundle *ures_copyResb(UResourceBundle *r, const UResourceBundle *original, UErrorCode *status) {
- UBool isStackObject;
- if(U_FAILURE(*status) || r == original) {
- return r;
- }
- if(original != NULL) {
- if(r == NULL) {
- isStackObject = FALSE;
- r = (UResourceBundle *)uprv_malloc(sizeof(UResourceBundle));
- /* test for NULL */
- if (r == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- } else {
- isStackObject = ures_isStackObject(r);
- ures_closeBundle(r, FALSE);
- }
- uprv_memcpy(r, original, sizeof(UResourceBundle));
- r->fResPath = NULL;
- r->fResPathLen = 0;
- if(original->fResPath) {
- ures_appendResPath(r, original->fResPath, original->fResPathLen, status);
- }
- ures_setIsStackObject(r, isStackObject);
- if(r->fData != NULL) {
- entryIncrease(r->fData);
- }
- }
- return r;
-}
-
-/**
- * Functions to retrieve data from resource bundles.
- */
-
-U_CAPI const UChar* U_EXPORT2 ures_getString(const UResourceBundle* resB, int32_t* len, UErrorCode* status) {
- const UChar *s;
- if (status==NULL || U_FAILURE(*status)) {
- return NULL;
- }
- if(resB == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- s = res_getString({resB}, &(resB->fResData), resB->fRes, len);
- if (s == NULL) {
- *status = U_RESOURCE_TYPE_MISMATCH;
- }
- return s;
-}
-
-static const char *
-ures_toUTF8String(const UChar *s16, int32_t length16,
- char *dest, int32_t *pLength,
- UBool forceCopy,
- UErrorCode *status) {
- int32_t capacity;
-
- if (U_FAILURE(*status)) {
- return NULL;
- }
- if (pLength != NULL) {
- capacity = *pLength;
- } else {
- capacity = 0;
- }
- if (capacity < 0 || (capacity > 0 && dest == NULL)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- if (length16 == 0) {
- /* empty string, return as read-only pointer */
- if (pLength != NULL) {
- *pLength = 0;
- }
- if (forceCopy) {
- u_terminateChars(dest, capacity, 0, status);
- return dest;
- } else {
- return "";
- }
- } else {
- /* We need to transform the string to the destination buffer. */
- if (capacity < length16) {
- /* No chance for the string to fit. Pure preflighting. */
- return u_strToUTF8(NULL, 0, pLength, s16, length16, status);
- }
- if (!forceCopy && (length16 <= 0x2aaaaaaa)) {
- /*
- * We know the string will fit into dest because each UChar turns
- * into at most three UTF-8 bytes. Fill the latter part of dest
- * so that callers do not expect to use dest as a string pointer,
- * hopefully leading to more robust code for when resource bundles
- * may store UTF-8 natively.
- * (In which case dest would not be used at all.)
- *
- * We do not do this if forceCopy=TRUE because then the caller
- * expects the string to start exactly at dest.
- *
- * The test above for <= 0x2aaaaaaa prevents overflows.
- * The +1 is for the NUL terminator.
- */
- int32_t maxLength = 3 * length16 + 1;
- if (capacity > maxLength) {
- dest += capacity - maxLength;
- capacity = maxLength;
- }
- }
- return u_strToUTF8(dest, capacity, pLength, s16, length16, status);
- }
-}
-
-U_CAPI const char * U_EXPORT2
-ures_getUTF8String(const UResourceBundle *resB,
- char *dest, int32_t *pLength,
- UBool forceCopy,
- UErrorCode *status) {
- int32_t length16;
- const UChar *s16 = ures_getString(resB, &length16, status);
- return ures_toUTF8String(s16, length16, dest, pLength, forceCopy, status);
-}
-
-U_CAPI const uint8_t* U_EXPORT2 ures_getBinary(const UResourceBundle* resB, int32_t* len,
- UErrorCode* status) {
- const uint8_t *p;
- if (status==NULL || U_FAILURE(*status)) {
- return NULL;
- }
- if(resB == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- p = res_getBinary({resB}, &(resB->fResData), resB->fRes, len);
- if (p == NULL) {
- *status = U_RESOURCE_TYPE_MISMATCH;
- }
- return p;
-}
-
-U_CAPI const int32_t* U_EXPORT2 ures_getIntVector(const UResourceBundle* resB, int32_t* len,
- UErrorCode* status) {
- const int32_t *p;
- if (status==NULL || U_FAILURE(*status)) {
- return NULL;
- }
- if(resB == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- p = res_getIntVector({resB}, &(resB->fResData), resB->fRes, len);
- if (p == NULL) {
- *status = U_RESOURCE_TYPE_MISMATCH;
- }
- return p;
-}
-
-/* this function returns a signed integer */
-/* it performs sign extension */
-U_CAPI int32_t U_EXPORT2 ures_getInt(const UResourceBundle* resB, UErrorCode *status) {
- if (status==NULL || U_FAILURE(*status)) {
- return 0xffffffff;
- }
- if(resB == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0xffffffff;
- }
- if(RES_GET_TYPE(resB->fRes) != URES_INT) {
- *status = U_RESOURCE_TYPE_MISMATCH;
- return 0xffffffff;
- }
- return res_getInt({resB}, resB->fRes);
-}
-
-U_CAPI uint32_t U_EXPORT2 ures_getUInt(const UResourceBundle* resB, UErrorCode *status) {
- if (status==NULL || U_FAILURE(*status)) {
- return 0xffffffff;
- }
- if(resB == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0xffffffff;
- }
- if(RES_GET_TYPE(resB->fRes) != URES_INT) {
- *status = U_RESOURCE_TYPE_MISMATCH;
- return 0xffffffff;
- }
- return res_getUInt({resB}, resB->fRes);
-}
-
-U_CAPI UResType U_EXPORT2 ures_getType(const UResourceBundle *resB) {
- if(resB == NULL) {
- return URES_NONE;
- }
- return res_getPublicType(resB->fRes);
-}
-
-U_CAPI const char * U_EXPORT2 ures_getKey(const UResourceBundle *resB) {
- //
- // TODO: Trace ures_getKey? I guess not usually.
- //
- // We usually get the key string to decide whether we want the value, or to
- // make a key-value pair. Tracing the value should suffice.
- //
- // However, I believe we have some data (e.g., in res_index) where the key
- // strings are the data. Tracing the enclosing table should suffice.
- //
- if(resB == NULL) {
- return NULL;
- }
- return(resB->fKey);
-}
-
-U_CAPI int32_t U_EXPORT2 ures_getSize(const UResourceBundle *resB) {
- if(resB == NULL) {
- return 0;
- }
-
- return resB->fSize;
-}
-
-static const UChar* ures_getStringWithAlias(const UResourceBundle *resB, Resource r, int32_t sIndex, int32_t *len, UErrorCode *status) {
- if(RES_GET_TYPE(r) == URES_ALIAS) {
- const UChar* result = 0;
- UResourceBundle *tempRes = ures_getByIndex(resB, sIndex, NULL, status);
- result = ures_getString(tempRes, len, status);
- ures_close(tempRes);
- return result;
- } else {
- return res_getString({resB, sIndex}, &(resB->fResData), r, len);
- }
-}
-
-U_CAPI void U_EXPORT2 ures_resetIterator(UResourceBundle *resB){
- if(resB == NULL) {
- return;
- }
- resB->fIndex = -1;
-}
-
-U_CAPI UBool U_EXPORT2 ures_hasNext(const UResourceBundle *resB) {
- if(resB == NULL) {
- return FALSE;
- }
- return (UBool)(resB->fIndex < resB->fSize-1);
-}
-
-U_CAPI const UChar* U_EXPORT2 ures_getNextString(UResourceBundle *resB, int32_t* len, const char ** key, UErrorCode *status) {
- Resource r = RES_BOGUS;
-
- if (status==NULL || U_FAILURE(*status)) {
- return NULL;
- }
- if(resB == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- if(resB->fIndex == resB->fSize-1) {
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- } else {
- resB->fIndex++;
- switch(RES_GET_TYPE(resB->fRes)) {
- case URES_STRING:
- case URES_STRING_V2:
- return res_getString({resB}, &(resB->fResData), resB->fRes, len);
- case URES_TABLE:
- case URES_TABLE16:
- case URES_TABLE32:
- r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, resB->fIndex, key);
- if(r == RES_BOGUS && resB->fHasFallback) {
- /* TODO: do the fallback */
- }
- return ures_getStringWithAlias(resB, r, resB->fIndex, len, status);
- case URES_ARRAY:
- case URES_ARRAY16:
- r = res_getArrayItem(&(resB->fResData), resB->fRes, resB->fIndex);
- if(r == RES_BOGUS && resB->fHasFallback) {
- /* TODO: do the fallback */
- }
- return ures_getStringWithAlias(resB, r, resB->fIndex, len, status);
- case URES_ALIAS:
- return ures_getStringWithAlias(resB, resB->fRes, resB->fIndex, len, status);
- case URES_INT:
- case URES_BINARY:
- case URES_INT_VECTOR:
- *status = U_RESOURCE_TYPE_MISMATCH;
- U_FALLTHROUGH;
- default:
- return NULL;
- }
- }
-
- return NULL;
-}
-
-U_CAPI UResourceBundle* U_EXPORT2 ures_getNextResource(UResourceBundle *resB, UResourceBundle *fillIn, UErrorCode *status) {
- const char *key = NULL;
- Resource r = RES_BOGUS;
-
- if (status==NULL || U_FAILURE(*status)) {
- /*return NULL;*/
- return fillIn;
- }
- if(resB == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- /*return NULL;*/
- return fillIn;
- }
-
- if(resB->fIndex == resB->fSize-1) {
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- /*return NULL;*/
- } else {
- resB->fIndex++;
- switch(RES_GET_TYPE(resB->fRes)) {
- case URES_INT:
- case URES_BINARY:
- case URES_STRING:
- case URES_STRING_V2:
- case URES_INT_VECTOR:
- return ures_copyResb(fillIn, resB, status);
- case URES_TABLE:
- case URES_TABLE16:
- case URES_TABLE32:
- r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, resB->fIndex, &key);
- if(r == RES_BOGUS && resB->fHasFallback) {
- /* TODO: do the fallback */
- }
- return init_resb_result(&(resB->fResData), r, key, resB->fIndex, resB->fData, resB, 0, fillIn, status);
- case URES_ARRAY:
- case URES_ARRAY16:
- r = res_getArrayItem(&(resB->fResData), resB->fRes, resB->fIndex);
- if(r == RES_BOGUS && resB->fHasFallback) {
- /* TODO: do the fallback */
- }
- return init_resb_result(&(resB->fResData), r, key, resB->fIndex, resB->fData, resB, 0, fillIn, status);
- default:
- /*return NULL;*/
- return fillIn;
- }
- }
- /*return NULL;*/
- return fillIn;
-}
-
-U_CAPI UResourceBundle* U_EXPORT2 ures_getByIndex(const UResourceBundle *resB, int32_t indexR, UResourceBundle *fillIn, UErrorCode *status) {
- const char* key = NULL;
- Resource r = RES_BOGUS;
-
- if (status==NULL || U_FAILURE(*status)) {
- /*return NULL;*/
- return fillIn;
- }
- if(resB == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- /*return NULL;*/
- return fillIn;
- }
-
- if(indexR >= 0 && resB->fSize > indexR) {
- switch(RES_GET_TYPE(resB->fRes)) {
- case URES_INT:
- case URES_BINARY:
- case URES_STRING:
- case URES_STRING_V2:
- case URES_INT_VECTOR:
- return ures_copyResb(fillIn, resB, status);
- case URES_TABLE:
- case URES_TABLE16:
- case URES_TABLE32:
- r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, indexR, &key);
- if(r == RES_BOGUS && resB->fHasFallback) {
- /* TODO: do the fallback */
- }
- return init_resb_result(&(resB->fResData), r, key, indexR, resB->fData, resB, 0, fillIn, status);
- case URES_ARRAY:
- case URES_ARRAY16:
- r = res_getArrayItem(&(resB->fResData), resB->fRes, indexR);
- if(r == RES_BOGUS && resB->fHasFallback) {
- /* TODO: do the fallback */
- }
- return init_resb_result(&(resB->fResData), r, key, indexR, resB->fData, resB, 0, fillIn, status);
- default:
- /*return NULL;*/
- return fillIn;
- }
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- }
- /*return NULL;*/
- return fillIn;
-}
-
-U_CAPI const UChar* U_EXPORT2 ures_getStringByIndex(const UResourceBundle *resB, int32_t indexS, int32_t* len, UErrorCode *status) {
- const char* key = NULL;
- Resource r = RES_BOGUS;
-
- if (status==NULL || U_FAILURE(*status)) {
- return NULL;
- }
- if(resB == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- if(indexS >= 0 && resB->fSize > indexS) {
- switch(RES_GET_TYPE(resB->fRes)) {
- case URES_STRING:
- case URES_STRING_V2:
- return res_getString({resB}, &(resB->fResData), resB->fRes, len);
- case URES_TABLE:
- case URES_TABLE16:
- case URES_TABLE32:
- r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, indexS, &key);
- if(r == RES_BOGUS && resB->fHasFallback) {
- /* TODO: do the fallback */
- }
- return ures_getStringWithAlias(resB, r, indexS, len, status);
- case URES_ARRAY:
- case URES_ARRAY16:
- r = res_getArrayItem(&(resB->fResData), resB->fRes, indexS);
- if(r == RES_BOGUS && resB->fHasFallback) {
- /* TODO: do the fallback */
- }
- return ures_getStringWithAlias(resB, r, indexS, len, status);
- case URES_ALIAS:
- return ures_getStringWithAlias(resB, resB->fRes, indexS, len, status);
- case URES_INT:
- case URES_BINARY:
- case URES_INT_VECTOR:
- *status = U_RESOURCE_TYPE_MISMATCH;
- break;
- default:
- /* must not occur */
- *status = U_INTERNAL_PROGRAM_ERROR;
- break;
- }
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- }
- return NULL;
-}
-
-U_CAPI const char * U_EXPORT2
-ures_getUTF8StringByIndex(const UResourceBundle *resB,
- int32_t idx,
- char *dest, int32_t *pLength,
- UBool forceCopy,
- UErrorCode *status) {
- int32_t length16;
- const UChar *s16 = ures_getStringByIndex(resB, idx, &length16, status);
- return ures_toUTF8String(s16, length16, dest, pLength, forceCopy, status);
-}
-
-/*U_CAPI const char *ures_getResPath(UResourceBundle *resB) {
- return resB->fResPath;
-}*/
-
-U_CAPI UResourceBundle* U_EXPORT2
-ures_findResource(const char* path, UResourceBundle *fillIn, UErrorCode *status)
-{
- UResourceBundle *first = NULL;
- UResourceBundle *result = fillIn;
- char *packageName = NULL;
- char *pathToResource = NULL, *save = NULL;
- char *locale = NULL, *localeEnd = NULL;
- int32_t length;
-
- if(status == NULL || U_FAILURE(*status)) {
- return result;
- }
-
- length = (int32_t)(uprv_strlen(path)+1);
- save = pathToResource = (char *)uprv_malloc(length*sizeof(char));
- /* test for NULL */
- if(pathToResource == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return result;
- }
- uprv_memcpy(pathToResource, path, length);
-
- locale = pathToResource;
- if(*pathToResource == RES_PATH_SEPARATOR) { /* there is a path specification */
- pathToResource++;
- packageName = pathToResource;
- pathToResource = uprv_strchr(pathToResource, RES_PATH_SEPARATOR);
- if(pathToResource == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- } else {
- *pathToResource = 0;
- locale = pathToResource+1;
- }
- }
-
- localeEnd = uprv_strchr(locale, RES_PATH_SEPARATOR);
- if(localeEnd != NULL) {
- *localeEnd = 0;
- }
-
- first = ures_open(packageName, locale, status);
-
- if(U_SUCCESS(*status)) {
- if(localeEnd) {
- result = ures_findSubResource(first, localeEnd+1, fillIn, status);
- } else {
- result = ures_copyResb(fillIn, first, status);
- }
- ures_close(first);
- }
- uprv_free(save);
- return result;
-}
-
-U_CAPI UResourceBundle* U_EXPORT2
-ures_findSubResource(const UResourceBundle *resB, char* path, UResourceBundle *fillIn, UErrorCode *status)
-{
- Resource res = RES_BOGUS;
- UResourceBundle *result = fillIn;
- const char *key;
-
- if(status == NULL || U_FAILURE(*status)) {
- return result;
- }
-
- /* here we do looping and circular alias checking */
- /* this loop is here because aliasing is resolved on this level, not on res level */
- /* so, when we encounter an alias, it is not an aggregate resource, so we return */
- do {
- res = res_findResource(&(resB->fResData), resB->fRes, &path, &key);
- if(res != RES_BOGUS) {
- result = init_resb_result(&(resB->fResData), res, key, -1, resB->fData, resB, 0, fillIn, status);
- resB = result;
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- break;
- }
- } while(*path); /* there is more stuff in the path */
-
- return result;
-}
-U_INTERNAL const UChar* U_EXPORT2
-ures_getStringByKeyWithFallback(const UResourceBundle *resB,
- const char* inKey,
- int32_t* len,
- UErrorCode *status) {
-
- UResourceBundle stack;
- const UChar* retVal = NULL;
- ures_initStackObject(&stack);
- ures_getByKeyWithFallback(resB, inKey, &stack, status);
- int32_t length;
- retVal = ures_getString(&stack, &length, status);
- ures_close(&stack);
- if (U_FAILURE(*status)) {
- return NULL;
- }
- if (length == 3 && retVal[0] == EMPTY_SET && retVal[1] == EMPTY_SET && retVal[2] == EMPTY_SET ) {
- retVal = NULL;
- length = 0;
- *status = U_MISSING_RESOURCE_ERROR;
- }
- if (len != NULL) {
- *len = length;
- }
- return retVal;
-}
-
-/*
- Like res_getTableItemByKey but accepts full paths like "NumberElements/latn/patternsShort".
-*/
-static Resource getTableItemByKeyPath(const ResourceData *pResData, Resource table, const char *key) {
- Resource resource = table; /* The current resource */
- icu::CharString path;
- UErrorCode errorCode = U_ZERO_ERROR;
- path.append(key, errorCode);
- if (U_FAILURE(errorCode)) { return RES_BOGUS; }
- char *pathPart = path.data(); /* Path from current resource to desired resource */
- UResType type = (UResType)RES_GET_TYPE(resource); /* the current resource type */
- while (*pathPart && resource != RES_BOGUS && URES_IS_CONTAINER(type)) {
- char *nextPathPart = uprv_strchr(pathPart, RES_PATH_SEPARATOR);
- if (nextPathPart != NULL) {
- *nextPathPart = 0; /* Terminating null for this part of path. */
- nextPathPart++;
- } else {
- nextPathPart = uprv_strchr(pathPart, 0);
- }
- int32_t t;
- const char *pathP = pathPart;
- resource = res_getTableItemByKey(pResData, resource, &t, &pathP);
- type = (UResType)RES_GET_TYPE(resource);
- pathPart = nextPathPart;
- }
- if (*pathPart) {
- return RES_BOGUS;
- }
- return resource;
-}
-
-U_CAPI UResourceBundle* U_EXPORT2
-ures_getByKeyWithFallback(const UResourceBundle *resB,
- const char* inKey,
- UResourceBundle *fillIn,
- UErrorCode *status) {
- Resource res = RES_BOGUS, rootRes = RES_BOGUS;
- /*UResourceDataEntry *realData = NULL;*/
- UResourceBundle *helper = NULL;
-
- if (status==NULL || U_FAILURE(*status)) {
- return fillIn;
- }
- if(resB == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return fillIn;
- }
-
- int32_t type = RES_GET_TYPE(resB->fRes);
- if(URES_IS_TABLE(type)) {
- res = getTableItemByKeyPath(&(resB->fResData), resB->fRes, inKey);
- const char* key = inKey;
- if(res == RES_BOGUS) {
- UResourceDataEntry *dataEntry = resB->fData;
- CharString path;
- char *myPath = NULL;
- const char* resPath = resB->fResPath;
- int32_t len = resB->fResPathLen;
- while(res == RES_BOGUS && dataEntry->fParent != NULL) { /* Otherwise, we'll look in parents */
- dataEntry = dataEntry->fParent;
- rootRes = dataEntry->fData.rootRes;
-
- if(dataEntry->fBogus == U_ZERO_ERROR) {
- path.clear();
- if (len > 0) {
- path.append(resPath, len, *status);
- }
- path.append(inKey, *status);
- if (U_FAILURE(*status)) {
- ures_close(helper);
- return fillIn;
- }
- myPath = path.data();
- key = inKey;
- do {
- res = res_findResource(&(dataEntry->fData), rootRes, &myPath, &key);
- if (RES_GET_TYPE(res) == URES_ALIAS && *myPath) {
- /* We hit an alias, but we didn't finish following the path. */
- helper = init_resb_result(&(dataEntry->fData), res, NULL, -1, dataEntry, resB, 0, helper, status);
- /*helper = init_resb_result(&(dataEntry->fData), res, inKey, -1, dataEntry, resB, 0, helper, status);*/
- if(helper) {
- dataEntry = helper->fData;
- rootRes = helper->fRes;
- resPath = helper->fResPath;
- len = helper->fResPathLen;
-
- } else {
- break;
- }
- }
- } while(*myPath); /* Continue until the whole path is consumed */
- }
- }
- /*const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status);*/
- if(res != RES_BOGUS) {
- /* check if resB->fResPath gives the right name here */
- if(uprv_strcmp(dataEntry->fName, uloc_getDefault())==0 || uprv_strcmp(dataEntry->fName, kRootLocaleName)==0) {
- *status = U_USING_DEFAULT_WARNING;
- } else {
- *status = U_USING_FALLBACK_WARNING;
- }
-
- fillIn = init_resb_result(&(dataEntry->fData), res, inKey, -1, dataEntry, resB, 0, fillIn, status);
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- }
- } else {
- fillIn = init_resb_result(&(resB->fResData), res, key, -1, resB->fData, resB, 0, fillIn, status);
- }
- }
- else {
- *status = U_RESOURCE_TYPE_MISMATCH;
- }
- ures_close(helper);
- return fillIn;
-}
-
-namespace {
-
-void getAllItemsWithFallback(
- const UResourceBundle *bundle, ResourceDataValue &value,
- ResourceSink &sink,
- UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return; }
- // We recursively enumerate child-first,
- // only storing parent items in the absence of child items.
- // The sink needs to store a placeholder value for the no-fallback/no-inheritance marker
- // to prevent a parent item from being stored.
- //
- // It would be possible to recursively enumerate parent-first,
- // overriding parent items with child items.
- // When the sink sees the no-fallback/no-inheritance marker,
- // then it would remove the parent's item.
- // We would deserialize parent values even though they are overridden in a child bundle.
- value.setData(&bundle->fResData);
- UResourceDataEntry *parentEntry = bundle->fData->fParent;
- UBool hasParent = parentEntry != NULL && U_SUCCESS(parentEntry->fBogus);
- value.setResource(bundle->fRes, ResourceTracer(bundle));
- sink.put(bundle->fKey, value, !hasParent, errorCode);
- if (hasParent) {
- // We might try to query the sink whether
- // any fallback from the parent bundle is still possible.
-
- // Turn the parent UResourceDataEntry into a UResourceBundle,
- // much like in ures_openWithType().
- // TODO: See if we can refactor ures_getByKeyWithFallback()
- // and pull out an inner function that takes and returns a UResourceDataEntry
- // so that we need not create UResourceBundle objects.
- UResourceBundle parentBundle;
- ures_initStackObject(&parentBundle);
- parentBundle.fTopLevelData = parentBundle.fData = parentEntry;
- // TODO: What is the difference between bundle fData and fTopLevelData?
- uprv_memcpy(&parentBundle.fResData, &parentEntry->fData, sizeof(ResourceData));
- // TODO: Try to replace bundle.fResData with just using bundle.fData->fData.
- parentBundle.fHasFallback = !parentBundle.fResData.noFallback;
- parentBundle.fIsTopLevel = TRUE;
- parentBundle.fRes = parentBundle.fResData.rootRes;
- parentBundle.fSize = res_countArrayItems(&(parentBundle.fResData), parentBundle.fRes);
- parentBundle.fIndex = -1;
- entryIncrease(parentEntry);
-
- // Look up the container item in the parent bundle.
- UResourceBundle containerBundle;
- ures_initStackObject(&containerBundle);
- const UResourceBundle *rb;
- UErrorCode pathErrorCode = U_ZERO_ERROR; // Ignore if parents up to root do not have this path.
- if (bundle->fResPath == NULL || *bundle->fResPath == 0) {
- rb = &parentBundle;
- } else {
- rb = ures_getByKeyWithFallback(&parentBundle, bundle->fResPath,
- &containerBundle, &pathErrorCode);
- }
- if (U_SUCCESS(pathErrorCode)) {
- getAllItemsWithFallback(rb, value, sink, errorCode);
- }
- ures_close(&containerBundle);
- ures_close(&parentBundle);
- }
-}
-
-} // namespace
-
-// Requires a ResourceDataValue fill-in, so that we need not cast from a ResourceValue.
-// Unfortunately, the caller must know which subclass to make and pass in.
-// Alternatively, we could make it as polymorphic as in Java by
-// returning a ResourceValue pointer (possibly wrapped into a LocalPointer)
-// that the caller then owns.
-//
-// Also requires a UResourceBundle fill-in, so that the value's ResourceTracer
-// can point to a non-local bundle.
-// Without tracing, the child bundle could be a function-local object.
-U_CAPI void U_EXPORT2
-ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
- UResourceBundle *tempFillIn,
- ResourceDataValue &value, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return; }
- if (path == nullptr) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- const UResourceBundle *rb;
- if (*path == 0) {
- // empty path
- rb = bundle;
- } else {
- rb = ures_getByKeyWithFallback(bundle, path, tempFillIn, &errorCode);
- if (U_FAILURE(errorCode)) {
- return;
- }
- }
- value.setData(&rb->fResData);
- value.setResource(rb->fRes, ResourceTracer(rb));
-}
-
-U_CAPI void U_EXPORT2
-ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
- icu::ResourceSink &sink, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return; }
- if (path == nullptr) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- StackUResourceBundle stackBundle;
- const UResourceBundle *rb;
- if (*path == 0) {
- // empty path
- rb = bundle;
- } else {
- rb = ures_getByKeyWithFallback(bundle, path, stackBundle.getAlias(), &errorCode);
- if (U_FAILURE(errorCode)) {
- return;
- }
- }
- // Get all table items with fallback.
- ResourceDataValue value;
- getAllItemsWithFallback(rb, value, sink, errorCode);
-}
-
-U_CAPI UResourceBundle* U_EXPORT2 ures_getByKey(const UResourceBundle *resB, const char* inKey, UResourceBundle *fillIn, UErrorCode *status) {
- Resource res = RES_BOGUS;
- UResourceDataEntry *realData = NULL;
- const char *key = inKey;
-
- if (status==NULL || U_FAILURE(*status)) {
- return fillIn;
- }
- if(resB == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return fillIn;
- }
-
- int32_t type = RES_GET_TYPE(resB->fRes);
- if(URES_IS_TABLE(type)) {
- int32_t t;
- res = res_getTableItemByKey(&(resB->fResData), resB->fRes, &t, &key);
- if(res == RES_BOGUS) {
- key = inKey;
- if(resB->fHasFallback == TRUE) {
- const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status);
- if(U_SUCCESS(*status)) {
- /* check if resB->fResPath gives the right name here */
- return init_resb_result(rd, res, key, -1, realData, resB, 0, fillIn, status);
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- }
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- }
- } else {
- return init_resb_result(&(resB->fResData), res, key, -1, resB->fData, resB, 0, fillIn, status);
- }
- }
-#if 0
- /* this is a kind of TODO item. If we have an array with an index table, we could do this. */
- /* not currently */
- else if(RES_GET_TYPE(resB->fRes) == URES_ARRAY && resB->fHasFallback == TRUE) {
- /* here should go a first attempt to locate the key using index table */
- const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status);
- if(U_SUCCESS(*status)) {
- return init_resb_result(rd, res, key, realData, resB, fillIn, status);
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- }
- }
-#endif
- else {
- *status = U_RESOURCE_TYPE_MISMATCH;
- }
- return fillIn;
-}
-
-U_CAPI const UChar* U_EXPORT2 ures_getStringByKey(const UResourceBundle *resB, const char* inKey, int32_t* len, UErrorCode *status) {
- Resource res = RES_BOGUS;
- UResourceDataEntry *realData = NULL;
- const char* key = inKey;
-
- if (status==NULL || U_FAILURE(*status)) {
- return NULL;
- }
- if(resB == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- int32_t type = RES_GET_TYPE(resB->fRes);
- if(URES_IS_TABLE(type)) {
- int32_t t=0;
-
- res = res_getTableItemByKey(&(resB->fResData), resB->fRes, &t, &key);
-
- if(res == RES_BOGUS) {
- key = inKey;
- if(resB->fHasFallback == TRUE) {
- const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status);
- if(U_SUCCESS(*status)) {
- switch (RES_GET_TYPE(res)) {
- case URES_STRING:
- case URES_STRING_V2:
- return res_getString({resB, key}, rd, res, len);
- case URES_ALIAS:
- {
- const UChar* result = 0;
- UResourceBundle *tempRes = ures_getByKey(resB, inKey, NULL, status);
- result = ures_getString(tempRes, len, status);
- ures_close(tempRes);
- return result;
- }
- default:
- *status = U_RESOURCE_TYPE_MISMATCH;
- }
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- }
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- }
- } else {
- switch (RES_GET_TYPE(res)) {
- case URES_STRING:
- case URES_STRING_V2:
- return res_getString({resB, key}, &(resB->fResData), res, len);
- case URES_ALIAS:
- {
- const UChar* result = 0;
- UResourceBundle *tempRes = ures_getByKey(resB, inKey, NULL, status);
- result = ures_getString(tempRes, len, status);
- ures_close(tempRes);
- return result;
- }
- default:
- *status = U_RESOURCE_TYPE_MISMATCH;
- }
- }
- }
-#if 0
- /* this is a kind of TODO item. If we have an array with an index table, we could do this. */
- /* not currently */
- else if(RES_GET_TYPE(resB->fRes) == URES_ARRAY && resB->fHasFallback == TRUE) {
- /* here should go a first attempt to locate the key using index table */
- const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status);
- if(U_SUCCESS(*status)) {
- // TODO: Tracing
- return res_getString(rd, res, len);
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- }
- }
-#endif
- else {
- *status = U_RESOURCE_TYPE_MISMATCH;
- }
- return NULL;
-}
-
-U_CAPI const char * U_EXPORT2
-ures_getUTF8StringByKey(const UResourceBundle *resB,
- const char *key,
- char *dest, int32_t *pLength,
- UBool forceCopy,
- UErrorCode *status) {
- int32_t length16;
- const UChar *s16 = ures_getStringByKey(resB, key, &length16, status);
- return ures_toUTF8String(s16, length16, dest, pLength, forceCopy, status);
-}
-
-/* TODO: clean from here down */
-
-/**
- * INTERNAL: Get the name of the first real locale (not placeholder)
- * that has resource bundle data.
- */
-U_INTERNAL const char* U_EXPORT2
-ures_getLocaleInternal(const UResourceBundle* resourceBundle, UErrorCode* status)
-{
- if (status==NULL || U_FAILURE(*status)) {
- return NULL;
- }
- if (!resourceBundle) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- } else {
- return resourceBundle->fData->fName;
- }
-}
-
-U_CAPI const char* U_EXPORT2
-ures_getLocale(const UResourceBundle* resourceBundle,
- UErrorCode* status)
-{
- return ures_getLocaleInternal(resourceBundle, status);
-}
-
-
-U_CAPI const char* U_EXPORT2
-ures_getLocaleByType(const UResourceBundle* resourceBundle,
- ULocDataLocaleType type,
- UErrorCode* status) {
- if (status==NULL || U_FAILURE(*status)) {
- return NULL;
- }
- if (!resourceBundle) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- } else {
- switch(type) {
- case ULOC_ACTUAL_LOCALE:
- return resourceBundle->fData->fName;
- case ULOC_VALID_LOCALE:
- return resourceBundle->fTopLevelData->fName;
- case ULOC_REQUESTED_LOCALE:
- default:
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- }
-}
-
-U_CFUNC const char* ures_getName(const UResourceBundle* resB) {
- if(resB == NULL) {
- return NULL;
- }
-
- return resB->fData->fName;
-}
-
-#ifdef URES_DEBUG
-U_CFUNC const char* ures_getPath(const UResourceBundle* resB) {
- if(resB == NULL) {
- return NULL;
- }
-
- return resB->fData->fPath;
-}
-#endif
-
-static UResourceBundle*
-ures_openWithType(UResourceBundle *r, const char* path, const char* localeID,
- UResOpenType openType, UErrorCode* status) {
- if(U_FAILURE(*status)) {
- return NULL;
- }
-
- UResourceDataEntry *entry;
- if(openType != URES_OPEN_DIRECT) {
- /* first "canonicalize" the locale ID */
- char canonLocaleID[ULOC_FULLNAME_CAPACITY];
- uloc_getBaseName(localeID, canonLocaleID, UPRV_LENGTHOF(canonLocaleID), status);
- if(U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- entry = entryOpen(path, canonLocaleID, openType, status);
- } else {
- entry = entryOpenDirect(path, localeID, status);
- }
- if(U_FAILURE(*status)) {
- return NULL;
- }
- if(entry == NULL) {
- *status = U_MISSING_RESOURCE_ERROR;
- return NULL;
- }
-
- UBool isStackObject;
- if(r == NULL) {
- r = (UResourceBundle *)uprv_malloc(sizeof(UResourceBundle));
- if(r == NULL) {
- entryClose(entry);
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- isStackObject = FALSE;
- } else { // fill-in
- isStackObject = ures_isStackObject(r);
- ures_closeBundle(r, FALSE);
- }
- uprv_memset(r, 0, sizeof(UResourceBundle));
- ures_setIsStackObject(r, isStackObject);
-
- r->fTopLevelData = r->fData = entry;
- uprv_memcpy(&r->fResData, &entry->fData, sizeof(ResourceData));
- r->fHasFallback = openType != URES_OPEN_DIRECT && !r->fResData.noFallback;
- r->fIsTopLevel = TRUE;
- r->fRes = r->fResData.rootRes;
- r->fSize = res_countArrayItems(&(r->fResData), r->fRes);
- r->fIndex = -1;
-
- ResourceTracer(r).traceOpen();
-
- return r;
-}
-
-U_CAPI UResourceBundle* U_EXPORT2
-ures_open(const char* path, const char* localeID, UErrorCode* status) {
- return ures_openWithType(NULL, path, localeID, URES_OPEN_LOCALE_DEFAULT_ROOT, status);
-}
-
-U_CAPI UResourceBundle* U_EXPORT2
-ures_openNoDefault(const char* path, const char* localeID, UErrorCode* status) {
- return ures_openWithType(NULL, path, localeID, URES_OPEN_LOCALE_ROOT, status);
-}
-
-/**
- * Opens a resource bundle without "canonicalizing" the locale name. No fallback will be performed
- * or sought. However, alias substitution will happen!
- */
-U_CAPI UResourceBundle* U_EXPORT2
-ures_openDirect(const char* path, const char* localeID, UErrorCode* status) {
- return ures_openWithType(NULL, path, localeID, URES_OPEN_DIRECT, status);
-}
-
-/**
- * Internal API: This function is used to open a resource bundle
- * proper fallback chaining is executed while initialization.
- * The result is stored in cache for later fallback search.
- *
- * Same as ures_open(), but uses the fill-in parameter and does not allocate a new bundle.
- */
-U_INTERNAL void U_EXPORT2
-ures_openFillIn(UResourceBundle *r, const char* path,
- const char* localeID, UErrorCode* status) {
- if(U_SUCCESS(*status) && r == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- ures_openWithType(r, path, localeID, URES_OPEN_LOCALE_DEFAULT_ROOT, status);
-}
-
-/**
- * Same as ures_openDirect(), but uses the fill-in parameter and does not allocate a new bundle.
- */
-U_INTERNAL void U_EXPORT2
-ures_openDirectFillIn(UResourceBundle *r, const char* path, const char* localeID, UErrorCode* status) {
- if(U_SUCCESS(*status) && r == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- ures_openWithType(r, path, localeID, URES_OPEN_DIRECT, status);
-}
-
-/**
- * API: Counts members. For arrays and tables, returns number of resources.
- * For strings, returns 1.
- */
-U_CAPI int32_t U_EXPORT2
-ures_countArrayItems(const UResourceBundle* resourceBundle,
- const char* resourceKey,
- UErrorCode* status)
-{
- UResourceBundle resData;
- ures_initStackObject(&resData);
- if (status==NULL || U_FAILURE(*status)) {
- return 0;
- }
- if(resourceBundle == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- ures_getByKey(resourceBundle, resourceKey, &resData, status);
-
- if(resData.fResData.data != NULL) {
- int32_t result = res_countArrayItems(&resData.fResData, resData.fRes);
- ures_close(&resData);
- return result;
- } else {
- *status = U_MISSING_RESOURCE_ERROR;
- ures_close(&resData);
- return 0;
- }
-}
-
-/**
- * Internal function.
- * Return the version number associated with this ResourceBundle as a string.
- *
- * @param resourceBundle The resource bundle for which the version is checked.
- * @return A version number string as specified in the resource bundle or its parent.
- * The caller does not own this string.
- * @see ures_getVersion
- * @internal
- */
-U_INTERNAL const char* U_EXPORT2
-ures_getVersionNumberInternal(const UResourceBundle *resourceBundle)
-{
- if (!resourceBundle) return NULL;
-
- if(resourceBundle->fVersion == NULL) {
-
- /* If the version ID has not been built yet, then do so. Retrieve */
- /* the minor version from the file. */
- UErrorCode status = U_ZERO_ERROR;
- int32_t minor_len = 0;
- int32_t len;
-
- const UChar* minor_version = ures_getStringByKey(resourceBundle, kVersionTag, &minor_len, &status);
-
- /* Determine the length of of the final version string. This is */
- /* the length of the major part + the length of the separator */
- /* (==1) + the length of the minor part (+ 1 for the zero byte at */
- /* the end). */
-
- len = (minor_len > 0) ? minor_len : 1;
-
- /* Allocate the string, and build it up. */
- /* + 1 for zero byte */
-
-
- ((UResourceBundle *)resourceBundle)->fVersion = (char *)uprv_malloc(1 + len);
- /* Check for null pointer. */
- if (((UResourceBundle *)resourceBundle)->fVersion == NULL) {
- return NULL;
- }
-
- if(minor_len > 0) {
- u_UCharsToChars(minor_version, resourceBundle->fVersion , minor_len);
- resourceBundle->fVersion[len] = '\0';
- }
- else {
- uprv_strcpy(resourceBundle->fVersion, kDefaultMinorVersion);
- }
- }
-
- return resourceBundle->fVersion;
-}
-
-U_CAPI const char* U_EXPORT2
-ures_getVersionNumber(const UResourceBundle* resourceBundle)
-{
- return ures_getVersionNumberInternal(resourceBundle);
-}
-
-U_CAPI void U_EXPORT2 ures_getVersion(const UResourceBundle* resB, UVersionInfo versionInfo) {
- if (!resB) return;
-
- u_versionFromString(versionInfo, ures_getVersionNumberInternal(resB));
-}
-
-/** Tree support functions *******************************/
-#define INDEX_LOCALE_NAME "res_index"
-#define INDEX_TAG "InstalledLocales"
-#define DEFAULT_TAG "default"
-
-#if defined(URES_TREE_DEBUG)
-#include <stdio.h>
-#endif
-
-typedef struct ULocalesContext {
- UResourceBundle installed;
- UResourceBundle curr;
-} ULocalesContext;
-
-static void U_CALLCONV
-ures_loc_closeLocales(UEnumeration *enumerator) {
- ULocalesContext *ctx = (ULocalesContext *)enumerator->context;
- ures_close(&ctx->curr);
- ures_close(&ctx->installed);
- uprv_free(ctx);
- uprv_free(enumerator);
-}
-
-static int32_t U_CALLCONV
-ures_loc_countLocales(UEnumeration *en, UErrorCode * /*status*/) {
- ULocalesContext *ctx = (ULocalesContext *)en->context;
- return ures_getSize(&ctx->installed);
-}
-
-U_CDECL_BEGIN
-
-
-static const char * U_CALLCONV
-ures_loc_nextLocale(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* status) {
- ULocalesContext *ctx = (ULocalesContext *)en->context;
- UResourceBundle *res = &(ctx->installed);
- UResourceBundle *k = NULL;
- const char *result = NULL;
- int32_t len = 0;
- if(ures_hasNext(res) && (k = ures_getNextResource(res, &ctx->curr, status)) != 0) {
- result = ures_getKey(k);
- len = (int32_t)uprv_strlen(result);
- }
- if (resultLength) {
- *resultLength = len;
- }
- return result;
-}
-
-static void U_CALLCONV
-ures_loc_resetLocales(UEnumeration* en,
- UErrorCode* /*status*/) {
- UResourceBundle *res = &((ULocalesContext *)en->context)->installed;
- ures_resetIterator(res);
-}
-
-U_CDECL_END
-
-static const UEnumeration gLocalesEnum = {
- NULL,
- NULL,
- ures_loc_closeLocales,
- ures_loc_countLocales,
- uenum_unextDefault,
- ures_loc_nextLocale,
- ures_loc_resetLocales
-};
-
-
-U_CAPI UEnumeration* U_EXPORT2
-ures_openAvailableLocales(const char *path, UErrorCode *status)
-{
- UResourceBundle *idx = NULL;
- UEnumeration *en = NULL;
- ULocalesContext *myContext = NULL;
-
- if(U_FAILURE(*status)) {
- return NULL;
- }
- myContext = static_cast<ULocalesContext *>(uprv_malloc(sizeof(ULocalesContext)));
- en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
- if(!en || !myContext) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(en);
- uprv_free(myContext);
- return NULL;
- }
- uprv_memcpy(en, &gLocalesEnum, sizeof(UEnumeration));
-
- ures_initStackObject(&myContext->installed);
- ures_initStackObject(&myContext->curr);
- idx = ures_openDirect(path, INDEX_LOCALE_NAME, status);
- ures_getByKey(idx, INDEX_TAG, &myContext->installed, status);
- if(U_SUCCESS(*status)) {
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "Got %s::%s::[%s] : %s\n",
- path, INDEX_LOCALE_NAME, INDEX_TAG, ures_getKey(&myContext->installed));
-#endif
- en->context = myContext;
- } else {
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s open failed - %s\n", path, u_errorName(*status));
-#endif
- ures_close(&myContext->installed);
- uprv_free(myContext);
- uprv_free(en);
- en = NULL;
- }
-
- ures_close(idx);
-
- return en;
-}
-
-static UBool isLocaleInList(UEnumeration *locEnum, const char *locToSearch, UErrorCode *status) {
- const char *loc;
- while ((loc = uenum_next(locEnum, NULL, status)) != NULL) {
- if (uprv_strcmp(loc, locToSearch) == 0) {
- return TRUE;
- }
- }
- return FALSE;
-}
-
-U_CAPI int32_t U_EXPORT2
-ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
- const char *path, const char *resName, const char *keyword, const char *locid,
- UBool *isAvailable, UBool omitDefault, UErrorCode *status)
-{
- char kwVal[1024] = ""; /* value of keyword 'keyword' */
- char defVal[1024] = ""; /* default value for given locale */
- char defLoc[1024] = ""; /* default value for given locale */
- char base[1024] = ""; /* base locale */
- char found[1024] = "";
- char parent[1024] = "";
- char full[1024] = "";
- UResourceBundle bund1, bund2;
- UResourceBundle *res = NULL;
- UErrorCode subStatus = U_ZERO_ERROR;
- int32_t length = 0;
- if(U_FAILURE(*status)) return 0;
- uloc_getKeywordValue(locid, keyword, kwVal, 1024-1,&subStatus);
- if(!uprv_strcmp(kwVal, DEFAULT_TAG)) {
- kwVal[0]=0;
- }
- uloc_getBaseName(locid, base, 1024-1,&subStatus);
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "getFunctionalEquivalent: \"%s\" [%s=%s] in %s - %s\n",
- locid, keyword, kwVal, base, u_errorName(subStatus));
-#endif
- ures_initStackObject(&bund1);
- ures_initStackObject(&bund2);
-
-
- uprv_strcpy(parent, base);
- uprv_strcpy(found, base);
-
- if(isAvailable) {
- UEnumeration *locEnum = ures_openAvailableLocales(path, &subStatus);
- *isAvailable = TRUE;
- if (U_SUCCESS(subStatus)) {
- *isAvailable = isLocaleInList(locEnum, parent, &subStatus);
- }
- uenum_close(locEnum);
- }
-
- if(U_FAILURE(subStatus)) {
- *status = subStatus;
- return 0;
- }
-
- do {
- subStatus = U_ZERO_ERROR;
- res = ures_open(path, parent, &subStatus);
- if(((subStatus == U_USING_FALLBACK_WARNING) ||
- (subStatus == U_USING_DEFAULT_WARNING)) && isAvailable)
- {
- *isAvailable = FALSE;
- }
- isAvailable = NULL; /* only want to set this the first time around */
-
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s;%s -> %s [%s]\n", path?path:"ICUDATA", parent, u_errorName(subStatus), ures_getLocale(res, &subStatus));
-#endif
- if(U_FAILURE(subStatus)) {
- *status = subStatus;
- } else if(subStatus == U_ZERO_ERROR) {
- ures_getByKey(res,resName,&bund1, &subStatus);
- if(subStatus == U_ZERO_ERROR) {
- const UChar *defUstr;
- int32_t defLen;
- /* look for default item */
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s;%s : loaded default -> %s\n",
- path?path:"ICUDATA", parent, u_errorName(subStatus));
-#endif
- defUstr = ures_getStringByKey(&bund1, DEFAULT_TAG, &defLen, &subStatus);
- if(U_SUCCESS(subStatus) && defLen) {
- u_UCharsToChars(defUstr, defVal, u_strlen(defUstr));
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s;%s -> default %s=%s, %s\n",
- path?path:"ICUDATA", parent, keyword, defVal, u_errorName(subStatus));
-#endif
- uprv_strcpy(defLoc, parent);
- if(kwVal[0]==0) {
- uprv_strcpy(kwVal, defVal);
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s;%s -> kwVal = %s\n",
- path?path:"ICUDATA", parent, keyword, kwVal);
-#endif
- }
- }
- }
- }
-
- subStatus = U_ZERO_ERROR;
-
- if (res != NULL) {
- uprv_strcpy(found, ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus));
- }
-
- uloc_getParent(found,parent,sizeof(parent),&subStatus);
- ures_close(res);
- } while(!defVal[0] && *found && uprv_strcmp(found, "root") != 0 && U_SUCCESS(*status));
-
- /* Now, see if we can find the kwVal collator.. start the search over.. */
- uprv_strcpy(parent, base);
- uprv_strcpy(found, base);
-
- do {
- subStatus = U_ZERO_ERROR;
- res = ures_open(path, parent, &subStatus);
- if((subStatus == U_USING_FALLBACK_WARNING) && isAvailable) {
- *isAvailable = FALSE;
- }
- isAvailable = NULL; /* only want to set this the first time around */
-
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s;%s -> %s (looking for %s)\n",
- path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal);
-#endif
- if(U_FAILURE(subStatus)) {
- *status = subStatus;
- } else if(subStatus == U_ZERO_ERROR) {
- ures_getByKey(res,resName,&bund1, &subStatus);
-#if defined(URES_TREE_DEBUG)
-/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, resName, u_errorName(subStatus));
-#endif
- if(subStatus == U_ZERO_ERROR) {
- ures_getByKey(&bund1, kwVal, &bund2, &subStatus);
-#if defined(URES_TREE_DEBUG)
-/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, kwVal, u_errorName(subStatus));
-#endif
- if(subStatus == U_ZERO_ERROR) {
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s;%s -> full0 %s=%s, %s\n",
- path?path:"ICUDATA", parent, keyword, kwVal, u_errorName(subStatus));
-#endif
- uprv_strcpy(full, parent);
- if(*full == 0) {
- uprv_strcpy(full, "root");
- }
- /* now, recalculate default kw if need be */
- if(uprv_strlen(defLoc) > uprv_strlen(full)) {
- const UChar *defUstr;
- int32_t defLen;
- /* look for default item */
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s;%s -> recalculating Default0\n",
- path?path:"ICUDATA", full);
-#endif
- defUstr = ures_getStringByKey(&bund1, DEFAULT_TAG, &defLen, &subStatus);
- if(U_SUCCESS(subStatus) && defLen) {
- u_UCharsToChars(defUstr, defVal, u_strlen(defUstr));
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s;%s -> default0 %s=%s, %s\n",
- path?path:"ICUDATA", full, keyword, defVal, u_errorName(subStatus));
-#endif
- uprv_strcpy(defLoc, full);
- }
- } /* end of recalculate default KW */
-#if defined(URES_TREE_DEBUG)
- else {
- fprintf(stderr, "No trim0, %s <= %s\n", defLoc, full);
- }
-#endif
- } else {
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "err=%s in %s looking for %s\n",
- u_errorName(subStatus), parent, kwVal);
-#endif
- }
- }
- }
-
- subStatus = U_ZERO_ERROR;
-
- uprv_strcpy(found, parent);
- uloc_getParent(found,parent,1023,&subStatus);
- ures_close(res);
- } while(!full[0] && *found && U_SUCCESS(*status));
-
- if((full[0]==0) && uprv_strcmp(kwVal, defVal)) {
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "Failed to locate kw %s - try default %s\n", kwVal, defVal);
-#endif
- uprv_strcpy(kwVal, defVal);
- uprv_strcpy(parent, base);
- uprv_strcpy(found, base);
-
- do { /* search for 'default' named item */
- subStatus = U_ZERO_ERROR;
- res = ures_open(path, parent, &subStatus);
- if((subStatus == U_USING_FALLBACK_WARNING) && isAvailable) {
- *isAvailable = FALSE;
- }
- isAvailable = NULL; /* only want to set this the first time around */
-
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s;%s -> %s (looking for default %s)\n",
- path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal);
-#endif
- if(U_FAILURE(subStatus)) {
- *status = subStatus;
- } else if(subStatus == U_ZERO_ERROR) {
- ures_getByKey(res,resName,&bund1, &subStatus);
- if(subStatus == U_ZERO_ERROR) {
- ures_getByKey(&bund1, kwVal, &bund2, &subStatus);
- if(subStatus == U_ZERO_ERROR) {
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s;%s -> full1 %s=%s, %s\n", path?path:"ICUDATA",
- parent, keyword, kwVal, u_errorName(subStatus));
-#endif
- uprv_strcpy(full, parent);
- if(*full == 0) {
- uprv_strcpy(full, "root");
- }
-
- /* now, recalculate default kw if need be */
- if(uprv_strlen(defLoc) > uprv_strlen(full)) {
- const UChar *defUstr;
- int32_t defLen;
- /* look for default item */
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s;%s -> recalculating Default1\n",
- path?path:"ICUDATA", full);
-#endif
- defUstr = ures_getStringByKey(&bund1, DEFAULT_TAG, &defLen, &subStatus);
- if(U_SUCCESS(subStatus) && defLen) {
- u_UCharsToChars(defUstr, defVal, u_strlen(defUstr));
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s;%s -> default %s=%s, %s\n",
- path?path:"ICUDATA", full, keyword, defVal, u_errorName(subStatus));
-#endif
- uprv_strcpy(defLoc, full);
- }
- } /* end of recalculate default KW */
-#if defined(URES_TREE_DEBUG)
- else {
- fprintf(stderr, "No trim1, %s <= %s\n", defLoc, full);
- }
-#endif
- }
- }
- }
- subStatus = U_ZERO_ERROR;
-
- uprv_strcpy(found, parent);
- uloc_getParent(found,parent,1023,&subStatus);
- ures_close(res);
- } while(!full[0] && *found && U_SUCCESS(*status));
- }
-
- if(U_SUCCESS(*status)) {
- if(!full[0]) {
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "Still could not load keyword %s=%s\n", keyword, kwVal);
-#endif
- *status = U_MISSING_RESOURCE_ERROR;
- } else if(omitDefault) {
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr,"Trim? full=%s, defLoc=%s, found=%s\n", full, defLoc, found);
-#endif
- if(uprv_strlen(defLoc) <= uprv_strlen(full)) {
- /* found the keyword in a *child* of where the default tag was present. */
- if(!uprv_strcmp(kwVal, defVal)) { /* if the requested kw is default, */
- /* and the default is in or in an ancestor of the current locale */
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "Removing unneeded var %s=%s\n", keyword, kwVal);
-#endif
- kwVal[0]=0;
- }
- }
- }
- uprv_strcpy(found, full);
- if(kwVal[0]) {
- uprv_strcat(found, "@");
- uprv_strcat(found, keyword);
- uprv_strcat(found, "=");
- uprv_strcat(found, kwVal);
- } else if(!omitDefault) {
- uprv_strcat(found, "@");
- uprv_strcat(found, keyword);
- uprv_strcat(found, "=");
- uprv_strcat(found, defVal);
- }
- }
- /* we found the default locale - no need to repeat it.*/
-
- ures_close(&bund1);
- ures_close(&bund2);
-
- length = (int32_t)uprv_strlen(found);
-
- if(U_SUCCESS(*status)) {
- int32_t copyLength = uprv_min(length, resultCapacity);
- if(copyLength>0) {
- uprv_strncpy(result, found, copyLength);
- }
- if(length == 0) {
- *status = U_MISSING_RESOURCE_ERROR;
- }
- } else {
- length = 0;
- result[0]=0;
- }
- return u_terminateChars(result, resultCapacity, length, status);
-}
-
-U_CAPI UEnumeration* U_EXPORT2
-ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status)
-{
-#define VALUES_BUF_SIZE 2048
-#define VALUES_LIST_SIZE 512
-
- char valuesBuf[VALUES_BUF_SIZE];
- int32_t valuesIndex = 0;
- const char *valuesList[VALUES_LIST_SIZE];
- int32_t valuesCount = 0;
-
- const char *locale;
- int32_t locLen;
-
- UEnumeration *locs = NULL;
-
- UResourceBundle item;
- UResourceBundle subItem;
-
- ures_initStackObject(&item);
- ures_initStackObject(&subItem);
- locs = ures_openAvailableLocales(path, status);
-
- if(U_FAILURE(*status)) {
- ures_close(&item);
- ures_close(&subItem);
- return NULL;
- }
-
- valuesBuf[0]=0;
- valuesBuf[1]=0;
-
- while((locale = uenum_next(locs, &locLen, status)) != 0) {
- UResourceBundle *bund = NULL;
- UResourceBundle *subPtr = NULL;
- UErrorCode subStatus = U_ZERO_ERROR; /* don't fail if a bundle is unopenable */
- bund = ures_openDirect(path, locale, &subStatus);
-
-#if defined(URES_TREE_DEBUG)
- if(!bund || U_FAILURE(subStatus)) {
- fprintf(stderr, "%s-%s values: Can't open %s locale - skipping. (%s)\n",
- path?path:"<ICUDATA>", keyword, locale, u_errorName(subStatus));
- }
-#endif
-
- ures_getByKey(bund, keyword, &item, &subStatus);
-
- if(!bund || U_FAILURE(subStatus)) {
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s-%s values: Can't find in %s - skipping. (%s)\n",
- path?path:"<ICUDATA>", keyword, locale, u_errorName(subStatus));
-#endif
- ures_close(bund);
- bund = NULL;
- continue;
- }
-
- while((subPtr = ures_getNextResource(&item,&subItem,&subStatus)) != 0
- && U_SUCCESS(subStatus)) {
- const char *k;
- int32_t i;
- k = ures_getKey(subPtr);
-
-#if defined(URES_TREE_DEBUG)
- /* fprintf(stderr, "%s | %s | %s | %s\n", path?path:"<ICUDATA>", keyword, locale, k); */
-#endif
- if(k == NULL || *k == 0 ||
- uprv_strcmp(k, DEFAULT_TAG) == 0 || uprv_strncmp(k, "private-", 8) == 0) {
- // empty or "default" or unlisted type
- continue;
- }
- for(i=0; i<valuesCount; i++) {
- if(!uprv_strcmp(valuesList[i],k)) {
- k = NULL; /* found duplicate */
- break;
- }
- }
- if(k != NULL) {
- int32_t kLen = (int32_t)uprv_strlen(k);
- if((valuesCount >= (VALUES_LIST_SIZE-1)) || /* no more space in list .. */
- ((valuesIndex+kLen+1+1) >= VALUES_BUF_SIZE)) { /* no more space in buffer (string + 2 nulls) */
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* out of space.. */
- } else {
- uprv_strcpy(valuesBuf+valuesIndex, k);
- valuesList[valuesCount++] = valuesBuf+valuesIndex;
- valuesIndex += kLen;
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s | %s | %s | [%s] (UNIQUE)\n",
- path?path:"<ICUDATA>", keyword, locale, k);
-#endif
- valuesBuf[valuesIndex++] = 0; /* terminate */
- }
- }
- }
- ures_close(bund);
- }
- valuesBuf[valuesIndex++] = 0; /* terminate */
-
- ures_close(&item);
- ures_close(&subItem);
- uenum_close(locs);
-#if defined(URES_TREE_DEBUG)
- fprintf(stderr, "%s: size %d, #%d\n", u_errorName(*status),
- valuesIndex, valuesCount);
-#endif
- return uloc_openKeywordList(valuesBuf, valuesIndex, status);
-}
-#if 0
-/* This code isn't needed, and given the documentation warnings the implementation is suspect */
-U_INTERNAL UBool U_EXPORT2
-ures_equal(const UResourceBundle* res1, const UResourceBundle* res2){
- if(res1==NULL || res2==NULL){
- return res1==res2; /* pointer comparision */
- }
- if(res1->fKey==NULL|| res2->fKey==NULL){
- return (res1->fKey==res2->fKey);
- }else{
- if(uprv_strcmp(res1->fKey, res2->fKey)!=0){
- return FALSE;
- }
- }
- if(uprv_strcmp(res1->fData->fName, res2->fData->fName)!=0){
- return FALSE;
- }
- if(res1->fData->fPath == NULL|| res2->fData->fPath==NULL){
- return (res1->fData->fPath == res2->fData->fPath);
- }else{
- if(uprv_strcmp(res1->fData->fPath, res2->fData->fPath)!=0){
- return FALSE;
- }
- }
- if(uprv_strcmp(res1->fData->fParent->fName, res2->fData->fParent->fName)!=0){
- return FALSE;
- }
- if(uprv_strcmp(res1->fData->fParent->fPath, res2->fData->fParent->fPath)!=0){
- return FALSE;
- }
- if(uprv_strncmp(res1->fResPath, res2->fResPath, res1->fResPathLen)!=0){
- return FALSE;
- }
- if(res1->fRes != res2->fRes){
- return FALSE;
- }
- return TRUE;
-}
-U_INTERNAL UResourceBundle* U_EXPORT2
-ures_clone(const UResourceBundle* res, UErrorCode* status){
- UResourceBundle* bundle = NULL;
- UResourceBundle* ret = NULL;
- if(U_FAILURE(*status) || res == NULL){
- return NULL;
- }
- bundle = ures_open(res->fData->fPath, res->fData->fName, status);
- if(res->fResPath!=NULL){
- ret = ures_findSubResource(bundle, res->fResPath, NULL, status);
- ures_close(bundle);
- }else{
- ret = bundle;
- }
- return ret;
-}
-U_INTERNAL const UResourceBundle* U_EXPORT2
-ures_getParentBundle(const UResourceBundle* res){
- if(res==NULL){
- return NULL;
- }
- return res->fParentRes;
-}
-#endif
-
-U_INTERNAL void U_EXPORT2
-ures_getVersionByKey(const UResourceBundle* res, const char *key, UVersionInfo ver, UErrorCode *status) {
- const UChar *str;
- int32_t len;
- str = ures_getStringByKey(res, key, &len, status);
- if(U_SUCCESS(*status)) {
- u_versionFromUString(ver, str);
- }
-}
-
-/* eof */
diff --git a/contrib/libs/icu/common/uresdata.cpp b/contrib/libs/icu/common/uresdata.cpp
deleted file mode 100644
index b3c2e2e27cc..00000000000
--- a/contrib/libs/icu/common/uresdata.cpp
+++ /dev/null
@@ -1,1518 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 1999-2016, International Business Machines Corporation
-* and others. All Rights Reserved.
-*******************************************************************************
-* file name: uresdata.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999dec08
-* created by: Markus W. Scherer
-* Modification History:
-*
-* Date Name Description
-* 06/20/2000 helena OS/400 port changes; mostly typecast.
-* 06/24/02 weiv Added support for resource sharing
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/udata.h"
-#include "unicode/ustring.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "resource.h"
-#include "uarrsort.h"
-#include "uassert.h"
-#include "ucol_swp.h"
-#include "udataswp.h"
-#include "uinvchar.h"
-#include "uresdata.h"
-#include "uresimp.h"
-#include "utracimp.h"
-
-/*
- * Resource access helpers
- */
-
-/* get a const char* pointer to the key with the keyOffset byte offset from pRoot */
-#define RES_GET_KEY16(pResData, keyOffset) \
- ((keyOffset)<(pResData)->localKeyLimit ? \
- (const char *)(pResData)->pRoot+(keyOffset) : \
- (pResData)->poolBundleKeys+(keyOffset)-(pResData)->localKeyLimit)
-
-#define RES_GET_KEY32(pResData, keyOffset) \
- ((keyOffset)>=0 ? \
- (const char *)(pResData)->pRoot+(keyOffset) : \
- (pResData)->poolBundleKeys+((keyOffset)&0x7fffffff))
-
-#define URESDATA_ITEM_NOT_FOUND -1
-
-/* empty resources, returned when the resource offset is 0 */
-static const uint16_t gEmpty16=0;
-
-static const struct {
- int32_t length;
- int32_t res;
-} gEmpty32={ 0, 0 };
-
-static const struct {
- int32_t length;
- UChar nul;
- UChar pad;
-} gEmptyString={ 0, 0, 0 };
-
-/*
- * All the type-access functions assume that
- * the resource is of the expected type.
- */
-
-static int32_t
-_res_findTableItem(const ResourceData *pResData, const uint16_t *keyOffsets, int32_t length,
- const char *key, const char **realKey) {
- const char *tableKey;
- int32_t mid, start, limit;
- int result;
-
- /* do a binary search for the key */
- start=0;
- limit=length;
- while(start<limit) {
- mid = (start + limit) / 2;
- tableKey = RES_GET_KEY16(pResData, keyOffsets[mid]);
- if (pResData->useNativeStrcmp) {
- result = uprv_strcmp(key, tableKey);
- } else {
- result = uprv_compareInvCharsAsAscii(key, tableKey);
- }
- if (result < 0) {
- limit = mid;
- } else if (result > 0) {
- start = mid + 1;
- } else {
- /* We found it! */
- *realKey=tableKey;
- return mid;
- }
- }
- return URESDATA_ITEM_NOT_FOUND; /* not found or table is empty. */
-}
-
-static int32_t
-_res_findTable32Item(const ResourceData *pResData, const int32_t *keyOffsets, int32_t length,
- const char *key, const char **realKey) {
- const char *tableKey;
- int32_t mid, start, limit;
- int result;
-
- /* do a binary search for the key */
- start=0;
- limit=length;
- while(start<limit) {
- mid = (start + limit) / 2;
- tableKey = RES_GET_KEY32(pResData, keyOffsets[mid]);
- if (pResData->useNativeStrcmp) {
- result = uprv_strcmp(key, tableKey);
- } else {
- result = uprv_compareInvCharsAsAscii(key, tableKey);
- }
- if (result < 0) {
- limit = mid;
- } else if (result > 0) {
- start = mid + 1;
- } else {
- /* We found it! */
- *realKey=tableKey;
- return mid;
- }
- }
- return URESDATA_ITEM_NOT_FOUND; /* not found or table is empty. */
-}
-
-/* helper for res_load() ---------------------------------------------------- */
-
-static UBool U_CALLCONV
-isAcceptable(void *context,
- const char * /*type*/, const char * /*name*/,
- const UDataInfo *pInfo) {
- uprv_memcpy(context, pInfo->formatVersion, 4);
- return (UBool)(
- pInfo->size>=20 &&
- pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
- pInfo->charsetFamily==U_CHARSET_FAMILY &&
- pInfo->sizeofUChar==U_SIZEOF_UCHAR &&
- pInfo->dataFormat[0]==0x52 && /* dataFormat="ResB" */
- pInfo->dataFormat[1]==0x65 &&
- pInfo->dataFormat[2]==0x73 &&
- pInfo->dataFormat[3]==0x42 &&
- (1<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=3));
-}
-
-/* semi-public functions ---------------------------------------------------- */
-
-static void
-res_init(ResourceData *pResData,
- UVersionInfo formatVersion, const void *inBytes, int32_t length,
- UErrorCode *errorCode) {
- UResType rootType;
-
- /* get the root resource */
- pResData->pRoot=(const int32_t *)inBytes;
- pResData->rootRes=(Resource)*pResData->pRoot;
- pResData->p16BitUnits=&gEmpty16;
-
- /* formatVersion 1.1 must have a root item and at least 5 indexes */
- if(length>=0 && (length/4)<((formatVersion[0]==1 && formatVersion[1]==0) ? 1 : 1+5)) {
- *errorCode=U_INVALID_FORMAT_ERROR;
- res_unload(pResData);
- return;
- }
-
- /* currently, we accept only resources that have a Table as their roots */
- rootType=(UResType)RES_GET_TYPE(pResData->rootRes);
- if(!URES_IS_TABLE(rootType)) {
- *errorCode=U_INVALID_FORMAT_ERROR;
- res_unload(pResData);
- return;
- }
-
- if(formatVersion[0]==1 && formatVersion[1]==0) {
- pResData->localKeyLimit=0x10000; /* greater than any 16-bit key string offset */
- } else {
- /* bundles with formatVersion 1.1 and later contain an indexes[] array */
- const int32_t *indexes=pResData->pRoot+1;
- int32_t indexLength=indexes[URES_INDEX_LENGTH]&0xff;
- if(indexLength<=URES_INDEX_MAX_TABLE_LENGTH) {
- *errorCode=U_INVALID_FORMAT_ERROR;
- res_unload(pResData);
- return;
- }
- if( length>=0 &&
- (length<((1+indexLength)<<2) ||
- length<(indexes[URES_INDEX_BUNDLE_TOP]<<2))
- ) {
- *errorCode=U_INVALID_FORMAT_ERROR;
- res_unload(pResData);
- return;
- }
- if(indexes[URES_INDEX_KEYS_TOP]>(1+indexLength)) {
- pResData->localKeyLimit=indexes[URES_INDEX_KEYS_TOP]<<2;
- }
- if(formatVersion[0]>=3) {
- // In formatVersion 1, the indexLength took up this whole int.
- // In version 2, bits 31..8 were reserved and always 0.
- // In version 3, they contain bits 23..0 of the poolStringIndexLimit.
- // Bits 27..24 are in indexes[URES_INDEX_ATTRIBUTES] bits 15..12.
- pResData->poolStringIndexLimit=(int32_t)((uint32_t)indexes[URES_INDEX_LENGTH]>>8);
- }
- if(indexLength>URES_INDEX_ATTRIBUTES) {
- int32_t att=indexes[URES_INDEX_ATTRIBUTES];
- pResData->noFallback=(UBool)(att&URES_ATT_NO_FALLBACK);
- pResData->isPoolBundle=(UBool)((att&URES_ATT_IS_POOL_BUNDLE)!=0);
- pResData->usesPoolBundle=(UBool)((att&URES_ATT_USES_POOL_BUNDLE)!=0);
- pResData->poolStringIndexLimit|=(att&0xf000)<<12; // bits 15..12 -> 27..24
- pResData->poolStringIndex16Limit=(int32_t)((uint32_t)att>>16);
- }
- if((pResData->isPoolBundle || pResData->usesPoolBundle) && indexLength<=URES_INDEX_POOL_CHECKSUM) {
- *errorCode=U_INVALID_FORMAT_ERROR;
- res_unload(pResData);
- return;
- }
- if( indexLength>URES_INDEX_16BIT_TOP &&
- indexes[URES_INDEX_16BIT_TOP]>indexes[URES_INDEX_KEYS_TOP]
- ) {
- pResData->p16BitUnits=(const uint16_t *)(pResData->pRoot+indexes[URES_INDEX_KEYS_TOP]);
- }
- }
-
- if(formatVersion[0]==1 || U_CHARSET_FAMILY==U_ASCII_FAMILY) {
- /*
- * formatVersion 1: compare key strings in native-charset order
- * formatVersion 2 and up: compare key strings in ASCII order
- */
- pResData->useNativeStrcmp=TRUE;
- }
-}
-
-U_CAPI void U_EXPORT2
-res_read(ResourceData *pResData,
- const UDataInfo *pInfo, const void *inBytes, int32_t length,
- UErrorCode *errorCode) {
- UVersionInfo formatVersion;
-
- uprv_memset(pResData, 0, sizeof(ResourceData));
- if(U_FAILURE(*errorCode)) {
- return;
- }
- if(!isAcceptable(formatVersion, NULL, NULL, pInfo)) {
- *errorCode=U_INVALID_FORMAT_ERROR;
- return;
- }
- res_init(pResData, formatVersion, inBytes, length, errorCode);
-}
-
-U_CFUNC void
-res_load(ResourceData *pResData,
- const char *path, const char *name, UErrorCode *errorCode) {
- UVersionInfo formatVersion;
-
- uprv_memset(pResData, 0, sizeof(ResourceData));
-
- /* load the ResourceBundle file */
- pResData->data=udata_openChoice(path, "res", name, isAcceptable, formatVersion, errorCode);
- if(U_FAILURE(*errorCode)) {
- return;
- }
-
- /* get its memory and initialize *pResData */
- res_init(pResData, formatVersion, udata_getMemory(pResData->data), -1, errorCode);
-}
-
-U_CFUNC void
-res_unload(ResourceData *pResData) {
- if(pResData->data!=NULL) {
- udata_close(pResData->data);
- pResData->data=NULL;
- }
-}
-
-static const int8_t gPublicTypes[URES_LIMIT] = {
- URES_STRING,
- URES_BINARY,
- URES_TABLE,
- URES_ALIAS,
-
- URES_TABLE, /* URES_TABLE32 */
- URES_TABLE, /* URES_TABLE16 */
- URES_STRING, /* URES_STRING_V2 */
- URES_INT,
-
- URES_ARRAY,
- URES_ARRAY, /* URES_ARRAY16 */
- URES_NONE,
- URES_NONE,
-
- URES_NONE,
- URES_NONE,
- URES_INT_VECTOR,
- URES_NONE
-};
-
-U_CAPI UResType U_EXPORT2
-res_getPublicType(Resource res) {
- return (UResType)gPublicTypes[RES_GET_TYPE(res)];
-}
-
-U_CAPI const UChar * U_EXPORT2
-res_getStringNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength) {
- const UChar *p;
- uint32_t offset=RES_GET_OFFSET(res);
- int32_t length;
- if(RES_GET_TYPE(res)==URES_STRING_V2) {
- int32_t first;
- if((int32_t)offset<pResData->poolStringIndexLimit) {
- p=(const UChar *)pResData->poolBundleStrings+offset;
- } else {
- p=(const UChar *)pResData->p16BitUnits+(offset-pResData->poolStringIndexLimit);
- }
- first=*p;
- if(!U16_IS_TRAIL(first)) {
- length=u_strlen(p);
- } else if(first<0xdfef) {
- length=first&0x3ff;
- ++p;
- } else if(first<0xdfff) {
- length=((first-0xdfef)<<16)|p[1];
- p+=2;
- } else {
- length=((int32_t)p[1]<<16)|p[2];
- p+=3;
- }
- } else if(res==offset) /* RES_GET_TYPE(res)==URES_STRING */ {
- const int32_t *p32= res==0 ? &gEmptyString.length : pResData->pRoot+res;
- length=*p32++;
- p=(const UChar *)p32;
- } else {
- p=NULL;
- length=0;
- }
- if(pLength) {
- *pLength=length;
- }
- return p;
-}
-
-namespace {
-
-/**
- * CLDR string value (three empty-set symbols)=={2205, 2205, 2205}
- * prevents fallback to the parent bundle.
- * TODO: combine with other code that handles this marker, use EMPTY_SET constant.
- * TODO: maybe move to uresbund.cpp?
- */
-UBool isNoInheritanceMarker(const ResourceData *pResData, Resource res) {
- uint32_t offset=RES_GET_OFFSET(res);
- if (offset == 0) {
- // empty string
- } else if (res == offset) {
- const int32_t *p32=pResData->pRoot+res;
- int32_t length=*p32;
- const UChar *p=(const UChar *)p32;
- return length == 3 && p[2] == 0x2205 && p[3] == 0x2205 && p[4] == 0x2205;
- } else if (RES_GET_TYPE(res) == URES_STRING_V2) {
- const UChar *p;
- if((int32_t)offset<pResData->poolStringIndexLimit) {
- p=(const UChar *)pResData->poolBundleStrings+offset;
- } else {
- p=(const UChar *)pResData->p16BitUnits+(offset-pResData->poolStringIndexLimit);
- }
- int32_t first=*p;
- if (first == 0x2205) { // implicit length
- return p[1] == 0x2205 && p[2] == 0x2205 && p[3] == 0;
- } else if (first == 0xdc03) { // explicit length 3 (should not occur)
- return p[1] == 0x2205 && p[2] == 0x2205 && p[3] == 0x2205;
- } else {
- // Assume that the string has not been stored with more length units than necessary.
- return FALSE;
- }
- }
- return FALSE;
-}
-
-int32_t getStringArray(const ResourceData *pResData, const icu::ResourceArray &array,
- icu::UnicodeString *dest, int32_t capacity,
- UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- if(dest == NULL ? capacity != 0 : capacity < 0) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- int32_t length = array.getSize();
- if(length == 0) {
- return 0;
- }
- if(length > capacity) {
- errorCode = U_BUFFER_OVERFLOW_ERROR;
- return length;
- }
- for(int32_t i = 0; i < length; ++i) {
- int32_t sLength;
- // No tracing: handled by the caller
- const UChar *s = res_getStringNoTrace(pResData, array.internalGetResource(pResData, i), &sLength);
- if(s == NULL) {
- errorCode = U_RESOURCE_TYPE_MISMATCH;
- return 0;
- }
- dest[i].setTo(TRUE, s, sLength);
- }
- return length;
-}
-
-} // namespace
-
-U_CAPI const UChar * U_EXPORT2
-res_getAlias(const ResourceData *pResData, Resource res, int32_t *pLength) {
- const UChar *p;
- uint32_t offset=RES_GET_OFFSET(res);
- int32_t length;
- if(RES_GET_TYPE(res)==URES_ALIAS) {
- const int32_t *p32= offset==0 ? &gEmptyString.length : pResData->pRoot+offset;
- length=*p32++;
- p=(const UChar *)p32;
- } else {
- p=NULL;
- length=0;
- }
- if(pLength) {
- *pLength=length;
- }
- return p;
-}
-
-U_CAPI const uint8_t * U_EXPORT2
-res_getBinaryNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength) {
- const uint8_t *p;
- uint32_t offset=RES_GET_OFFSET(res);
- int32_t length;
- if(RES_GET_TYPE(res)==URES_BINARY) {
- const int32_t *p32= offset==0 ? (const int32_t*)&gEmpty32 : pResData->pRoot+offset;
- length=*p32++;
- p=(const uint8_t *)p32;
- } else {
- p=NULL;
- length=0;
- }
- if(pLength) {
- *pLength=length;
- }
- return p;
-}
-
-
-U_CAPI const int32_t * U_EXPORT2
-res_getIntVectorNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength) {
- const int32_t *p;
- uint32_t offset=RES_GET_OFFSET(res);
- int32_t length;
- if(RES_GET_TYPE(res)==URES_INT_VECTOR) {
- p= offset==0 ? (const int32_t *)&gEmpty32 : pResData->pRoot+offset;
- length=*p++;
- } else {
- p=NULL;
- length=0;
- }
- if(pLength) {
- *pLength=length;
- }
- return p;
-}
-
-U_CAPI int32_t U_EXPORT2
-res_countArrayItems(const ResourceData *pResData, Resource res) {
- uint32_t offset=RES_GET_OFFSET(res);
- switch(RES_GET_TYPE(res)) {
- case URES_STRING:
- case URES_STRING_V2:
- case URES_BINARY:
- case URES_ALIAS:
- case URES_INT:
- case URES_INT_VECTOR:
- return 1;
- case URES_ARRAY:
- case URES_TABLE32:
- return offset==0 ? 0 : *(pResData->pRoot+offset);
- case URES_TABLE:
- return offset==0 ? 0 : *((const uint16_t *)(pResData->pRoot+offset));
- case URES_ARRAY16:
- case URES_TABLE16:
- return pResData->p16BitUnits[offset];
- default:
- return 0;
- }
-}
-
-U_NAMESPACE_BEGIN
-
-ResourceDataValue::~ResourceDataValue() {}
-
-UResType ResourceDataValue::getType() const {
- return res_getPublicType(res);
-}
-
-const UChar *ResourceDataValue::getString(int32_t &length, UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- const UChar *s = res_getString(fTraceInfo, &getData(), res, &length);
- if(s == NULL) {
- errorCode = U_RESOURCE_TYPE_MISMATCH;
- }
- return s;
-}
-
-const UChar *ResourceDataValue::getAliasString(int32_t &length, UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- const UChar *s = res_getAlias(&getData(), res, &length);
- if(s == NULL) {
- errorCode = U_RESOURCE_TYPE_MISMATCH;
- }
- return s;
-}
-
-int32_t ResourceDataValue::getInt(UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- if(RES_GET_TYPE(res) != URES_INT) {
- errorCode = U_RESOURCE_TYPE_MISMATCH;
- }
- return res_getInt(fTraceInfo, res);
-}
-
-uint32_t ResourceDataValue::getUInt(UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- if(RES_GET_TYPE(res) != URES_INT) {
- errorCode = U_RESOURCE_TYPE_MISMATCH;
- }
- return res_getUInt(fTraceInfo, res);
-}
-
-const int32_t *ResourceDataValue::getIntVector(int32_t &length, UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- const int32_t *iv = res_getIntVector(fTraceInfo, &getData(), res, &length);
- if(iv == NULL) {
- errorCode = U_RESOURCE_TYPE_MISMATCH;
- }
- return iv;
-}
-
-const uint8_t *ResourceDataValue::getBinary(int32_t &length, UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return NULL;
- }
- const uint8_t *b = res_getBinary(fTraceInfo, &getData(), res, &length);
- if(b == NULL) {
- errorCode = U_RESOURCE_TYPE_MISMATCH;
- }
- return b;
-}
-
-ResourceArray ResourceDataValue::getArray(UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return ResourceArray();
- }
- const uint16_t *items16 = NULL;
- const Resource *items32 = NULL;
- uint32_t offset=RES_GET_OFFSET(res);
- int32_t length = 0;
- switch(RES_GET_TYPE(res)) {
- case URES_ARRAY:
- if (offset!=0) { // empty if offset==0
- items32 = (const Resource *)getData().pRoot+offset;
- length = *items32++;
- }
- break;
- case URES_ARRAY16:
- items16 = getData().p16BitUnits+offset;
- length = *items16++;
- break;
- default:
- errorCode = U_RESOURCE_TYPE_MISMATCH;
- return ResourceArray();
- }
- return ResourceArray(items16, items32, length, fTraceInfo);
-}
-
-ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return ResourceTable();
- }
- const uint16_t *keys16 = NULL;
- const int32_t *keys32 = NULL;
- const uint16_t *items16 = NULL;
- const Resource *items32 = NULL;
- uint32_t offset = RES_GET_OFFSET(res);
- int32_t length = 0;
- switch(RES_GET_TYPE(res)) {
- case URES_TABLE:
- if (offset != 0) { // empty if offset==0
- keys16 = (const uint16_t *)(getData().pRoot+offset);
- length = *keys16++;
- items32 = (const Resource *)(keys16+length+(~length&1));
- }
- break;
- case URES_TABLE16:
- keys16 = getData().p16BitUnits+offset;
- length = *keys16++;
- items16 = keys16 + length;
- break;
- case URES_TABLE32:
- if (offset != 0) { // empty if offset==0
- keys32 = getData().pRoot+offset;
- length = *keys32++;
- items32 = (const Resource *)keys32 + length;
- }
- break;
- default:
- errorCode = U_RESOURCE_TYPE_MISMATCH;
- return ResourceTable();
- }
- return ResourceTable(keys16, keys32, items16, items32, length, fTraceInfo);
-}
-
-UBool ResourceDataValue::isNoInheritanceMarker() const {
- return ::isNoInheritanceMarker(&getData(), res);
-}
-
-int32_t ResourceDataValue::getStringArray(UnicodeString *dest, int32_t capacity,
- UErrorCode &errorCode) const {
- return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode);
-}
-
-int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity,
- UErrorCode &errorCode) const {
- if(URES_IS_ARRAY(res)) {
- return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode);
- }
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- if(dest == NULL ? capacity != 0 : capacity < 0) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- if(capacity < 1) {
- errorCode = U_BUFFER_OVERFLOW_ERROR;
- return 1;
- }
- int32_t sLength;
- const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength);
- if(s != NULL) {
- dest[0].setTo(TRUE, s, sLength);
- return 1;
- }
- errorCode = U_RESOURCE_TYPE_MISMATCH;
- return 0;
-}
-
-UnicodeString ResourceDataValue::getStringOrFirstOfArray(UErrorCode &errorCode) const {
- UnicodeString us;
- if(U_FAILURE(errorCode)) {
- return us;
- }
- int32_t sLength;
- const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength);
- if(s != NULL) {
- us.setTo(TRUE, s, sLength);
- return us;
- }
- ResourceArray array = getArray(errorCode);
- if(U_FAILURE(errorCode)) {
- return us;
- }
- if(array.getSize() > 0) {
- // Tracing is already performed above (unimportant for trace that this is an array)
- s = res_getStringNoTrace(&getData(), array.internalGetResource(&getData(), 0), &sLength);
- if(s != NULL) {
- us.setTo(TRUE, s, sLength);
- return us;
- }
- }
- errorCode = U_RESOURCE_TYPE_MISMATCH;
- return us;
-}
-
-U_NAMESPACE_END
-
-static Resource
-makeResourceFrom16(const ResourceData *pResData, int32_t res16) {
- if(res16<pResData->poolStringIndex16Limit) {
- // Pool string, nothing to do.
- } else {
- // Local string, adjust the 16-bit offset to a regular one,
- // with a larger pool string index limit.
- res16=res16-pResData->poolStringIndex16Limit+pResData->poolStringIndexLimit;
- }
- return URES_MAKE_RESOURCE(URES_STRING_V2, res16);
-}
-
-U_CAPI Resource U_EXPORT2
-res_getTableItemByKey(const ResourceData *pResData, Resource table,
- int32_t *indexR, const char **key) {
- uint32_t offset=RES_GET_OFFSET(table);
- int32_t length;
- int32_t idx;
- if(key == NULL || *key == NULL) {
- return RES_BOGUS;
- }
- switch(RES_GET_TYPE(table)) {
- case URES_TABLE: {
- if (offset!=0) { /* empty if offset==0 */
- const uint16_t *p= (const uint16_t *)(pResData->pRoot+offset);
- length=*p++;
- *indexR=idx=_res_findTableItem(pResData, p, length, *key, key);
- if(idx>=0) {
- const Resource *p32=(const Resource *)(p+length+(~length&1));
- return p32[idx];
- }
- }
- break;
- }
- case URES_TABLE16: {
- const uint16_t *p=pResData->p16BitUnits+offset;
- length=*p++;
- *indexR=idx=_res_findTableItem(pResData, p, length, *key, key);
- if(idx>=0) {
- return makeResourceFrom16(pResData, p[length+idx]);
- }
- break;
- }
- case URES_TABLE32: {
- if (offset!=0) { /* empty if offset==0 */
- const int32_t *p= pResData->pRoot+offset;
- length=*p++;
- *indexR=idx=_res_findTable32Item(pResData, p, length, *key, key);
- if(idx>=0) {
- return (Resource)p[length+idx];
- }
- }
- break;
- }
- default:
- break;
- }
- return RES_BOGUS;
-}
-
-U_CAPI Resource U_EXPORT2
-res_getTableItemByIndex(const ResourceData *pResData, Resource table,
- int32_t indexR, const char **key) {
- uint32_t offset=RES_GET_OFFSET(table);
- int32_t length;
- if (indexR < 0) {
- return RES_BOGUS;
- }
- switch(RES_GET_TYPE(table)) {
- case URES_TABLE: {
- if (offset != 0) { /* empty if offset==0 */
- const uint16_t *p= (const uint16_t *)(pResData->pRoot+offset);
- length=*p++;
- if(indexR<length) {
- const Resource *p32=(const Resource *)(p+length+(~length&1));
- if(key!=NULL) {
- *key=RES_GET_KEY16(pResData, p[indexR]);
- }
- return p32[indexR];
- }
- }
- break;
- }
- case URES_TABLE16: {
- const uint16_t *p=pResData->p16BitUnits+offset;
- length=*p++;
- if(indexR<length) {
- if(key!=NULL) {
- *key=RES_GET_KEY16(pResData, p[indexR]);
- }
- return makeResourceFrom16(pResData, p[length+indexR]);
- }
- break;
- }
- case URES_TABLE32: {
- if (offset != 0) { /* empty if offset==0 */
- const int32_t *p= pResData->pRoot+offset;
- length=*p++;
- if(indexR<length) {
- if(key!=NULL) {
- *key=RES_GET_KEY32(pResData, p[indexR]);
- }
- return (Resource)p[length+indexR];
- }
- }
- break;
- }
- default:
- break;
- }
- return RES_BOGUS;
-}
-
-U_CAPI Resource U_EXPORT2
-res_getResource(const ResourceData *pResData, const char *key) {
- const char *realKey=key;
- int32_t idx;
- return res_getTableItemByKey(pResData, pResData->rootRes, &idx, &realKey);
-}
-
-
-UBool icu::ResourceTable::getKeyAndValue(int32_t i,
- const char *&key, icu::ResourceValue &value) const {
- if(0 <= i && i < length) {
- icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
- if (keys16 != nullptr) {
- key = RES_GET_KEY16(&rdValue.getData(), keys16[i]);
- } else {
- key = RES_GET_KEY32(&rdValue.getData(), keys32[i]);
- }
- Resource res;
- if (items16 != nullptr) {
- res = makeResourceFrom16(&rdValue.getData(), items16[i]);
- } else {
- res = items32[i];
- }
- // Note: the ResourceTracer keeps a reference to the field of this
- // ResourceTable. This is OK because the ResourceTable should remain
- // alive for the duration that fields are being read from it
- // (including nested fields).
- rdValue.setResource(res, ResourceTracer(fTraceInfo, key));
- return TRUE;
- }
- return FALSE;
-}
-
-UBool icu::ResourceTable::findValue(const char *key, ResourceValue &value) const {
- icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
- const char *realKey = nullptr;
- int32_t i;
- if (keys16 != nullptr) {
- i = _res_findTableItem(&rdValue.getData(), keys16, length, key, &realKey);
- } else {
- i = _res_findTable32Item(&rdValue.getData(), keys32, length, key, &realKey);
- }
- if (i >= 0) {
- Resource res;
- if (items16 != nullptr) {
- res = makeResourceFrom16(&rdValue.getData(), items16[i]);
- } else {
- res = items32[i];
- }
- // Same note about lifetime as in getKeyAndValue().
- rdValue.setResource(res, ResourceTracer(fTraceInfo, key));
- return TRUE;
- }
- return FALSE;
-}
-
-U_CAPI Resource U_EXPORT2
-res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexR) {
- uint32_t offset=RES_GET_OFFSET(array);
- if (indexR < 0) {
- return RES_BOGUS;
- }
- switch(RES_GET_TYPE(array)) {
- case URES_ARRAY: {
- if (offset!=0) { /* empty if offset==0 */
- const int32_t *p= pResData->pRoot+offset;
- if(indexR<*p) {
- return (Resource)p[1+indexR];
- }
- }
- break;
- }
- case URES_ARRAY16: {
- const uint16_t *p=pResData->p16BitUnits+offset;
- if(indexR<*p) {
- return makeResourceFrom16(pResData, p[1+indexR]);
- }
- break;
- }
- default:
- break;
- }
- return RES_BOGUS;
-}
-
-uint32_t icu::ResourceArray::internalGetResource(const ResourceData *pResData, int32_t i) const {
- if (items16 != NULL) {
- return makeResourceFrom16(pResData, items16[i]);
- } else {
- return items32[i];
- }
-}
-
-UBool icu::ResourceArray::getValue(int32_t i, icu::ResourceValue &value) const {
- if(0 <= i && i < length) {
- icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
- // Note: the ResourceTracer keeps a reference to the field of this
- // ResourceArray. This is OK because the ResourceArray should remain
- // alive for the duration that fields are being read from it
- // (including nested fields).
- rdValue.setResource(
- internalGetResource(&rdValue.getData(), i),
- ResourceTracer(fTraceInfo, i));
- return TRUE;
- }
- return FALSE;
-}
-
-U_CFUNC Resource
-res_findResource(const ResourceData *pResData, Resource r, char** path, const char** key) {
- char *pathP = *path, *nextSepP = *path;
- char *closeIndex = NULL;
- Resource t1 = r;
- Resource t2;
- int32_t indexR = 0;
- UResType type = (UResType)RES_GET_TYPE(t1);
-
- /* if you come in with an empty path, you'll be getting back the same resource */
- if(!uprv_strlen(pathP)) {
- return r;
- }
-
- /* one needs to have an aggregate resource in order to search in it */
- if(!URES_IS_CONTAINER(type)) {
- return RES_BOGUS;
- }
-
- while(nextSepP && *pathP && t1 != RES_BOGUS && URES_IS_CONTAINER(type)) {
- /* Iteration stops if: the path has been consumed, we found a non-existing
- * resource (t1 == RES_BOGUS) or we found a scalar resource (including alias)
- */
- nextSepP = uprv_strchr(pathP, RES_PATH_SEPARATOR);
- /* if there are more separators, terminate string
- * and set path to the remaining part of the string
- */
- if(nextSepP != NULL) {
- if(nextSepP == pathP) {
- // Empty key string.
- return RES_BOGUS;
- }
- *nextSepP = 0; /* overwrite the separator with a NUL to terminate the key */
- *path = nextSepP+1;
- } else {
- *path = uprv_strchr(pathP, 0);
- }
-
- /* if the resource is a table */
- /* try the key based access */
- if(URES_IS_TABLE(type)) {
- *key = pathP;
- t2 = res_getTableItemByKey(pResData, t1, &indexR, key);
- if(t2 == RES_BOGUS) {
- /* if we fail to get the resource by key, maybe we got an index */
- indexR = uprv_strtol(pathP, &closeIndex, 10);
- if(indexR >= 0 && *closeIndex == 0) {
- /* if we indeed have an index, try to get the item by index */
- t2 = res_getTableItemByIndex(pResData, t1, indexR, key);
- } // else t2 is already RES_BOGUS
- }
- } else if(URES_IS_ARRAY(type)) {
- indexR = uprv_strtol(pathP, &closeIndex, 10);
- if(indexR >= 0 && *closeIndex == 0) {
- t2 = res_getArrayItem(pResData, t1, indexR);
- } else {
- t2 = RES_BOGUS; /* have an array, but don't have a valid index */
- }
- *key = NULL;
- } else { /* can't do much here, except setting t2 to bogus */
- t2 = RES_BOGUS;
- }
- t1 = t2;
- type = (UResType)RES_GET_TYPE(t1);
- /* position pathP to next resource key/index */
- pathP = *path;
- }
-
- return t1;
-}
-
-/* resource bundle swapping ------------------------------------------------- */
-
-/*
- * Need to always enumerate the entire item tree,
- * track the lowest address of any item to use as the limit for char keys[],
- * track the highest address of any item to return the size of the data.
- *
- * We should have thought of storing those in the data...
- * It is possible to extend the data structure by putting additional values
- * in places that are inaccessible by ordinary enumeration of the item tree.
- * For example, additional integers could be stored at the beginning or
- * end of the key strings; this could be indicated by a minor version number,
- * and the data swapping would have to know about these values.
- *
- * The data structure does not forbid keys to be shared, so we must swap
- * all keys once instead of each key when it is referenced.
- *
- * These swapping functions assume that a resource bundle always has a length
- * that is a multiple of 4 bytes.
- * Currently, this is trivially true because genrb writes bundle tree leaves
- * physically first, before their branches, so that the root table with its
- * array of resource items (uint32_t values) is always last.
- */
-
-/* definitions for table sorting ------------------------ */
-
-/*
- * row of a temporary array
- *
- * gets platform-endian key string indexes and sorting indexes;
- * after sorting this array by keys, the actual key/value arrays are permutated
- * according to the sorting indexes
- */
-typedef struct Row {
- int32_t keyIndex, sortIndex;
-} Row;
-
-static int32_t U_CALLCONV
-ures_compareRows(const void *context, const void *left, const void *right) {
- const char *keyChars=(const char *)context;
- return (int32_t)uprv_strcmp(keyChars+((const Row *)left)->keyIndex,
- keyChars+((const Row *)right)->keyIndex);
-}
-
-typedef struct TempTable {
- const char *keyChars;
- Row *rows;
- int32_t *resort;
- uint32_t *resFlags;
- int32_t localKeyLimit;
- uint8_t majorFormatVersion;
-} TempTable;
-
-enum {
- STACK_ROW_CAPACITY=200
-};
-
-/* The table item key string is not locally available. */
-static const char *const gUnknownKey="";
-
-/* resource table key for collation binaries: "%%CollationBin" */
-static const UChar gCollationBinKey[]={
- 0x25, 0x25,
- 0x43, 0x6f, 0x6c, 0x6c, 0x61, 0x74, 0x69, 0x6f, 0x6e,
- 0x42, 0x69, 0x6e,
- 0
-};
-
-/*
- * swap one resource item
- */
-static void
-ures_swapResource(const UDataSwapper *ds,
- const Resource *inBundle, Resource *outBundle,
- Resource res, /* caller swaps res itself */
- const char *key,
- TempTable *pTempTable,
- UErrorCode *pErrorCode) {
- const Resource *p;
- Resource *q;
- int32_t offset, count;
-
- switch(RES_GET_TYPE(res)) {
- case URES_TABLE16:
- case URES_STRING_V2:
- case URES_INT:
- case URES_ARRAY16:
- /* integer, or points to 16-bit units, nothing to do here */
- return;
- default:
- break;
- }
-
- /* all other types use an offset to point to their data */
- offset=(int32_t)RES_GET_OFFSET(res);
- if(offset==0) {
- /* special offset indicating an empty item */
- return;
- }
- if(pTempTable->resFlags[offset>>5]&((uint32_t)1<<(offset&0x1f))) {
- /* we already swapped this resource item */
- return;
- } else {
- /* mark it as swapped now */
- pTempTable->resFlags[offset>>5]|=((uint32_t)1<<(offset&0x1f));
- }
-
- p=inBundle+offset;
- q=outBundle+offset;
-
- switch(RES_GET_TYPE(res)) {
- case URES_ALIAS:
- /* physically same value layout as string, fall through */
- U_FALLTHROUGH;
- case URES_STRING:
- count=udata_readInt32(ds, (int32_t)*p);
- /* swap length */
- ds->swapArray32(ds, p, 4, q, pErrorCode);
- /* swap each UChar (the terminating NUL would not change) */
- ds->swapArray16(ds, p+1, 2*count, q+1, pErrorCode);
- break;
- case URES_BINARY:
- count=udata_readInt32(ds, (int32_t)*p);
- /* swap length */
- ds->swapArray32(ds, p, 4, q, pErrorCode);
- /* no need to swap or copy bytes - ures_swap() copied them all */
-
- /* swap known formats */
-#if !UCONFIG_NO_COLLATION
- if( key!=NULL && /* the binary is in a table */
- (key!=gUnknownKey ?
- /* its table key string is "%%CollationBin" */
- 0==ds->compareInvChars(ds, key, -1,
- gCollationBinKey, UPRV_LENGTHOF(gCollationBinKey)-1) :
- /* its table key string is unknown but it looks like a collation binary */
- ucol_looksLikeCollationBinary(ds, p+1, count))
- ) {
- ucol_swap(ds, p+1, count, q+1, pErrorCode);
- }
-#endif
- break;
- case URES_TABLE:
- case URES_TABLE32:
- {
- const uint16_t *pKey16;
- uint16_t *qKey16;
-
- const int32_t *pKey32;
- int32_t *qKey32;
-
- Resource item;
- int32_t i, oldIndex;
-
- if(RES_GET_TYPE(res)==URES_TABLE) {
- /* get table item count */
- pKey16=(const uint16_t *)p;
- qKey16=(uint16_t *)q;
- count=ds->readUInt16(*pKey16);
-
- pKey32=qKey32=NULL;
-
- /* swap count */
- ds->swapArray16(ds, pKey16++, 2, qKey16++, pErrorCode);
-
- offset+=((1+count)+1)/2;
- } else {
- /* get table item count */
- pKey32=(const int32_t *)p;
- qKey32=(int32_t *)q;
- count=udata_readInt32(ds, *pKey32);
-
- pKey16=qKey16=NULL;
-
- /* swap count */
- ds->swapArray32(ds, pKey32++, 4, qKey32++, pErrorCode);
-
- offset+=1+count;
- }
-
- if(count==0) {
- break;
- }
-
- p=inBundle+offset; /* pointer to table resources */
- q=outBundle+offset;
-
- /* recurse */
- for(i=0; i<count; ++i) {
- const char *itemKey=gUnknownKey;
- if(pKey16!=NULL) {
- int32_t keyOffset=ds->readUInt16(pKey16[i]);
- if(keyOffset<pTempTable->localKeyLimit) {
- itemKey=(const char *)outBundle+keyOffset;
- }
- } else {
- int32_t keyOffset=udata_readInt32(ds, pKey32[i]);
- if(keyOffset>=0) {
- itemKey=(const char *)outBundle+keyOffset;
- }
- }
- item=ds->readUInt32(p[i]);
- ures_swapResource(ds, inBundle, outBundle, item, itemKey, pTempTable, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "ures_swapResource(table res=%08x)[%d].recurse(%08x) failed\n",
- res, i, item);
- return;
- }
- }
-
- if(pTempTable->majorFormatVersion>1 || ds->inCharset==ds->outCharset) {
- /* no need to sort, just swap the offset/value arrays */
- if(pKey16!=NULL) {
- ds->swapArray16(ds, pKey16, count*2, qKey16, pErrorCode);
- ds->swapArray32(ds, p, count*4, q, pErrorCode);
- } else {
- /* swap key offsets and items as one array */
- ds->swapArray32(ds, pKey32, count*2*4, qKey32, pErrorCode);
- }
- break;
- }
-
- /*
- * We need to sort tables by outCharset key strings because they
- * sort differently for different charset families.
- * ures_swap() already set pTempTable->keyChars appropriately.
- * First we set up a temporary table with the key indexes and
- * sorting indexes and sort that.
- * Then we permutate and copy/swap the actual values.
- */
- if(pKey16!=NULL) {
- for(i=0; i<count; ++i) {
- pTempTable->rows[i].keyIndex=ds->readUInt16(pKey16[i]);
- pTempTable->rows[i].sortIndex=i;
- }
- } else {
- for(i=0; i<count; ++i) {
- pTempTable->rows[i].keyIndex=udata_readInt32(ds, pKey32[i]);
- pTempTable->rows[i].sortIndex=i;
- }
- }
- uprv_sortArray(pTempTable->rows, count, sizeof(Row),
- ures_compareRows, pTempTable->keyChars,
- FALSE, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "ures_swapResource(table res=%08x).uprv_sortArray(%d items) failed\n",
- res, count);
- return;
- }
-
- /*
- * copy/swap/permutate items
- *
- * If we swap in-place, then the permutation must use another
- * temporary array (pTempTable->resort)
- * before the results are copied to the outBundle.
- */
- /* keys */
- if(pKey16!=NULL) {
- uint16_t *rKey16;
-
- if(pKey16!=qKey16) {
- rKey16=qKey16;
- } else {
- rKey16=(uint16_t *)pTempTable->resort;
- }
- for(i=0; i<count; ++i) {
- oldIndex=pTempTable->rows[i].sortIndex;
- ds->swapArray16(ds, pKey16+oldIndex, 2, rKey16+i, pErrorCode);
- }
- if(qKey16!=rKey16) {
- uprv_memcpy(qKey16, rKey16, 2*count);
- }
- } else {
- int32_t *rKey32;
-
- if(pKey32!=qKey32) {
- rKey32=qKey32;
- } else {
- rKey32=pTempTable->resort;
- }
- for(i=0; i<count; ++i) {
- oldIndex=pTempTable->rows[i].sortIndex;
- ds->swapArray32(ds, pKey32+oldIndex, 4, rKey32+i, pErrorCode);
- }
- if(qKey32!=rKey32) {
- uprv_memcpy(qKey32, rKey32, 4*count);
- }
- }
-
- /* resources */
- {
- Resource *r;
-
-
- if(p!=q) {
- r=q;
- } else {
- r=(Resource *)pTempTable->resort;
- }
- for(i=0; i<count; ++i) {
- oldIndex=pTempTable->rows[i].sortIndex;
- ds->swapArray32(ds, p+oldIndex, 4, r+i, pErrorCode);
- }
- if(q!=r) {
- uprv_memcpy(q, r, 4*count);
- }
- }
- }
- break;
- case URES_ARRAY:
- {
- Resource item;
- int32_t i;
-
- count=udata_readInt32(ds, (int32_t)*p);
- /* swap length */
- ds->swapArray32(ds, p++, 4, q++, pErrorCode);
-
- /* recurse */
- for(i=0; i<count; ++i) {
- item=ds->readUInt32(p[i]);
- ures_swapResource(ds, inBundle, outBundle, item, NULL, pTempTable, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "ures_swapResource(array res=%08x)[%d].recurse(%08x) failed\n",
- res, i, item);
- return;
- }
- }
-
- /* swap items */
- ds->swapArray32(ds, p, 4*count, q, pErrorCode);
- }
- break;
- case URES_INT_VECTOR:
- count=udata_readInt32(ds, (int32_t)*p);
- /* swap length and each integer */
- ds->swapArray32(ds, p, 4*(1+count), q, pErrorCode);
- break;
- default:
- /* also catches RES_BOGUS */
- *pErrorCode=U_UNSUPPORTED_ERROR;
- break;
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-ures_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UDataInfo *pInfo;
- const Resource *inBundle;
- Resource rootRes;
- int32_t headerSize, maxTableLength;
-
- Row rows[STACK_ROW_CAPACITY];
- int32_t resort[STACK_ROW_CAPACITY];
- TempTable tempTable;
-
- const int32_t *inIndexes;
-
- /* the following integers count Resource item offsets (4 bytes each), not bytes */
- int32_t bundleLength, indexLength, keysBottom, keysTop, resBottom, top;
-
- /* udata_swapDataHeader checks the arguments */
- headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* check data format and format version */
- pInfo=(const UDataInfo *)((const char *)inData+4);
- if(!(
- pInfo->dataFormat[0]==0x52 && /* dataFormat="ResB" */
- pInfo->dataFormat[1]==0x65 &&
- pInfo->dataFormat[2]==0x73 &&
- pInfo->dataFormat[3]==0x42 &&
- /* formatVersion 1.1+ or 2.x or 3.x */
- ((pInfo->formatVersion[0]==1 && pInfo->formatVersion[1]>=1) ||
- pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3)
- )) {
- udata_printError(ds, "ures_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a resource bundle\n",
- pInfo->dataFormat[0], pInfo->dataFormat[1],
- pInfo->dataFormat[2], pInfo->dataFormat[3],
- pInfo->formatVersion[0], pInfo->formatVersion[1]);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
- tempTable.majorFormatVersion=pInfo->formatVersion[0];
-
- /* a resource bundle must contain at least one resource item */
- if(length<0) {
- bundleLength=-1;
- } else {
- bundleLength=(length-headerSize)/4;
-
- /* formatVersion 1.1 must have a root item and at least 5 indexes */
- if(bundleLength<(1+5)) {
- udata_printError(ds, "ures_swap(): too few bytes (%d after header) for a resource bundle\n",
- length-headerSize);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
-
- inBundle=(const Resource *)((const char *)inData+headerSize);
- rootRes=ds->readUInt32(*inBundle);
-
- /* formatVersion 1.1 adds the indexes[] array */
- inIndexes=(const int32_t *)(inBundle+1);
-
- indexLength=udata_readInt32(ds, inIndexes[URES_INDEX_LENGTH])&0xff;
- if(indexLength<=URES_INDEX_MAX_TABLE_LENGTH) {
- udata_printError(ds, "ures_swap(): too few indexes for a 1.1+ resource bundle\n");
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- keysBottom=1+indexLength;
- keysTop=udata_readInt32(ds, inIndexes[URES_INDEX_KEYS_TOP]);
- if(indexLength>URES_INDEX_16BIT_TOP) {
- resBottom=udata_readInt32(ds, inIndexes[URES_INDEX_16BIT_TOP]);
- } else {
- resBottom=keysTop;
- }
- top=udata_readInt32(ds, inIndexes[URES_INDEX_BUNDLE_TOP]);
- maxTableLength=udata_readInt32(ds, inIndexes[URES_INDEX_MAX_TABLE_LENGTH]);
-
- if(0<=bundleLength && bundleLength<top) {
- udata_printError(ds, "ures_swap(): resource top %d exceeds bundle length %d\n",
- top, bundleLength);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- if(keysTop>(1+indexLength)) {
- tempTable.localKeyLimit=keysTop<<2;
- } else {
- tempTable.localKeyLimit=0;
- }
-
- if(length>=0) {
- Resource *outBundle=(Resource *)((char *)outData+headerSize);
-
- /* track which resources we have already swapped */
- uint32_t stackResFlags[STACK_ROW_CAPACITY];
- int32_t resFlagsLength;
-
- /*
- * We need one bit per 4 resource bundle bytes so that we can track
- * every possible Resource for whether we have swapped it already.
- * Multiple Resource words can refer to the same bundle offsets
- * for sharing identical values.
- * We could optimize this by allocating only for locations above
- * where Resource values are stored (above keys & strings).
- */
- resFlagsLength=(length+31)>>5; /* number of bytes needed */
- resFlagsLength=(resFlagsLength+3)&~3; /* multiple of 4 bytes for uint32_t */
- if(resFlagsLength<=(int32_t)sizeof(stackResFlags)) {
- tempTable.resFlags=stackResFlags;
- } else {
- tempTable.resFlags=(uint32_t *)uprv_malloc(resFlagsLength);
- if(tempTable.resFlags==NULL) {
- udata_printError(ds, "ures_swap(): unable to allocate memory for tracking resources\n");
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- }
- uprv_memset(tempTable.resFlags, 0, resFlagsLength);
-
- /* copy the bundle for binary and inaccessible data */
- if(inData!=outData) {
- uprv_memcpy(outBundle, inBundle, 4*top);
- }
-
- /* swap the key strings, but not the padding bytes (0xaa) after the last string and its NUL */
- udata_swapInvStringBlock(ds, inBundle+keysBottom, 4*(keysTop-keysBottom),
- outBundle+keysBottom, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "ures_swap().udata_swapInvStringBlock(keys[%d]) failed\n", 4*(keysTop-keysBottom));
- return 0;
- }
-
- /* swap the 16-bit units (strings, table16, array16) */
- if(keysTop<resBottom) {
- ds->swapArray16(ds, inBundle+keysTop, (resBottom-keysTop)*4, outBundle+keysTop, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "ures_swap().swapArray16(16-bit units[%d]) failed\n", 2*(resBottom-keysTop));
- return 0;
- }
- }
-
- /* allocate the temporary table for sorting resource tables */
- tempTable.keyChars=(const char *)outBundle; /* sort by outCharset */
- if(tempTable.majorFormatVersion>1 || maxTableLength<=STACK_ROW_CAPACITY) {
- tempTable.rows=rows;
- tempTable.resort=resort;
- } else {
- tempTable.rows=(Row *)uprv_malloc(maxTableLength*sizeof(Row)+maxTableLength*4);
- if(tempTable.rows==NULL) {
- udata_printError(ds, "ures_swap(): unable to allocate memory for sorting tables (max length: %d)\n",
- maxTableLength);
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- if(tempTable.resFlags!=stackResFlags) {
- uprv_free(tempTable.resFlags);
- }
- return 0;
- }
- tempTable.resort=(int32_t *)(tempTable.rows+maxTableLength);
- }
-
- /* swap the resources */
- ures_swapResource(ds, inBundle, outBundle, rootRes, NULL, &tempTable, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "ures_swapResource(root res=%08x) failed\n",
- rootRes);
- }
-
- if(tempTable.rows!=rows) {
- uprv_free(tempTable.rows);
- }
- if(tempTable.resFlags!=stackResFlags) {
- uprv_free(tempTable.resFlags);
- }
-
- /* swap the root resource and indexes */
- ds->swapArray32(ds, inBundle, keysBottom*4, outBundle, pErrorCode);
- }
-
- return headerSize+4*top;
-}
diff --git a/contrib/libs/icu/common/uresdata.h b/contrib/libs/icu/common/uresdata.h
deleted file mode 100644
index d1b67babf29..00000000000
--- a/contrib/libs/icu/common/uresdata.h
+++ /dev/null
@@ -1,565 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 1999-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-******************************************************************************
-* file name: uresdata.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999dec08
-* created by: Markus W. Scherer
-* 06/24/02 weiv Added support for resource sharing
-*/
-
-#ifndef __RESDATA_H__
-#define __RESDATA_H__
-
-#include "unicode/utypes.h"
-#include "unicode/udata.h"
-#include "unicode/ures.h"
-#include "putilimp.h"
-#include "udataswp.h"
-
-/**
- * Numeric constants for internal-only types of resource items.
- * These must use different numeric values than UResType constants
- * because they are used together.
- * Internal types are never returned by ures_getType().
- */
-typedef enum {
- /** Include a negative value so that the compiler uses the same int type as for UResType. */
- URES_INTERNAL_NONE=-1,
-
- /** Resource type constant for tables with 32-bit count, key offsets and values. */
- URES_TABLE32=4,
-
- /**
- * Resource type constant for tables with 16-bit count, key offsets and values.
- * All values are URES_STRING_V2 strings.
- */
- URES_TABLE16=5,
-
- /** Resource type constant for 16-bit Unicode strings in formatVersion 2. */
- URES_STRING_V2=6,
-
- /**
- * Resource type constant for arrays with 16-bit count and values.
- * All values are URES_STRING_V2 strings.
- */
- URES_ARRAY16=9
-
- /* Resource type 15 is not defined but effectively used by RES_BOGUS=0xffffffff. */
-} UResInternalType;
-
-/*
- * A Resource is a 32-bit value that has 2 bit fields:
- * 31..28 4-bit type, see enum below
- * 27..0 28-bit four-byte-offset or value according to the type
- */
-typedef uint32_t Resource;
-
-#define RES_BOGUS 0xffffffff
-#define RES_MAX_OFFSET 0x0fffffff
-
-#define RES_GET_TYPE(res) ((int32_t)((res)>>28UL))
-#define RES_GET_OFFSET(res) ((res)&0x0fffffff)
-#define RES_GET_POINTER(pRoot, res) ((pRoot)+RES_GET_OFFSET(res))
-
-/* get signed and unsigned integer values directly from the Resource handle
- * NOTE: For proper logging, please use the res_getInt() constexpr
- */
-#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
-# define RES_GET_INT_NO_TRACE(res) (((int32_t)((res)<<4L))>>4L)
-#else
-# define RES_GET_INT_NO_TRACE(res) (int32_t)(((res)&0x08000000) ? (res)|0xf0000000 : (res)&0x07ffffff)
-#endif
-
-#define RES_GET_UINT_NO_TRACE(res) ((res)&0x0fffffff)
-
-#define URES_IS_ARRAY(type) ((int32_t)(type)==URES_ARRAY || (int32_t)(type)==URES_ARRAY16)
-#define URES_IS_TABLE(type) ((int32_t)(type)==URES_TABLE || (int32_t)(type)==URES_TABLE16 || (int32_t)(type)==URES_TABLE32)
-#define URES_IS_CONTAINER(type) (URES_IS_TABLE(type) || URES_IS_ARRAY(type))
-
-#define URES_MAKE_RESOURCE(type, offset) (((Resource)(type)<<28)|(Resource)(offset))
-#define URES_MAKE_EMPTY_RESOURCE(type) ((Resource)(type)<<28)
-
-/* indexes[] value names; indexes are generally 32-bit (Resource) indexes */
-enum {
- /**
- * [0] contains the length of indexes[]
- * which is at most URES_INDEX_TOP of the latest format version
- *
- * formatVersion==1: all bits contain the length of indexes[]
- * but the length is much less than 0xff;
- * formatVersion>1:
- * only bits 7..0 contain the length of indexes[],
- * bits 31..8 are reserved and set to 0
- * formatVersion>=3:
- * bits 31..8 poolStringIndexLimit bits 23..0
- */
- URES_INDEX_LENGTH,
- /**
- * [1] contains the top of the key strings,
- * same as the bottom of resources or UTF-16 strings, rounded up
- */
- URES_INDEX_KEYS_TOP,
- /** [2] contains the top of all resources */
- URES_INDEX_RESOURCES_TOP,
- /**
- * [3] contains the top of the bundle,
- * in case it were ever different from [2]
- */
- URES_INDEX_BUNDLE_TOP,
- /** [4] max. length of any table */
- URES_INDEX_MAX_TABLE_LENGTH,
- /**
- * [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2)
- *
- * formatVersion>=3:
- * bits 31..16 poolStringIndex16Limit
- * bits 15..12 poolStringIndexLimit bits 27..24
- */
- URES_INDEX_ATTRIBUTES,
- /**
- * [6] top of the 16-bit units (UTF-16 string v2 UChars, URES_TABLE16, URES_ARRAY16),
- * rounded up (new in formatVersion 2.0, ICU 4.4)
- */
- URES_INDEX_16BIT_TOP,
- /** [7] checksum of the pool bundle (new in formatVersion 2.0, ICU 4.4) */
- URES_INDEX_POOL_CHECKSUM,
- URES_INDEX_TOP
-};
-
-/*
- * Nofallback attribute, attribute bit 0 in indexes[URES_INDEX_ATTRIBUTES].
- * New in formatVersion 1.2 (ICU 3.6).
- *
- * If set, then this resource bundle is a standalone bundle.
- * If not set, then the bundle participates in locale fallback, eventually
- * all the way to the root bundle.
- * If indexes[] is missing or too short, then the attribute cannot be determined
- * reliably. Dependency checking should ignore such bundles, and loading should
- * use fallbacks.
- */
-#define URES_ATT_NO_FALLBACK 1
-
-/*
- * Attributes for bundles that are, or use, a pool bundle.
- * A pool bundle provides key strings that are shared among several other bundles
- * to reduce their total size.
- * New in formatVersion 2 (ICU 4.4).
- */
-#define URES_ATT_IS_POOL_BUNDLE 2
-#define URES_ATT_USES_POOL_BUNDLE 4
-
-/*
- * File format for .res resource bundle files
- *
- * ICU 56: New in formatVersion 3 compared with 2: -------------
- *
- * Resource bundles can optionally use shared string-v2 values
- * stored in the pool bundle.
- * If so, then the indexes[] contain two new values
- * in previously-unused bits of existing indexes[] slots:
- * - poolStringIndexLimit:
- * String-v2 offsets (in 32-bit Resource words) below this limit
- * point to pool bundle string-v2 values.
- * - poolStringIndex16Limit:
- * Resource16 string-v2 offsets below this limit
- * point to pool bundle string-v2 values.
- * Guarantee: poolStringIndex16Limit <= poolStringIndexLimit
- *
- * The local bundle's poolStringIndexLimit is greater than
- * any pool bundle string index used in the local bundle.
- * The poolStringIndexLimit should not be greater than
- * the maximum possible pool bundle string index.
- *
- * The maximum possible pool bundle string index is the index to the last non-NUL
- * pool string character, due to suffix sharing.
- *
- * In the pool bundle, there is no structure that lists the strings.
- * (The root resource is an empty Table.)
- * If the strings need to be enumerated (as genrb --usePoolBundle does),
- * then iterate through the pool bundle's 16-bit-units array from the beginning.
- * Stop at the end of the array, or when an explicit or implicit string length
- * would lead beyond the end of the array,
- * or when an apparent string is not NUL-terminated.
- * (Future genrb version might terminate the strings with
- * what looks like a large explicit string length.)
- *
- * ICU 4.4: New in formatVersion 2 compared with 1.3: -------------
- *
- * Three new resource types -- String-v2, Table16 and Array16 -- have their
- * values stored in a new array of 16-bit units between the table key strings
- * and the start of the other resources.
- *
- * genrb eliminates duplicates among Unicode string-v2 values.
- * Multiple Unicode strings may use the same offset and string data,
- * or a short string may point to the suffix of a longer string. ("Suffix sharing")
- * For example, one string "abc" may be reused for another string "bc" by pointing
- * to the second character. (Short strings-v2 are NUL-terminated
- * and not preceded by an explicit length value.)
- *
- * It is allowed for all resource types to share values.
- * The swapper code (ures_swap()) has been modified so that it swaps each item
- * exactly once.
- *
- * A resource bundle may use a special pool bundle. Some or all of the table key strings
- * of the using-bundle are omitted, and the key string offsets for such key strings refer
- * to offsets in the pool bundle.
- * The using-bundle's and the pool-bundle's indexes[URES_INDEX_POOL_CHECKSUM] values
- * must match.
- * Two bits in indexes[URES_INDEX_ATTRIBUTES] indicate whether a resource bundle
- * is or uses a pool bundle.
- *
- * Table key strings must be compared in ASCII order, even if they are not
- * stored in ASCII.
- *
- * New in formatVersion 1.3 compared with 1.2: -------------
- *
- * genrb eliminates duplicates among key strings.
- * Multiple table items may share one key string, or one item may point
- * to the suffix of another's key string. ("Suffix sharing")
- * For example, one key "abc" may be reused for another key "bc" by pointing
- * to the second character. (Key strings are NUL-terminated.)
- *
- * -------------
- *
- * An ICU4C resource bundle file (.res) is a binary, memory-mappable file
- * with nested, hierarchical data structures.
- * It physically contains the following:
- *
- * Resource root; -- 32-bit Resource item, root item for this bundle's tree;
- * currently, the root item must be a table or table32 resource item
- * int32_t indexes[indexes[0]]; -- array of indexes for friendly
- * reading and swapping; see URES_INDEX_* above
- * new in formatVersion 1.1 (ICU 2.8)
- * char keys[]; -- characters for key strings
- * (formatVersion 1.0: up to 65k of characters; 1.1: <2G)
- * (minus the space for root and indexes[]),
- * which consist of invariant characters (ASCII/EBCDIC) and are NUL-terminated;
- * padded to multiple of 4 bytes for 4-alignment of the following data
- * uint16_t 16BitUnits[]; -- resources that are stored entirely as sequences of 16-bit units
- * (new in formatVersion 2/ICU 4.4)
- * data is indexed by the offset values in 16-bit resource types,
- * with offset 0 pointing to the beginning of this array;
- * there is a 0 at offset 0, for empty resources;
- * padded to multiple of 4 bytes for 4-alignment of the following data
- * data; -- data directly and indirectly indexed by the root item;
- * the structure is determined by walking the tree
- *
- * Each resource bundle item has a 32-bit Resource handle (see typedef above)
- * which contains the item type number in its upper 4 bits (31..28) and either
- * an offset or a direct value in its lower 28 bits (27..0).
- * The order of items is undefined and only determined by walking the tree.
- * Leaves of the tree may be stored first or last or anywhere in between,
- * and it is in theory possible to have unreferenced holes in the file.
- *
- * 16-bit-unit values:
- * Starting with formatVersion 2/ICU 4.4, some resources are stored in a special
- * array of 16-bit units. Each resource value is a sequence of 16-bit units,
- * with no per-resource padding to a 4-byte boundary.
- * 16-bit container types (Table16 and Array16) contain Resource16 values
- * which are offsets to String-v2 resources in the same 16-bit-units array.
- *
- * Direct values:
- * - Empty Unicode strings have an offset value of 0 in the Resource handle itself.
- * - Starting with formatVersion 2/ICU 4.4, an offset value of 0 for
- * _any_ resource type indicates an empty value.
- * - Integer values are 28-bit values stored in the Resource handle itself;
- * the interpretation of unsigned vs. signed integers is up to the application.
- *
- * All other types and values use 28-bit offsets to point to the item's data.
- * The offset is an index to the first 32-bit word of the value, relative to the
- * start of the resource data (i.e., the root item handle is at offset 0).
- * To get byte offsets, the offset is multiplied by 4 (or shifted left by 2 bits).
- * All resource item values are 4-aligned.
- *
- * New in formatVersion 2/ICU 4.4: Some types use offsets into the 16-bit-units array,
- * indexing 16-bit units in that array.
- *
- * The structures (memory layouts) for the values for each item type are listed
- * in the table below.
- *
- * Nested, hierarchical structures: -------------
- *
- * Table items contain key-value pairs where the keys are offsets to char * key strings.
- * The values of these pairs are either Resource handles or
- * offsets into the 16-bit-units array, depending on the table type.
- *
- * Array items are simple vectors of Resource handles,
- * or of offsets into the 16-bit-units array, depending on the array type.
- *
- * Table key string offsets: -------
- *
- * Key string offsets are relative to the start of the resource data (of the root handle),
- * i.e., the first string has an offset of 4+sizeof(indexes).
- * (After the 4-byte root handle and after the indexes array.)
- *
- * If the resource bundle uses a pool bundle, then some key strings are stored
- * in the pool bundle rather than in the local bundle itself.
- * - In a Table or Table16, the 16-bit key string offset is local if it is
- * less than indexes[URES_INDEX_KEYS_TOP]<<2.
- * Otherwise, subtract indexes[URES_INDEX_KEYS_TOP]<<2 to get the offset into
- * the pool bundle key strings.
- * - In a Table32, the 32-bit key string offset is local if it is non-negative.
- * Otherwise, reset bit 31 to get the pool key string offset.
- *
- * Unlike the local offset, the pool key offset is relative to
- * the start of the key strings, not to the start of the bundle.
- *
- * An alias item is special (and new in ICU 2.4): --------------
- *
- * Its memory layout is just like for a UnicodeString, but at runtime it resolves to
- * another resource bundle's item according to the path in the string.
- * This is used to share items across bundles that are in different lookup/fallback
- * chains (e.g., large collation data among zh_TW and zh_HK).
- * This saves space (for large items) and maintenance effort (less duplication of data).
- *
- * --------------------------------------------------------------------------
- *
- * Resource types:
- *
- * Most resources have their values stored at four-byte offsets from the start
- * of the resource data. These values are at least 4-aligned.
- * Some resource values are stored directly in the offset field of the Resource itself.
- * See UResType in unicode/ures.h for enumeration constants for Resource types.
- *
- * Some resources have their values stored as sequences of 16-bit units,
- * at 2-byte offsets from the start of a contiguous 16-bit-unit array between
- * the table key strings and the other resources. (new in formatVersion 2/ICU 4.4)
- * At offset 0 of that array is a 16-bit zero value for empty 16-bit resources.
- *
- * Resource16 values in Table16 and Array16 are 16-bit offsets to String-v2
- * resources, with the offsets relative to the start of the 16-bit-units array.
- * Starting with formatVersion 3/ICU 56, if offset<poolStringIndex16Limit
- * then use the pool bundle's 16-bit-units array,
- * otherwise subtract that limit and use the local 16-bit-units array.
- *
- * Type Name Memory layout of values
- * (in parentheses: scalar, non-offset values)
- *
- * 0 Unicode String: int32_t length, UChar[length], (UChar)0, (padding)
- * or (empty string ("") if offset==0)
- * 1 Binary: int32_t length, uint8_t[length], (padding)
- * - the start of the bytes is 16-aligned -
- * 2 Table: uint16_t count, uint16_t keyStringOffsets[count], (uint16_t padding), Resource[count]
- * 3 Alias: (physically same value layout as string, new in ICU 2.4)
- * 4 Table32: int32_t count, int32_t keyStringOffsets[count], Resource[count]
- * (new in formatVersion 1.1/ICU 2.8)
- * 5 Table16: uint16_t count, uint16_t keyStringOffsets[count], Resource16[count]
- * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
- * 6 Unicode String-v2:UChar[length], (UChar)0; length determined by the first UChar:
- * - if first is not a trail surrogate, then the length is implicit
- * and u_strlen() needs to be called
- * - if first<0xdfef then length=first&0x3ff (and skip first)
- * - if first<0xdfff then length=((first-0xdfef)<<16) | second UChar
- * - if first==0xdfff then length=((second UChar)<<16) | third UChar
- * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
- *
- * Starting with formatVersion 3/ICU 56, if offset<poolStringIndexLimit
- * then use the pool bundle's 16-bit-units array,
- * otherwise subtract that limit and use the local 16-bit-units array.
- * (Note different limits for Resource16 vs. Resource.)
- *
- * 7 Integer: (28-bit offset is integer value)
- * 8 Array: int32_t count, Resource[count]
- * 9 Array16: uint16_t count, Resource16[count]
- * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
- * 14 Integer Vector: int32_t length, int32_t[length]
- * 15 Reserved: This value denotes special purpose resources and is for internal use.
- *
- * Note that there are 3 types with data vector values:
- * - Vectors of 8-bit bytes stored as type Binary.
- * - Vectors of 16-bit words stored as type Unicode String or Unicode String-v2
- * (no value restrictions, all values 0..ffff allowed!).
- * - Vectors of 32-bit words stored as type Integer Vector.
- */
-
-/*
- * Structure for a single, memory-mapped ResourceBundle.
- */
-typedef struct ResourceData {
- UDataMemory *data;
- const int32_t *pRoot;
- const uint16_t *p16BitUnits;
- const char *poolBundleKeys;
- Resource rootRes;
- int32_t localKeyLimit;
- const uint16_t *poolBundleStrings;
- int32_t poolStringIndexLimit;
- int32_t poolStringIndex16Limit;
- UBool noFallback; /* see URES_ATT_NO_FALLBACK */
- UBool isPoolBundle;
- UBool usesPoolBundle;
- UBool useNativeStrcmp;
-} ResourceData;
-
-/*
- * Read a resource bundle from memory.
- */
-U_INTERNAL void U_EXPORT2
-res_read(ResourceData *pResData,
- const UDataInfo *pInfo, const void *inBytes, int32_t length,
- UErrorCode *errorCode);
-
-/*
- * Load a resource bundle file.
- * The ResourceData structure must be allocated externally.
- */
-U_CFUNC void
-res_load(ResourceData *pResData,
- const char *path, const char *name, UErrorCode *errorCode);
-
-/*
- * Release a resource bundle file.
- * This does not release the ResourceData structure itself.
- */
-U_CFUNC void
-res_unload(ResourceData *pResData);
-
-U_INTERNAL UResType U_EXPORT2
-res_getPublicType(Resource res);
-
-///////////////////////////////////////////////////////////////////////////
-// To enable tracing, use the inline versions of the res_get* functions. //
-///////////////////////////////////////////////////////////////////////////
-
-/*
- * Return a pointer to a zero-terminated, const UChar* string
- * and set its length in *pLength.
- * Returns NULL if not found.
- */
-U_INTERNAL const UChar * U_EXPORT2
-res_getStringNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength);
-
-U_INTERNAL const uint8_t * U_EXPORT2
-res_getBinaryNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength);
-
-U_INTERNAL const int32_t * U_EXPORT2
-res_getIntVectorNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength);
-
-U_INTERNAL const UChar * U_EXPORT2
-res_getAlias(const ResourceData *pResData, Resource res, int32_t *pLength);
-
-U_INTERNAL Resource U_EXPORT2
-res_getResource(const ResourceData *pResData, const char *key);
-
-U_INTERNAL int32_t U_EXPORT2
-res_countArrayItems(const ResourceData *pResData, Resource res);
-
-U_INTERNAL Resource U_EXPORT2
-res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexS);
-
-U_INTERNAL Resource U_EXPORT2
-res_getTableItemByIndex(const ResourceData *pResData, Resource table, int32_t indexS, const char ** key);
-
-U_INTERNAL Resource U_EXPORT2
-res_getTableItemByKey(const ResourceData *pResData, Resource table, int32_t *indexS, const char* * key);
-
-/**
- * Iterates over the path and stops when a scalar resource is found.
- * Follows aliases.
- * Modifies the contents of *path (replacing separators with NULs),
- * and also moves *path forward while it finds items.
- *
- * @param path input: "CollationElements/Sequence" or "zoneStrings/3/2" etc.;
- * output: points to the part that has not yet been processed
- */
-U_CFUNC Resource res_findResource(const ResourceData *pResData, Resource r,
- char** path, const char** key);
-
-#ifdef __cplusplus
-
-#include "resource.h"
-#include "restrace.h"
-
-U_NAMESPACE_BEGIN
-
-inline const UChar* res_getString(const ResourceTracer& traceInfo,
- const ResourceData *pResData, Resource res, int32_t *pLength) {
- traceInfo.trace("string");
- return res_getStringNoTrace(pResData, res, pLength);
-}
-
-inline const uint8_t* res_getBinary(const ResourceTracer& traceInfo,
- const ResourceData *pResData, Resource res, int32_t *pLength) {
- traceInfo.trace("binary");
- return res_getBinaryNoTrace(pResData, res, pLength);
-}
-
-inline const int32_t* res_getIntVector(const ResourceTracer& traceInfo,
- const ResourceData *pResData, Resource res, int32_t *pLength) {
- traceInfo.trace("intvector");
- return res_getIntVectorNoTrace(pResData, res, pLength);
-}
-
-inline int32_t res_getInt(const ResourceTracer& traceInfo, Resource res) {
- traceInfo.trace("int");
- return RES_GET_INT_NO_TRACE(res);
-}
-
-inline uint32_t res_getUInt(const ResourceTracer& traceInfo, Resource res) {
- traceInfo.trace("uint");
- return RES_GET_UINT_NO_TRACE(res);
-}
-
-class ResourceDataValue : public ResourceValue {
-public:
- ResourceDataValue() :
- res(static_cast<Resource>(URES_NONE)),
- fTraceInfo() {}
- virtual ~ResourceDataValue();
-
- void setData(const ResourceData *data) {
- resData = *data;
- }
-
- void setResource(Resource r, ResourceTracer&& traceInfo) {
- res = r;
- fTraceInfo = traceInfo;
- }
-
- const ResourceData &getData() const { return resData; }
- virtual UResType getType() const;
- virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const;
- virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const;
- virtual int32_t getInt(UErrorCode &errorCode) const;
- virtual uint32_t getUInt(UErrorCode &errorCode) const;
- virtual const int32_t *getIntVector(int32_t &length, UErrorCode &errorCode) const;
- virtual const uint8_t *getBinary(int32_t &length, UErrorCode &errorCode) const;
- virtual ResourceArray getArray(UErrorCode &errorCode) const;
- virtual ResourceTable getTable(UErrorCode &errorCode) const;
- virtual UBool isNoInheritanceMarker() const;
- virtual int32_t getStringArray(UnicodeString *dest, int32_t capacity,
- UErrorCode &errorCode) const;
- virtual int32_t getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity,
- UErrorCode &errorCode) const;
- virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const;
-
-private:
- // TODO(ICU-20769): If UResourceBundle.fResData becomes a pointer,
- // then remove this value field again and just store a pResData pointer.
- ResourceData resData;
- Resource res;
- ResourceTracer fTraceInfo;
-};
-
-U_NAMESPACE_END
-
-#endif /* __cplusplus */
-
-/**
- * Swap an ICU resource bundle. See udataswp.h.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-ures_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-#endif
diff --git a/contrib/libs/icu/common/uresimp.h b/contrib/libs/icu/common/uresimp.h
deleted file mode 100644
index f453ddc004a..00000000000
--- a/contrib/libs/icu/common/uresimp.h
+++ /dev/null
@@ -1,364 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2000-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#ifndef URESIMP_H
-#define URESIMP_H
-
-#include "unicode/ures.h"
-#include "unicode/utypes.h"
-
-#include "uresdata.h"
-
-#define kRootLocaleName "root"
-#define kPoolBundleName "pool"
-
-/*
- The default minor version and the version separator must be exactly one
- character long.
-*/
-
-#define kDefaultMinorVersion "0"
-#define kVersionSeparator "."
-#define kVersionTag "Version"
-
-#define MAGIC1 19700503
-#define MAGIC2 19641227
-
-#define URES_MAX_ALIAS_LEVEL 256
-#define URES_MAX_BUFFER_SIZE 256
-
-#define EMPTY_SET 0x2205
-
-struct UResourceDataEntry;
-typedef struct UResourceDataEntry UResourceDataEntry;
-
-/*
- * Note: If we wanted to make this structure smaller, then we could try
- * to use one UResourceDataEntry pointer for fAlias and fPool, with a separate
- * flag to distinguish whether this struct is for a real bundle with a pool,
- * or for an alias entry for which we won't use the pool after loading.
- */
-struct UResourceDataEntry {
- char *fName; /* name of the locale for bundle - still to decide whether it is original or fallback */
- char *fPath; /* path to bundle - used for distinguishing between resources with the same name */
- UResourceDataEntry *fParent; /*next resource in fallback chain*/
- UResourceDataEntry *fAlias;
- UResourceDataEntry *fPool;
- ResourceData fData; /* data for low level access */
- char fNameBuffer[3]; /* A small buffer of free space for fName. The free space is due to struct padding. */
- uint32_t fCountExisting; /* how much is this resource used */
- UErrorCode fBogus;
- /* int32_t fHashKey;*/ /* for faster access in the hashtable */
-};
-
-#define RES_BUFSIZE 64
-#define RES_PATH_SEPARATOR '/'
-#define RES_PATH_SEPARATOR_S "/"
-
-struct UResourceBundle {
- const char *fKey; /*tag*/
- UResourceDataEntry *fData; /*for low-level access*/
- char *fVersion;
- UResourceDataEntry *fTopLevelData; /* for getting the valid locale */
- char *fResPath; /* full path to the resource: "zh_TW/CollationElements/Sequence" */
- // TODO(ICU-20769): Try to change the by-value fResData into a pointer,
- // with the struct in only one place for each bundle.
- // Also replace class ResourceDataValue.resData with a pResData pointer again.
- ResourceData fResData;
- char fResBuf[RES_BUFSIZE];
- int32_t fResPathLen;
- Resource fRes;
- UBool fHasFallback;
- UBool fIsTopLevel;
- uint32_t fMagic1; /* For determining if it's a stack object */
- uint32_t fMagic2; /* For determining if it's a stack object */
- int32_t fIndex;
- int32_t fSize;
-
- /*const UResourceBundle *fParentRes;*/ /* needed to get the actual locale for a child resource */
-};
-
-U_CAPI void U_EXPORT2 ures_initStackObject(UResourceBundle* resB);
-
-#ifdef __cplusplus
-
-U_NAMESPACE_BEGIN
-
-/**
- * \class StackUResourceBundle
- * "Smart pointer" like class, closes a UResourceBundle via ures_close().
- *
- * This code:
- *
- * StackUResourceBundle bundle;
- * foo(bundle.getAlias());
- *
- * Is equivalent to this code:
- *
- * UResourceBundle bundle;
- * ures_initStackObject(&bundle);
- * foo(&bundle);
- * ures_close(&bundle);
- *
- * @see LocalUResourceBundlePointer
- * @internal
- */
-class U_COMMON_API StackUResourceBundle {
-public:
- // No heap allocation. Use only on the stack.
- static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
- static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
-#if U_HAVE_PLACEMENT_NEW
- static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
-#endif
-
- StackUResourceBundle();
- ~StackUResourceBundle();
-
- UResourceBundle* getAlias() { return &bundle; }
-
- UResourceBundle& ref() { return bundle; }
- const UResourceBundle& ref() const { return bundle; }
-
- StackUResourceBundle(const StackUResourceBundle&) = delete;
- StackUResourceBundle& operator=(const StackUResourceBundle&) = delete;
-
- StackUResourceBundle(StackUResourceBundle&&) = delete;
- StackUResourceBundle& operator=(StackUResourceBundle&&) = delete;
-
-private:
- UResourceBundle bundle;
-};
-
-U_NAMESPACE_END
-
-#endif /* __cplusplus */
-
-/**
- * Opens a resource bundle for the locale;
- * if there is not even a base language bundle, then loads the root bundle;
- * never falls back to the default locale.
- *
- * This is used for algorithms that have good pan-Unicode default behavior,
- * such as case mappings, collation, and segmentation (BreakIterator).
- */
-U_CAPI UResourceBundle* U_EXPORT2
-ures_openNoDefault(const char* path, const char* localeID, UErrorCode* status);
-
-/* Some getters used by the copy constructor */
-U_CFUNC const char* ures_getName(const UResourceBundle* resB);
-#ifdef URES_DEBUG
-U_CFUNC const char* ures_getPath(const UResourceBundle* resB);
-/**
- * If anything was in the RB cache, dump it to the screen.
- * @return TRUE if there was anything into the cache
- */
-U_CAPI UBool U_EXPORT2 ures_dumpCacheContents(void);
-#endif
-/*U_CFUNC void ures_appendResPath(UResourceBundle *resB, const char* toAdd, int32_t lenToAdd);*/
-/*U_CFUNC void ures_setResPath(UResourceBundle *resB, const char* toAdd);*/
-/*U_CFUNC void ures_freeResPath(UResourceBundle *resB);*/
-
-/* Candidates for export */
-U_CFUNC UResourceBundle *ures_copyResb(UResourceBundle *r, const UResourceBundle *original, UErrorCode *status);
-
-/**
- * Returns a resource that can be located using the pathToResource argument. One needs optional package, locale
- * and path inside the locale, for example: "/myData/en/zoneStrings/3". Keys and indexes are supported. Keys
- * need to reference data in named structures, while indexes can reference both named and anonymous resources.
- * Features a fill-in parameter.
- *
- * Note, this function does NOT have a syntax for specifying items within a tree. May want to consider a
- * syntax that delineates between package/tree and resource.
- *
- * @param pathToResource a path that will lead to the requested resource
- * @param fillIn if NULL a new UResourceBundle struct is allocated and must be deleted by the caller.
- * Alternatively, you can supply a struct to be filled by this function.
- * @param status fills in the outgoing error code.
- * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
- */
-U_CAPI UResourceBundle* U_EXPORT2
-ures_findResource(const char* pathToResource,
- UResourceBundle *fillIn, UErrorCode *status);
-
-/**
- * Returns a sub resource that can be located using the pathToResource argument. One needs a path inside
- * the supplied resource, for example, if you have "en_US" resource bundle opened, you might ask for
- * "zoneStrings/3". Keys and indexes are supported. Keys
- * need to reference data in named structures, while indexes can reference both
- * named and anonymous resources.
- * Features a fill-in parameter.
- *
- * @param resourceBundle a resource
- * @param pathToResource a path that will lead to the requested resource
- * @param fillIn if NULL a new UResourceBundle struct is allocated and must be deleted by the caller.
- * Alternatively, you can supply a struct to be filled by this function.
- * @param status fills in the outgoing error code.
- * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
- */
-U_CAPI UResourceBundle* U_EXPORT2
-ures_findSubResource(const UResourceBundle *resB,
- char* pathToResource,
- UResourceBundle *fillIn, UErrorCode *status);
-
-/**
- * Returns a functionally equivalent locale (considering keywords) for the specified keyword.
- * @param result fillin for the equivalent locale
- * @param resultCapacity capacity of the fillin buffer
- * @param path path to the tree, or NULL for ICU data
- * @param resName top level resource. Example: "collations"
- * @param keyword locale keyword. Example: "collation"
- * @param locid The requested locale
- * @param isAvailable If non-null, pointer to fillin parameter that indicates whether the
- * requested locale was available. The locale is defined as 'available' if it physically
- * exists within the specified tree.
- * @param omitDefault if TRUE, omit keyword and value if default. 'de_DE\@collation=standard' -> 'de_DE'
- * @param status error code
- * @return the actual buffer size needed for the full locale. If it's greater
- * than resultCapacity, the returned full name will be truncated and an error code will be returned.
- */
-U_CAPI int32_t U_EXPORT2
-ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
- const char *path, const char *resName, const char *keyword, const char *locid,
- UBool *isAvailable, UBool omitDefault, UErrorCode *status);
-
-/**
- * Given a tree path and keyword, return a string enumeration of all possible values for that keyword.
- * @param path path to the tree, or NULL for ICU data
- * @param keyword a particular keyword to consider, must match a top level resource name
- * within the tree.
- * @param status error code
- */
-U_CAPI UEnumeration* U_EXPORT2
-ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status);
-
-
-/**
- * Get a resource with multi-level fallback. Normally only the top level resources will
- * fallback to its parent. This performs fallback on subresources. For example, when a table
- * is defined in a resource bundle and a parent resource bundle, normally no fallback occurs
- * on the sub-resources because the table is defined in the current resource bundle, but this
- * function can perform fallback on the sub-resources of the table.
- * @param resB a resource
- * @param inKey a key associated with the requested resource
- * @param fillIn if NULL a new UResourceBundle struct is allocated and must be deleted by the caller.
- * Alternatively, you can supply a struct to be filled by this function.
- * @param status: fills in the outgoing error code
- * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
- * could be a non-failing error
- * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
- * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
- */
-U_CAPI UResourceBundle* U_EXPORT2
-ures_getByKeyWithFallback(const UResourceBundle *resB,
- const char* inKey,
- UResourceBundle *fillIn,
- UErrorCode *status);
-
-
-/**
- * Get a String with multi-level fallback. Normally only the top level resources will
- * fallback to its parent. This performs fallback on subresources. For example, when a table
- * is defined in a resource bundle and a parent resource bundle, normally no fallback occurs
- * on the sub-resources because the table is defined in the current resource bundle, but this
- * function can perform fallback on the sub-resources of the table.
- * @param resB a resource
- * @param inKey a key associated with the requested resource
- * @param status: fills in the outgoing error code
- * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
- * could be a non-failing error
- * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
- * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
- */
-U_CAPI const UChar* U_EXPORT2
-ures_getStringByKeyWithFallback(const UResourceBundle *resB,
- const char* inKey,
- int32_t* len,
- UErrorCode *status);
-
-#ifdef __cplusplus
-
-U_CAPI void U_EXPORT2
-ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
- UResourceBundle *tempFillIn,
- icu::ResourceDataValue &value, UErrorCode &errorCode);
-
-U_CAPI void U_EXPORT2
-ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
- icu::ResourceSink &sink, UErrorCode &errorCode);
-
-#endif /* __cplusplus */
-
-/**
- * Get a version number by key
- * @param resB bundle containing version number
- * @param key the key for the version number
- * @param ver fillin for the version number
- * @param status error code
- */
-U_CAPI void U_EXPORT2
-ures_getVersionByKey(const UResourceBundle *resB,
- const char *key,
- UVersionInfo ver,
- UErrorCode *status);
-
-
-/**
- * Internal function.
- * Return the version number associated with this ResourceBundle as a string.
- *
- * @param resourceBundle The resource bundle for which the version is checked.
- * @return A version number string as specified in the resource bundle or its parent.
- * The caller does not own this string.
- * @see ures_getVersion
- */
-U_CAPI const char* U_EXPORT2
-ures_getVersionNumberInternal(const UResourceBundle *resourceBundle);
-
-/**
- * Return the name of the Locale associated with this ResourceBundle. This API allows
- * you to query for the real locale of the resource. For example, if you requested
- * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned.
- * For subresources, the locale where this resource comes from will be returned.
- * If fallback has occured, getLocale will reflect this.
- *
- * This internal version avoids deprecated-warnings in ICU code.
- *
- * @param resourceBundle resource bundle in question
- * @param status just for catching illegal arguments
- * @return A Locale name
- */
-U_CAPI const char* U_EXPORT2
-ures_getLocaleInternal(const UResourceBundle* resourceBundle,
- UErrorCode* status);
-
-/**
- * Same as ures_openDirect() but uses the fill-in parameter instead of allocating a new bundle.
- *
- * @param r The existing UResourceBundle to fill in. If NULL then status will be
- * set to U_ILLEGAL_ARGUMENT_ERROR.
- * @param packageName The packageName and locale together point to an ICU udata object,
- * as defined by <code> udata_open( packageName, "res", locale, err) </code>
- * or equivalent. Typically, packageName will refer to a (.dat) file, or to
- * a package registered with udata_setAppData(). Using a full file or directory
- * pathname for packageName is deprecated. If NULL, ICU data will be used.
- * @param locale specifies the locale for which we want to open the resource
- * if NULL, the default locale will be used. If strlen(locale) == 0
- * root locale will be used.
- * @param status The error code.
- * @see ures_openDirect
- * @internal
- */
-U_CAPI void U_EXPORT2
-ures_openDirectFillIn(UResourceBundle *r,
- const char *packageName,
- const char *locale,
- UErrorCode *status);
-
-#endif /*URESIMP_H*/
diff --git a/contrib/libs/icu/common/ureslocs.h b/contrib/libs/icu/common/ureslocs.h
deleted file mode 100644
index f7c3344ef20..00000000000
--- a/contrib/libs/icu/common/ureslocs.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2009-2014 International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#ifndef __URESLOCS_H__
-#define __URESLOCS_H__
-
-#include "unicode/utypes.h"
-#include "unicode/udata.h"
-
-U_CDECL_BEGIN
-
-
-#define U_ICUDATA_LANG U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "lang"
-#define U_ICUDATA_REGION U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "region"
-#define U_ICUDATA_CURR U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "curr"
-#define U_ICUDATA_ZONE U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "zone"
-#define U_ICUDATA_UNIT U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "unit"
-
-U_CDECL_END
-
-#endif
diff --git a/contrib/libs/icu/common/usc_impl.cpp b/contrib/libs/icu/common/usc_impl.cpp
deleted file mode 100644
index 111029b9749..00000000000
--- a/contrib/libs/icu/common/usc_impl.cpp
+++ /dev/null
@@ -1,361 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*
-* File USC_IMPL.C
-*
-* Modification History:
-*
-* Date Name Description
-* 07/08/2002 Eric Mader Creation.
-******************************************************************************
-*/
-
-#include "unicode/uscript.h"
-#include "usc_impl.h"
-#include "cmemory.h"
-
-#define PAREN_STACK_DEPTH 32
-
-#define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
-#define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH)
-#define INC(sp,count) (MOD((sp) + (count)))
-#define INC1(sp) (INC(sp, 1))
-#define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
-#define DEC1(sp) (DEC(sp, 1))
-#define STACK_IS_EMPTY(scriptRun) ((scriptRun)->pushCount <= 0)
-#define STACK_IS_NOT_EMPTY(scriptRun) (! STACK_IS_EMPTY(scriptRun))
-#define TOP(scriptRun) ((scriptRun)->parenStack[(scriptRun)->parenSP])
-#define SYNC_FIXUP(scriptRun) ((scriptRun)->fixupCount = 0)
-
-struct ParenStackEntry
-{
- int32_t pairIndex;
- UScriptCode scriptCode;
-};
-
-struct UScriptRun
-{
- int32_t textLength;
- const UChar *textArray;
-
- int32_t scriptStart;
- int32_t scriptLimit;
- UScriptCode scriptCode;
-
- struct ParenStackEntry parenStack[PAREN_STACK_DEPTH];
- int32_t parenSP;
- int32_t pushCount;
- int32_t fixupCount;
-};
-
-static int8_t highBit(int32_t value);
-
-static const UChar32 pairedChars[] = {
- 0x0028, 0x0029, /* ascii paired punctuation */
- 0x003c, 0x003e,
- 0x005b, 0x005d,
- 0x007b, 0x007d,
- 0x00ab, 0x00bb, /* guillemets */
- 0x2018, 0x2019, /* general punctuation */
- 0x201c, 0x201d,
- 0x2039, 0x203a,
- 0x3008, 0x3009, /* chinese paired punctuation */
- 0x300a, 0x300b,
- 0x300c, 0x300d,
- 0x300e, 0x300f,
- 0x3010, 0x3011,
- 0x3014, 0x3015,
- 0x3016, 0x3017,
- 0x3018, 0x3019,
- 0x301a, 0x301b
-};
-
-static void push(UScriptRun *scriptRun, int32_t pairIndex, UScriptCode scriptCode)
-{
- scriptRun->pushCount = LIMIT_INC(scriptRun->pushCount);
- scriptRun->fixupCount = LIMIT_INC(scriptRun->fixupCount);
-
- scriptRun->parenSP = INC1(scriptRun->parenSP);
- scriptRun->parenStack[scriptRun->parenSP].pairIndex = pairIndex;
- scriptRun->parenStack[scriptRun->parenSP].scriptCode = scriptCode;
-}
-
-static void pop(UScriptRun *scriptRun)
-{
- if (STACK_IS_EMPTY(scriptRun)) {
- return;
- }
-
- if (scriptRun->fixupCount > 0) {
- scriptRun->fixupCount -= 1;
- }
-
- scriptRun->pushCount -= 1;
- scriptRun->parenSP = DEC1(scriptRun->parenSP);
-
- /* If the stack is now empty, reset the stack
- pointers to their initial values.
- */
- if (STACK_IS_EMPTY(scriptRun)) {
- scriptRun->parenSP = -1;
- }
-}
-
-static void fixup(UScriptRun *scriptRun, UScriptCode scriptCode)
-{
- int32_t fixupSP = DEC(scriptRun->parenSP, scriptRun->fixupCount);
-
- while (scriptRun->fixupCount-- > 0) {
- fixupSP = INC1(fixupSP);
- scriptRun->parenStack[fixupSP].scriptCode = scriptCode;
- }
-}
-
-static int8_t
-highBit(int32_t value)
-{
- int8_t bit = 0;
-
- if (value <= 0) {
- return -32;
- }
-
- if (value >= 1 << 16) {
- value >>= 16;
- bit += 16;
- }
-
- if (value >= 1 << 8) {
- value >>= 8;
- bit += 8;
- }
-
- if (value >= 1 << 4) {
- value >>= 4;
- bit += 4;
- }
-
- if (value >= 1 << 2) {
- value >>= 2;
- bit += 2;
- }
-
- if (value >= 1 << 1) {
- //value >>= 1;
- bit += 1;
- }
-
- return bit;
-}
-
-static int32_t
-getPairIndex(UChar32 ch)
-{
- int32_t pairedCharCount = UPRV_LENGTHOF(pairedChars);
- int32_t pairedCharPower = 1 << highBit(pairedCharCount);
- int32_t pairedCharExtra = pairedCharCount - pairedCharPower;
-
- int32_t probe = pairedCharPower;
- int32_t pairIndex = 0;
-
- if (ch >= pairedChars[pairedCharExtra]) {
- pairIndex = pairedCharExtra;
- }
-
- while (probe > (1 << 0)) {
- probe >>= 1;
-
- if (ch >= pairedChars[pairIndex + probe]) {
- pairIndex += probe;
- }
- }
-
- if (pairedChars[pairIndex] != ch) {
- pairIndex = -1;
- }
-
- return pairIndex;
-}
-
-static UBool
-sameScript(UScriptCode scriptOne, UScriptCode scriptTwo)
-{
- return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo;
-}
-
-U_CAPI UScriptRun * U_EXPORT2
-uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode)
-{
- UScriptRun *result = NULL;
-
- if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) {
- return NULL;
- }
-
- result = (UScriptRun *)uprv_malloc(sizeof (UScriptRun));
-
- if (result == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- uscript_setRunText(result, src, length, pErrorCode);
-
- /* Release the UScriptRun if uscript_setRunText() returns an error */
- if (U_FAILURE(*pErrorCode)) {
- uprv_free(result);
- result = NULL;
- }
-
- return result;
-}
-
-U_CAPI void U_EXPORT2
-uscript_closeRun(UScriptRun *scriptRun)
-{
- if (scriptRun != NULL) {
- uprv_free(scriptRun);
- }
-}
-
-U_CAPI void U_EXPORT2
-uscript_resetRun(UScriptRun *scriptRun)
-{
- if (scriptRun != NULL) {
- scriptRun->scriptStart = 0;
- scriptRun->scriptLimit = 0;
- scriptRun->scriptCode = USCRIPT_INVALID_CODE;
- scriptRun->parenSP = -1;
- scriptRun->pushCount = 0;
- scriptRun->fixupCount = 0;
- }
-}
-
-U_CAPI void U_EXPORT2
-uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode)
-{
- if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- if (scriptRun == NULL || length < 0 || ((src == NULL) != (length == 0))) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- scriptRun->textArray = src;
- scriptRun->textLength = length;
-
- uscript_resetRun(scriptRun);
-}
-
-U_CAPI UBool U_EXPORT2
-uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript)
-{
- UErrorCode error = U_ZERO_ERROR;
-
- /* if we've fallen off the end of the text, we're done */
- if (scriptRun == NULL || scriptRun->scriptLimit >= scriptRun->textLength) {
- return FALSE;
- }
-
- SYNC_FIXUP(scriptRun);
- scriptRun->scriptCode = USCRIPT_COMMON;
-
- for (scriptRun->scriptStart = scriptRun->scriptLimit; scriptRun->scriptLimit < scriptRun->textLength; scriptRun->scriptLimit += 1) {
- UChar high = scriptRun->textArray[scriptRun->scriptLimit];
- UChar32 ch = high;
- UScriptCode sc;
- int32_t pairIndex;
-
- /*
- * if the character is a high surrogate and it's not the last one
- * in the text, see if it's followed by a low surrogate
- */
- if (high >= 0xD800 && high <= 0xDBFF && scriptRun->scriptLimit < scriptRun->textLength - 1) {
- UChar low = scriptRun->textArray[scriptRun->scriptLimit + 1];
-
- /*
- * if it is followed by a low surrogate,
- * consume it and form the full character
- */
- if (low >= 0xDC00 && low <= 0xDFFF) {
- ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000;
- scriptRun->scriptLimit += 1;
- }
- }
-
- sc = uscript_getScript(ch, &error);
- pairIndex = getPairIndex(ch);
-
- /*
- * Paired character handling:
- *
- * if it's an open character, push it onto the stack.
- * if it's a close character, find the matching open on the
- * stack, and use that script code. Any non-matching open
- * characters above it on the stack will be poped.
- */
- if (pairIndex >= 0) {
- if ((pairIndex & 1) == 0) {
- push(scriptRun, pairIndex, scriptRun->scriptCode);
- } else {
- int32_t pi = pairIndex & ~1;
-
- while (STACK_IS_NOT_EMPTY(scriptRun) && TOP(scriptRun).pairIndex != pi) {
- pop(scriptRun);
- }
-
- if (STACK_IS_NOT_EMPTY(scriptRun)) {
- sc = TOP(scriptRun).scriptCode;
- }
- }
- }
-
- if (sameScript(scriptRun->scriptCode, sc)) {
- if (scriptRun->scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
- scriptRun->scriptCode = sc;
-
- fixup(scriptRun, scriptRun->scriptCode);
- }
-
- /*
- * if this character is a close paired character,
- * pop the matching open character from the stack
- */
- if (pairIndex >= 0 && (pairIndex & 1) != 0) {
- pop(scriptRun);
- }
- } else {
- /*
- * if the run broke on a surrogate pair,
- * end it before the high surrogate
- */
- if (ch >= 0x10000) {
- scriptRun->scriptLimit -= 1;
- }
-
- break;
- }
- }
-
-
- if (pRunStart != NULL) {
- *pRunStart = scriptRun->scriptStart;
- }
-
- if (pRunLimit != NULL) {
- *pRunLimit = scriptRun->scriptLimit;
- }
-
- if (pRunScript != NULL) {
- *pRunScript = scriptRun->scriptCode;
- }
-
- return TRUE;
-}
diff --git a/contrib/libs/icu/common/usc_impl.h b/contrib/libs/icu/common/usc_impl.h
deleted file mode 100644
index 44899649d4c..00000000000
--- a/contrib/libs/icu/common/usc_impl.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*
-* File USC_IMPL.H
-*
-* Modification History:
-*
-* Date Name Description
-* 07/08/2002 Eric Mader Creation.
-******************************************************************************
-*/
-
-#ifndef USC_IMPL_H
-#define USC_IMPL_H
-#include "unicode/utypes.h"
-#include "unicode/uscript.h"
-
-/**
- * <code>UScriptRun</code> is used to find runs of characters in
- * the same script. It implements a simple iterator over an array
- * of characters. The iterator will resolve script-neutral characters
- * like punctuation into the script of the surrounding characters.
- *
- * The iterator will try to match paired punctuation. If it sees an
- * opening punctuation character, it will remember the script that
- * was assigned to that character, and assign the same script to the
- * matching closing punctuation.
- *
- * Scripts are chosen based on the <code>UScriptCode</code> enumeration.
- * No attempt is made to combine related scripts into a single run. In
- * particular, Hiragana, Katakana, and Han characters will appear in seperate
- * runs.
-
- * Here is an example of how to iterate over script runs:
- * <pre>
- * \code
- * void printScriptRuns(const UChar *text, int32_t length)
- * {
- * UErrorCode error = U_ZERO_ERROR;
- * UScriptRun *scriptRun = uscript_openRun(text, testLength, &error);
- * int32_t start = 0, limit = 0;
- * UScriptCode code = USCRIPT_INVALID_CODE;
- *
- * while (uscript_nextRun(&start, &limit, &code)) {
- * printf("Script '%s' from %d to %d.\n", uscript_getName(code), start, limit);
- * }
- *
- * uscript_closeRun(scriptRun);
- * }
- * </pre>
- */
-struct UScriptRun;
-
-typedef struct UScriptRun UScriptRun;
-
-/**
- * Create a <code>UScriptRun</code> object for iterating over the given text. This object must
- * be freed using <code>uscript_closeRun()</code>. Note that this object does not copy the source text,
- * only the pointer to it. You must make sure that the pointer remains valid until you call
- * <code>uscript_closeRun()</code> or <code>uscript_setRunText()</code>.
- *
- * @param src is the address of the array of characters over which to iterate.
- * if <code>src == NULL</code> and <code>length == 0</code>,
- * an empty <code>UScriptRun</code> object will be returned.
- *
- * @param length is the number of characters over which to iterate.
- *
- * @param pErrorCode is a pointer to a valid <code>UErrorCode</code> value. If this value
- * indicates a failure on entry, the function will immediately return.
- * On exit the value will indicate the success of the operation.
- *
- * @return the address of <code>UScriptRun</code> object which will iterate over the text,
- * or <code>NULL</code> if the operation failed.
- */
-U_CAPI UScriptRun * U_EXPORT2
-uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode);
-
-/**
- * Frees the given <code>UScriptRun</code> object and any storage associated with it.
- * On return, scriptRun no longer points to a valid <code>UScriptRun</code> object.
- *
- * @param scriptRun is the <code>UScriptRun</code> object which will be freed.
- */
-U_CAPI void U_EXPORT2
-uscript_closeRun(UScriptRun *scriptRun);
-
-/**
- * Reset the <code>UScriptRun</code> object so that it will start iterating from
- * the beginning.
- *
- * @param scriptRun is the address of the <code>UScriptRun</code> object to be reset.
- */
-U_CAPI void U_EXPORT2
-uscript_resetRun(UScriptRun *scriptRun);
-
-/**
- * Change the text over which the given <code>UScriptRun</code> object iterates.
- *
- * @param scriptRun is the <code>UScriptRun</code> object which will be changed.
- *
- * @param src is the address of the new array of characters over which to iterate.
- * If <code>src == NULL</code> and <code>length == 0</code>,
- * the <code>UScriptRun</code> object will become empty.
- *
- * @param length is the new number of characters over which to iterate
- *
- * @param pErrorCode is a pointer to a valid <code>UErrorCode</code> value. If this value
- * indicates a failure on entry, the function will immediately return.
- * On exit the value will indicate the success of the operation.
- */
-U_CAPI void U_EXPORT2
-uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode);
-
-/**
- * Advance the <code>UScriptRun</code> object to the next script run, return the start and limit
- * offsets, and the script of the run.
- *
- * @param scriptRun is the address of the <code>UScriptRun</code> object.
- *
- * @param pRunStart is a pointer to the variable to receive the starting offset of the next run.
- * This pointer can be <code>NULL</code> if the value is not needed.
- *
- * @param pRunLimit is a pointer to the variable to receive the limit offset of the next run.
- * This pointer can be <code>NULL</code> if the value is not needed.
- *
- * @param pRunScript is a pointer to the variable to receive the UScriptCode for the
- * script of the current run. This pointer can be <code>NULL</code> if the value is not needed.
- *
- * @return true if there was another script run.
- */
-U_CAPI UBool U_EXPORT2
-uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript);
-
-#endif
diff --git a/contrib/libs/icu/common/uscript.cpp b/contrib/libs/icu/common/uscript.cpp
deleted file mode 100644
index f8bd7e7fdd1..00000000000
--- a/contrib/libs/icu/common/uscript.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1997-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*
-* File USCRIPT.C
-*
-* Modification History:
-*
-* Date Name Description
-* 07/06/2001 Ram Creation.
-******************************************************************************
-*/
-
-#include "unicode/uchar.h"
-#include "unicode/uscript.h"
-#include "unicode/uloc.h"
-#include "bytesinkutil.h"
-#include "charstr.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "ulocimp.h"
-
-static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
-static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
-static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
-
-static int32_t
-setCodes(const UScriptCode *src, int32_t length,
- UScriptCode *dest, int32_t capacity, UErrorCode *err) {
- int32_t i;
- if(U_FAILURE(*err)) { return 0; }
- if(length > capacity) {
- *err = U_BUFFER_OVERFLOW_ERROR;
- return length;
- }
- for(i = 0; i < length; ++i) {
- dest[i] = src[i];
- }
- return length;
-}
-
-static int32_t
-setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
- if(U_FAILURE(*err)) { return 0; }
- if(1 > capacity) {
- *err = U_BUFFER_OVERFLOW_ERROR;
- return 1;
- }
- scripts[0] = script;
- return 1;
-}
-
-static int32_t
-getCodesFromLocale(const char *locale,
- UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
- UErrorCode internalErrorCode = U_ZERO_ERROR;
- char lang[8] = {0};
- char script[8] = {0};
- int32_t scriptLength;
- if(U_FAILURE(*err)) { return 0; }
- // Multi-script languages, equivalent to the LocaleScript data
- // that we used to load from locale resource bundles.
- /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode);
- if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
- return 0;
- }
- if(0 == uprv_strcmp(lang, "ja")) {
- return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err);
- }
- if(0 == uprv_strcmp(lang, "ko")) {
- return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err);
- }
- scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode);
- if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
- return 0;
- }
- if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) {
- return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err);
- }
- // Explicit script code.
- if(scriptLength != 0) {
- UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
- if(scriptCode != USCRIPT_INVALID_CODE) {
- if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) {
- scriptCode = USCRIPT_HAN;
- }
- return setOneCode(scriptCode, scripts, capacity, err);
- }
- }
- return 0;
-}
-
-/* TODO: this is a bad API and should be deprecated, ticket #11141 */
-U_CAPI int32_t U_EXPORT2
-uscript_getCode(const char* nameOrAbbrOrLocale,
- UScriptCode* fillIn,
- int32_t capacity,
- UErrorCode* err){
- UBool triedCode;
- UErrorCode internalErrorCode;
- int32_t length;
-
- if(U_FAILURE(*err)) {
- return 0;
- }
- if(nameOrAbbrOrLocale==NULL ||
- (fillIn == NULL ? capacity != 0 : capacity < 0)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- triedCode = FALSE;
- if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){
- /* try long and abbreviated script names first */
- UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
- if(code!=USCRIPT_INVALID_CODE) {
- return setOneCode(code, fillIn, capacity, err);
- }
- triedCode = TRUE;
- }
- internalErrorCode = U_ZERO_ERROR;
- length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err);
- if(U_FAILURE(*err) || length != 0) {
- return length;
- }
- icu::CharString likely;
- {
- icu::CharStringByteSink sink(&likely);
- ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, &internalErrorCode);
- }
- if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
- length = getCodesFromLocale(likely.data(), fillIn, capacity, err);
- if(U_FAILURE(*err) || length != 0) {
- return length;
- }
- }
- if(!triedCode) {
- /* still not found .. try long and abbreviated script names again */
- UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
- if(code!=USCRIPT_INVALID_CODE) {
- return setOneCode(code, fillIn, capacity, err);
- }
- }
- return 0;
-}
diff --git a/contrib/libs/icu/common/uscript_props.cpp b/contrib/libs/icu/common/uscript_props.cpp
deleted file mode 100644
index 25d287b57a3..00000000000
--- a/contrib/libs/icu/common/uscript_props.cpp
+++ /dev/null
@@ -1,302 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2013-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: uscript_props.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2013feb16
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/unistr.h"
-#include "unicode/uscript.h"
-#include "unicode/utf16.h"
-#include "ustr_imp.h"
-#include "cmemory.h"
-
-namespace {
-
-// Script metadata (script properties).
-// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
-
-// 0 = NOT_ENCODED, no sample character, default false script properties.
-// Bits 20.. 0: sample character
-
-// Bits 23..21: usage
-const int32_t UNKNOWN = 1 << 21;
-const int32_t EXCLUSION = 2 << 21;
-const int32_t LIMITED_USE = 3 << 21;
-// st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10
-const int32_t RECOMMENDED = 5 << 21;
-
-// Bits 31..24: Single-bit flags
-const int32_t RTL = 1 << 24;
-const int32_t LB_LETTERS = 1 << 25;
-const int32_t CASED = 1 << 26;
-
-const int32_t SCRIPT_PROPS[] = {
- // Begin copy-paste output from
- // tools/trunk/unicode/py/parsescriptmetadata.py
- 0x0040 | RECOMMENDED, // Zyyy
- 0x0308 | RECOMMENDED, // Zinh
- 0x0628 | RECOMMENDED | RTL, // Arab
- 0x0531 | RECOMMENDED | CASED, // Armn
- 0x0995 | RECOMMENDED, // Beng
- 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
- 0x13C4 | LIMITED_USE | CASED, // Cher
- 0x03E2 | EXCLUSION | CASED, // Copt
- 0x042F | RECOMMENDED | CASED, // Cyrl
- 0x10414 | EXCLUSION | CASED, // Dsrt
- 0x0905 | RECOMMENDED, // Deva
- 0x12A0 | RECOMMENDED, // Ethi
- 0x10D3 | RECOMMENDED, // Geor
- 0x10330 | EXCLUSION, // Goth
- 0x03A9 | RECOMMENDED | CASED, // Grek
- 0x0A95 | RECOMMENDED, // Gujr
- 0x0A15 | RECOMMENDED, // Guru
- 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani
- 0xAC00 | RECOMMENDED, // Hang
- 0x05D0 | RECOMMENDED | RTL, // Hebr
- 0x304B | RECOMMENDED | LB_LETTERS, // Hira
- 0x0C95 | RECOMMENDED, // Knda
- 0x30AB | RECOMMENDED | LB_LETTERS, // Kana
- 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr
- 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo
- 0x004C | RECOMMENDED | CASED, // Latn
- 0x0D15 | RECOMMENDED, // Mlym
- 0x1826 | EXCLUSION, // Mong
- 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
- 0x168F | EXCLUSION, // Ogam
- 0x10300 | EXCLUSION, // Ital
- 0x0B15 | RECOMMENDED, // Orya
- 0x16A0 | EXCLUSION, // Runr
- 0x0D85 | RECOMMENDED, // Sinh
- 0x0710 | LIMITED_USE | RTL, // Syrc
- 0x0B95 | RECOMMENDED, // Taml
- 0x0C15 | RECOMMENDED, // Telu
- 0x078C | RECOMMENDED | RTL, // Thaa
- 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai
- 0x0F40 | RECOMMENDED, // Tibt
- 0x14C0 | LIMITED_USE, // Cans
- 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii
- 0x1703 | EXCLUSION, // Tglg
- 0x1723 | EXCLUSION, // Hano
- 0x1743 | EXCLUSION, // Buhd
- 0x1763 | EXCLUSION, // Tagb
- 0x280E | UNKNOWN, // Brai
- 0x10800 | EXCLUSION | RTL, // Cprt
- 0x1900 | LIMITED_USE, // Limb
- 0x10000 | EXCLUSION, // Linb
- 0x10480 | EXCLUSION, // Osma
- 0x10450 | EXCLUSION, // Shaw
- 0x1950 | LIMITED_USE | LB_LETTERS, // Tale
- 0x10380 | EXCLUSION, // Ugar
- 0,
- 0x1A00 | EXCLUSION, // Bugi
- 0x2C00 | EXCLUSION | CASED, // Glag
- 0x10A00 | EXCLUSION | RTL, // Khar
- 0xA800 | LIMITED_USE, // Sylo
- 0x1980 | LIMITED_USE | LB_LETTERS, // Talu
- 0x2D30 | LIMITED_USE, // Tfng
- 0x103A0 | EXCLUSION, // Xpeo
- 0x1B05 | LIMITED_USE, // Bali
- 0x1BC0 | LIMITED_USE, // Batk
- 0,
- 0x11005 | EXCLUSION, // Brah
- 0xAA00 | LIMITED_USE, // Cham
- 0,
- 0,
- 0,
- 0,
- 0x13153 | EXCLUSION, // Egyp
- 0,
- 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans
- 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant
- 0x16B1C | EXCLUSION, // Hmng
- 0x10CA1 | EXCLUSION | RTL | CASED, // Hung
- 0,
- 0xA984 | LIMITED_USE, // Java
- 0xA90A | LIMITED_USE, // Kali
- 0,
- 0,
- 0x1C00 | LIMITED_USE, // Lepc
- 0x10647 | EXCLUSION, // Lina
- 0x0840 | LIMITED_USE | RTL, // Mand
- 0,
- 0x10980 | EXCLUSION | RTL, // Mero
- 0x07CA | LIMITED_USE | RTL, // Nkoo
- 0x10C00 | EXCLUSION | RTL, // Orkh
- 0x1036B | EXCLUSION, // Perm
- 0xA840 | EXCLUSION, // Phag
- 0x10900 | EXCLUSION | RTL, // Phnx
- 0x16F00 | LIMITED_USE, // Plrd
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0xA549 | LIMITED_USE, // Vaii
- 0,
- 0x12000 | EXCLUSION, // Xsux
- 0,
- 0xFDD0 | UNKNOWN, // Zzzz
- 0x102A0 | EXCLUSION, // Cari
- 0x304B | RECOMMENDED | LB_LETTERS, // Jpan
- 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
- 0x10280 | EXCLUSION, // Lyci
- 0x10920 | EXCLUSION | RTL, // Lydi
- 0x1C5A | LIMITED_USE, // Olck
- 0xA930 | EXCLUSION, // Rjng
- 0xA882 | LIMITED_USE, // Saur
- 0x1D850 | EXCLUSION, // Sgnw
- 0x1B83 | LIMITED_USE, // Sund
- 0,
- 0xABC0 | LIMITED_USE, // Mtei
- 0x10840 | EXCLUSION | RTL, // Armi
- 0x10B00 | EXCLUSION | RTL, // Avst
- 0x11103 | LIMITED_USE, // Cakm
- 0xAC00 | RECOMMENDED, // Kore
- 0x11083 | EXCLUSION, // Kthi
- 0x10AD8 | EXCLUSION | RTL, // Mani
- 0x10B60 | EXCLUSION | RTL, // Phli
- 0x10B8F | EXCLUSION | RTL, // Phlp
- 0,
- 0x10B40 | EXCLUSION | RTL, // Prti
- 0x0800 | EXCLUSION | RTL, // Samr
- 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt
- 0,
- 0,
- 0xA6A0 | LIMITED_USE, // Bamu
- 0xA4D0 | LIMITED_USE, // Lisu
- 0,
- 0x10A60 | EXCLUSION | RTL, // Sarb
- 0x16AE6 | EXCLUSION, // Bass
- 0x1BC20 | EXCLUSION, // Dupl
- 0x10500 | EXCLUSION, // Elba
- 0x11315 | EXCLUSION, // Gran
- 0,
- 0,
- 0x1E802 | EXCLUSION | RTL, // Mend
- 0x109A0 | EXCLUSION | RTL, // Merc
- 0x10A95 | EXCLUSION | RTL, // Narb
- 0x10896 | EXCLUSION | RTL, // Nbat
- 0x10873 | EXCLUSION | RTL, // Palm
- 0x112BE | EXCLUSION, // Sind
- 0x118B4 | EXCLUSION | CASED, // Wara
- 0,
- 0,
- 0x16A4F | EXCLUSION, // Mroo
- 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu
- 0x11183 | EXCLUSION, // Shrd
- 0x110D0 | EXCLUSION, // Sora
- 0x11680 | EXCLUSION, // Takr
- 0x18229 | EXCLUSION | LB_LETTERS, // Tang
- 0,
- 0x14400 | EXCLUSION, // Hluw
- 0x11208 | EXCLUSION, // Khoj
- 0x11484 | EXCLUSION, // Tirh
- 0x10537 | EXCLUSION, // Aghb
- 0x11152 | EXCLUSION, // Mahj
- 0x11717 | EXCLUSION | LB_LETTERS, // Ahom
- 0x108F4 | EXCLUSION | RTL, // Hatr
- 0x1160E | EXCLUSION, // Modi
- 0x1128F | EXCLUSION, // Mult
- 0x11AC0 | EXCLUSION, // Pauc
- 0x1158E | EXCLUSION, // Sidd
- 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm
- 0x11C0E | EXCLUSION, // Bhks
- 0x11C72 | EXCLUSION, // Marc
- 0x11412 | LIMITED_USE, // Newa
- 0x104B5 | LIMITED_USE | CASED, // Osge
- 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb
- 0x1112 | RECOMMENDED, // Jamo
- 0,
- 0x11D10 | EXCLUSION, // Gonm
- 0x11A5C | EXCLUSION, // Soyo
- 0x11A0B | EXCLUSION, // Zanb
- 0x1180B | EXCLUSION, // Dogr
- 0x11D71 | LIMITED_USE, // Gong
- 0x11EE5 | EXCLUSION, // Maka
- 0x16E40 | EXCLUSION | CASED, // Medf
- 0x10D12 | LIMITED_USE | RTL, // Rohg
- 0x10F42 | EXCLUSION | RTL, // Sogd
- 0x10F19 | EXCLUSION | RTL, // Sogo
- 0x10FF1 | EXCLUSION | RTL, // Elym
- 0x1E108 | LIMITED_USE, // Hmnp
- 0x119CE | EXCLUSION, // Nand
- 0x1E2E1 | LIMITED_USE, // Wcho
- 0x10FBF | EXCLUSION | RTL, // Chrs
- 0x1190C | EXCLUSION, // Diak
- 0x18C65 | EXCLUSION | LB_LETTERS, // Kits
- 0x10E88 | EXCLUSION | RTL, // Yezi
- // End copy-paste from parsescriptmetadata.py
-};
-
-int32_t getScriptProps(UScriptCode script) {
- if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) {
- return SCRIPT_PROPS[script];
- } else {
- return 0;
- }
-}
-
-} // namespace
-
-U_CAPI int32_t U_EXPORT2
-uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) { return 0; }
- if(capacity < 0 || (capacity > 0 && dest == NULL)) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- int32_t sampleChar = getScriptProps(script) & 0x1fffff;
- int32_t length;
- if(sampleChar == 0) {
- length = 0;
- } else {
- length = U16_LENGTH(sampleChar);
- if(length <= capacity) {
- int32_t i = 0;
- U16_APPEND_UNSAFE(dest, i, sampleChar);
- }
- }
- return u_terminateUChars(dest, capacity, length, pErrorCode);
-}
-
-U_COMMON_API icu::UnicodeString U_EXPORT2
-uscript_getSampleUnicodeString(UScriptCode script) {
- icu::UnicodeString sample;
- int32_t sampleChar = getScriptProps(script) & 0x1fffff;
- if(sampleChar != 0) {
- sample.append(sampleChar);
- }
- return sample;
-}
-
-U_CAPI UScriptUsage U_EXPORT2
-uscript_getUsage(UScriptCode script) {
- return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
-}
-
-U_CAPI UBool U_EXPORT2
-uscript_isRightToLeft(UScriptCode script) {
- return (getScriptProps(script) & RTL) != 0;
-}
-
-U_CAPI UBool U_EXPORT2
-uscript_breaksBetweenLetters(UScriptCode script) {
- return (getScriptProps(script) & LB_LETTERS) != 0;
-}
-
-U_CAPI UBool U_EXPORT2
-uscript_isCased(UScriptCode script) {
- return (getScriptProps(script) & CASED) != 0;
-}
diff --git a/contrib/libs/icu/common/uset.cpp b/contrib/libs/icu/common/uset.cpp
deleted file mode 100644
index eae7981d52f..00000000000
--- a/contrib/libs/icu/common/uset.cpp
+++ /dev/null
@@ -1,641 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uset.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002mar07
-* created by: Markus W. Scherer
-*
-* There are functions to efficiently serialize a USet into an array of uint16_t
-* and functions to use such a serialized form efficiently without
-* instantiating a new USet.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "unicode/uset.h"
-#include "unicode/uniset.h"
-#include "cmemory.h"
-#include "unicode/ustring.h"
-#include "unicode/parsepos.h"
-
-U_NAMESPACE_USE
-
-U_CAPI USet* U_EXPORT2
-uset_openEmpty() {
- return (USet*) new UnicodeSet();
-}
-
-U_CAPI USet* U_EXPORT2
-uset_open(UChar32 start, UChar32 end) {
- return (USet*) new UnicodeSet(start, end);
-}
-
-U_CAPI void U_EXPORT2
-uset_close(USet* set) {
- delete (UnicodeSet*) set;
-}
-
-U_CAPI USet * U_EXPORT2
-uset_clone(const USet *set) {
- return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());
-}
-
-U_CAPI UBool U_EXPORT2
-uset_isFrozen(const USet *set) {
- return ((UnicodeSet*) set)->UnicodeSet::isFrozen();
-}
-
-U_CAPI void U_EXPORT2
-uset_freeze(USet *set) {
- ((UnicodeSet*) set)->UnicodeSet::freeze();
-}
-
-U_CAPI USet * U_EXPORT2
-uset_cloneAsThawed(const USet *set) {
- return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());
-}
-
-U_CAPI void U_EXPORT2
-uset_set(USet* set,
- UChar32 start, UChar32 end) {
- ((UnicodeSet*) set)->UnicodeSet::set(start, end);
-}
-
-U_CAPI void U_EXPORT2
-uset_addAll(USet* set, const USet *additionalSet) {
- ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet));
-}
-
-U_CAPI void U_EXPORT2
-uset_add(USet* set, UChar32 c) {
- ((UnicodeSet*) set)->UnicodeSet::add(c);
-}
-
-U_CAPI void U_EXPORT2
-uset_addRange(USet* set, UChar32 start, UChar32 end) {
- ((UnicodeSet*) set)->UnicodeSet::add(start, end);
-}
-
-U_CAPI void U_EXPORT2
-uset_addString(USet* set, const UChar* str, int32_t strLen) {
- // UnicodeString handles -1 for strLen
- UnicodeString s(strLen<0, str, strLen);
- ((UnicodeSet*) set)->UnicodeSet::add(s);
-}
-
-U_CAPI void U_EXPORT2
-uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen) {
- // UnicodeString handles -1 for strLen
- UnicodeString s(str, strLen);
- ((UnicodeSet*) set)->UnicodeSet::addAll(s);
-}
-
-U_CAPI void U_EXPORT2
-uset_remove(USet* set, UChar32 c) {
- ((UnicodeSet*) set)->UnicodeSet::remove(c);
-}
-
-U_CAPI void U_EXPORT2
-uset_removeRange(USet* set, UChar32 start, UChar32 end) {
- ((UnicodeSet*) set)->UnicodeSet::remove(start, end);
-}
-
-U_CAPI void U_EXPORT2
-uset_removeString(USet* set, const UChar* str, int32_t strLen) {
- UnicodeString s(strLen==-1, str, strLen);
- ((UnicodeSet*) set)->UnicodeSet::remove(s);
-}
-
-U_CAPI void U_EXPORT2
-uset_removeAll(USet* set, const USet* remove) {
- ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
-}
-
-U_CAPI void U_EXPORT2
-uset_retain(USet* set, UChar32 start, UChar32 end) {
- ((UnicodeSet*) set)->UnicodeSet::retain(start, end);
-}
-
-U_CAPI void U_EXPORT2
-uset_retainAll(USet* set, const USet* retain) {
- ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
-}
-
-U_CAPI void U_EXPORT2
-uset_compact(USet* set) {
- ((UnicodeSet*) set)->UnicodeSet::compact();
-}
-
-U_CAPI void U_EXPORT2
-uset_complement(USet* set) {
- ((UnicodeSet*) set)->UnicodeSet::complement();
-}
-
-U_CAPI void U_EXPORT2
-uset_complementAll(USet* set, const USet* complement) {
- ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
-}
-
-U_CAPI void U_EXPORT2
-uset_clear(USet* set) {
- ((UnicodeSet*) set)->UnicodeSet::clear();
-}
-
-U_CAPI void U_EXPORT2
-uset_removeAllStrings(USet* set) {
- ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
-}
-
-U_CAPI UBool U_EXPORT2
-uset_isEmpty(const USet* set) {
- return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
-}
-
-U_CAPI UBool U_EXPORT2
-uset_contains(const USet* set, UChar32 c) {
- return ((const UnicodeSet*) set)->UnicodeSet::contains(c);
-}
-
-U_CAPI UBool U_EXPORT2
-uset_containsRange(const USet* set, UChar32 start, UChar32 end) {
- return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end);
-}
-
-U_CAPI UBool U_EXPORT2
-uset_containsString(const USet* set, const UChar* str, int32_t strLen) {
- UnicodeString s(strLen==-1, str, strLen);
- return ((const UnicodeSet*) set)->UnicodeSet::contains(s);
-}
-
-U_CAPI UBool U_EXPORT2
-uset_containsAll(const USet* set1, const USet* set2) {
- return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2);
-}
-
-U_CAPI UBool U_EXPORT2
-uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen) {
- // Create a string alias, since nothing is being added to the set.
- UnicodeString s(strLen==-1, str, strLen);
- return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s);
-}
-
-U_CAPI UBool U_EXPORT2
-uset_containsNone(const USet* set1, const USet* set2) {
- return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2);
-}
-
-U_CAPI UBool U_EXPORT2
-uset_containsSome(const USet* set1, const USet* set2) {
- return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
-}
-
-U_CAPI int32_t U_EXPORT2
-uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
- return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);
-}
-
-U_CAPI int32_t U_EXPORT2
-uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
- return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);
-}
-
-U_CAPI int32_t U_EXPORT2
-uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
- return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);
-}
-
-U_CAPI int32_t U_EXPORT2
-uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
- return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);
-}
-
-U_CAPI UBool U_EXPORT2
-uset_equals(const USet* set1, const USet* set2) {
- return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2;
-}
-
-U_CAPI int32_t U_EXPORT2
-uset_indexOf(const USet* set, UChar32 c) {
- return ((UnicodeSet*) set)->UnicodeSet::indexOf(c);
-}
-
-U_CAPI UChar32 U_EXPORT2
-uset_charAt(const USet* set, int32_t index) {
- return ((UnicodeSet*) set)->UnicodeSet::charAt(index);
-}
-
-U_CAPI int32_t U_EXPORT2
-uset_size(const USet* set) {
- return ((const UnicodeSet*) set)->UnicodeSet::size();
-}
-
-U_NAMESPACE_BEGIN
-/**
- * This class only exists to provide access to the UnicodeSet private
- * USet support API. Declaring a class a friend is more portable than
- * trying to declare extern "C" functions as friends.
- */
-class USetAccess /* not : public UObject because all methods are static */ {
-public:
- /* Try to have the compiler inline these*/
- inline static int32_t getStringCount(const UnicodeSet& set) {
- return set.stringsSize();
- }
- inline static const UnicodeString* getString(const UnicodeSet& set,
- int32_t i) {
- return set.getString(i);
- }
-private:
- /* do not instantiate*/
- USetAccess();
-};
-U_NAMESPACE_END
-
-U_CAPI int32_t U_EXPORT2
-uset_getItemCount(const USet* uset) {
- const UnicodeSet& set = *(const UnicodeSet*)uset;
- return set.getRangeCount() + USetAccess::getStringCount(set);
-}
-
-U_CAPI int32_t U_EXPORT2
-uset_getItem(const USet* uset, int32_t itemIndex,
- UChar32* start, UChar32* end,
- UChar* str, int32_t strCapacity,
- UErrorCode* ec) {
- if (U_FAILURE(*ec)) return 0;
- const UnicodeSet& set = *(const UnicodeSet*)uset;
- int32_t rangeCount;
-
- if (itemIndex < 0) {
- *ec = U_ILLEGAL_ARGUMENT_ERROR;
- return -1;
- } else if (itemIndex < (rangeCount = set.getRangeCount())) {
- *start = set.getRangeStart(itemIndex);
- *end = set.getRangeEnd(itemIndex);
- return 0;
- } else {
- itemIndex -= rangeCount;
- if (itemIndex < USetAccess::getStringCount(set)) {
- const UnicodeString* s = USetAccess::getString(set, itemIndex);
- return s->extract(str, strCapacity, *ec);
- } else {
- *ec = U_INDEX_OUTOFBOUNDS_ERROR;
- return -1;
- }
- }
-}
-
-//U_CAPI int32_t U_EXPORT2
-//uset_getRangeCount(const USet* set) {
-// return ((const UnicodeSet*) set)->getRangeCount();
-//}
-//
-//U_CAPI UBool U_EXPORT2
-//uset_getRange(const USet* set, int32_t rangeIndex,
-// UChar32* pStart, UChar32* pEnd) {
-// if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) {
-// return FALSE;
-// }
-// const UnicodeSet* us = (const UnicodeSet*) set;
-// *pStart = us->getRangeStart(rangeIndex);
-// *pEnd = us->getRangeEnd(rangeIndex);
-// return TRUE;
-//}
-
-/*
- * Serialize a USet into 16-bit units.
- * Store BMP code points as themselves with one 16-bit unit each.
- *
- * Important: the code points in the array are in ascending order,
- * therefore all BMP code points precede all supplementary code points.
- *
- * Store each supplementary code point in 2 16-bit units,
- * simply with higher-then-lower 16-bit halfs.
- *
- * Precede the entire list with the length.
- * If there are supplementary code points, then set bit 15 in the length
- * and add the bmpLength between it and the array.
- *
- * In other words:
- * - all BMP: (length=bmpLength) BMP, .., BMP
- * - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, ..
- */
-U_CAPI int32_t U_EXPORT2
-uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* ec) {
- if (ec==NULL || U_FAILURE(*ec)) {
- return 0;
- }
-
- return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec);
-}
-
-U_CAPI UBool U_EXPORT2
-uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) {
- int32_t length;
-
- if(fillSet==NULL) {
- return FALSE;
- }
- if(src==NULL || srcLength<=0) {
- fillSet->length=fillSet->bmpLength=0;
- return FALSE;
- }
-
- length=*src++;
- if(length&0x8000) {
- /* there are supplementary values */
- length&=0x7fff;
- if(srcLength<(2+length)) {
- fillSet->length=fillSet->bmpLength=0;
- return FALSE;
- }
- fillSet->bmpLength=*src++;
- } else {
- /* only BMP values */
- if(srcLength<(1+length)) {
- fillSet->length=fillSet->bmpLength=0;
- return FALSE;
- }
- fillSet->bmpLength=length;
- }
- fillSet->array=src;
- fillSet->length=length;
- return TRUE;
-}
-
-U_CAPI void U_EXPORT2
-uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) {
- if(fillSet==NULL || (uint32_t)c>0x10ffff) {
- return;
- }
-
- fillSet->array=fillSet->staticArray;
- if(c<0xffff) {
- fillSet->bmpLength=fillSet->length=2;
- fillSet->staticArray[0]=(uint16_t)c;
- fillSet->staticArray[1]=(uint16_t)c+1;
- } else if(c==0xffff) {
- fillSet->bmpLength=1;
- fillSet->length=3;
- fillSet->staticArray[0]=0xffff;
- fillSet->staticArray[1]=1;
- fillSet->staticArray[2]=0;
- } else if(c<0x10ffff) {
- fillSet->bmpLength=0;
- fillSet->length=4;
- fillSet->staticArray[0]=(uint16_t)(c>>16);
- fillSet->staticArray[1]=(uint16_t)c;
- ++c;
- fillSet->staticArray[2]=(uint16_t)(c>>16);
- fillSet->staticArray[3]=(uint16_t)c;
- } else /* c==0x10ffff */ {
- fillSet->bmpLength=0;
- fillSet->length=2;
- fillSet->staticArray[0]=0x10;
- fillSet->staticArray[1]=0xffff;
- }
-}
-
-U_CAPI UBool U_EXPORT2
-uset_serializedContains(const USerializedSet* set, UChar32 c) {
- const uint16_t* array;
-
- if(set==NULL || (uint32_t)c>0x10ffff) {
- return FALSE;
- }
-
- array=set->array;
- if(c<=0xffff) {
- /* find c in the BMP part */
- int32_t lo = 0;
- int32_t hi = set->bmpLength-1;
- if (c < array[0]) {
- hi = 0;
- } else if (c < array[hi]) {
- for(;;) {
- int32_t i = (lo + hi) >> 1;
- if (i == lo) {
- break; // Done!
- } else if (c < array[i]) {
- hi = i;
- } else {
- lo = i;
- }
- }
- } else {
- hi += 1;
- }
- return (UBool)(hi&1);
- } else {
- /* find c in the supplementary part */
- uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
- int32_t base = set->bmpLength;
- int32_t lo = 0;
- int32_t hi = set->length - 2 - base;
- if (high < array[base] || (high==array[base] && low<array[base+1])) {
- hi = 0;
- } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) {
- for (;;) {
- int32_t i = ((lo + hi) >> 1) & ~1; // Guarantee even result
- int32_t iabs = i + base;
- if (i == lo) {
- break; // Done!
- } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) {
- hi = i;
- } else {
- lo = i;
- }
- }
- } else {
- hi += 2;
- }
- /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
- return (UBool)(((hi+(base<<1))&2)!=0);
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-uset_getSerializedRangeCount(const USerializedSet* set) {
- if(set==NULL) {
- return 0;
- }
-
- return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2;
-}
-
-U_CAPI UBool U_EXPORT2
-uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
- UChar32* pStart, UChar32* pEnd) {
- const uint16_t* array;
- int32_t bmpLength, length;
-
- if(set==NULL || rangeIndex<0 || pStart==NULL || pEnd==NULL) {
- return FALSE;
- }
-
- array=set->array;
- length=set->length;
- bmpLength=set->bmpLength;
-
- rangeIndex*=2; /* address start/limit pairs */
- if(rangeIndex<bmpLength) {
- *pStart=array[rangeIndex++];
- if(rangeIndex<bmpLength) {
- *pEnd=array[rangeIndex]-1;
- } else if(rangeIndex<length) {
- *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
- } else {
- *pEnd=0x10ffff;
- }
- return TRUE;
- } else {
- rangeIndex-=bmpLength;
- rangeIndex*=2; /* address pairs of pairs of units */
- length-=bmpLength;
- if(rangeIndex<length) {
- array+=bmpLength;
- *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
- rangeIndex+=2;
- if(rangeIndex<length) {
- *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
- } else {
- *pEnd=0x10ffff;
- }
- return TRUE;
- } else {
- return FALSE;
- }
- }
-}
-
-// TODO The old, internal uset.c had an efficient uset_containsOne function.
-// Returned the one and only code point, or else -1 or something.
-// Consider adding such a function to both C and C++ UnicodeSet/uset.
-// See tools/gennorm/store.c for usage, now usetContainsOne there.
-
-// TODO Investigate incorporating this code into UnicodeSet to improve
-// efficiency.
-// ---
-// #define USET_GROW_DELTA 20
-//
-// static int32_t
-// findChar(const UChar32* array, int32_t length, UChar32 c) {
-// int32_t i;
-//
-// /* check the last range limit first for more efficient appending */
-// if(length>0) {
-// if(c>=array[length-1]) {
-// return length;
-// }
-//
-// /* do not check the last range limit again in the loop below */
-// --length;
-// }
-//
-// for(i=0; i<length && c>=array[i]; ++i) {}
-// return i;
-// }
-//
-// static UBool
-// addRemove(USet* set, UChar32 c, int32_t doRemove) {
-// int32_t i, length, more;
-//
-// if(set==NULL || (uint32_t)c>0x10ffff) {
-// return FALSE;
-// }
-//
-// length=set->length;
-// i=findChar(set->array, length, c);
-// if((i&1)^doRemove) {
-// /* c is already in the set */
-// return TRUE;
-// }
-//
-// /* how many more array items do we need? */
-// if(i<length && (c+1)==set->array[i]) {
-// /* c is just before the following range, extend that in-place by one */
-// set->array[i]=c;
-// if(i>0) {
-// --i;
-// if(c==set->array[i]) {
-// /* the previous range collapsed, remove it */
-// set->length=length-=2;
-// if(i<length) {
-// uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
-// }
-// }
-// }
-// return TRUE;
-// } else if(i>0 && c==set->array[i-1]) {
-// /* c is just after the previous range, extend that in-place by one */
-// if(++c<=0x10ffff) {
-// set->array[i-1]=c;
-// if(i<length && c==set->array[i]) {
-// /* the following range collapsed, remove it */
-// --i;
-// set->length=length-=2;
-// if(i<length) {
-// uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
-// }
-// }
-// } else {
-// /* extend the previous range (had limit 0x10ffff) to the end of Unicode */
-// set->length=i-1;
-// }
-// return TRUE;
-// } else if(i==length && c==0x10ffff) {
-// /* insert one range limit c */
-// more=1;
-// } else {
-// /* insert two range limits c, c+1 */
-// more=2;
-// }
-//
-// /* insert <more> range limits */
-// if(length+more>set->capacity) {
-// /* reallocate */
-// int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA;
-// UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4);
-// if(newArray==NULL) {
-// return FALSE;
-// }
-// set->capacity=newCapacity;
-// uprv_memcpy(newArray, set->array, length*4);
-//
-// if(set->array!=set->staticBuffer) {
-// uprv_free(set->array);
-// }
-// set->array=newArray;
-// }
-//
-// if(i<length) {
-// uprv_memmove(set->array+i+more, set->array+i, (length-i)*4);
-// }
-// set->array[i]=c;
-// if(more==2) {
-// set->array[i+1]=c+1;
-// }
-// set->length+=more;
-//
-// return TRUE;
-// }
-//
-// U_CAPI UBool U_EXPORT2
-// uset_add(USet* set, UChar32 c) {
-// return addRemove(set, c, 0);
-// }
-//
-// U_CAPI void U_EXPORT2
-// uset_remove(USet* set, UChar32 c) {
-// addRemove(set, c, 1);
-// }
diff --git a/contrib/libs/icu/common/uset_imp.h b/contrib/libs/icu/common/uset_imp.h
deleted file mode 100644
index 7233b9303c3..00000000000
--- a/contrib/libs/icu/common/uset_imp.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2004-2007, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uset_imp.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004sep07
-* created by: Markus W. Scherer
-*
-* Internal USet definitions.
-*/
-
-#ifndef __USET_IMP_H__
-#define __USET_IMP_H__
-
-#include "unicode/utypes.h"
-#include "unicode/uset.h"
-
-U_CDECL_BEGIN
-
-typedef void U_CALLCONV
-USetAdd(USet *set, UChar32 c);
-
-typedef void U_CALLCONV
-USetAddRange(USet *set, UChar32 start, UChar32 end);
-
-typedef void U_CALLCONV
-USetAddString(USet *set, const UChar *str, int32_t length);
-
-typedef void U_CALLCONV
-USetRemove(USet *set, UChar32 c);
-
-typedef void U_CALLCONV
-USetRemoveRange(USet *set, UChar32 start, UChar32 end);
-
-/**
- * Interface for adding items to a USet, to keep low-level code from
- * statically depending on the USet implementation.
- * Calls will look like sa->add(sa->set, c);
- */
-struct USetAdder {
- USet *set;
- USetAdd *add;
- USetAddRange *addRange;
- USetAddString *addString;
- USetRemove *remove;
- USetRemoveRange *removeRange;
-};
-typedef struct USetAdder USetAdder;
-
-U_CDECL_END
-
-#endif
-
diff --git a/contrib/libs/icu/common/uset_props.cpp b/contrib/libs/icu/common/uset_props.cpp
deleted file mode 100644
index f08e760b10d..00000000000
--- a/contrib/libs/icu/common/uset_props.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: uset_props.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004aug30
-* created by: Markus W. Scherer
-*
-* C wrappers around UnicodeSet functions that are implemented in
-* uniset_props.cpp, split off for modularization.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "unicode/uset.h"
-#include "unicode/uniset.h"
-#include "cmemory.h"
-#include "unicode/ustring.h"
-#include "unicode/parsepos.h"
-
-U_NAMESPACE_USE
-
-U_CAPI USet* U_EXPORT2
-uset_openPattern(const UChar* pattern, int32_t patternLength,
- UErrorCode* ec)
-{
- UnicodeString pat(patternLength==-1, pattern, patternLength);
- UnicodeSet* set = new UnicodeSet(pat, *ec);
- /* test for NULL */
- if(set == 0) {
- *ec = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
- if (U_FAILURE(*ec)) {
- delete set;
- set = NULL;
- }
- return (USet*) set;
-}
-
-U_CAPI USet* U_EXPORT2
-uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
- uint32_t options,
- UErrorCode* ec)
-{
- UnicodeString pat(patternLength==-1, pattern, patternLength);
- UnicodeSet* set = new UnicodeSet(pat, options, NULL, *ec);
- /* test for NULL */
- if(set == 0) {
- *ec = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
- if (U_FAILURE(*ec)) {
- delete set;
- set = NULL;
- }
- return (USet*) set;
-}
-
-
-U_CAPI int32_t U_EXPORT2
-uset_applyPattern(USet *set,
- const UChar *pattern, int32_t patternLength,
- uint32_t options,
- UErrorCode *status){
-
- // status code needs to be checked since we
- // dereference it
- if(status == NULL || U_FAILURE(*status)){
- return 0;
- }
-
- // check only the set paramenter
- // if pattern is NULL or null terminate
- // UnicodeString constructor takes care of it
- if(set == NULL){
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- UnicodeString pat(pattern, patternLength);
-
- ParsePosition pos;
-
- ((UnicodeSet*) set)->applyPattern(pat, pos, options, NULL, *status);
-
- return pos.getIndex();
-}
-
-U_CAPI void U_EXPORT2
-uset_applyIntPropertyValue(USet* set,
- UProperty prop, int32_t value, UErrorCode* ec) {
- ((UnicodeSet*) set)->applyIntPropertyValue(prop, value, *ec);
-}
-
-U_CAPI void U_EXPORT2
-uset_applyPropertyAlias(USet* set,
- const UChar *prop, int32_t propLength,
- const UChar *value, int32_t valueLength,
- UErrorCode* ec) {
-
- UnicodeString p(prop, propLength);
- UnicodeString v(value, valueLength);
-
- ((UnicodeSet*) set)->applyPropertyAlias(p, v, *ec);
-}
-
-U_CAPI UBool U_EXPORT2
-uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
- int32_t pos) {
-
- UnicodeString pat(pattern, patternLength);
-
- return ((pos+1) < pat.length() &&
- pat.charAt(pos) == (UChar)91/*[*/) ||
- UnicodeSet::resemblesPattern(pat, pos);
-}
-
-U_CAPI int32_t U_EXPORT2
-uset_toPattern(const USet* set,
- UChar* result, int32_t resultCapacity,
- UBool escapeUnprintable,
- UErrorCode* ec) {
- UnicodeString pat;
- ((const UnicodeSet*) set)->toPattern(pat, escapeUnprintable);
- return pat.extract(result, resultCapacity, *ec);
-}
-
-U_CAPI void U_EXPORT2
-uset_closeOver(USet* set, int32_t attributes) {
- ((UnicodeSet*) set)->UnicodeSet::closeOver(attributes);
-}
diff --git a/contrib/libs/icu/common/usetiter.cpp b/contrib/libs/icu/common/usetiter.cpp
deleted file mode 100644
index 79151690494..00000000000
--- a/contrib/libs/icu/common/usetiter.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2002-2006, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-#include "unicode/usetiter.h"
-#include "unicode/uniset.h"
-#include "unicode/unistr.h"
-#include "uvector.h"
-
-U_NAMESPACE_BEGIN
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSetIterator)
-
-/**
- * Create an iterator
- * @param set set to iterate over
- */
-UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) {
- cpString = NULL;
- reset(uSet);
-}
-
-/**
- * Create an iterator. Convenience for when the contents are to be set later.
- */
-UnicodeSetIterator::UnicodeSetIterator() {
- this->set = NULL;
- cpString = NULL;
- reset();
-}
-
-UnicodeSetIterator::~UnicodeSetIterator() {
- delete cpString;
-}
-
-/**
- * Returns the next element in the set.
- * @return true if there was another element in the set.
- * if so, if codepoint == IS_STRING, the value is a string in the string field
- * else the value is a single code point in the codepoint field.
- * <br>You are guaranteed that the codepoints are in sorted order, and the strings are in sorted order,
- * and that all code points are returned before any strings are returned.
- * <br>Note also that the codepointEnd is undefined after calling this method.
- */
-UBool UnicodeSetIterator::next() {
- if (nextElement <= endElement) {
- codepoint = codepointEnd = nextElement++;
- string = NULL;
- return TRUE;
- }
- if (range < endRange) {
- loadRange(++range);
- codepoint = codepointEnd = nextElement++;
- string = NULL;
- return TRUE;
- }
-
- if (nextString >= stringCount) return FALSE;
- codepoint = (UChar32)IS_STRING; // signal that value is actually a string
- string = (const UnicodeString*) set->strings->elementAt(nextString++);
- return TRUE;
-}
-
-/**
- * @return true if there was another element in the set.
- * if so, if codepoint == IS_STRING, the value is a string in the string field
- * else the value is a range of codepoints in the <codepoint, codepointEnd> fields.
- * <br>Note that the codepoints are in sorted order, and the strings are in sorted order,
- * and that all code points are returned before any strings are returned.
- * <br>You are guaranteed that the ranges are in sorted order, and the strings are in sorted order,
- * and that all ranges are returned before any strings are returned.
- * <br>You are also guaranteed that ranges are disjoint and non-contiguous.
- * <br>Note also that the codepointEnd is undefined after calling this method.
- */
-UBool UnicodeSetIterator::nextRange() {
- string = NULL;
- if (nextElement <= endElement) {
- codepointEnd = endElement;
- codepoint = nextElement;
- nextElement = endElement+1;
- return TRUE;
- }
- if (range < endRange) {
- loadRange(++range);
- codepointEnd = endElement;
- codepoint = nextElement;
- nextElement = endElement+1;
- return TRUE;
- }
-
- if (nextString >= stringCount) return FALSE;
- codepoint = (UChar32)IS_STRING; // signal that value is actually a string
- string = (const UnicodeString*) set->strings->elementAt(nextString++);
- return TRUE;
-}
-
-/**
- *@param set the set to iterate over. This allows reuse of the iterator.
- */
-void UnicodeSetIterator::reset(const UnicodeSet& uSet) {
- this->set = &uSet;
- reset();
-}
-
-/**
- * Resets to the start, to allow the iteration to start over again.
- */
-void UnicodeSetIterator::reset() {
- if (set == NULL) {
- // Set up indices to empty iteration
- endRange = -1;
- stringCount = 0;
- } else {
- endRange = set->getRangeCount() - 1;
- stringCount = set->stringsSize();
- }
- range = 0;
- endElement = -1;
- nextElement = 0;
- if (endRange >= 0) {
- loadRange(range);
- }
- nextString = 0;
- string = NULL;
-}
-
-void UnicodeSetIterator::loadRange(int32_t iRange) {
- nextElement = set->getRangeStart(iRange);
- endElement = set->getRangeEnd(iRange);
-}
-
-
-const UnicodeString& UnicodeSetIterator::getString() {
- if (string==NULL && codepoint!=(UChar32)IS_STRING) {
- if (cpString == NULL) {
- cpString = new UnicodeString();
- }
- if (cpString != NULL) {
- cpString->setTo((UChar32)codepoint);
- }
- string = cpString;
- }
- return *string;
-}
-
-U_NAMESPACE_END
-
-//eof
diff --git a/contrib/libs/icu/common/ushape.cpp b/contrib/libs/icu/common/ushape.cpp
deleted file mode 100644
index ae13b5c1183..00000000000
--- a/contrib/libs/icu/common/ushape.cpp
+++ /dev/null
@@ -1,1728 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- ******************************************************************************
- *
- * Copyright (C) 2000-2016, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- ******************************************************************************
- * file name: ushape.cpp
- * encoding: UTF-8
- * tab size: 8 (not used)
- * indentation:4
- *
- * created on: 2000jun29
- * created by: Markus W. Scherer
- *
- * Arabic letter shaping implemented by Ayman Roshdy
- */
-
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "unicode/ustring.h"
-#include "unicode/ushape.h"
-#include "cmemory.h"
-#include "putilimp.h"
-#include "ustr_imp.h"
-#include "ubidi_props.h"
-#include "uassert.h"
-
-/*
- * This implementation is designed for 16-bit Unicode strings.
- * The main assumption is that the Arabic characters and their
- * presentation forms each fit into a single UChar.
- * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII
- * characters.
- */
-
-/*
- * ### TODO in general for letter shaping:
- * - the letter shaping code is UTF-16-unaware; needs update
- * + especially invertBuffer()?!
- * - needs to handle the "Arabic Tail" that is used in some legacy codepages
- * as a glyph fragment of wide-glyph letters
- * + IBM Unicode conversion tables map it to U+200B (ZWSP)
- * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms
- * + Unicode 3.2 added U+FE73 ARABIC TAIL FRAGMENT
- */
-
-/* definitions for Arabic letter shaping ------------------------------------ */
-
-#define IRRELEVANT 4
-#define LAMTYPE 16
-#define ALEFTYPE 32
-#define LINKR 1
-#define LINKL 2
-#define APRESENT 8
-#define SHADDA 64
-#define CSHADDA 128
-#define COMBINE (SHADDA+CSHADDA)
-
-#define HAMZAFE_CHAR 0xfe80
-#define HAMZA06_CHAR 0x0621
-#define YEH_HAMZA_CHAR 0x0626
-#define YEH_HAMZAFE_CHAR 0xFE89
-#define LAMALEF_SPACE_SUB 0xFFFF
-#define TASHKEEL_SPACE_SUB 0xFFFE
-#define NEW_TAIL_CHAR 0xFE73
-#define OLD_TAIL_CHAR 0x200B
-#define LAM_CHAR 0x0644
-#define SPACE_CHAR 0x0020
-#define SHADDA_CHAR 0xFE7C
-#define TATWEEL_CHAR 0x0640
-#define SHADDA_TATWEEL_CHAR 0xFE7D
-#define SHADDA06_CHAR 0x0651
-
-#define SHAPE_MODE 0
-#define DESHAPE_MODE 1
-
-struct uShapeVariables {
- UChar tailChar;
- uint32_t uShapeLamalefBegin;
- uint32_t uShapeLamalefEnd;
- uint32_t uShapeTashkeelBegin;
- uint32_t uShapeTashkeelEnd;
- int spacesRelativeToTextBeginEnd;
-};
-
-static const uint8_t tailFamilyIsolatedFinal[] = {
- /* FEB1 */ 1,
- /* FEB2 */ 1,
- /* FEB3 */ 0,
- /* FEB4 */ 0,
- /* FEB5 */ 1,
- /* FEB6 */ 1,
- /* FEB7 */ 0,
- /* FEB8 */ 0,
- /* FEB9 */ 1,
- /* FEBA */ 1,
- /* FEBB */ 0,
- /* FEBC */ 0,
- /* FEBD */ 1,
- /* FEBE */ 1
-};
-
-static const uint8_t tashkeelMedial[] = {
- /* FE70 */ 0,
- /* FE71 */ 1,
- /* FE72 */ 0,
- /* FE73 */ 0,
- /* FE74 */ 0,
- /* FE75 */ 0,
- /* FE76 */ 0,
- /* FE77 */ 1,
- /* FE78 */ 0,
- /* FE79 */ 1,
- /* FE7A */ 0,
- /* FE7B */ 1,
- /* FE7C */ 0,
- /* FE7D */ 1,
- /* FE7E */ 0,
- /* FE7F */ 1
-};
-
-static const UChar yehHamzaToYeh[] =
-{
-/* isolated*/ 0xFEEF,
-/* final */ 0xFEF0
-};
-
-static const uint8_t IrrelevantPos[] = {
- 0x0, 0x2, 0x4, 0x6,
- 0x8, 0xA, 0xC, 0xE
-};
-
-
-static const UChar convertLamAlef[] =
-{
-/*FEF5*/ 0x0622,
-/*FEF6*/ 0x0622,
-/*FEF7*/ 0x0623,
-/*FEF8*/ 0x0623,
-/*FEF9*/ 0x0625,
-/*FEFA*/ 0x0625,
-/*FEFB*/ 0x0627,
-/*FEFC*/ 0x0627
-};
-
-static const UChar araLink[178]=
-{
- 1 + 32 + 256 * 0x11,/*0x0622*/
- 1 + 32 + 256 * 0x13,/*0x0623*/
- 1 + 256 * 0x15,/*0x0624*/
- 1 + 32 + 256 * 0x17,/*0x0625*/
- 1 + 2 + 256 * 0x19,/*0x0626*/
- 1 + 32 + 256 * 0x1D,/*0x0627*/
- 1 + 2 + 256 * 0x1F,/*0x0628*/
- 1 + 256 * 0x23,/*0x0629*/
- 1 + 2 + 256 * 0x25,/*0x062A*/
- 1 + 2 + 256 * 0x29,/*0x062B*/
- 1 + 2 + 256 * 0x2D,/*0x062C*/
- 1 + 2 + 256 * 0x31,/*0x062D*/
- 1 + 2 + 256 * 0x35,/*0x062E*/
- 1 + 256 * 0x39,/*0x062F*/
- 1 + 256 * 0x3B,/*0x0630*/
- 1 + 256 * 0x3D,/*0x0631*/
- 1 + 256 * 0x3F,/*0x0632*/
- 1 + 2 + 256 * 0x41,/*0x0633*/
- 1 + 2 + 256 * 0x45,/*0x0634*/
- 1 + 2 + 256 * 0x49,/*0x0635*/
- 1 + 2 + 256 * 0x4D,/*0x0636*/
- 1 + 2 + 256 * 0x51,/*0x0637*/
- 1 + 2 + 256 * 0x55,/*0x0638*/
- 1 + 2 + 256 * 0x59,/*0x0639*/
- 1 + 2 + 256 * 0x5D,/*0x063A*/
- 0, 0, 0, 0, 0, /*0x063B-0x063F*/
- 1 + 2, /*0x0640*/
- 1 + 2 + 256 * 0x61,/*0x0641*/
- 1 + 2 + 256 * 0x65,/*0x0642*/
- 1 + 2 + 256 * 0x69,/*0x0643*/
- 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/
- 1 + 2 + 256 * 0x71,/*0x0645*/
- 1 + 2 + 256 * 0x75,/*0x0646*/
- 1 + 2 + 256 * 0x79,/*0x0647*/
- 1 + 256 * 0x7D,/*0x0648*/
- 1 + 256 * 0x7F,/*0x0649*/
- 1 + 2 + 256 * 0x81,/*0x064A*/
- 4 + 256 * 1, /*0x064B*/
- 4 + 128 + 256 * 1, /*0x064C*/
- 4 + 128 + 256 * 1, /*0x064D*/
- 4 + 128 + 256 * 1, /*0x064E*/
- 4 + 128 + 256 * 1, /*0x064F*/
- 4 + 128 + 256 * 1, /*0x0650*/
- 4 + 64 + 256 * 3, /*0x0651*/
- 4 + 256 * 1, /*0x0652*/
- 4 + 256 * 7, /*0x0653*/
- 4 + 256 * 8, /*0x0654*/
- 4 + 256 * 8, /*0x0655*/
- 4 + 256 * 1, /*0x0656*/
- 0, 0, 0, 0, 0, /*0x0657-0x065B*/
- 1 + 256 * 0x85,/*0x065C*/
- 1 + 256 * 0x87,/*0x065D*/
- 1 + 256 * 0x89,/*0x065E*/
- 1 + 256 * 0x8B,/*0x065F*/
- 0, 0, 0, 0, 0, /*0x0660-0x0664*/
- 0, 0, 0, 0, 0, /*0x0665-0x0669*/
- 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/
- 4 + 256 * 6, /*0x0670*/
- 1 + 8 + 256 * 0x00,/*0x0671*/
- 1 + 32, /*0x0672*/
- 1 + 32, /*0x0673*/
- 0, /*0x0674*/
- 1 + 32, /*0x0675*/
- 1, 1, /*0x0676-0x0677*/
- 1 + 2, /*0x0678*/
- 1 + 2 + 8 + 256 * 0x16,/*0x0679*/
- 1 + 2 + 8 + 256 * 0x0E,/*0x067A*/
- 1 + 2 + 8 + 256 * 0x02,/*0x067B*/
- 1+2, 1+2, /*0x67C-0x067D*/
- 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/
- 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/
- 1 + 8 + 256 * 0x38,/*0x0688*/
- 1, 1, 1, /*0x0689-0x068B*/
- 1 + 8 + 256 * 0x34,/*0x068C*/
- 1 + 8 + 256 * 0x32,/*0x068D*/
- 1 + 8 + 256 * 0x36,/*0x068E*/
- 1, 1, /*0x068F-0x0690*/
- 1 + 8 + 256 * 0x3C,/*0x0691*/
- 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/
- 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/
- 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/
- 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/
- 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/
- 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/
- 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/
- 1+2, 1+2, /*0x06B8-0x06B9*/
- 1 + 8 + 256 * 0x4E,/*0x06BA*/
- 1 + 2 + 8 + 256 * 0x50,/*0x06BB*/
- 1+2, 1+2, /*0x06BC-0x06BD*/
- 1 + 2 + 8 + 256 * 0x5A,/*0x06BE*/
- 1+2, /*0x06BF*/
- 1 + 8 + 256 * 0x54,/*0x06C0*/
- 1 + 2 + 8 + 256 * 0x56,/*0x06C1*/
- 1, 1, 1, /*0x06C2-0x06C4*/
- 1 + 8 + 256 * 0x90,/*0x06C5*/
- 1 + 8 + 256 * 0x89,/*0x06C6*/
- 1 + 8 + 256 * 0x87,/*0x06C7*/
- 1 + 8 + 256 * 0x8B,/*0x06C8*/
- 1 + 8 + 256 * 0x92,/*0x06C9*/
- 1, /*0x06CA*/
- 1 + 8 + 256 * 0x8E,/*0x06CB*/
- 1 + 2 + 8 + 256 * 0xAC,/*0x06CC*/
- 1, /*0x06CD*/
- 1+2, 1+2, /*0x06CE-0x06CF*/
- 1 + 2 + 8 + 256 * 0x94,/*0x06D0*/
- 1+2, /*0x06D1*/
- 1 + 8 + 256 * 0x5E,/*0x06D2*/
- 1 + 8 + 256 * 0x60 /*0x06D3*/
-};
-
-static const uint8_t presALink[] = {
-/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/
-/*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0,
-/*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0,
-/*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
-/*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2,
-/*FC0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FC1*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FC2*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FC3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FC4*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FC5*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4,
-/*FC6*/ 4, 4, 4
-};
-
-static const uint8_t presBLink[]=
-{
-/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/
-/*FE7*/1 + 2,1 + 2,1 + 2, 0,1 + 2, 0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,
-/*FE8*/ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2,1 + 2, 0, 1, 0,
-/*FE9*/ 1, 2,1 + 2, 0, 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
-/*FEA*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0,
-/*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
-/*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
-/*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
-/*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0,
-/*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0
-};
-
-static const UChar convertFBto06[] =
-{
-/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
-/*FB5*/ 0x671, 0x671, 0x67B, 0x67B, 0x67B, 0x67B, 0x67E, 0x67E, 0x67E, 0x67E, 0, 0, 0, 0, 0x67A, 0x67A,
-/*FB6*/ 0x67A, 0x67A, 0, 0, 0, 0, 0x679, 0x679, 0x679, 0x679, 0, 0, 0, 0, 0, 0,
-/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x686, 0x686, 0x686, 0x686, 0, 0,
-/*FB8*/ 0, 0, 0x68D, 0x68D, 0x68C, 0x68C, 0x68E, 0x68E, 0x688, 0x688, 0x698, 0x698, 0x691, 0x691, 0x6A9, 0x6A9,
-/*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0, 0, 0, 0, 0, 0x6BA, 0x6BA,
-/*FBA*/ 0x6BB, 0x6BB, 0x6BB, 0x6BB, 0x6C0, 0x6C0, 0x6C1, 0x6C1, 0x6C1, 0x6C1, 0x6BE, 0x6BE, 0x6BE, 0x6BE, 0x6d2, 0x6D2,
-/*FBB*/ 0x6D3, 0x6D3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0x6C7, 0x6C7, 0x6C6, 0x6C6, 0x6C8, 0x6C8, 0, 0x6CB, 0x6CB,
-/*FBE*/ 0x6C5, 0x6C5, 0x6C9, 0x6C9, 0x6D0, 0x6D0, 0x6D0, 0x6D0, 0, 0, 0, 0, 0, 0, 0, 0,
-/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC
-};
-
-static const UChar convertFEto06[] =
-{
-/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
-/*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
-/*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
-/*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
-/*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
-/*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
-/*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
-/*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
-/*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
-/*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
-};
-
-static const uint8_t shapeTable[4][4][4]=
-{
- { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
- { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
- { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
- { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
-};
-
-/*
- * This function shapes European digits to Arabic-Indic digits
- * in-place, writing over the input characters.
- * Since we know that we are only looking for BMP code points,
- * we can safely just work with code units (again, at least UTF-16).
- */
-static void
-_shapeToArabicDigitsWithContext(UChar *s, int32_t length,
- UChar digitBase,
- UBool isLogical, UBool lastStrongWasAL) {
- int32_t i;
- UChar c;
-
- digitBase-=0x30;
-
- /* the iteration direction depends on the type of input */
- if(isLogical) {
- for(i=0; i<length; ++i) {
- c=s[i];
- switch(ubidi_getClass(c)) {
- case U_LEFT_TO_RIGHT: /* L */
- case U_RIGHT_TO_LEFT: /* R */
- lastStrongWasAL=FALSE;
- break;
- case U_RIGHT_TO_LEFT_ARABIC: /* AL */
- lastStrongWasAL=TRUE;
- break;
- case U_EUROPEAN_NUMBER: /* EN */
- if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
- s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
- }
- break;
- default :
- break;
- }
- }
- } else {
- for(i=length; i>0; /* pre-decrement in the body */) {
- c=s[--i];
- switch(ubidi_getClass(c)) {
- case U_LEFT_TO_RIGHT: /* L */
- case U_RIGHT_TO_LEFT: /* R */
- lastStrongWasAL=FALSE;
- break;
- case U_RIGHT_TO_LEFT_ARABIC: /* AL */
- lastStrongWasAL=TRUE;
- break;
- case U_EUROPEAN_NUMBER: /* EN */
- if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
- s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
- }
- break;
- default :
- break;
- }
- }
- }
-}
-
-/*
- *Name : invertBuffer
- *Function : This function inverts the buffer, it's used
- * in case the user specifies the buffer to be
- * U_SHAPE_TEXT_DIRECTION_LOGICAL
- */
-static void
-invertBuffer(UChar *buffer, int32_t size, uint32_t /*options*/, int32_t lowlimit, int32_t highlimit) {
- UChar temp;
- int32_t i=0,j=0;
- for(i=lowlimit,j=size-highlimit-1;i<j;i++,j--) {
- temp = buffer[i];
- buffer[i] = buffer[j];
- buffer[j] = temp;
- }
-}
-
-/*
- *Name : changeLamAlef
- *Function : Converts the Alef characters into an equivalent
- * LamAlef location in the 0x06xx Range, this is an
- * intermediate stage in the operation of the program
- * later it'll be converted into the 0xFExx LamAlefs
- * in the shaping function.
- */
-static inline UChar
-changeLamAlef(UChar ch) {
- switch(ch) {
- case 0x0622 :
- return 0x065C;
- case 0x0623 :
- return 0x065D;
- case 0x0625 :
- return 0x065E;
- case 0x0627 :
- return 0x065F;
- }
- return 0;
-}
-
-/*
- *Name : getLink
- *Function : Resolves the link between the characters as
- * Arabic characters have four forms :
- * Isolated, Initial, Middle and Final Form
- */
-static UChar
-getLink(UChar ch) {
- if(ch >= 0x0622 && ch <= 0x06D3) {
- return(araLink[ch-0x0622]);
- } else if(ch == 0x200D) {
- return(3);
- } else if(ch >= 0x206D && ch <= 0x206F) {
- return(4);
- }else if(ch >= 0xFB50 && ch <= 0xFC62) {
- return(presALink[ch-0xFB50]);
- } else if(ch >= 0xFE70 && ch <= 0xFEFC) {
- return(presBLink[ch-0xFE70]);
- }else {
- return(0);
- }
-}
-
-/*
- *Name : countSpaces
- *Function : Counts the number of spaces
- * at each end of the logical buffer
- */
-static void
-countSpaces(UChar *dest, int32_t size, uint32_t /*options*/, int32_t *spacesCountl, int32_t *spacesCountr) {
- int32_t i = 0;
- int32_t countl = 0,countr = 0;
- while((dest[i] == SPACE_CHAR) && (countl < size)) {
- countl++;
- i++;
- }
- if (countl < size) { /* the entire buffer is not all space */
- while(dest[size-1] == SPACE_CHAR) {
- countr++;
- size--;
- }
- }
- *spacesCountl = countl;
- *spacesCountr = countr;
-}
-
-/*
- *Name : isTashkeelChar
- *Function : Returns 1 for Tashkeel characters in 06 range else return 0
- */
-static inline int32_t
-isTashkeelChar(UChar ch) {
- return (int32_t)( ch>=0x064B && ch<= 0x0652 );
-}
-
-/*
- *Name : isTashkeelCharFE
- *Function : Returns 1 for Tashkeel characters in FE range else return 0
- */
-static inline int32_t
-isTashkeelCharFE(UChar ch) {
- return (int32_t)( ch>=0xFE70 && ch<= 0xFE7F );
-}
-
-/*
- *Name : isAlefChar
- *Function : Returns 1 for Alef characters else return 0
- */
-static inline int32_t
-isAlefChar(UChar ch) {
- return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) );
-}
-
-/*
- *Name : isLamAlefChar
- *Function : Returns 1 for LamAlef characters else return 0
- */
-static inline int32_t
-isLamAlefChar(UChar ch) {
- return (int32_t)((ch>=0xFEF5)&&(ch<=0xFEFC) );
-}
-
-/*BIDI
- *Name : isTailChar
- *Function : returns 1 if the character matches one of the tail characters (0xfe73 or 0x200b) otherwise returns 0
- */
-
-static inline int32_t
-isTailChar(UChar ch) {
- if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){
- return 1;
- }else{
- return 0;
- }
-}
-
-/*BIDI
- *Name : isSeenTailFamilyChar
- *Function : returns 1 if the character is a seen family isolated character
- * in the FE range otherwise returns 0
- */
-
-static inline int32_t
-isSeenTailFamilyChar(UChar ch) {
- if(ch >= 0xfeb1 && ch < 0xfebf){
- return tailFamilyIsolatedFinal [ch - 0xFEB1];
- }else{
- return 0;
- }
-}
-
- /* Name : isSeenFamilyChar
- * Function : returns 1 if the character is a seen family character in the Unicode
- * 06 range otherwise returns 0
- */
-
-static inline int32_t
-isSeenFamilyChar(UChar ch){
- if(ch >= 0x633 && ch <= 0x636){
- return 1;
- }else {
- return 0;
- }
-}
-
-/*Start of BIDI*/
-/*
- *Name : isAlefMaksouraChar
- *Function : returns 1 if the character is a Alef Maksoura Final or isolated
- * otherwise returns 0
- */
-static inline int32_t
-isAlefMaksouraChar(UChar ch) {
- return (int32_t)( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649));
-}
-
-/*
- * Name : isYehHamzaChar
- * Function : returns 1 if the character is a yehHamza isolated or yehhamza
- * final is found otherwise returns 0
- */
-static inline int32_t
-isYehHamzaChar(UChar ch) {
- if((ch==0xFE89)||(ch==0xFE8A)){
- return 1;
- }else{
- return 0;
- }
-}
-
- /*
- * Name: isTashkeelOnTatweelChar
- * Function: Checks if the Tashkeel Character is on Tatweel or not,if the
- * Tashkeel on tatweel (FE range), it returns 1 else if the
- * Tashkeel with shadda on tatweel (FC range)return 2 otherwise
- * returns 0
- */
-static inline int32_t
-isTashkeelOnTatweelChar(UChar ch){
- if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR)
- {
- return tashkeelMedial [ch - 0xFE70];
- }else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) {
- return 2;
- }else{
- return 0;
- }
-}
-
-/*
- * Name: isIsolatedTashkeelChar
- * Function: Checks if the Tashkeel Character is in the isolated form
- * (i.e. Unicode FE range) returns 1 else if the Tashkeel
- * with shadda is in the isolated form (i.e. Unicode FC range)
- * returns 2 otherwise returns 0
- */
-static inline int32_t
-isIsolatedTashkeelChar(UChar ch){
- if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){
- return (1 - tashkeelMedial [ch - 0xFE70]);
- }else if(ch >= 0xfc5e && ch <= 0xfc63){
- return 1;
- }else{
- return 0;
- }
-}
-
-
-
-
-/*
- *Name : calculateSize
- *Function : This function calculates the destSize to be used in preflighting
- * when the destSize is equal to 0
- * It is used also to calculate the new destsize in case the
- * destination buffer will be resized.
- */
-
-static int32_t
-calculateSize(const UChar *source, int32_t sourceLength,
-int32_t destSize,uint32_t options) {
- int32_t i = 0;
-
- int lamAlefOption = 0;
- int tashkeelOption = 0;
-
- destSize = sourceLength;
-
- if (((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE ||
- ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED )) &&
- ((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE )){
- lamAlefOption = 1;
- }
- if((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE &&
- ((options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ) ){
- tashkeelOption = 1;
- }
-
- if(lamAlefOption || tashkeelOption){
- if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) {
- for(i=0;i<sourceLength;i++) {
- if( ((isAlefChar(source[i]))&& (i<(sourceLength-1)) &&(source[i+1] == LAM_CHAR)) || (isTashkeelCharFE(source[i])) ) {
- destSize--;
- }
- }
- }else if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL) {
- for(i=0;i<sourceLength;i++) {
- if( ( (source[i] == LAM_CHAR) && (i<(sourceLength-1)) && (isAlefChar(source[i+1]))) || (isTashkeelCharFE(source[i])) ) {
- destSize--;
- }
- }
- }
- }
-
- if ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE){
- if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){
- for(i=0;i<sourceLength;i++) {
- if(isLamAlefChar(source[i]))
- destSize++;
- }
- }
- }
-
- return destSize;
-}
-
-/*
- *Name : handleTashkeelWithTatweel
- *Function : Replaces Tashkeel as following:
- * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel.
- * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace
- * it with Shadda on Tatweel.
- * Case 3: if the Tashkeel is isolated replace it with Space.
- *
- */
-static int32_t
-handleTashkeelWithTatweel(UChar *dest, int32_t sourceLength,
- int32_t /*destSize*/, uint32_t /*options*/,
- UErrorCode * /*pErrorCode*/) {
- int i;
- for(i = 0; i < sourceLength; i++){
- if((isTashkeelOnTatweelChar(dest[i]) == 1)){
- dest[i] = TATWEEL_CHAR;
- }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){
- dest[i] = SHADDA_TATWEEL_CHAR;
- }else if(isIsolatedTashkeelChar(dest[i]) && dest[i] != SHADDA_CHAR){
- dest[i] = SPACE_CHAR;
- }
- }
- return sourceLength;
-}
-
-
-
-/*
- *Name : handleGeneratedSpaces
- *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space,
- * and Tashkeel to space.
- * handleGeneratedSpaces function puts these generated spaces
- * according to the options the user specifies. LamAlef and Tashkeel
- * spaces can be replaced at begin, at end, at near or decrease the
- * buffer size.
- *
- * There is also Auto option for LamAlef and tashkeel, which will put
- * the spaces at end of the buffer (or end of text if the user used
- * the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END).
- *
- * If the text type was visual_LTR and the option
- * U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END
- * option will place the space at the beginning of the buffer and
- * BEGIN will place the space at the end of the buffer.
- */
-
-static int32_t
-handleGeneratedSpaces(UChar *dest, int32_t sourceLength,
- int32_t destSize,
- uint32_t options,
- UErrorCode *pErrorCode,struct uShapeVariables shapeVars ) {
-
- int32_t i = 0, j = 0;
- int32_t count = 0;
- UChar *tempbuffer=NULL;
-
- int lamAlefOption = 0;
- int tashkeelOption = 0;
- int shapingMode = SHAPE_MODE;
-
- if (shapingMode == 0){
- if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE ){
- lamAlefOption = 1;
- }
- if ( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ){
- tashkeelOption = 1;
- }
- }
-
- tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
- /* Test for NULL */
- if(tempbuffer == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
-
- if (lamAlefOption || tashkeelOption){
- uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
-
- i = j = 0; count = 0;
- while(i < sourceLength) {
- if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
- (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
- j--;
- count++;
- } else {
- tempbuffer[j] = dest[i];
- }
- i++;
- j++;
- }
-
- while(count >= 0) {
- tempbuffer[i] = 0x0000;
- i--;
- count--;
- }
-
- u_memcpy(dest, tempbuffer, sourceLength);
- destSize = u_strlen(dest);
- }
-
- lamAlefOption = 0;
-
- if (shapingMode == 0){
- if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR ){
- lamAlefOption = 1;
- }
- }
-
- if (lamAlefOption){
- /* Lam+Alef is already shaped into LamAlef + FFFF */
- i = 0;
- while(i < sourceLength) {
- if(lamAlefOption&&dest[i] == LAMALEF_SPACE_SUB){
- dest[i] = SPACE_CHAR;
- }
- i++;
- }
- destSize = sourceLength;
- }
- lamAlefOption = 0;
- tashkeelOption = 0;
-
- if (shapingMode == 0) {
- if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin) ||
- (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO )
- && (shapeVars.spacesRelativeToTextBeginEnd==1)) ) {
- lamAlefOption = 1;
- }
- if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelBegin ) {
- tashkeelOption = 1;
- }
- }
-
- if(lamAlefOption || tashkeelOption){
- uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
-
- i = j = sourceLength; count = 0;
-
- while(i >= 0) {
- if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
- (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
- j++;
- count++;
- }else {
- tempbuffer[j] = dest[i];
- }
- i--;
- j--;
- }
-
- for(i=0 ;i < count; i++){
- tempbuffer[i] = SPACE_CHAR;
- }
-
- u_memcpy(dest, tempbuffer, sourceLength);
- destSize = sourceLength;
- }
-
-
-
- lamAlefOption = 0;
- tashkeelOption = 0;
-
- if (shapingMode == 0) {
- if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd) ||
- (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO )
- && (shapeVars.spacesRelativeToTextBeginEnd==0)) ) {
- lamAlefOption = 1;
- }
- if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelEnd ){
- tashkeelOption = 1;
- }
- }
-
- if(lamAlefOption || tashkeelOption){
- uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
-
- i = j = 0; count = 0;
- while(i < sourceLength) {
- if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
- (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
- j--;
- count++;
- }else {
- tempbuffer[j] = dest[i];
- }
- i++;
- j++;
- }
-
- while(count >= 0) {
- tempbuffer[i] = SPACE_CHAR;
- i--;
- count--;
- }
-
- u_memcpy(dest, tempbuffer, sourceLength);
- destSize = sourceLength;
- }
-
-
- if(tempbuffer){
- uprv_free(tempbuffer);
- }
-
- return destSize;
-}
-
-/*
- *Name :expandCompositCharAtBegin
- *Function :Expands the LamAlef character to Lam and Alef consuming the required
- * space from beginning of the buffer. If the text type was visual_LTR
- * and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected
- * the spaces will be located at end of buffer.
- * If there are no spaces to expand the LamAlef, an error
- * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
- */
-
-static int32_t
-expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) {
- int32_t i = 0,j = 0;
- int32_t countl = 0;
- UChar *tempbuffer=NULL;
-
- tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
-
- /* Test for NULL */
- if(tempbuffer == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
- uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
-
- i = 0;
- while(dest[i] == SPACE_CHAR) {
- countl++;
- i++;
- }
-
- i = j = sourceLength-1;
-
- while(i >= 0 && j >= 0) {
- if( countl>0 && isLamAlefChar(dest[i])) {
- tempbuffer[j] = LAM_CHAR;
- /* to ensure the array index is within the range */
- U_ASSERT(dest[i] >= 0xFEF5u
- && dest[i]-0xFEF5u < UPRV_LENGTHOF(convertLamAlef));
- tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ];
- j--;
- countl--;
- }else {
- if( countl == 0 && isLamAlefChar(dest[i]) ) {
- *pErrorCode=U_NO_SPACE_AVAILABLE;
- }
- tempbuffer[j] = dest[i];
- }
- i--;
- j--;
- }
- u_memcpy(dest, tempbuffer, sourceLength);
-
- uprv_free(tempbuffer);
-
- destSize = sourceLength;
- return destSize;
-}
-
-/*
- *Name : expandCompositCharAtEnd
- *Function : Expands the LamAlef character to Lam and Alef consuming the
- * required space from end of the buffer. If the text type was
- * Visual LTR and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END
- * was used, the spaces will be consumed from begin of buffer. If
- * there are no spaces to expand the LamAlef, an error
- * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
- */
-
-static int32_t
-expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) {
- int32_t i = 0,j = 0;
-
- int32_t countr = 0;
- int32_t inpsize = sourceLength;
-
- UChar *tempbuffer=NULL;
- tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
-
- /* Test for NULL */
- if(tempbuffer == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
- uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
-
- while(dest[inpsize-1] == SPACE_CHAR) {
- countr++;
- inpsize--;
- }
-
- i = sourceLength - countr - 1;
- j = sourceLength - 1;
-
- while(i >= 0 && j >= 0) {
- if( countr>0 && isLamAlefChar(dest[i]) ) {
- tempbuffer[j] = LAM_CHAR;
- tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ];
- j--;
- countr--;
- }else {
- if ((countr == 0) && isLamAlefChar(dest[i]) ) {
- *pErrorCode=U_NO_SPACE_AVAILABLE;
- }
- tempbuffer[j] = dest[i];
- }
- i--;
- j--;
- }
-
- if(countr > 0) {
- u_memmove(tempbuffer, tempbuffer+countr, sourceLength);
- if(u_strlen(tempbuffer) < sourceLength) {
- for(i=sourceLength-1;i>=sourceLength-countr;i--) {
- tempbuffer[i] = SPACE_CHAR;
- }
- }
- }
- u_memcpy(dest, tempbuffer, sourceLength);
-
- uprv_free(tempbuffer);
-
- destSize = sourceLength;
- return destSize;
-}
-
-/*
- *Name : expandCompositCharAtNear
- *Function : Expands the LamAlef character into Lam + Alef, YehHamza character
- * into Yeh + Hamza, SeenFamily character into SeenFamily character
- * + Tail, while consuming the space next to the character.
- * If there are no spaces next to the character, an error
- * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
- */
-
-static int32_t
-expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode,
- int yehHamzaOption, int seenTailOption, int lamAlefOption, struct uShapeVariables shapeVars) {
- int32_t i = 0;
-
-
- UChar lamalefChar, yehhamzaChar;
-
- for(i = 0 ;i<=sourceLength-1;i++) {
- if (seenTailOption && isSeenTailFamilyChar(dest[i])) {
- if ((i>0) && (dest[i-1] == SPACE_CHAR) ) {
- dest[i-1] = shapeVars.tailChar;
- }else {
- *pErrorCode=U_NO_SPACE_AVAILABLE;
- }
- }else if(yehHamzaOption && (isYehHamzaChar(dest[i])) ) {
- if ((i>0) && (dest[i-1] == SPACE_CHAR) ) {
- yehhamzaChar = dest[i];
- dest[i] = yehHamzaToYeh[yehhamzaChar - YEH_HAMZAFE_CHAR];
- dest[i-1] = HAMZAFE_CHAR;
- }else {
-
- *pErrorCode=U_NO_SPACE_AVAILABLE;
- }
- }else if(lamAlefOption && isLamAlefChar(dest[i+1])) {
- if(dest[i] == SPACE_CHAR){
- lamalefChar = dest[i+1];
- dest[i+1] = LAM_CHAR;
- dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ];
- }else {
- *pErrorCode=U_NO_SPACE_AVAILABLE;
- }
- }
- }
- destSize = sourceLength;
- return destSize;
-}
- /*
- * Name : expandCompositChar
- * Function : LamAlef, need special handling, since it expands from one
- * character into two characters while shaping or deshaping.
- * In order to expand it, near or far spaces according to the
- * options user specifies. Also buffer size can be increased.
- *
- * For SeenFamily characters and YehHamza only the near option is
- * supported, while for LamAlef we can take spaces from begin, end,
- * near or even increase the buffer size.
- * There is also the Auto option for LamAlef only, which will first
- * search for a space at end, begin then near, respectively.
- * If there are no spaces to expand these characters, an error will be set to
- * U_NO_SPACE_AVAILABLE as defined in utypes.h
- */
-
-static int32_t
-expandCompositChar(UChar *dest, int32_t sourceLength,
- int32_t destSize,uint32_t options,
- UErrorCode *pErrorCode, int shapingMode,struct uShapeVariables shapeVars) {
-
- int32_t i = 0,j = 0;
-
- UChar *tempbuffer=NULL;
- int yehHamzaOption = 0;
- int seenTailOption = 0;
- int lamAlefOption = 0;
-
- if (shapingMode == 1){
- if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO){
-
- if(shapeVars.spacesRelativeToTextBeginEnd == 0) {
- destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
-
- if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
- *pErrorCode = U_ZERO_ERROR;
- destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
- }
- }else {
- destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
-
- if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
- *pErrorCode = U_ZERO_ERROR;
- destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
- }
- }
-
- if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
- *pErrorCode = U_ZERO_ERROR;
- destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption,
- seenTailOption, 1,shapeVars);
- }
- }
- }
-
- if (shapingMode == 1){
- if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd){
- destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
- }
- }
-
- if (shapingMode == 1){
- if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin){
- destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
- }
- }
-
- if (shapingMode == 0){
- if ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR){
- yehHamzaOption = 1;
- }
- if ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR){
- seenTailOption = 1;
- }
- }
- if (shapingMode == 1) {
- if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR) {
- lamAlefOption = 1;
- }
- }
-
-
- if (yehHamzaOption || seenTailOption || lamAlefOption){
- destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption,
- seenTailOption,lamAlefOption,shapeVars);
- }
-
-
- if (shapingMode == 1){
- if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){
- destSize = calculateSize(dest,sourceLength,destSize,options);
- tempbuffer = (UChar *)uprv_malloc((destSize+1)*U_SIZEOF_UCHAR);
-
- /* Test for NULL */
- if(tempbuffer == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
- uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR);
-
- i = j = 0;
- while(i < destSize && j < destSize) {
- if(isLamAlefChar(dest[i]) ) {
- tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ];
- tempbuffer[j+1] = LAM_CHAR;
- j++;
- }else {
- tempbuffer[j] = dest[i];
- }
- i++;
- j++;
- }
-
- u_memcpy(dest, tempbuffer, destSize);
- }
- }
-
- if(tempbuffer) {
- uprv_free(tempbuffer);
- }
- return destSize;
-}
-
-/*
- *Name : shapeUnicode
- *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped
- * arabic Unicode buffer in FExx Range
- */
-static int32_t
-shapeUnicode(UChar *dest, int32_t sourceLength,
- int32_t destSize,uint32_t options,
- UErrorCode *pErrorCode,
- int tashkeelFlag, struct uShapeVariables shapeVars) {
-
- int32_t i, iend;
- int32_t step;
- int32_t lastPos,Nx, Nw;
- unsigned int Shape;
- int32_t lamalef_found = 0;
- int32_t seenfamFound = 0, yehhamzaFound =0, tashkeelFound = 0;
- UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0;
- UChar wLamalef;
-
- /*
- * Converts the input buffer from FExx Range into 06xx Range
- * to make sure that all characters are in the 06xx range
- * even the lamalef is converted to the special region in
- * the 06xx range
- */
- if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK) == U_SHAPE_PRESERVE_PRESENTATION_NOOP) {
- for (i = 0; i < sourceLength; i++) {
- UChar inputChar = dest[i];
- if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) {
- UChar c = convertFBto06 [ (inputChar - 0xFB50) ];
- if (c != 0)
- dest[i] = c;
- } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) {
- dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ;
- } else {
- dest[i] = inputChar ;
- }
- }
- }
-
-
- /* sets the index to the end of the buffer, together with the step point to -1 */
- i = sourceLength - 1;
- iend = -1;
- step = -1;
-
- /*
- * This function resolves the link between the characters .
- * Arabic characters have four forms :
- * Isolated Form, Initial Form, Middle Form and Final Form
- */
- currLink = getLink(dest[i]);
-
- lastPos = i;
- Nx = -2, Nw = 0;
-
- while (i != iend) {
- /* If high byte of currLink > 0 then more than one shape */
- if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT) != 0) {
- Nw = i + step;
- while (Nx < 0) { /* we need to know about next char */
- if(Nw == iend) {
- nextLink = 0;
- Nx = 3000;
- } else {
- nextLink = getLink(dest[Nw]);
- if((nextLink & IRRELEVANT) == 0) {
- Nx = Nw;
- } else {
- Nw = Nw + step;
- }
- }
- }
-
- if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) {
- lamalef_found = 1;
- wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */
- if ( wLamalef != 0) {
- dest[i] = LAMALEF_SPACE_SUB; /* The default case is to drop the Alef and replace */
- dest[lastPos] =wLamalef; /* it by LAMALEF_SPACE_SUB which is the last character in the */
- i=lastPos; /* unicode private use area, this is done to make */
- } /* sure that removeLamAlefSpaces() handles only the */
- lastLink = prevLink; /* spaces generated during lamalef generation. */
- currLink = getLink(wLamalef); /* LAMALEF_SPACE_SUB is added here and is replaced by spaces */
- } /* in removeLamAlefSpaces() */
-
- if ((i > 0) && (dest[i-1] == SPACE_CHAR)){
- if ( isSeenFamilyChar(dest[i])) {
- seenfamFound = 1;
- } else if (dest[i] == YEH_HAMZA_CHAR) {
- yehhamzaFound = 1;
- }
- }
- else if(i==0){
- if ( isSeenFamilyChar(dest[i])){
- seenfamFound = 1;
- } else if (dest[i] == YEH_HAMZA_CHAR) {
- yehhamzaFound = 1;
- }
- }
-
- /*
- * get the proper shape according to link ability of neighbors
- * and of character; depends on the order of the shapes
- * (isolated, initial, middle, final) in the compatibility area
- */
- Shape = shapeTable[nextLink & (LINKR + LINKL)]
- [lastLink & (LINKR + LINKL)]
- [currLink & (LINKR + LINKL)];
-
- if ((currLink & (LINKR+LINKL)) == 1) {
- Shape &= 1;
- } else if(isTashkeelChar(dest[i])) {
- if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) &&
- dest[i] != 0x064C && dest[i] != 0x064D )
- {
- Shape = 1;
- if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) {
- Shape = 0;
- }
- } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){
- Shape = 1;
- } else {
- Shape = 0;
- }
- }
- if ((dest[i] ^ 0x0600) < 0x100) {
- if ( isTashkeelChar(dest[i]) ){
- if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR){
- dest[i] = TASHKEEL_SPACE_SUB;
- tashkeelFound = 1;
- } else {
- /* to ensure the array index is within the range */
- U_ASSERT(dest[i] >= 0x064Bu
- && dest[i]-0x064Bu < UPRV_LENGTHOF(IrrelevantPos));
- dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + static_cast<UChar>(Shape);
- }
- }else if ((currLink & APRESENT) > 0) {
- dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape);
- }else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT) == 0) {
- dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape);
- }
- }
- }
-
- /* move one notch forward */
- if ((currLink & IRRELEVANT) == 0) {
- prevLink = lastLink;
- lastLink = currLink;
- lastPos = i;
- }
-
- i = i + step;
- if (i == Nx) {
- currLink = nextLink;
- Nx = -2;
- } else if(i != iend) {
- currLink = getLink(dest[i]);
- }
- }
- destSize = sourceLength;
- if ( (lamalef_found != 0 ) || (tashkeelFound != 0) ){
- destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode, shapeVars);
- }
-
- if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) {
- destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE,shapeVars);
- }
- return destSize;
-}
-
-/*
- *Name : deShapeUnicode
- *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped
- * arabic Unicode buffer in 06xx Range
- */
-static int32_t
-deShapeUnicode(UChar *dest, int32_t sourceLength,
- int32_t destSize,uint32_t options,
- UErrorCode *pErrorCode, struct uShapeVariables shapeVars) {
- int32_t i = 0;
- int32_t lamalef_found = 0;
- int32_t yehHamzaComposeEnabled = 0;
- int32_t seenComposeEnabled = 0;
-
- yehHamzaComposeEnabled = ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR) ? 1 : 0;
- seenComposeEnabled = ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR)? 1 : 0;
-
- /*
- *This for loop changes the buffer from the Unicode FE range to
- *the Unicode 06 range
- */
-
- for(i = 0; i < sourceLength; i++) {
- UChar inputChar = dest[i];
- if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */
- UChar c = convertFBto06 [ (inputChar - 0xFB50) ];
- if (c != 0)
- dest[i] = c;
- } else if( (yehHamzaComposeEnabled == 1) && ((inputChar == HAMZA06_CHAR) || (inputChar == HAMZAFE_CHAR))
- && (i < (sourceLength - 1)) && isAlefMaksouraChar(dest[i+1] )) {
- dest[i] = SPACE_CHAR;
- dest[i+1] = YEH_HAMZA_CHAR;
- } else if ( (seenComposeEnabled == 1) && (isTailChar(inputChar)) && (i< (sourceLength - 1))
- && (isSeenTailFamilyChar(dest[i+1])) ) {
- dest[i] = SPACE_CHAR;
- } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */
- dest[i] = convertFEto06 [ (inputChar - 0xFE70) ];
- } else {
- dest[i] = inputChar ;
- }
-
- if( isLamAlefChar(dest[i]) )
- lamalef_found = 1;
- }
-
- destSize = sourceLength;
- if (lamalef_found != 0){
- destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE, shapeVars);
- }
- return destSize;
-}
-
-/*
- ****************************************
- * u_shapeArabic
- ****************************************
- */
-
-U_CAPI int32_t U_EXPORT2
-u_shapeArabic(const UChar *source, int32_t sourceLength,
- UChar *dest, int32_t destCapacity,
- uint32_t options,
- UErrorCode *pErrorCode) {
-
- int32_t destLength;
- struct uShapeVariables shapeVars = { OLD_TAIL_CHAR,U_SHAPE_LAMALEF_BEGIN,U_SHAPE_LAMALEF_END,U_SHAPE_TASHKEEL_BEGIN,U_SHAPE_TASHKEEL_END,0};
-
- /* usual error checking */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
- if( source==NULL || sourceLength<-1 || (dest==NULL && destCapacity!=0) || destCapacity<0 ||
- (((options&U_SHAPE_TASHKEEL_MASK) > 0) &&
- ((options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) ) ||
- (((options&U_SHAPE_TASHKEEL_MASK) > 0) &&
- ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE)) ||
- (options&U_SHAPE_DIGIT_TYPE_RESERVED)==U_SHAPE_DIGIT_TYPE_RESERVED ||
- (options&U_SHAPE_DIGITS_MASK)==U_SHAPE_DIGITS_RESERVED ||
- ((options&U_SHAPE_LAMALEF_MASK) != U_SHAPE_LAMALEF_RESIZE &&
- (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) != 0) ||
- ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) == U_SHAPE_AGGREGATE_TASHKEEL &&
- (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)
- )
- {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- /* Validate lamalef options */
- if(((options&U_SHAPE_LAMALEF_MASK) > 0)&&
- !(((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_BEGIN) ||
- ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_END ) ||
- ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE )||
- ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_AUTO) ||
- ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_NEAR)))
- {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- /* Validate Tashkeel options */
- if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&&
- !(((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_BEGIN) ||
- ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_END )
- ||((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE )||
- ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)))
- {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- /* determine the source length */
- if(sourceLength==-1) {
- sourceLength=u_strlen(source);
- }
- if(sourceLength<=0) {
- return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
- }
-
- /* check that source and destination do not overlap */
- if( dest!=NULL &&
- ((source<=dest && dest<source+sourceLength) ||
- (dest<=source && source<dest+destCapacity))) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* Does Options contain the new Seen Tail Unicode code point option */
- if ( (options&U_SHAPE_TAIL_TYPE_MASK) == U_SHAPE_TAIL_NEW_UNICODE){
- shapeVars.tailChar = NEW_TAIL_CHAR;
- }else {
- shapeVars.tailChar = OLD_TAIL_CHAR;
- }
-
- if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
- UChar buffer[300];
- UChar *tempbuffer, *tempsource = NULL;
- int32_t outputSize, spacesCountl=0, spacesCountr=0;
-
- if((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK)>0) {
- int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL;
- int32_t aggregate_tashkeel =
- (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)) ==
- (U_SHAPE_AGGREGATE_TASHKEEL+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED);
- int step=logical_order?1:-1;
- int j=logical_order?-1:2*sourceLength;
- int i=logical_order?-1:sourceLength;
- int end=logical_order?sourceLength:-1;
- int aggregation_possible = 1;
- UChar prev = 0;
- UChar prevLink, currLink = 0;
- int newSourceLength = 0;
- tempsource = (UChar *)uprv_malloc(2*sourceLength*U_SIZEOF_UCHAR);
- if(tempsource == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
- while ((i+=step) != end) {
- prevLink = currLink;
- currLink = getLink(source[i]);
- if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE) == COMBINE && aggregation_possible) {
- aggregation_possible = 0;
- tempsource[j] = (prev<source[i]?prev:source[i])-0x064C+0xFC5E;
- currLink = getLink(tempsource[j]);
- } else {
- aggregation_possible = 1;
- tempsource[j+=step] = source[i];
- prev = source[i];
- newSourceLength++;
- }
- }
- source = tempsource+(logical_order?0:j);
- sourceLength = newSourceLength;
- }
-
- /* calculate destination size */
- /* TODO: do we ever need to do this pure preflighting? */
- if(((options&U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE) ||
- ((options&U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE)) {
- outputSize=calculateSize(source,sourceLength,destCapacity,options);
- } else {
- outputSize=sourceLength;
- }
-
- if(outputSize>destCapacity) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- if (tempsource != NULL) uprv_free(tempsource);
- return outputSize;
- }
-
- /*
- * need a temporary buffer of size max(outputSize, sourceLength)
- * because at first we copy source->temp
- */
- if(sourceLength>outputSize) {
- outputSize=sourceLength;
- }
-
- /* Start of Arabic letter shaping part */
- if(outputSize<=UPRV_LENGTHOF(buffer)) {
- outputSize=UPRV_LENGTHOF(buffer);
- tempbuffer=buffer;
- } else {
- tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR);
-
- /*Test for NULL*/
- if(tempbuffer == NULL) {
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- if (tempsource != NULL) uprv_free(tempsource);
- return 0;
- }
- }
- u_memcpy(tempbuffer, source, sourceLength);
- if (tempsource != NULL){
- uprv_free(tempsource);
- }
-
- if(sourceLength<outputSize) {
- uprv_memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength)*U_SIZEOF_UCHAR);
- }
-
- if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) {
- countSpaces(tempbuffer,sourceLength,options,&spacesCountl,&spacesCountr);
- invertBuffer(tempbuffer,sourceLength,options,spacesCountl,spacesCountr);
- }
-
- if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) {
- if((options&U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK) == U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END) {
- shapeVars.spacesRelativeToTextBeginEnd = 1;
- shapeVars.uShapeLamalefBegin = U_SHAPE_LAMALEF_END;
- shapeVars.uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN;
- shapeVars.uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END;
- shapeVars.uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN;
- }
- }
-
- switch(options&U_SHAPE_LETTERS_MASK) {
- case U_SHAPE_LETTERS_SHAPE :
- if( (options&U_SHAPE_TASHKEEL_MASK)> 0
- && ((options&U_SHAPE_TASHKEEL_MASK) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)) {
- /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */
- destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2,shapeVars);
- }else {
- /* default Call the shaping function with tashkeel flag == 1 */
- destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1,shapeVars);
-
- /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/
- if( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL){
- destLength = handleTashkeelWithTatweel(tempbuffer,destLength,destCapacity,options,pErrorCode);
- }
- }
- break;
- case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED :
- /* Call the shaping function with tashkeel flag == 0 */
- destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0,shapeVars);
- break;
-
- case U_SHAPE_LETTERS_UNSHAPE :
- /* Call the deshaping function */
- destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,shapeVars);
- break;
- default :
- /* will never occur because of validity checks above */
- destLength = 0;
- break;
- }
-
- /*
- * TODO: (markus 2002aug01)
- * For as long as we always preflight the outputSize above
- * we should U_ASSERT(outputSize==destLength)
- * except for the adjustment above before the tempbuffer allocation
- */
-
- if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) {
- countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr);
- invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr);
- }
- u_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity));
-
- if(tempbuffer!=buffer) {
- uprv_free(tempbuffer);
- }
-
- if(destLength>destCapacity) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return destLength;
- }
-
- /* End of Arabic letter shaping part */
- } else {
- /*
- * No letter shaping:
- * just make sure the destination is large enough and copy the string.
- */
- if(destCapacity<sourceLength) {
- /* this catches preflighting, too */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return sourceLength;
- }
- u_memcpy(dest, source, sourceLength);
- destLength=sourceLength;
- }
-
- /*
- * Perform number shaping.
- * With UTF-16 or UTF-32, the length of the string is constant.
- * The easiest way to do this is to operate on the destination and
- * "shape" the digits in-place.
- */
- if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) {
- UChar digitBase;
- int32_t i;
-
- /* select the requested digit group */
- switch(options&U_SHAPE_DIGIT_TYPE_MASK) {
- case U_SHAPE_DIGIT_TYPE_AN:
- digitBase=0x660; /* Unicode: "Arabic-Indic digits" */
- break;
- case U_SHAPE_DIGIT_TYPE_AN_EXTENDED:
- digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */
- break;
- default:
- /* will never occur because of validity checks above */
- digitBase=0;
- break;
- }
-
- /* perform the requested operation */
- switch(options&U_SHAPE_DIGITS_MASK) {
- case U_SHAPE_DIGITS_EN2AN:
- /* add (digitBase-'0') to each European (ASCII) digit code point */
- digitBase-=0x30;
- for(i=0; i<destLength; ++i) {
- if(((uint32_t)dest[i]-0x30)<10) {
- dest[i]+=digitBase;
- }
- }
- break;
- case U_SHAPE_DIGITS_AN2EN:
- /* subtract (digitBase-'0') from each Arabic digit code point */
- for(i=0; i<destLength; ++i) {
- if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) {
- dest[i]-=digitBase-0x30;
- }
- }
- break;
- case U_SHAPE_DIGITS_ALEN2AN_INIT_LR:
- _shapeToArabicDigitsWithContext(dest, destLength,
- digitBase,
- (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
- FALSE);
- break;
- case U_SHAPE_DIGITS_ALEN2AN_INIT_AL:
- _shapeToArabicDigitsWithContext(dest, destLength,
- digitBase,
- (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
- TRUE);
- break;
- default:
- /* will never occur because of validity checks above */
- break;
- }
- }
-
- return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
-}
diff --git a/contrib/libs/icu/common/usprep.cpp b/contrib/libs/icu/common/usprep.cpp
deleted file mode 100644
index 8351a773706..00000000000
--- a/contrib/libs/icu/common/usprep.cpp
+++ /dev/null
@@ -1,871 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- *******************************************************************************
- *
- * Copyright (C) 2003-2016, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- *******************************************************************************
- * file name: usprep.cpp
- * encoding: UTF-8
- * tab size: 8 (not used)
- * indentation:4
- *
- * created on: 2003jul2
- * created by: Ram Viswanadha
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_IDNA
-
-#include "unicode/usprep.h"
-
-#include "unicode/normalizer2.h"
-#include "unicode/ustring.h"
-#include "unicode/uchar.h"
-#include "unicode/uversion.h"
-#include "umutex.h"
-#include "cmemory.h"
-#include "sprpimpl.h"
-#include "ustr_imp.h"
-#include "uhash.h"
-#include "cstring.h"
-#include "udataswp.h"
-#include "ucln_cmn.h"
-#include "ubidi_props.h"
-#include "uprops.h"
-
-U_NAMESPACE_USE
-
-U_CDECL_BEGIN
-
-/*
-Static cache for already opened StringPrep profiles
-*/
-static UHashtable *SHARED_DATA_HASHTABLE = NULL;
-static icu::UInitOnce gSharedDataInitOnce = U_INITONCE_INITIALIZER;
-
-static UMutex usprepMutex;
-/* format version of spp file */
-//static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
-
-/* the Unicode version of the sprep data */
-static UVersionInfo dataVersion={ 0, 0, 0, 0 };
-
-/* Profile names must be aligned to UStringPrepProfileType */
-static const char * const PROFILE_NAMES[] = {
- "rfc3491", /* USPREP_RFC3491_NAMEPREP */
- "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
- "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
- "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
- "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
- "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
- "rfc3722", /* USPREP_RFC3722_ISCSI */
- "rfc3920node", /* USPREP_RFC3920_NODEPREP */
- "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
- "rfc4011", /* USPREP_RFC4011_MIB */
- "rfc4013", /* USPREP_RFC4013_SASLPREP */
- "rfc4505", /* USPREP_RFC4505_TRACE */
- "rfc4518", /* USPREP_RFC4518_LDAP */
- "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
-};
-
-static UBool U_CALLCONV
-isSPrepAcceptable(void * /* context */,
- const char * /* type */,
- const char * /* name */,
- const UDataInfo *pInfo) {
- if(
- pInfo->size>=20 &&
- pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
- pInfo->charsetFamily==U_CHARSET_FAMILY &&
- pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
- pInfo->dataFormat[1]==0x50 &&
- pInfo->dataFormat[2]==0x52 &&
- pInfo->dataFormat[3]==0x50 &&
- pInfo->formatVersion[0]==3 &&
- pInfo->formatVersion[2]==UTRIE_SHIFT &&
- pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
- ) {
- //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
- uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-static int32_t U_CALLCONV
-getSPrepFoldingOffset(uint32_t data) {
-
- return (int32_t)data;
-
-}
-
-/* hashes an entry */
-static int32_t U_CALLCONV
-hashEntry(const UHashTok parm) {
- UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
- UHashTok namekey, pathkey;
- namekey.pointer = b->name;
- pathkey.pointer = b->path;
- uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) +
- 37u * static_cast<uint32_t>(uhash_hashChars(pathkey));
- return static_cast<int32_t>(unsignedHash);
-}
-
-/* compares two entries */
-static UBool U_CALLCONV
-compareEntries(const UHashTok p1, const UHashTok p2) {
- UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
- UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
- UHashTok name1, name2, path1, path2;
- name1.pointer = b1->name;
- name2.pointer = b2->name;
- path1.pointer = b1->path;
- path2.pointer = b2->path;
- return ((UBool)(uhash_compareChars(name1, name2) &
- uhash_compareChars(path1, path2)));
-}
-
-static void
-usprep_unload(UStringPrepProfile* data){
- udata_close(data->sprepData);
-}
-
-static int32_t
-usprep_internal_flushCache(UBool noRefCount){
- UStringPrepProfile *profile = NULL;
- UStringPrepKey *key = NULL;
- int32_t pos = UHASH_FIRST;
- int32_t deletedNum = 0;
- const UHashElement *e;
-
- /*
- * if shared data hasn't even been lazy evaluated yet
- * return 0
- */
- umtx_lock(&usprepMutex);
- if (SHARED_DATA_HASHTABLE == NULL) {
- umtx_unlock(&usprepMutex);
- return 0;
- }
-
- /*creates an enumeration to iterate through every element in the table */
- while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
- {
- profile = (UStringPrepProfile *) e->value.pointer;
- key = (UStringPrepKey *) e->key.pointer;
-
- if ((noRefCount== FALSE && profile->refCount == 0) ||
- noRefCount== TRUE) {
- deletedNum++;
- uhash_removeElement(SHARED_DATA_HASHTABLE, e);
-
- /* unload the data */
- usprep_unload(profile);
-
- if(key->name != NULL) {
- uprv_free(key->name);
- key->name=NULL;
- }
- if(key->path != NULL) {
- uprv_free(key->path);
- key->path=NULL;
- }
- uprv_free(profile);
- uprv_free(key);
- }
-
- }
- umtx_unlock(&usprepMutex);
-
- return deletedNum;
-}
-
-/* Works just like ucnv_flushCache()
-static int32_t
-usprep_flushCache(){
- return usprep_internal_flushCache(FALSE);
-}
-*/
-
-static UBool U_CALLCONV usprep_cleanup(void){
- if (SHARED_DATA_HASHTABLE != NULL) {
- usprep_internal_flushCache(TRUE);
- if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
- uhash_close(SHARED_DATA_HASHTABLE);
- SHARED_DATA_HASHTABLE = NULL;
- }
- }
- gSharedDataInitOnce.reset();
- return (SHARED_DATA_HASHTABLE == NULL);
-}
-U_CDECL_END
-
-
-/** Initializes the cache for resources */
-static void U_CALLCONV
-createCache(UErrorCode &status) {
- SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
- if (U_FAILURE(status)) {
- SHARED_DATA_HASHTABLE = NULL;
- }
- ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
-}
-
-static void
-initCache(UErrorCode *status) {
- umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
-}
-
-static UBool U_CALLCONV
-loadData(UStringPrepProfile* profile,
- const char* path,
- const char* name,
- const char* type,
- UErrorCode* errorCode) {
- /* load Unicode SPREP data from file */
- UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
- UDataMemory *dataMemory;
- const int32_t *p=NULL;
- const uint8_t *pb;
- UVersionInfo normUnicodeVersion;
- int32_t normUniVer, sprepUniVer, normCorrVer;
-
- if(errorCode==NULL || U_FAILURE(*errorCode)) {
- return 0;
- }
-
- /* open the data outside the mutex block */
- //TODO: change the path
- dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
- if(U_FAILURE(*errorCode)) {
- return FALSE;
- }
-
- p=(const int32_t *)udata_getMemory(dataMemory);
- pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
- utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
- _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
-
-
- if(U_FAILURE(*errorCode)) {
- udata_close(dataMemory);
- return FALSE;
- }
-
- /* in the mutex block, set the data for this process */
- umtx_lock(&usprepMutex);
- if(profile->sprepData==NULL) {
- profile->sprepData=dataMemory;
- dataMemory=NULL;
- uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
- uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
- } else {
- p=(const int32_t *)udata_getMemory(profile->sprepData);
- }
- umtx_unlock(&usprepMutex);
- /* initialize some variables */
- profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
-
- u_getUnicodeVersion(normUnicodeVersion);
- normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
- (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
- sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
- (dataVersion[2] << 8 ) + (dataVersion[3]);
- normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
-
- if(U_FAILURE(*errorCode)){
- udata_close(dataMemory);
- return FALSE;
- }
- if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
- normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
- ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
- ){
- *errorCode = U_INVALID_FORMAT_ERROR;
- udata_close(dataMemory);
- return FALSE;
- }
- profile->isDataLoaded = TRUE;
-
- /* if a different thread set it first, then close the extra data */
- if(dataMemory!=NULL) {
- udata_close(dataMemory); /* NULL if it was set correctly */
- }
-
-
- return profile->isDataLoaded;
-}
-
-static UStringPrepProfile*
-usprep_getProfile(const char* path,
- const char* name,
- UErrorCode *status){
-
- UStringPrepProfile* profile = NULL;
-
- initCache(status);
-
- if(U_FAILURE(*status)){
- return NULL;
- }
-
- UStringPrepKey stackKey;
- /*
- * const is cast way to save malloc, strcpy and free calls
- * we use the passed in pointers for fetching the data from the
- * hash table which is safe
- */
- stackKey.name = (char*) name;
- stackKey.path = (char*) path;
-
- /* fetch the data from the cache */
- umtx_lock(&usprepMutex);
- profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
- if(profile != NULL) {
- profile->refCount++;
- }
- umtx_unlock(&usprepMutex);
-
- if(profile == NULL) {
- /* else load the data and put the data in the cache */
- LocalMemory<UStringPrepProfile> newProfile;
- if(newProfile.allocateInsteadAndReset() == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- /* load the data */
- if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
- return NULL;
- }
-
- /* get the options */
- newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
- newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
-
- LocalMemory<UStringPrepKey> key;
- LocalMemory<char> keyName;
- LocalMemory<char> keyPath;
- if( key.allocateInsteadAndReset() == NULL ||
- keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == NULL ||
- (path != NULL &&
- keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == NULL)
- ) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- usprep_unload(newProfile.getAlias());
- return NULL;
- }
-
- umtx_lock(&usprepMutex);
- // If another thread already inserted the same key/value, refcount and cleanup our thread data
- profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
- if(profile != NULL) {
- profile->refCount++;
- usprep_unload(newProfile.getAlias());
- }
- else {
- /* initialize the key members */
- key->name = keyName.orphan();
- uprv_strcpy(key->name, name);
- if(path != NULL){
- key->path = keyPath.orphan();
- uprv_strcpy(key->path, path);
- }
- profile = newProfile.orphan();
-
- /* add the data object to the cache */
- profile->refCount = 1;
- uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
- }
- umtx_unlock(&usprepMutex);
- }
-
- return profile;
-}
-
-U_CAPI UStringPrepProfile* U_EXPORT2
-usprep_open(const char* path,
- const char* name,
- UErrorCode* status){
-
- if(status == NULL || U_FAILURE(*status)){
- return NULL;
- }
-
- /* initialize the profile struct members */
- return usprep_getProfile(path,name,status);
-}
-
-U_CAPI UStringPrepProfile* U_EXPORT2
-usprep_openByType(UStringPrepProfileType type,
- UErrorCode* status) {
- if(status == NULL || U_FAILURE(*status)){
- return NULL;
- }
- int32_t index = (int32_t)type;
- if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- return usprep_open(NULL, PROFILE_NAMES[index], status);
-}
-
-U_CAPI void U_EXPORT2
-usprep_close(UStringPrepProfile* profile){
- if(profile==NULL){
- return;
- }
-
- umtx_lock(&usprepMutex);
- /* decrement the ref count*/
- if(profile->refCount > 0){
- profile->refCount--;
- }
- umtx_unlock(&usprepMutex);
-
-}
-
-U_CFUNC void
-uprv_syntaxError(const UChar* rules,
- int32_t pos,
- int32_t rulesLen,
- UParseError* parseError){
- if(parseError == NULL){
- return;
- }
- parseError->offset = pos;
- parseError->line = 0 ; // we are not using line numbers
-
- // for pre-context
- int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
- int32_t limit = pos;
-
- u_memcpy(parseError->preContext,rules+start,limit-start);
- //null terminate the buffer
- parseError->preContext[limit-start] = 0;
-
- // for post-context; include error rules[pos]
- start = pos;
- limit = start + (U_PARSE_CONTEXT_LEN-1);
- if (limit > rulesLen) {
- limit = rulesLen;
- }
- if (start < rulesLen) {
- u_memcpy(parseError->postContext,rules+start,limit-start);
- }
- //null terminate the buffer
- parseError->postContext[limit-start]= 0;
-}
-
-
-static inline UStringPrepType
-getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
-
- UStringPrepType type;
- if(trieWord == 0){
- /*
- * Initial value stored in the mapping table
- * just return USPREP_TYPE_LIMIT .. so that
- * the source codepoint is copied to the destination
- */
- type = USPREP_TYPE_LIMIT;
- isIndex =FALSE;
- value = 0;
- }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
- type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
- isIndex =FALSE;
- value = 0;
- }else{
- /* get the type */
- type = USPREP_MAP;
- /* ascertain if the value is index or delta */
- if(trieWord & 0x02){
- isIndex = TRUE;
- value = trieWord >> 2; //mask off the lower 2 bits and shift
- }else{
- isIndex = FALSE;
- value = (int16_t)trieWord;
- value = (value >> 2);
- }
-
- if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
- type = USPREP_DELETE;
- isIndex =FALSE;
- value = 0;
- }
- }
- return type;
-}
-
-// TODO: change to writing to UnicodeString not UChar *
-static int32_t
-usprep_map( const UStringPrepProfile* profile,
- const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
- int32_t options,
- UParseError* parseError,
- UErrorCode* status ){
-
- uint16_t result;
- int32_t destIndex=0;
- int32_t srcIndex;
- UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
- UStringPrepType type;
- int16_t value;
- UBool isIndex;
- const int32_t* indexes = profile->indexes;
-
- // no error checking the caller check for error and arguments
- // no string length check the caller finds out the string length
-
- for(srcIndex=0;srcIndex<srcLength;){
- UChar32 ch;
-
- U16_NEXT(src,srcIndex,srcLength,ch);
-
- result=0;
-
- UTRIE_GET16(&profile->sprepTrie,ch,result);
-
- type = getValues(result, value, isIndex);
-
- // check if the source codepoint is unassigned
- if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
-
- uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
- *status = U_STRINGPREP_UNASSIGNED_ERROR;
- return 0;
-
- }else if(type == USPREP_MAP){
-
- int32_t index, length;
-
- if(isIndex){
- index = value;
- if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
- index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
- length = 1;
- }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
- index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
- length = 2;
- }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
- index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
- length = 3;
- }else{
- length = profile->mappingData[index++];
-
- }
-
- /* copy mapping to destination */
- for(int32_t i=0; i< length; i++){
- if(destIndex < destCapacity ){
- dest[destIndex] = profile->mappingData[index+i];
- }
- destIndex++; /* for pre-flighting */
- }
- continue;
- }else{
- // subtract the delta to arrive at the code point
- ch -= value;
- }
-
- }else if(type==USPREP_DELETE){
- // just consume the codepoint and contine
- continue;
- }
- //copy the code point into destination
- if(ch <= 0xFFFF){
- if(destIndex < destCapacity ){
- dest[destIndex] = (UChar)ch;
- }
- destIndex++;
- }else{
- if(destIndex+1 < destCapacity ){
- dest[destIndex] = U16_LEAD(ch);
- dest[destIndex+1] = U16_TRAIL(ch);
- }
- destIndex +=2;
- }
-
- }
-
- return u_terminateUChars(dest, destCapacity, destIndex, status);
-}
-
-/*
- 1) Map -- For each character in the input, check if it has a mapping
- and, if so, replace it with its mapping.
-
- 2) Normalize -- Possibly normalize the result of step 1 using Unicode
- normalization.
-
- 3) Prohibit -- Check for any characters that are not allowed in the
- output. If any are found, return an error.
-
- 4) Check bidi -- Possibly check for right-to-left characters, and if
- any are found, make sure that the whole string satisfies the
- requirements for bidirectional strings. If the string does not
- satisfy the requirements for bidirectional strings, return an
- error.
- [Unicode3.2] defines several bidirectional categories; each character
- has one bidirectional category assigned to it. For the purposes of
- the requirements below, an "RandALCat character" is a character that
- has Unicode bidirectional categories "R" or "AL"; an "LCat character"
- is a character that has Unicode bidirectional category "L". Note
-
-
- that there are many characters which fall in neither of the above
- definitions; Latin digits (<U+0030> through <U+0039>) are examples of
- this because they have bidirectional category "EN".
-
- In any profile that specifies bidirectional character handling, all
- three of the following requirements MUST be met:
-
- 1) The characters in section 5.8 MUST be prohibited.
-
- 2) If a string contains any RandALCat character, the string MUST NOT
- contain any LCat character.
-
- 3) If a string contains any RandALCat character, a RandALCat
- character MUST be the first character of the string, and a
- RandALCat character MUST be the last character of the string.
-*/
-U_CAPI int32_t U_EXPORT2
-usprep_prepare( const UStringPrepProfile* profile,
- const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
- int32_t options,
- UParseError* parseError,
- UErrorCode* status ){
-
- // check error status
- if(U_FAILURE(*status)){
- return 0;
- }
-
- //check arguments
- if(profile==NULL ||
- (src==NULL ? srcLength!=0 : srcLength<-1) ||
- (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
- *status=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- //get the string length
- if(srcLength < 0){
- srcLength = u_strlen(src);
- }
- // map
- UnicodeString s1;
- UChar *b1 = s1.getBuffer(srcLength);
- if(b1==NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- int32_t b1Len = usprep_map(profile, src, srcLength,
- b1, s1.getCapacity(), options, parseError, status);
- s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
-
- if(*status == U_BUFFER_OVERFLOW_ERROR){
- // redo processing of string
- /* we do not have enough room so grow the buffer*/
- b1 = s1.getBuffer(b1Len);
- if(b1==NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
- *status = U_ZERO_ERROR; // reset error
- b1Len = usprep_map(profile, src, srcLength,
- b1, s1.getCapacity(), options, parseError, status);
- s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
- }
- if(U_FAILURE(*status)){
- return 0;
- }
-
- // normalize
- UnicodeString s2;
- if(profile->doNFKC){
- const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
- FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
- if(U_FAILURE(*status)){
- return 0;
- }
- fn2.normalize(s1, s2, *status);
- }else{
- s2.fastCopyFrom(s1);
- }
- if(U_FAILURE(*status)){
- return 0;
- }
-
- // Prohibit and checkBiDi in one pass
- const UChar *b2 = s2.getBuffer();
- int32_t b2Len = s2.length();
- UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
- UBool leftToRight=FALSE, rightToLeft=FALSE;
- int32_t rtlPos =-1, ltrPos =-1;
-
- for(int32_t b2Index=0; b2Index<b2Len;){
- UChar32 ch = 0;
- U16_NEXT(b2, b2Index, b2Len, ch);
-
- uint16_t result;
- UTRIE_GET16(&profile->sprepTrie,ch,result);
-
- int16_t value;
- UBool isIndex;
- UStringPrepType type = getValues(result, value, isIndex);
-
- if( type == USPREP_PROHIBITED ||
- ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
- ){
- *status = U_STRINGPREP_PROHIBITED_ERROR;
- uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError);
- return 0;
- }
-
- if(profile->checkBiDi) {
- direction = ubidi_getClass(ch);
- if(firstCharDir == U_CHAR_DIRECTION_COUNT){
- firstCharDir = direction;
- }
- if(direction == U_LEFT_TO_RIGHT){
- leftToRight = TRUE;
- ltrPos = b2Index-1;
- }
- if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
- rightToLeft = TRUE;
- rtlPos = b2Index-1;
- }
- }
- }
- if(profile->checkBiDi == TRUE){
- // satisfy 2
- if( leftToRight == TRUE && rightToLeft == TRUE){
- *status = U_STRINGPREP_CHECK_BIDI_ERROR;
- uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
- return 0;
- }
-
- //satisfy 3
- if( rightToLeft == TRUE &&
- !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
- (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
- ){
- *status = U_STRINGPREP_CHECK_BIDI_ERROR;
- uprv_syntaxError(b2, rtlPos, b2Len, parseError);
- return FALSE;
- }
- }
- return s2.extract(dest, destCapacity, *status);
-}
-
-
-/* data swapping ------------------------------------------------------------ */
-
-U_CAPI int32_t U_EXPORT2
-usprep_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UDataInfo *pInfo;
- int32_t headerSize;
-
- const uint8_t *inBytes;
- uint8_t *outBytes;
-
- const int32_t *inIndexes;
- int32_t indexes[16];
-
- int32_t i, offset, count, size;
-
- /* udata_swapDataHeader checks the arguments */
- headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* check data format and format version */
- pInfo=(const UDataInfo *)((const char *)inData+4);
- if(!(
- pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
- pInfo->dataFormat[1]==0x50 &&
- pInfo->dataFormat[2]==0x52 &&
- pInfo->dataFormat[3]==0x50 &&
- pInfo->formatVersion[0]==3
- )) {
- udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
- pInfo->dataFormat[0], pInfo->dataFormat[1],
- pInfo->dataFormat[2], pInfo->dataFormat[3],
- pInfo->formatVersion[0]);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return 0;
- }
-
- inBytes=(const uint8_t *)inData+headerSize;
- outBytes=(uint8_t *)outData+headerSize;
-
- inIndexes=(const int32_t *)inBytes;
-
- if(length>=0) {
- length-=headerSize;
- if(length<16*4) {
- udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
- length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
-
- /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
- for(i=0; i<16; ++i) {
- indexes[i]=udata_readInt32(ds, inIndexes[i]);
- }
-
- /* calculate the total length of the data */
- size=
- 16*4+ /* size of indexes[] */
- indexes[_SPREP_INDEX_TRIE_SIZE]+
- indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
-
- if(length>=0) {
- if(length<size) {
- udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
- length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- /* copy the data for inaccessible bytes */
- if(inBytes!=outBytes) {
- uprv_memcpy(outBytes, inBytes, size);
- }
-
- offset=0;
-
- /* swap the int32_t indexes[] */
- count=16*4;
- ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
- offset+=count;
-
- /* swap the UTrie */
- count=indexes[_SPREP_INDEX_TRIE_SIZE];
- utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
- offset+=count;
-
- /* swap the uint16_t mappingTable[] */
- count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
- ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
- //offset+=count;
- }
-
- return headerSize+size;
-}
-
-#endif /* #if !UCONFIG_NO_IDNA */
diff --git a/contrib/libs/icu/common/ustack.cpp b/contrib/libs/icu/common/ustack.cpp
deleted file mode 100644
index fb314b0ebe5..00000000000
--- a/contrib/libs/icu/common/ustack.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2003-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#include "uvector.h"
-
-U_NAMESPACE_BEGIN
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UStack)
-
-UStack::UStack(UErrorCode &status) :
- UVector(status)
-{
-}
-
-UStack::UStack(int32_t initialCapacity, UErrorCode &status) :
- UVector(initialCapacity, status)
-{
-}
-
-UStack::UStack(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status) :
- UVector(d, c, status)
-{
-}
-
-UStack::UStack(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status) :
- UVector(d, c, initialCapacity, status)
-{
-}
-
-UStack::~UStack() {}
-
-void* UStack::pop(void) {
- int32_t n = size() - 1;
- void* result = 0;
- if (n >= 0) {
- result = elementAt(n);
- removeElementAt(n);
- }
- return result;
-}
-
-int32_t UStack::popi(void) {
- int32_t n = size() - 1;
- int32_t result = 0;
- if (n >= 0) {
- result = elementAti(n);
- removeElementAt(n);
- }
- return result;
-}
-
-int32_t UStack::search(void* obj) const {
- int32_t i = indexOf(obj);
- return (i >= 0) ? size() - i : i;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/ustr_cnv.cpp b/contrib/libs/icu/common/ustr_cnv.cpp
deleted file mode 100644
index 9a25a9905a2..00000000000
--- a/contrib/libs/icu/common/ustr_cnv.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1998-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ustr_cnv.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004aug24
-* created by: Markus W. Scherer
-*
-* Character conversion functions moved here from ustring.c
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ustring.h"
-#include "unicode/ucnv.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "umutex.h"
-#include "ustr_cnv.h"
-#include "ucnv_bld.h"
-
-/* mutexed access to a shared default converter ----------------------------- */
-
-static UConverter *gDefaultConverter = NULL;
-
-U_CAPI UConverter* U_EXPORT2
-u_getDefaultConverter(UErrorCode *status)
-{
- UConverter *converter = NULL;
-
- if (gDefaultConverter != NULL) {
- icu::umtx_lock(NULL);
-
- /* need to check to make sure it wasn't taken out from under us */
- if (gDefaultConverter != NULL) {
- converter = gDefaultConverter;
- gDefaultConverter = NULL;
- }
- icu::umtx_unlock(NULL);
- }
-
- /* if the cache was empty, create a converter */
- if(converter == NULL) {
- converter = ucnv_open(NULL, status);
- if(U_FAILURE(*status)) {
- ucnv_close(converter);
- converter = NULL;
- }
- }
-
- return converter;
-}
-
-U_CAPI void U_EXPORT2
-u_releaseDefaultConverter(UConverter *converter)
-{
- if(gDefaultConverter == NULL) {
- if (converter != NULL) {
- ucnv_reset(converter);
- }
- ucnv_enableCleanup();
- icu::umtx_lock(NULL);
- if(gDefaultConverter == NULL) {
- gDefaultConverter = converter;
- converter = NULL;
- }
- icu::umtx_unlock(NULL);
- }
-
- if(converter != NULL) {
- ucnv_close(converter);
- }
-}
-
-U_CAPI void U_EXPORT2
-u_flushDefaultConverter()
-{
- UConverter *converter = NULL;
-
- if (gDefaultConverter != NULL) {
- icu::umtx_lock(NULL);
-
- /* need to check to make sure it wasn't taken out from under us */
- if (gDefaultConverter != NULL) {
- converter = gDefaultConverter;
- gDefaultConverter = NULL;
- }
- icu::umtx_unlock(NULL);
- }
-
- /* if the cache was populated, flush it */
- if(converter != NULL) {
- ucnv_close(converter);
- }
-}
-
-
-/* conversions between char* and UChar* ------------------------------------- */
-
-/* maximum string length for u_uastrcpy() and u_austrcpy() implementations */
-#define MAX_STRLEN 0x0FFFFFFF
-
-/*
- returns the minimum of (the length of the null-terminated string) and n.
-*/
-static int32_t u_astrnlen(const char *s1, int32_t n)
-{
- int32_t len = 0;
-
- if (s1)
- {
- while (n-- && *(s1++))
- {
- len++;
- }
- }
- return len;
-}
-
-U_CAPI UChar* U_EXPORT2
-u_uastrncpy(UChar *ucs1,
- const char *s2,
- int32_t n)
-{
- UChar *target = ucs1;
- UErrorCode err = U_ZERO_ERROR;
- UConverter *cnv = u_getDefaultConverter(&err);
- if(U_SUCCESS(err) && cnv != NULL) {
- ucnv_reset(cnv);
- ucnv_toUnicode(cnv,
- &target,
- ucs1+n,
- &s2,
- s2+u_astrnlen(s2, n),
- NULL,
- TRUE,
- &err);
- ucnv_reset(cnv); /* be good citizens */
- u_releaseDefaultConverter(cnv);
- if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
- *ucs1 = 0; /* failure */
- }
- if(target < (ucs1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
- *target = 0; /* terminate */
- }
- } else {
- *ucs1 = 0;
- }
- return ucs1;
-}
-
-U_CAPI UChar* U_EXPORT2
-u_uastrcpy(UChar *ucs1,
- const char *s2 )
-{
- UErrorCode err = U_ZERO_ERROR;
- UConverter *cnv = u_getDefaultConverter(&err);
- if(U_SUCCESS(err) && cnv != NULL) {
- ucnv_toUChars(cnv,
- ucs1,
- MAX_STRLEN,
- s2,
- (int32_t)uprv_strlen(s2),
- &err);
- u_releaseDefaultConverter(cnv);
- if(U_FAILURE(err)) {
- *ucs1 = 0;
- }
- } else {
- *ucs1 = 0;
- }
- return ucs1;
-}
-
-/*
- returns the minimum of (the length of the null-terminated string) and n.
-*/
-static int32_t u_ustrnlen(const UChar *ucs1, int32_t n)
-{
- int32_t len = 0;
-
- if (ucs1)
- {
- while (n-- && *(ucs1++))
- {
- len++;
- }
- }
- return len;
-}
-
-U_CAPI char* U_EXPORT2
-u_austrncpy(char *s1,
- const UChar *ucs2,
- int32_t n)
-{
- char *target = s1;
- UErrorCode err = U_ZERO_ERROR;
- UConverter *cnv = u_getDefaultConverter(&err);
- if(U_SUCCESS(err) && cnv != NULL) {
- ucnv_reset(cnv);
- ucnv_fromUnicode(cnv,
- &target,
- s1+n,
- &ucs2,
- ucs2+u_ustrnlen(ucs2, n),
- NULL,
- TRUE,
- &err);
- ucnv_reset(cnv); /* be good citizens */
- u_releaseDefaultConverter(cnv);
- if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
- *s1 = 0; /* failure */
- }
- if(target < (s1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
- *target = 0; /* terminate */
- }
- } else {
- *s1 = 0;
- }
- return s1;
-}
-
-U_CAPI char* U_EXPORT2
-u_austrcpy(char *s1,
- const UChar *ucs2 )
-{
- UErrorCode err = U_ZERO_ERROR;
- UConverter *cnv = u_getDefaultConverter(&err);
- if(U_SUCCESS(err) && cnv != NULL) {
- int32_t len = ucnv_fromUChars(cnv,
- s1,
- MAX_STRLEN,
- ucs2,
- -1,
- &err);
- u_releaseDefaultConverter(cnv);
- s1[len] = 0;
- } else {
- *s1 = 0;
- }
- return s1;
-}
-
-#endif
diff --git a/contrib/libs/icu/common/ustr_cnv.h b/contrib/libs/icu/common/ustr_cnv.h
deleted file mode 100644
index 861e3ebff06..00000000000
--- a/contrib/libs/icu/common/ustr_cnv.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2010, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* file name: ustr_cnv.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004Aug27
-* created by: George Rhoten
-*/
-
-#ifndef USTR_CNV_IMP_H
-#define USTR_CNV_IMP_H
-
-#include "unicode/utypes.h"
-#include "unicode/ucnv.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-/**
- * Get the default converter. This is a commonly used converter
- * that is used for the ustring and UnicodeString API.
- * Remember to use the u_releaseDefaultConverter when you are done.
- * @internal
- */
-U_CAPI UConverter* U_EXPORT2
-u_getDefaultConverter(UErrorCode *status);
-
-
-/**
- * Release the default converter to the converter cache.
- * @internal
- */
-U_CAPI void U_EXPORT2
-u_releaseDefaultConverter(UConverter *converter);
-
-/**
- * Flush the default converter, if cached.
- * @internal
- */
-U_CAPI void U_EXPORT2
-u_flushDefaultConverter(void);
-
-#endif
-
-#endif
diff --git a/contrib/libs/icu/common/ustr_imp.h b/contrib/libs/icu/common/ustr_imp.h
deleted file mode 100644
index 07170922d20..00000000000
--- a/contrib/libs/icu/common/ustr_imp.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* file name: ustr_imp.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001jan30
-* created by: Markus W. Scherer
-*/
-
-#ifndef __USTR_IMP_H__
-#define __USTR_IMP_H__
-
-#include "unicode/utypes.h"
-#include "unicode/utf8.h"
-
-/**
- * Internal option for unorm_cmpEquivFold() for strncmp style.
- * If set, checks for both string length and terminating NUL.
- */
-#define _STRNCMP_STYLE 0x1000
-
-/**
- * Compare two strings in code point order or code unit order.
- * Works in strcmp style (both lengths -1),
- * strncmp style (lengths equal and >=0, flag TRUE),
- * and memcmp/UnicodeString style (at least one length >=0).
- */
-U_CFUNC int32_t U_EXPORT2
-uprv_strCompare(const UChar *s1, int32_t length1,
- const UChar *s2, int32_t length2,
- UBool strncmpStyle, UBool codePointOrder);
-
-U_INTERNAL int32_t U_EXPORT2
-ustr_hashUCharsN(const UChar *str, int32_t length);
-
-U_INTERNAL int32_t U_EXPORT2
-ustr_hashCharsN(const char *str, int32_t length);
-
-U_INTERNAL int32_t U_EXPORT2
-ustr_hashICharsN(const char *str, int32_t length);
-
-/**
- * Convert an ASCII-range lowercase character to uppercase.
- *
- * @param c A UChar.
- * @return If UChar is a lowercase ASCII character, returns the uppercase version.
- * Otherwise, returns the input character.
- */
-U_INTERNAL UChar U_EXPORT2
-u_asciiToUpper(UChar c);
-
-// TODO: Add u_asciiToLower if/when there is a need for it.
-
-/**
- * NUL-terminate a UChar * string if possible.
- * If length < destCapacity then NUL-terminate.
- * If length == destCapacity then do not terminate but set U_STRING_NOT_TERMINATED_WARNING.
- * If length > destCapacity then do not terminate but set U_BUFFER_OVERFLOW_ERROR.
- *
- * @param dest Destination buffer, can be NULL if destCapacity==0.
- * @param destCapacity Number of UChars available at dest.
- * @param length Number of UChars that were (to be) written to dest.
- * @param pErrorCode ICU error code.
- * @return length
- */
-U_INTERNAL int32_t U_EXPORT2
-u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
-
-/**
- * NUL-terminate a char * string if possible.
- * Same as u_terminateUChars() but for a different string type.
- */
-U_INTERNAL int32_t U_EXPORT2
-u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
-
-/**
- * NUL-terminate a UChar32 * string if possible.
- * Same as u_terminateUChars() but for a different string type.
- */
-U_INTERNAL int32_t U_EXPORT2
-u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
-
-/**
- * NUL-terminate a wchar_t * string if possible.
- * Same as u_terminateUChars() but for a different string type.
- */
-U_INTERNAL int32_t U_EXPORT2
-u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
-
-/**
- * Counts the bytes of any whole valid sequence for a UTF-8 lead byte.
- * Returns 1 for ASCII 0..0x7f.
- * Returns 0 for 0x80..0xc1 as well as for 0xf5..0xff.
- * leadByte might be evaluated multiple times.
- *
- * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
- * @return 0..4
- */
-#define U8_COUNT_BYTES(leadByte) \
- (U8_IS_SINGLE(leadByte) ? 1 : U8_COUNT_BYTES_NON_ASCII(leadByte))
-
-/**
- * Counts the bytes of any whole valid sequence for a UTF-8 lead byte.
- * Returns 0 for 0x00..0xc1 as well as for 0xf5..0xff.
- * leadByte might be evaluated multiple times.
- *
- * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
- * @return 0 or 2..4
- */
-#define U8_COUNT_BYTES_NON_ASCII(leadByte) \
- (U8_IS_LEAD(leadByte) ? ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+2 : 0)
-
-#ifdef __cplusplus
-
-U_NAMESPACE_BEGIN
-
-class UTF8 {
-public:
- UTF8() = delete; // all static
-
- /**
- * Is t a valid UTF-8 trail byte?
- *
- * @param prev Must be the preceding lead byte if i==1 and length>=3;
- * otherwise ignored.
- * @param t The i-th byte following the lead byte.
- * @param i The index (1..3) of byte t in the byte sequence. 0<i<length
- * @param length The length (2..4) of the byte sequence according to the lead byte.
- * @return TRUE if t is a valid trail byte in this context.
- */
- static inline UBool isValidTrail(int32_t prev, uint8_t t, int32_t i, int32_t length) {
- // The first trail byte after a 3- or 4-byte lead byte
- // needs to be validated together with its lead byte.
- if (length <= 2 || i > 1) {
- return U8_IS_TRAIL(t);
- } else if (length == 3) {
- return U8_IS_VALID_LEAD3_AND_T1(prev, t);
- } else { // length == 4
- return U8_IS_VALID_LEAD4_AND_T1(prev, t);
- }
- }
-};
-
-U_NAMESPACE_END
-
-#endif // __cplusplus
-
-#endif
diff --git a/contrib/libs/icu/common/ustr_titlecase_brkiter.cpp b/contrib/libs/icu/common/ustr_titlecase_brkiter.cpp
deleted file mode 100644
index 457905eb60d..00000000000
--- a/contrib/libs/icu/common/ustr_titlecase_brkiter.cpp
+++ /dev/null
@@ -1,237 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: ustr_titlecase_brkiter.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011may30
-* created by: Markus W. Scherer
-*
-* Titlecasing functions that are based on BreakIterator
-* were moved here to break dependency cycles among parts of the common library.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/brkiter.h"
-#include "unicode/casemap.h"
-#include "unicode/chariter.h"
-#include "unicode/localpointer.h"
-#include "unicode/ubrk.h"
-#include "unicode/ucasemap.h"
-#include "unicode/utext.h"
-#include "cmemory.h"
-#include "uassert.h"
-#include "ucase.h"
-#include "ucasemap_imp.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * Whole-string BreakIterator.
- * Titlecasing only calls setText(), first(), and next().
- * We implement the rest only to satisfy the abstract interface.
- */
-class WholeStringBreakIterator : public BreakIterator {
-public:
- WholeStringBreakIterator() : BreakIterator(), length(0) {}
- ~WholeStringBreakIterator() U_OVERRIDE;
- UBool operator==(const BreakIterator&) const U_OVERRIDE;
- WholeStringBreakIterator *clone() const U_OVERRIDE;
- static UClassID U_EXPORT2 getStaticClassID();
- UClassID getDynamicClassID() const U_OVERRIDE;
- CharacterIterator &getText() const U_OVERRIDE;
- UText *getUText(UText *fillIn, UErrorCode &errorCode) const U_OVERRIDE;
- void setText(const UnicodeString &text) U_OVERRIDE;
- void setText(UText *text, UErrorCode &errorCode) U_OVERRIDE;
- void adoptText(CharacterIterator* it) U_OVERRIDE;
- int32_t first() U_OVERRIDE;
- int32_t last() U_OVERRIDE;
- int32_t previous() U_OVERRIDE;
- int32_t next() U_OVERRIDE;
- int32_t current() const U_OVERRIDE;
- int32_t following(int32_t offset) U_OVERRIDE;
- int32_t preceding(int32_t offset) U_OVERRIDE;
- UBool isBoundary(int32_t offset) U_OVERRIDE;
- int32_t next(int32_t n) U_OVERRIDE;
- WholeStringBreakIterator *createBufferClone(void *stackBuffer, int32_t &BufferSize,
- UErrorCode &errorCode) U_OVERRIDE;
- WholeStringBreakIterator &refreshInputText(UText *input, UErrorCode &errorCode) U_OVERRIDE;
-
-private:
- int32_t length;
-};
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(WholeStringBreakIterator)
-
-WholeStringBreakIterator::~WholeStringBreakIterator() {}
-UBool WholeStringBreakIterator::operator==(const BreakIterator&) const { return FALSE; }
-WholeStringBreakIterator *WholeStringBreakIterator::clone() const { return nullptr; }
-
-CharacterIterator &WholeStringBreakIterator::getText() const {
- UPRV_UNREACHABLE; // really should not be called
-}
-UText *WholeStringBreakIterator::getUText(UText * /*fillIn*/, UErrorCode &errorCode) const {
- if (U_SUCCESS(errorCode)) {
- errorCode = U_UNSUPPORTED_ERROR;
- }
- return nullptr;
-}
-
-void WholeStringBreakIterator::setText(const UnicodeString &text) {
- length = text.length();
-}
-void WholeStringBreakIterator::setText(UText *text, UErrorCode &errorCode) {
- if (U_SUCCESS(errorCode)) {
- int64_t length64 = utext_nativeLength(text);
- if (length64 <= INT32_MAX) {
- length = (int32_t)length64;
- } else {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- }
- }
-}
-void WholeStringBreakIterator::adoptText(CharacterIterator*) {
- UPRV_UNREACHABLE; // should not be called
-}
-
-int32_t WholeStringBreakIterator::first() { return 0; }
-int32_t WholeStringBreakIterator::last() { return length; }
-int32_t WholeStringBreakIterator::previous() { return 0; }
-int32_t WholeStringBreakIterator::next() { return length; }
-int32_t WholeStringBreakIterator::current() const { return 0; }
-int32_t WholeStringBreakIterator::following(int32_t /*offset*/) { return length; }
-int32_t WholeStringBreakIterator::preceding(int32_t /*offset*/) { return 0; }
-UBool WholeStringBreakIterator::isBoundary(int32_t /*offset*/) { return FALSE; }
-int32_t WholeStringBreakIterator::next(int32_t /*n*/) { return length; }
-
-WholeStringBreakIterator *WholeStringBreakIterator::createBufferClone(
- void * /*stackBuffer*/, int32_t & /*BufferSize*/, UErrorCode &errorCode) {
- if (U_SUCCESS(errorCode)) {
- errorCode = U_UNSUPPORTED_ERROR;
- }
- return nullptr;
-}
-WholeStringBreakIterator &WholeStringBreakIterator::refreshInputText(
- UText * /*input*/, UErrorCode &errorCode) {
- if (U_SUCCESS(errorCode)) {
- errorCode = U_UNSUPPORTED_ERROR;
- }
- return *this;
-}
-
-U_CFUNC
-BreakIterator *ustrcase_getTitleBreakIterator(
- const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter,
- LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return nullptr; }
- options &= U_TITLECASE_ITERATOR_MASK;
- if (options != 0 && iter != nullptr) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- if (iter == nullptr) {
- switch (options) {
- case 0:
- iter = BreakIterator::createWordInstance(
- locale != nullptr ? *locale : Locale(locID), errorCode);
- break;
- case U_TITLECASE_WHOLE_STRING:
- iter = new WholeStringBreakIterator();
- if (iter == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- }
- break;
- case U_TITLECASE_SENTENCES:
- iter = BreakIterator::createSentenceInstance(
- locale != nullptr ? *locale : Locale(locID), errorCode);
- break;
- default:
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- ownedIter.adoptInstead(iter);
- }
- return iter;
-}
-
-int32_t CaseMap::toTitle(
- const char *locale, uint32_t options, BreakIterator *iter,
- const UChar *src, int32_t srcLength,
- UChar *dest, int32_t destCapacity, Edits *edits,
- UErrorCode &errorCode) {
- LocalPointer<BreakIterator> ownedIter;
- iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
- if(iter==NULL) {
- return 0;
- }
- UnicodeString s(srcLength<0, src, srcLength);
- iter->setText(s);
- return ustrcase_map(
- ustrcase_getCaseLocale(locale), options, iter,
- dest, destCapacity,
- src, srcLength,
- ustrcase_internalToTitle, edits, errorCode);
-}
-
-U_NAMESPACE_END
-
-U_NAMESPACE_USE
-
-U_CAPI int32_t U_EXPORT2
-u_strToTitle(UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- UBreakIterator *titleIter,
- const char *locale,
- UErrorCode *pErrorCode) {
- LocalPointer<BreakIterator> ownedIter;
- BreakIterator *iter = ustrcase_getTitleBreakIterator(
- nullptr, locale, 0, reinterpret_cast<BreakIterator *>(titleIter),
- ownedIter, *pErrorCode);
- if (iter == nullptr) {
- return 0;
- }
- UnicodeString s(srcLength<0, src, srcLength);
- iter->setText(s);
- return ustrcase_mapWithOverlap(
- ustrcase_getCaseLocale(locale), 0, iter,
- dest, destCapacity,
- src, srcLength,
- ustrcase_internalToTitle, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-ucasemap_toTitle(UCaseMap *csm,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if (csm->iter == NULL) {
- LocalPointer<BreakIterator> ownedIter;
- BreakIterator *iter = ustrcase_getTitleBreakIterator(
- nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode);
- if (iter == nullptr) {
- return 0;
- }
- csm->iter = ownedIter.orphan();
- }
- UnicodeString s(srcLength<0, src, srcLength);
- csm->iter->setText(s);
- return ustrcase_map(
- csm->caseLocale, csm->options, csm->iter,
- dest, destCapacity,
- src, srcLength,
- ustrcase_internalToTitle, NULL, *pErrorCode);
-}
-
-#endif // !UCONFIG_NO_BREAK_ITERATION
diff --git a/contrib/libs/icu/common/ustr_wcs.cpp b/contrib/libs/icu/common/ustr_wcs.cpp
deleted file mode 100644
index e9f278e9691..00000000000
--- a/contrib/libs/icu/common/ustr_wcs.cpp
+++ /dev/null
@@ -1,535 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2001-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ustr_wcs.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004sep07
-* created by: Markus W. Scherer
-*
-* u_strToWCS() and u_strFromWCS() functions
-* moved here from ustrtrns.c for better modularization.
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/ustring.h"
-#include "cstring.h"
-#include "cwchar.h"
-#include "cmemory.h"
-#include "ustr_imp.h"
-#include "ustr_cnv.h"
-
-#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
-
-#define _STACK_BUFFER_CAPACITY 1000
-#define _BUFFER_CAPACITY_MULTIPLIER 2
-
-#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
-// TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
-// Then we could change this to work only with wchar_t buffers.
-static inline UBool
-u_growAnyBufferFromStatic(void *context,
- void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
- int32_t length, int32_t size) {
- // Use char* not void* to avoid the compiler's strict-aliasing assumptions
- // and related warnings.
- char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
- if(newBuffer!=NULL) {
- if(length>0) {
- uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size);
- }
- *pCapacity=reqCapacity;
- } else {
- *pCapacity=0;
- }
-
- /* release the old pBuffer if it was not statically allocated */
- if(*pBuffer!=(char *)context) {
- uprv_free(*pBuffer);
- }
-
- *pBuffer=newBuffer;
- return (UBool)(newBuffer!=NULL);
-}
-
-/* helper function */
-static wchar_t*
-_strToWCS(wchar_t *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const UChar *src,
- int32_t srcLength,
- UErrorCode *pErrorCode){
-
- char stackBuffer [_STACK_BUFFER_CAPACITY];
- char* tempBuf = stackBuffer;
- int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
- char* tempBufLimit = stackBuffer + tempBufCapacity;
- UConverter* conv = NULL;
- char* saveBuf = tempBuf;
- wchar_t* intTarget=NULL;
- int32_t intTargetCapacity=0;
- int count=0,retVal=0;
-
- const UChar *pSrcLimit =NULL;
- const UChar *pSrc = src;
-
- conv = u_getDefaultConverter(pErrorCode);
-
- if(U_FAILURE(*pErrorCode)){
- return NULL;
- }
-
- if(srcLength == -1){
- srcLength = u_strlen(pSrc);
- }
-
- pSrcLimit = pSrc + srcLength;
-
- for(;;) {
- /* reset the error state */
- *pErrorCode = U_ZERO_ERROR;
-
- /* convert to chars using default converter */
- ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
- count =(tempBuf - saveBuf);
-
- /* This should rarely occur */
- if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
- tempBuf = saveBuf;
-
- /* we dont have enough room on the stack grow the buffer */
- int32_t newCapacity = 2 * srcLength;
- if(newCapacity <= tempBufCapacity) {
- newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
- }
- if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
- newCapacity, count, 1)) {
- goto cleanup;
- }
-
- saveBuf = tempBuf;
- tempBufLimit = tempBuf + tempBufCapacity;
- tempBuf = tempBuf + count;
-
- } else {
- break;
- }
- }
-
- if(U_FAILURE(*pErrorCode)){
- goto cleanup;
- }
-
- /* done with conversion null terminate the char buffer */
- if(count>=tempBufCapacity){
- tempBuf = saveBuf;
- /* we dont have enough room on the stack grow the buffer */
- if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
- count+1, count, 1)) {
- goto cleanup;
- }
- saveBuf = tempBuf;
- }
-
- saveBuf[count]=0;
-
-
- /* allocate more space than required
- * here we assume that every char requires
- * no more than 2 wchar_ts
- */
- intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
- intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
-
- if(intTarget){
-
- int32_t nulLen = 0;
- int32_t remaining = intTargetCapacity;
- wchar_t* pIntTarget=intTarget;
- tempBuf = saveBuf;
-
- /* now convert the mbs to wcs */
- for(;;){
-
- /* we can call the system API since we are sure that
- * there is atleast 1 null in the input
- */
- retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
-
- if(retVal==-1){
- *pErrorCode = U_INVALID_CHAR_FOUND;
- break;
- }else if(retVal== remaining){/* should never occur */
- int numWritten = (pIntTarget-intTarget);
- u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
- &intTargetCapacity,
- intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
- numWritten,
- sizeof(wchar_t));
- pIntTarget = intTarget;
- remaining=intTargetCapacity;
-
- if(nulLen!=count){ /*there are embedded nulls*/
- pIntTarget+=numWritten;
- remaining-=numWritten;
- }
-
- }else{
- int32_t nulVal;
- /*scan for nulls */
- /* we donot check for limit since tempBuf is null terminated */
- while(tempBuf[nulLen++] != 0){
- }
- nulVal = (nulLen < srcLength) ? 1 : 0;
- pIntTarget = pIntTarget + retVal+nulVal;
- remaining -=(retVal+nulVal);
-
- /* check if we have reached the source limit*/
- if(nulLen>=(count)){
- break;
- }
- }
- }
- count = (int32_t)(pIntTarget-intTarget);
-
- if(0 < count && count <= destCapacity){
- uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t));
- }
-
- if(pDestLength){
- *pDestLength = count;
- }
-
- /* free the allocated memory */
- uprv_free(intTarget);
-
- }else{
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- }
-cleanup:
- /* are we still using stack buffer */
- if(stackBuffer != saveBuf){
- uprv_free(saveBuf);
- }
- u_terminateWChars(dest,destCapacity,count,pErrorCode);
-
- u_releaseDefaultConverter(conv);
-
- return dest;
-}
-#endif
-
-U_CAPI wchar_t* U_EXPORT2
-u_strToWCS(wchar_t *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const UChar *src,
- int32_t srcLength,
- UErrorCode *pErrorCode){
-
- /* args check */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
- return NULL;
- }
-
- if( (src==NULL && srcLength!=0) || srcLength < -1 ||
- (destCapacity<0) || (dest == NULL && destCapacity > 0)
- ) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
-#ifdef U_WCHAR_IS_UTF16
- /* wchar_t is UTF-16 just do a memcpy */
- if(srcLength == -1){
- srcLength = u_strlen(src);
- }
- if(0 < srcLength && srcLength <= destCapacity){
- u_memcpy((UChar *)dest, src, srcLength);
- }
- if(pDestLength){
- *pDestLength = srcLength;
- }
-
- u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode);
-
- return dest;
-
-#elif defined U_WCHAR_IS_UTF32
-
- return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
- src, srcLength, pErrorCode);
-
-#else
-
- return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
-
-#endif
-
-}
-
-#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
-/* helper function */
-static UChar*
-_strFromWCS( UChar *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const wchar_t *src,
- int32_t srcLength,
- UErrorCode *pErrorCode)
-{
- int32_t retVal =0, count =0 ;
- UConverter* conv = NULL;
- UChar* pTarget = NULL;
- UChar* pTargetLimit = NULL;
- UChar* target = NULL;
-
- UChar uStack [_STACK_BUFFER_CAPACITY];
-
- wchar_t wStack[_STACK_BUFFER_CAPACITY];
- wchar_t* pWStack = wStack;
-
-
- char cStack[_STACK_BUFFER_CAPACITY];
- int32_t cStackCap = _STACK_BUFFER_CAPACITY;
- char* pCSrc=cStack;
- char* pCSave=pCSrc;
- char* pCSrcLimit=NULL;
-
- const wchar_t* pSrc = src;
- const wchar_t* pSrcLimit = NULL;
-
- if(srcLength ==-1){
- /* if the wchar_t source is null terminated we can safely
- * assume that there are no embedded nulls, this is a fast
- * path for null terminated strings.
- */
- for(;;){
- /* convert wchars to chars */
- retVal = uprv_wcstombs(pCSrc,src, cStackCap);
-
- if(retVal == -1){
- *pErrorCode = U_ILLEGAL_CHAR_FOUND;
- goto cleanup;
- }else if(retVal >= (cStackCap-1)){
- /* Should rarely occur */
- u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
- cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
- pCSave = pCSrc;
- }else{
- /* converted every thing */
- pCSrc = pCSrc+retVal;
- break;
- }
- }
-
- }else{
- /* here the source is not null terminated
- * so it may have nulls embeded and we need to
- * do some extra processing
- */
- int32_t remaining =cStackCap;
-
- pSrcLimit = src + srcLength;
-
- for(;;){
- int32_t nulLen = 0;
-
- /* find nulls in the string */
- while(nulLen<srcLength && pSrc[nulLen++]!=0){
- }
-
- if((pSrc+nulLen) < pSrcLimit){
- /* check if we have enough room in pCSrc */
- if(remaining < (nulLen * MB_CUR_MAX)){
- /* should rarely occur */
- int32_t len = (pCSrc-pCSave);
- pCSrc = pCSave;
- /* we do not have enough room so grow the buffer*/
- u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
- _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
-
- pCSave = pCSrc;
- pCSrc = pCSave+len;
- remaining = cStackCap-(pCSrc - pCSave);
- }
-
- /* we have found a null so convert the
- * chunk from begining of non-null char to null
- */
- retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
-
- if(retVal==-1){
- /* an error occurred bail out */
- *pErrorCode = U_ILLEGAL_CHAR_FOUND;
- goto cleanup;
- }
-
- pCSrc += retVal+1 /* already null terminated */;
-
- pSrc += nulLen; /* skip past the null */
- srcLength-=nulLen; /* decrement the srcLength */
- remaining -= (pCSrc-pCSave);
-
-
- }else{
- /* the source is not null terminated and we are
- * end of source so we copy the source to a temp buffer
- * null terminate it and convert wchar_ts to chars
- */
- if(nulLen >= _STACK_BUFFER_CAPACITY){
- /* Should rarely occcur */
- /* allocate new buffer buffer */
- pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
- if(pWStack==NULL){
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- goto cleanup;
- }
- }
- if(nulLen>0){
- /* copy the contents to tempStack */
- uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t));
- }
-
- /* null terminate the tempBuffer */
- pWStack[nulLen] =0 ;
-
- if(remaining < (nulLen * MB_CUR_MAX)){
- /* Should rarely occur */
- int32_t len = (pCSrc-pCSave);
- pCSrc = pCSave;
- /* we do not have enough room so grow the buffer*/
- u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
- cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
-
- pCSave = pCSrc;
- pCSrc = pCSave+len;
- remaining = cStackCap-(pCSrc - pCSave);
- }
- /* convert to chars */
- retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
-
- pCSrc += retVal;
- pSrc += nulLen;
- srcLength-=nulLen; /* decrement the srcLength */
- break;
- }
- }
- }
-
- /* OK..now we have converted from wchar_ts to chars now
- * convert chars to UChars
- */
- pCSrcLimit = pCSrc;
- pCSrc = pCSave;
- pTarget = target= dest;
- pTargetLimit = dest + destCapacity;
-
- conv= u_getDefaultConverter(pErrorCode);
-
- if(U_FAILURE(*pErrorCode)|| conv==NULL){
- goto cleanup;
- }
-
- for(;;) {
-
- *pErrorCode = U_ZERO_ERROR;
-
- /* convert to stack buffer*/
- ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
-
- /* increment count to number written to stack */
- count+= pTarget - target;
-
- if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
- target = uStack;
- pTarget = uStack;
- pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
- } else {
- break;
- }
-
- }
-
- if(pDestLength){
- *pDestLength =count;
- }
-
- u_terminateUChars(dest,destCapacity,count,pErrorCode);
-
-cleanup:
-
- if(cStack != pCSave){
- uprv_free(pCSave);
- }
-
- if(wStack != pWStack){
- uprv_free(pWStack);
- }
-
- u_releaseDefaultConverter(conv);
-
- return dest;
-}
-#endif
-
-U_CAPI UChar* U_EXPORT2
-u_strFromWCS(UChar *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const wchar_t *src,
- int32_t srcLength,
- UErrorCode *pErrorCode)
-{
-
- /* args check */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
- return NULL;
- }
-
- if( (src==NULL && srcLength!=0) || srcLength < -1 ||
- (destCapacity<0) || (dest == NULL && destCapacity > 0)
- ) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
-#ifdef U_WCHAR_IS_UTF16
- /* wchar_t is UTF-16 just do a memcpy */
- if(srcLength == -1){
- srcLength = u_strlen((const UChar *)src);
- }
- if(0 < srcLength && srcLength <= destCapacity){
- u_memcpy(dest, (const UChar *)src, srcLength);
- }
- if(pDestLength){
- *pDestLength = srcLength;
- }
-
- u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
-
- return dest;
-
-#elif defined U_WCHAR_IS_UTF32
-
- return u_strFromUTF32(dest, destCapacity, pDestLength,
- (UChar32*)src, srcLength, pErrorCode);
-
-#else
-
- return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
-
-#endif
-
-}
-
-#endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
diff --git a/contrib/libs/icu/common/ustrcase.cpp b/contrib/libs/icu/common/ustrcase.cpp
deleted file mode 100644
index 618e847c65d..00000000000
--- a/contrib/libs/icu/common/ustrcase.cpp
+++ /dev/null
@@ -1,1818 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2001-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ustrcase.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002feb20
-* created by: Markus W. Scherer
-*
-* Implementation file for string casing C API functions.
-* Uses functions from uchar.c for basic functionality that requires access
-* to the Unicode Character Database (uprops.dat).
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/brkiter.h"
-#include "unicode/casemap.h"
-#include "unicode/edits.h"
-#include "unicode/stringoptions.h"
-#include "unicode/ustring.h"
-#include "unicode/ucasemap.h"
-#include "unicode/ubrk.h"
-#include "unicode/utf.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "ucase.h"
-#include "ucasemap_imp.h"
-#include "ustr_imp.h"
-#include "uassert.h"
-
-U_NAMESPACE_BEGIN
-
-namespace {
-
-int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
- Edits *edits, UErrorCode &errorCode) {
- if (U_SUCCESS(errorCode)) {
- if (destIndex > destCapacity) {
- errorCode = U_BUFFER_OVERFLOW_ERROR;
- } else if (edits != NULL) {
- edits->copyErrorTo(errorCode);
- }
- }
- return destIndex;
-}
-
-/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
-inline int32_t
-appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
- int32_t result, const UChar *s,
- int32_t cpLength, uint32_t options, icu::Edits *edits) {
- UChar32 c;
- int32_t length;
-
- /* decode the result */
- if(result<0) {
- /* (not) original code point */
- if(edits!=NULL) {
- edits->addUnchanged(cpLength);
- }
- if(options & U_OMIT_UNCHANGED_TEXT) {
- return destIndex;
- }
- c=~result;
- if(destIndex<destCapacity && c<=0xffff) { // BMP slightly-fastpath
- dest[destIndex++]=(UChar)c;
- return destIndex;
- }
- length=cpLength;
- } else {
- if(result<=UCASE_MAX_STRING_LENGTH) {
- c=U_SENTINEL;
- length=result;
- } else if(destIndex<destCapacity && result<=0xffff) { // BMP slightly-fastpath
- dest[destIndex++]=(UChar)result;
- if(edits!=NULL) {
- edits->addReplace(cpLength, 1);
- }
- return destIndex;
- } else {
- c=result;
- length=U16_LENGTH(c);
- }
- if(edits!=NULL) {
- edits->addReplace(cpLength, length);
- }
- }
- if(length>(INT32_MAX-destIndex)) {
- return -1; // integer overflow
- }
-
- if(destIndex<destCapacity) {
- /* append the result */
- if(c>=0) {
- /* code point */
- UBool isError=FALSE;
- U16_APPEND(dest, destIndex, destCapacity, c, isError);
- if(isError) {
- /* overflow, nothing written */
- destIndex+=length;
- }
- } else {
- /* string */
- if((destIndex+length)<=destCapacity) {
- while(length>0) {
- dest[destIndex++]=*s++;
- --length;
- }
- } else {
- /* overflow */
- destIndex+=length;
- }
- }
- } else {
- /* preflight */
- destIndex+=length;
- }
- return destIndex;
-}
-
-inline int32_t
-appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
- if(destIndex<destCapacity) {
- dest[destIndex]=c;
- } else if(destIndex==INT32_MAX) {
- return -1; // integer overflow
- }
- return destIndex+1;
-}
-
-int32_t
-appendNonEmptyUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
- const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
- if(edits!=NULL) {
- edits->addUnchanged(length);
- }
- if(options & U_OMIT_UNCHANGED_TEXT) {
- return destIndex;
- }
- if(length>(INT32_MAX-destIndex)) {
- return -1; // integer overflow
- }
- if((destIndex+length)<=destCapacity) {
- u_memcpy(dest+destIndex, s, length);
- }
- return destIndex + length;
-}
-
-inline int32_t
-appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
- const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
- if (length <= 0) {
- return destIndex;
- }
- return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits);
-}
-
-UChar32 U_CALLCONV
-utf16_caseContextIterator(void *context, int8_t dir) {
- UCaseContext *csc=(UCaseContext *)context;
- UChar32 c;
-
- if(dir<0) {
- /* reset for backward iteration */
- csc->index=csc->cpStart;
- csc->dir=dir;
- } else if(dir>0) {
- /* reset for forward iteration */
- csc->index=csc->cpLimit;
- csc->dir=dir;
- } else {
- /* continue current iteration direction */
- dir=csc->dir;
- }
-
- if(dir<0) {
- if(csc->start<csc->index) {
- U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
- return c;
- }
- } else {
- if(csc->index<csc->limit) {
- U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
- return c;
- }
- }
- return U_SENTINEL;
-}
-
-/**
- * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
- * caseLocale < 0: Case-folds [srcStart..srcLimit[.
- */
-int32_t toLower(int32_t caseLocale, uint32_t options,
- UChar *dest, int32_t destCapacity,
- const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
- icu::Edits *edits, UErrorCode &errorCode) {
- const int8_t *latinToLower;
- if (caseLocale == UCASE_LOC_ROOT ||
- (caseLocale >= 0 ?
- !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
- (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
- latinToLower = LatinCase::TO_LOWER_NORMAL;
- } else {
- latinToLower = LatinCase::TO_LOWER_TR_LT;
- }
- const UTrie2 *trie = ucase_getTrie();
- int32_t destIndex = 0;
- int32_t prev = srcStart;
- int32_t srcIndex = srcStart;
- for (;;) {
- // fast path for simple cases
- UChar lead = 0;
- while (srcIndex < srcLimit) {
- lead = src[srcIndex];
- int32_t delta;
- if (lead < LatinCase::LONG_S) {
- int8_t d = latinToLower[lead];
- if (d == LatinCase::EXC) { break; }
- ++srcIndex;
- if (d == 0) { continue; }
- delta = d;
- } else if (lead >= 0xd800) {
- break; // surrogate or higher
- } else {
- uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
- if (UCASE_HAS_EXCEPTION(props)) { break; }
- ++srcIndex;
- if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
- continue;
- }
- }
- lead += static_cast<UChar>(delta);
- destIndex = appendUnchanged(dest, destIndex, destCapacity,
- src + prev, srcIndex - 1 - prev, options, edits);
- if (destIndex >= 0) {
- destIndex = appendUChar(dest, destIndex, destCapacity, lead);
- if (edits != nullptr) {
- edits->addReplace(1, 1);
- }
- }
- if (destIndex < 0) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- prev = srcIndex;
- }
- if (srcIndex >= srcLimit) {
- break;
- }
- // slow path
- int32_t cpStart = srcIndex++;
- UChar trail;
- UChar32 c;
- if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) {
- c = U16_GET_SUPPLEMENTARY(lead, trail);
- ++srcIndex;
- } else {
- c = lead;
- }
- const UChar *s;
- if (caseLocale >= 0) {
- csc->cpStart = cpStart;
- csc->cpLimit = srcIndex;
- c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale);
- } else {
- c = ucase_toFullFolding(c, &s, options);
- }
- if (c >= 0) {
- destIndex = appendUnchanged(dest, destIndex, destCapacity,
- src + prev, cpStart - prev, options, edits);
- if (destIndex >= 0) {
- destIndex = appendResult(dest, destIndex, destCapacity, c, s,
- srcIndex - cpStart, options, edits);
- }
- if (destIndex < 0) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- prev = srcIndex;
- }
- }
- destIndex = appendUnchanged(dest, destIndex, destCapacity,
- src + prev, srcIndex - prev, options, edits);
- if (destIndex < 0) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- return destIndex;
-}
-
-int32_t toUpper(int32_t caseLocale, uint32_t options,
- UChar *dest, int32_t destCapacity,
- const UChar *src, UCaseContext *csc, int32_t srcLength,
- icu::Edits *edits, UErrorCode &errorCode) {
- const int8_t *latinToUpper;
- if (caseLocale == UCASE_LOC_TURKISH) {
- latinToUpper = LatinCase::TO_UPPER_TR;
- } else {
- latinToUpper = LatinCase::TO_UPPER_NORMAL;
- }
- const UTrie2 *trie = ucase_getTrie();
- int32_t destIndex = 0;
- int32_t prev = 0;
- int32_t srcIndex = 0;
- for (;;) {
- // fast path for simple cases
- UChar lead = 0;
- while (srcIndex < srcLength) {
- lead = src[srcIndex];
- int32_t delta;
- if (lead < LatinCase::LONG_S) {
- int8_t d = latinToUpper[lead];
- if (d == LatinCase::EXC) { break; }
- ++srcIndex;
- if (d == 0) { continue; }
- delta = d;
- } else if (lead >= 0xd800) {
- break; // surrogate or higher
- } else {
- uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
- if (UCASE_HAS_EXCEPTION(props)) { break; }
- ++srcIndex;
- if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
- continue;
- }
- }
- lead += static_cast<UChar>(delta);
- destIndex = appendUnchanged(dest, destIndex, destCapacity,
- src + prev, srcIndex - 1 - prev, options, edits);
- if (destIndex >= 0) {
- destIndex = appendUChar(dest, destIndex, destCapacity, lead);
- if (edits != nullptr) {
- edits->addReplace(1, 1);
- }
- }
- if (destIndex < 0) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- prev = srcIndex;
- }
- if (srcIndex >= srcLength) {
- break;
- }
- // slow path
- int32_t cpStart;
- csc->cpStart = cpStart = srcIndex++;
- UChar trail;
- UChar32 c;
- if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) {
- c = U16_GET_SUPPLEMENTARY(lead, trail);
- ++srcIndex;
- } else {
- c = lead;
- }
- csc->cpLimit = srcIndex;
- const UChar *s;
- c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale);
- if (c >= 0) {
- destIndex = appendUnchanged(dest, destIndex, destCapacity,
- src + prev, cpStart - prev, options, edits);
- if (destIndex >= 0) {
- destIndex = appendResult(dest, destIndex, destCapacity, c, s,
- srcIndex - cpStart, options, edits);
- }
- if (destIndex < 0) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- prev = srcIndex;
- }
- }
- destIndex = appendUnchanged(dest, destIndex, destCapacity,
- src + prev, srcIndex - prev, options, edits);
- if (destIndex < 0) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- return destIndex;
-}
-
-} // namespace
-
-U_NAMESPACE_END
-
-U_NAMESPACE_USE
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-U_CFUNC int32_t U_CALLCONV
-ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- icu::Edits *edits,
- UErrorCode &errorCode) {
- if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
- return 0;
- }
-
- /* set up local variables */
- UCaseContext csc=UCASECONTEXT_INITIALIZER;
- csc.p=(void *)src;
- csc.limit=srcLength;
- int32_t destIndex=0;
- int32_t prev=0;
- UBool isFirstIndex=TRUE;
-
- /* titlecasing loop */
- while(prev<srcLength) {
- /* find next index where to titlecase */
- int32_t index;
- if(isFirstIndex) {
- isFirstIndex=FALSE;
- index=iter->first();
- } else {
- index=iter->next();
- }
- if(index==UBRK_DONE || index>srcLength) {
- index=srcLength;
- }
-
- /*
- * Segment [prev..index[ into 3 parts:
- * a) skipped characters (copy as-is) [prev..titleStart[
- * b) first letter (titlecase) [titleStart..titleLimit[
- * c) subsequent characters (lowercase) [titleLimit..index[
- */
- if(prev<index) {
- // Find and copy skipped characters [prev..titleStart[
- int32_t titleStart=prev;
- int32_t titleLimit=prev;
- UChar32 c;
- U16_NEXT(src, titleLimit, index, c);
- if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
- // Adjust the titlecasing index to the next cased character,
- // or to the next letter/number/symbol/private use.
- // Stop with titleStart<titleLimit<=index
- // if there is a character to be titlecased,
- // or else stop with titleStart==titleLimit==index.
- UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
- while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
- titleStart=titleLimit;
- if(titleLimit==index) {
- break;
- }
- U16_NEXT(src, titleLimit, index, c);
- }
- if (prev < titleStart) {
- destIndex=appendUnchanged(dest, destIndex, destCapacity,
- src+prev, titleStart-prev, options, edits);
- if(destIndex<0) {
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
- }
-
- if(titleStart<titleLimit) {
- /* titlecase c which is from [titleStart..titleLimit[ */
- csc.cpStart=titleStart;
- csc.cpLimit=titleLimit;
- const UChar *s;
- c=ucase_toFullTitle(c, utf16_caseContextIterator, &csc, &s, caseLocale);
- destIndex=appendResult(dest, destIndex, destCapacity, c, s,
- titleLimit-titleStart, options, edits);
- if(destIndex<0) {
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- /* Special case Dutch IJ titlecasing */
- if (titleStart+1 < index &&
- caseLocale == UCASE_LOC_DUTCH &&
- (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
- if (src[titleStart+1] == 0x006A) {
- destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
- if(destIndex<0) {
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- if(edits!=NULL) {
- edits->addReplace(1, 1);
- }
- titleLimit++;
- } else if (src[titleStart+1] == 0x004A) {
- // Keep the capital J from getting lowercased.
- destIndex=appendUnchanged(dest, destIndex, destCapacity,
- src+titleStart+1, 1, options, edits);
- if(destIndex<0) {
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- titleLimit++;
- }
- }
-
- /* lowercase [titleLimit..index[ */
- if(titleLimit<index) {
- if((options&U_TITLECASE_NO_LOWERCASE)==0) {
- /* Normal operation: Lowercase the rest of the word. */
- destIndex+=
- toLower(
- caseLocale, options,
- dest+destIndex, destCapacity-destIndex,
- src, &csc, titleLimit, index,
- edits, errorCode);
- if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
- errorCode=U_ZERO_ERROR;
- }
- if(U_FAILURE(errorCode)) {
- return destIndex;
- }
- } else {
- /* Optionally just copy the rest of the word unchanged. */
- destIndex=appendUnchanged(dest, destIndex, destCapacity,
- src+titleLimit, index-titleLimit, options, edits);
- if(destIndex<0) {
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
- }
- }
- }
-
- prev=index;
- }
-
- return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
-}
-
-#endif // !UCONFIG_NO_BREAK_ITERATION
-
-U_NAMESPACE_BEGIN
-namespace GreekUpper {
-
-// Data generated by prototype code, see
-// http://site.icu-project.org/design/case/greek-upper
-// TODO: Move this data into ucase.icu.
-static const uint16_t data0370[] = {
- // U+0370..03FF
- 0x0370,
- 0x0370,
- 0x0372,
- 0x0372,
- 0,
- 0,
- 0x0376,
- 0x0376,
- 0,
- 0,
- 0x037A,
- 0x03FD,
- 0x03FE,
- 0x03FF,
- 0,
- 0x037F,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
- 0x0391 | HAS_VOWEL,
- 0x0392,
- 0x0393,
- 0x0394,
- 0x0395 | HAS_VOWEL,
- 0x0396,
- 0x0397 | HAS_VOWEL,
- 0x0398,
- 0x0399 | HAS_VOWEL,
- 0x039A,
- 0x039B,
- 0x039C,
- 0x039D,
- 0x039E,
- 0x039F | HAS_VOWEL,
- 0x03A0,
- 0x03A1,
- 0,
- 0x03A3,
- 0x03A4,
- 0x03A5 | HAS_VOWEL,
- 0x03A6,
- 0x03A7,
- 0x03A8,
- 0x03A9 | HAS_VOWEL,
- 0x0399 | HAS_VOWEL | HAS_DIALYTIKA,
- 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
- 0x0391 | HAS_VOWEL,
- 0x0392,
- 0x0393,
- 0x0394,
- 0x0395 | HAS_VOWEL,
- 0x0396,
- 0x0397 | HAS_VOWEL,
- 0x0398,
- 0x0399 | HAS_VOWEL,
- 0x039A,
- 0x039B,
- 0x039C,
- 0x039D,
- 0x039E,
- 0x039F | HAS_VOWEL,
- 0x03A0,
- 0x03A1,
- 0x03A3,
- 0x03A3,
- 0x03A4,
- 0x03A5 | HAS_VOWEL,
- 0x03A6,
- 0x03A7,
- 0x03A8,
- 0x03A9 | HAS_VOWEL,
- 0x0399 | HAS_VOWEL | HAS_DIALYTIKA,
- 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03CF,
- 0x0392,
- 0x0398,
- 0x03D2,
- 0x03D2 | HAS_ACCENT,
- 0x03D2 | HAS_DIALYTIKA,
- 0x03A6,
- 0x03A0,
- 0x03CF,
- 0x03D8,
- 0x03D8,
- 0x03DA,
- 0x03DA,
- 0x03DC,
- 0x03DC,
- 0x03DE,
- 0x03DE,
- 0x03E0,
- 0x03E0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0x039A,
- 0x03A1,
- 0x03F9,
- 0x037F,
- 0x03F4,
- 0x0395 | HAS_VOWEL,
- 0,
- 0x03F7,
- 0x03F7,
- 0x03F9,
- 0x03FA,
- 0x03FA,
- 0x03FC,
- 0x03FD,
- 0x03FE,
- 0x03FF,
-};
-
-static const uint16_t data1F00[] = {
- // U+1F00..1FFF
- 0x0391 | HAS_VOWEL,
- 0x0391 | HAS_VOWEL,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL,
- 0x0391 | HAS_VOWEL,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0395 | HAS_VOWEL,
- 0x0395 | HAS_VOWEL,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0,
- 0,
- 0x0395 | HAS_VOWEL,
- 0x0395 | HAS_VOWEL,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0,
- 0,
- 0x0397 | HAS_VOWEL,
- 0x0397 | HAS_VOWEL,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL,
- 0x0397 | HAS_VOWEL,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL,
- 0x0399 | HAS_VOWEL,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL,
- 0x0399 | HAS_VOWEL,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x039F | HAS_VOWEL,
- 0x039F | HAS_VOWEL,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0,
- 0,
- 0x039F | HAS_VOWEL,
- 0x039F | HAS_VOWEL,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0,
- 0,
- 0x03A5 | HAS_VOWEL,
- 0x03A5 | HAS_VOWEL,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0,
- 0x03A5 | HAS_VOWEL,
- 0,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL,
- 0x03A9 | HAS_VOWEL,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL,
- 0x03A9 | HAS_VOWEL,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0,
- 0,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL,
- 0x0391 | HAS_VOWEL,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0391 | HAS_VOWEL,
- 0x0391 | HAS_VOWEL,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_ACCENT,
- 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0,
- 0x0399 | HAS_VOWEL,
- 0,
- 0,
- 0,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0x0395 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_ACCENT,
- 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0,
- 0,
- 0,
- 0x0399 | HAS_VOWEL,
- 0x0399 | HAS_VOWEL,
- 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
- 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
- 0,
- 0,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
- 0x0399 | HAS_VOWEL,
- 0x0399 | HAS_VOWEL,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0x0399 | HAS_VOWEL | HAS_ACCENT,
- 0,
- 0,
- 0,
- 0,
- 0x03A5 | HAS_VOWEL,
- 0x03A5 | HAS_VOWEL,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
- 0x03A1,
- 0x03A1,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
- 0x03A5 | HAS_VOWEL,
- 0x03A5 | HAS_VOWEL,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A5 | HAS_VOWEL | HAS_ACCENT,
- 0x03A1,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0x039F | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_ACCENT,
- 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
- 0,
- 0,
- 0,
-};
-
-// U+2126 Ohm sign
-static const uint16_t data2126 = 0x03A9 | HAS_VOWEL;
-
-uint32_t getLetterData(UChar32 c) {
- if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) {
- return 0;
- } else if (c <= 0x3ff) {
- return data0370[c - 0x370];
- } else if (c <= 0x1fff) {
- return data1F00[c - 0x1f00];
- } else if (c == 0x2126) {
- return data2126;
- } else {
- return 0;
- }
-}
-
-uint32_t getDiacriticData(UChar32 c) {
- switch (c) {
- case 0x0300: // varia
- case 0x0301: // tonos = oxia
- case 0x0342: // perispomeni
- case 0x0302: // circumflex can look like perispomeni
- case 0x0303: // tilde can look like perispomeni
- case 0x0311: // inverted breve can look like perispomeni
- return HAS_ACCENT;
- case 0x0308: // dialytika = diaeresis
- return HAS_COMBINING_DIALYTIKA;
- case 0x0344: // dialytika tonos
- return HAS_COMBINING_DIALYTIKA | HAS_ACCENT;
- case 0x0345: // ypogegrammeni = iota subscript
- return HAS_YPOGEGRAMMENI;
- case 0x0304: // macron
- case 0x0306: // breve
- case 0x0313: // comma above
- case 0x0314: // reversed comma above
- case 0x0343: // koronis
- return HAS_OTHER_GREEK_DIACRITIC;
- default:
- return 0;
- }
-}
-
-UBool isFollowedByCasedLetter(const UChar *s, int32_t i, int32_t length) {
- while (i < length) {
- UChar32 c;
- U16_NEXT(s, i, length, c);
- int32_t type = ucase_getTypeOrIgnorable(c);
- if ((type & UCASE_IGNORABLE) != 0) {
- // Case-ignorable, continue with the loop.
- } else if (type != UCASE_NONE) {
- return TRUE; // Followed by cased letter.
- } else {
- return FALSE; // Uncased and not case-ignorable.
- }
- }
- return FALSE; // Not followed by cased letter.
-}
-
-/**
- * Greek string uppercasing with a state machine.
- * Probably simpler than a stateless function that has to figure out complex context-before
- * for each character.
- * TODO: Try to re-consolidate one way or another with the non-Greek function.
- */
-int32_t toUpper(uint32_t options,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- Edits *edits,
- UErrorCode &errorCode) {
- int32_t destIndex=0;
- uint32_t state = 0;
- for (int32_t i = 0; i < srcLength;) {
- int32_t nextIndex = i;
- UChar32 c;
- U16_NEXT(src, nextIndex, srcLength, c);
- uint32_t nextState = 0;
- int32_t type = ucase_getTypeOrIgnorable(c);
- if ((type & UCASE_IGNORABLE) != 0) {
- // c is case-ignorable
- nextState |= (state & AFTER_CASED);
- } else if (type != UCASE_NONE) {
- // c is cased
- nextState |= AFTER_CASED;
- }
- uint32_t data = getLetterData(c);
- if (data > 0) {
- uint32_t upper = data & UPPER_MASK;
- // Add a dialytika to this iota or ypsilon vowel
- // if we removed a tonos from the previous vowel,
- // and that previous vowel did not also have (or gain) a dialytika.
- // Adding one only to the final vowel in a longer sequence
- // (which does not occur in normal writing) would require lookahead.
- // Set the same flag as for preserving an existing dialytika.
- if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
- (upper == 0x399 || upper == 0x3A5)) {
- data |= HAS_DIALYTIKA;
- }
- int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota.
- if ((data & HAS_YPOGEGRAMMENI) != 0) {
- numYpogegrammeni = 1;
- }
- // Skip combining diacritics after this Greek letter.
- while (nextIndex < srcLength) {
- uint32_t diacriticData = getDiacriticData(src[nextIndex]);
- if (diacriticData != 0) {
- data |= diacriticData;
- if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
- ++numYpogegrammeni;
- }
- ++nextIndex;
- } else {
- break; // not a Greek diacritic
- }
- }
- if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
- nextState |= AFTER_VOWEL_WITH_ACCENT;
- }
- // Map according to Greek rules.
- UBool addTonos = FALSE;
- if (upper == 0x397 &&
- (data & HAS_ACCENT) != 0 &&
- numYpogegrammeni == 0 &&
- (state & AFTER_CASED) == 0 &&
- !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
- // Keep disjunctive "or" with (only) a tonos.
- // We use the same "word boundary" conditions as for the Final_Sigma test.
- if (i == nextIndex) {
- upper = 0x389; // Preserve the precomposed form.
- } else {
- addTonos = TRUE;
- }
- } else if ((data & HAS_DIALYTIKA) != 0) {
- // Preserve a vowel with dialytika in precomposed form if it exists.
- if (upper == 0x399) {
- upper = 0x3AA;
- data &= ~HAS_EITHER_DIALYTIKA;
- } else if (upper == 0x3A5) {
- upper = 0x3AB;
- data &= ~HAS_EITHER_DIALYTIKA;
- }
- }
-
- UBool change;
- if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
- change = TRUE; // common, simple usage
- } else {
- // Find out first whether we are changing the text.
- change = src[i] != upper || numYpogegrammeni > 0;
- int32_t i2 = i + 1;
- if ((data & HAS_EITHER_DIALYTIKA) != 0) {
- change |= i2 >= nextIndex || src[i2] != 0x308;
- ++i2;
- }
- if (addTonos) {
- change |= i2 >= nextIndex || src[i2] != 0x301;
- ++i2;
- }
- int32_t oldLength = nextIndex - i;
- int32_t newLength = (i2 - i) + numYpogegrammeni;
- change |= oldLength != newLength;
- if (change) {
- if (edits != NULL) {
- edits->addReplace(oldLength, newLength);
- }
- } else {
- if (edits != NULL) {
- edits->addUnchanged(oldLength);
- }
- // Write unchanged text?
- change = (options & U_OMIT_UNCHANGED_TEXT) == 0;
- }
- }
-
- if (change) {
- destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
- if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
- destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika
- }
- if (destIndex >= 0 && addTonos) {
- destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
- }
- while (destIndex >= 0 && numYpogegrammeni > 0) {
- destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
- --numYpogegrammeni;
- }
- if(destIndex<0) {
- errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
- } else {
- const UChar *s;
- c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
- destIndex = appendResult(dest, destIndex, destCapacity, c, s,
- nextIndex - i, options, edits);
- if (destIndex < 0) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
- i = nextIndex;
- state = nextState;
- }
-
- return destIndex;
-}
-
-} // namespace GreekUpper
-U_NAMESPACE_END
-
-/* functions available in the common library (for unistr_case.cpp) */
-
-U_CFUNC int32_t U_CALLCONV
-ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- icu::Edits *edits,
- UErrorCode &errorCode) {
- UCaseContext csc=UCASECONTEXT_INITIALIZER;
- csc.p=(void *)src;
- csc.limit=srcLength;
- int32_t destIndex = toLower(
- caseLocale, options,
- dest, destCapacity,
- src, &csc, 0, srcLength,
- edits, errorCode);
- return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
-}
-
-U_CFUNC int32_t U_CALLCONV
-ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- icu::Edits *edits,
- UErrorCode &errorCode) {
- int32_t destIndex;
- if (caseLocale == UCASE_LOC_GREEK) {
- destIndex = GreekUpper::toUpper(options, dest, destCapacity,
- src, srcLength, edits, errorCode);
- } else {
- UCaseContext csc=UCASECONTEXT_INITIALIZER;
- csc.p=(void *)src;
- csc.limit=srcLength;
- destIndex = toUpper(
- caseLocale, options,
- dest, destCapacity,
- src, &csc, srcLength,
- edits, errorCode);
- }
- return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
-}
-
-U_CFUNC int32_t U_CALLCONV
-ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- icu::Edits *edits,
- UErrorCode &errorCode) {
- int32_t destIndex = toLower(
- -1, options,
- dest, destCapacity,
- src, nullptr, 0, srcLength,
- edits, errorCode);
- return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
-}
-
-U_CFUNC int32_t
-ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- UStringCaseMapper *stringCaseMapper,
- icu::Edits *edits,
- UErrorCode &errorCode) {
- int32_t destLength;
-
- /* check argument values */
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- if( destCapacity<0 ||
- (dest==NULL && destCapacity>0) ||
- src==NULL ||
- srcLength<-1
- ) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* get the string length */
- if(srcLength==-1) {
- srcLength=u_strlen(src);
- }
-
- /* check for overlapping source and destination */
- if( dest!=NULL &&
- ((src>=dest && src<(dest+destCapacity)) ||
- (dest>=src && dest<(src+srcLength)))
- ) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
- edits->reset();
- }
- destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
- dest, destCapacity, src, srcLength, edits, errorCode);
- return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
-}
-
-U_CFUNC int32_t
-ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- UStringCaseMapper *stringCaseMapper,
- UErrorCode &errorCode) {
- UChar buffer[300];
- UChar *temp;
-
- int32_t destLength;
-
- /* check argument values */
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- if( destCapacity<0 ||
- (dest==NULL && destCapacity>0) ||
- src==NULL ||
- srcLength<-1
- ) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* get the string length */
- if(srcLength==-1) {
- srcLength=u_strlen(src);
- }
-
- /* check for overlapping source and destination */
- if( dest!=NULL &&
- ((src>=dest && src<(dest+destCapacity)) ||
- (dest>=src && dest<(src+srcLength)))
- ) {
- /* overlap: provide a temporary destination buffer and later copy the result */
- if(destCapacity<=UPRV_LENGTHOF(buffer)) {
- /* the stack buffer is large enough */
- temp=buffer;
- } else {
- /* allocate a buffer */
- temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
- if(temp==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- }
- } else {
- temp=dest;
- }
-
- destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
- temp, destCapacity, src, srcLength, NULL, errorCode);
- if(temp!=dest) {
- /* copy the result string to the destination buffer */
- if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) {
- u_memmove(dest, temp, destLength);
- }
- if(temp!=buffer) {
- uprv_free(temp);
- }
- }
-
- return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
-}
-
-/* public API functions */
-
-U_CAPI int32_t U_EXPORT2
-u_strFoldCase(UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- uint32_t options,
- UErrorCode *pErrorCode) {
- return ustrcase_mapWithOverlap(
- UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
- dest, destCapacity,
- src, srcLength,
- ustrcase_internalFold, *pErrorCode);
-}
-
-U_NAMESPACE_BEGIN
-
-int32_t CaseMap::fold(
- uint32_t options,
- const UChar *src, int32_t srcLength,
- UChar *dest, int32_t destCapacity, Edits *edits,
- UErrorCode &errorCode) {
- return ustrcase_map(
- UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
- dest, destCapacity,
- src, srcLength,
- ustrcase_internalFold, edits, errorCode);
-}
-
-U_NAMESPACE_END
-
-/* case-insensitive string comparisons -------------------------------------- */
-
-/*
- * This function is a copy of unorm_cmpEquivFold() minus the parts for
- * canonical equivalence.
- * Keep the functions in sync, and see there for how this works.
- * The duplication is for modularization:
- * It makes caseless (but not canonical caseless) matches independent of
- * the normalization code.
- */
-
-/* stack element for previous-level source/decomposition pointers */
-struct CmpEquivLevel {
- const UChar *start, *s, *limit;
-};
-typedef struct CmpEquivLevel CmpEquivLevel;
-
-/**
- * Internal implementation code comparing string with case fold.
- * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch().
- *
- * @param s1 input string 1
- * @param length1 length of string 1, or -1 (NULL terminated)
- * @param s2 input string 2
- * @param length2 length of string 2, or -1 (NULL terminated)
- * @param options compare options
- * @param matchLen1 (output) length of partial prefix match in s1
- * @param matchLen2 (output) length of partial prefix match in s2
- * @param pErrorCode receives error status
- * @return The result of comparison
- */
-static int32_t _cmpFold(
- const UChar *s1, int32_t length1,
- const UChar *s2, int32_t length2,
- uint32_t options,
- int32_t *matchLen1, int32_t *matchLen2,
- UErrorCode *pErrorCode) {
- int32_t cmpRes = 0;
-
- /* current-level start/limit - s1/s2 as current */
- const UChar *start1, *start2, *limit1, *limit2;
-
- /* points to the original start address */
- const UChar *org1, *org2;
-
- /* points to the end of match + 1 */
- const UChar *m1, *m2;
-
- /* case folding variables */
- const UChar *p;
- int32_t length;
-
- /* stacks of previous-level start/current/limit */
- CmpEquivLevel stack1[2], stack2[2];
-
- /* case folding buffers, only use current-level start/limit */
- UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
-
- /* track which is the current level per string */
- int32_t level1, level2;
-
- /* current code units, and code points for lookups */
- UChar32 c1, c2, cp1, cp2;
-
- /* no argument error checking because this itself is not an API */
-
- /*
- * assume that at least the option U_COMPARE_IGNORE_CASE is set
- * otherwise this function would have to behave exactly as uprv_strCompare()
- */
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* initialize */
- if(matchLen1) {
- U_ASSERT(matchLen2 !=NULL);
- *matchLen1=0;
- *matchLen2=0;
- }
-
- start1=m1=org1=s1;
- if(length1==-1) {
- limit1=NULL;
- } else {
- limit1=s1+length1;
- }
-
- start2=m2=org2=s2;
- if(length2==-1) {
- limit2=NULL;
- } else {
- limit2=s2+length2;
- }
-
- level1=level2=0;
- c1=c2=-1;
-
- /* comparison loop */
- for(;;) {
- /*
- * here a code unit value of -1 means "get another code unit"
- * below it will mean "this source is finished"
- */
-
- if(c1<0) {
- /* get next code unit from string 1, post-increment */
- for(;;) {
- if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
- if(level1==0) {
- c1=-1;
- break;
- }
- } else {
- ++s1;
- break;
- }
-
- /* reached end of level buffer, pop one level */
- do {
- --level1;
- start1=stack1[level1].start; /*Not uninitialized*/
- } while(start1==NULL);
- s1=stack1[level1].s; /*Not uninitialized*/
- limit1=stack1[level1].limit; /*Not uninitialized*/
- }
- }
-
- if(c2<0) {
- /* get next code unit from string 2, post-increment */
- for(;;) {
- if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
- if(level2==0) {
- c2=-1;
- break;
- }
- } else {
- ++s2;
- break;
- }
-
- /* reached end of level buffer, pop one level */
- do {
- --level2;
- start2=stack2[level2].start; /*Not uninitialized*/
- } while(start2==NULL);
- s2=stack2[level2].s; /*Not uninitialized*/
- limit2=stack2[level2].limit; /*Not uninitialized*/
- }
- }
-
- /*
- * compare c1 and c2
- * either variable c1, c2 is -1 only if the corresponding string is finished
- */
- if(c1==c2) {
- const UChar *next1, *next2;
-
- if(c1<0) {
- cmpRes=0; /* c1==c2==-1 indicating end of strings */
- break;
- }
-
- /*
- * Note: Move the match positions in both strings at the same time
- * only when corresponding code point(s) in the original strings
- * are fully consumed. For example, when comparing s1="Fust" and
- * s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches
- * the first code point in the case-folded data. But the second "s"
- * has no matching code point in s1, so this implementation returns
- * 2 as the prefix match length ("Fu").
- */
- next1=next2=NULL;
- if(level1==0) {
- next1=s1;
- } else if(s1==limit1) {
- /* Note: This implementation only use a single level of stack.
- * If this code needs to be changed to use multiple levels
- * of stacks, the code above should check if the current
- * code is at the end of all stacks.
- */
- U_ASSERT(level1==1);
-
- /* is s1 at the end of the current stack? */
- next1=stack1[0].s;
- }
-
- if (next1!=NULL) {
- if(level2==0) {
- next2=s2;
- } else if(s2==limit2) {
- U_ASSERT(level2==1);
-
- /* is s2 at the end of the current stack? */
- next2=stack2[0].s;
- }
- if(next2!=NULL) {
- m1=next1;
- m2=next2;
- }
- }
- c1=c2=-1; /* make us fetch new code units */
- continue;
- } else if(c1<0) {
- cmpRes=-1; /* string 1 ends before string 2 */
- break;
- } else if(c2<0) {
- cmpRes=1; /* string 2 ends before string 1 */
- break;
- }
- /* c1!=c2 && c1>=0 && c2>=0 */
-
- /* get complete code points for c1, c2 for lookups if either is a surrogate */
- cp1=c1;
- if(U_IS_SURROGATE(c1)) {
- UChar c;
-
- if(U_IS_SURROGATE_LEAD(c1)) {
- if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
- /* advance ++s1; only below if cp1 decomposes/case-folds */
- cp1=U16_GET_SUPPLEMENTARY(c1, c);
- }
- } else /* isTrail(c1) */ {
- if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
- cp1=U16_GET_SUPPLEMENTARY(c, c1);
- }
- }
- }
-
- cp2=c2;
- if(U_IS_SURROGATE(c2)) {
- UChar c;
-
- if(U_IS_SURROGATE_LEAD(c2)) {
- if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
- /* advance ++s2; only below if cp2 decomposes/case-folds */
- cp2=U16_GET_SUPPLEMENTARY(c2, c);
- }
- } else /* isTrail(c2) */ {
- if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
- cp2=U16_GET_SUPPLEMENTARY(c, c2);
- }
- }
- }
-
- /*
- * go down one level for each string
- * continue with the main loop as soon as there is a real change
- */
-
- if( level1==0 &&
- (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
- ) {
- /* cp1 case-folds to the code point "length" or to p[length] */
- if(U_IS_SURROGATE(c1)) {
- if(U_IS_SURROGATE_LEAD(c1)) {
- /* advance beyond source surrogate pair if it case-folds */
- ++s1;
- } else /* isTrail(c1) */ {
- /*
- * we got a supplementary code point when hitting its trail surrogate,
- * therefore the lead surrogate must have been the same as in the other string;
- * compare this decomposition with the lead surrogate in the other string
- * remember that this simulates bulk text replacement:
- * the decomposition would replace the entire code point
- */
- --s2;
- --m2;
- c2=*(s2-1);
- }
- }
-
- /* push current level pointers */
- stack1[0].start=start1;
- stack1[0].s=s1;
- stack1[0].limit=limit1;
- ++level1;
-
- /* copy the folding result to fold1[] */
- if(length<=UCASE_MAX_STRING_LENGTH) {
- u_memcpy(fold1, p, length);
- } else {
- int32_t i=0;
- U16_APPEND_UNSAFE(fold1, i, length);
- length=i;
- }
-
- /* set next level pointers to case folding */
- start1=s1=fold1;
- limit1=fold1+length;
-
- /* get ready to read from decomposition, continue with loop */
- c1=-1;
- continue;
- }
-
- if( level2==0 &&
- (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
- ) {
- /* cp2 case-folds to the code point "length" or to p[length] */
- if(U_IS_SURROGATE(c2)) {
- if(U_IS_SURROGATE_LEAD(c2)) {
- /* advance beyond source surrogate pair if it case-folds */
- ++s2;
- } else /* isTrail(c2) */ {
- /*
- * we got a supplementary code point when hitting its trail surrogate,
- * therefore the lead surrogate must have been the same as in the other string;
- * compare this decomposition with the lead surrogate in the other string
- * remember that this simulates bulk text replacement:
- * the decomposition would replace the entire code point
- */
- --s1;
- --m2;
- c1=*(s1-1);
- }
- }
-
- /* push current level pointers */
- stack2[0].start=start2;
- stack2[0].s=s2;
- stack2[0].limit=limit2;
- ++level2;
-
- /* copy the folding result to fold2[] */
- if(length<=UCASE_MAX_STRING_LENGTH) {
- u_memcpy(fold2, p, length);
- } else {
- int32_t i=0;
- U16_APPEND_UNSAFE(fold2, i, length);
- length=i;
- }
-
- /* set next level pointers to case folding */
- start2=s2=fold2;
- limit2=fold2+length;
-
- /* get ready to read from decomposition, continue with loop */
- c2=-1;
- continue;
- }
-
- /*
- * no decomposition/case folding, max level for both sides:
- * return difference result
- *
- * code point order comparison must not just return cp1-cp2
- * because when single surrogates are present then the surrogate pairs
- * that formed cp1 and cp2 may be from different string indexes
- *
- * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
- * c1=d800 cp1=10001 c2=dc00 cp2=10000
- * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
- *
- * therefore, use same fix-up as in ustring.c/uprv_strCompare()
- * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
- * so we have slightly different pointer/start/limit comparisons here
- */
-
- if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
- /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
- if(
- (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
- (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
- ) {
- /* part of a surrogate pair, leave >=d800 */
- } else {
- /* BMP code point - may be surrogate code point - make <d800 */
- c1-=0x2800;
- }
-
- if(
- (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
- (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
- ) {
- /* part of a surrogate pair, leave >=d800 */
- } else {
- /* BMP code point - may be surrogate code point - make <d800 */
- c2-=0x2800;
- }
- }
-
- cmpRes=c1-c2;
- break;
- }
-
- if(matchLen1) {
- *matchLen1=static_cast<int32_t>(m1-org1);
- *matchLen2=static_cast<int32_t>(m2-org2);
- }
- return cmpRes;
-}
-
-/* internal function */
-U_CFUNC int32_t
-u_strcmpFold(const UChar *s1, int32_t length1,
- const UChar *s2, int32_t length2,
- uint32_t options,
- UErrorCode *pErrorCode) {
- return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode);
-}
-
-/* public API functions */
-
-U_CAPI int32_t U_EXPORT2
-u_strCaseCompare(const UChar *s1, int32_t length1,
- const UChar *s2, int32_t length2,
- uint32_t options,
- UErrorCode *pErrorCode) {
- /* argument checking */
- if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- return u_strcmpFold(s1, length1, s2, length2,
- options|U_COMPARE_IGNORE_CASE,
- pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
- UErrorCode errorCode=U_ZERO_ERROR;
- return u_strcmpFold(s1, -1, s2, -1,
- options|U_COMPARE_IGNORE_CASE,
- &errorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
- UErrorCode errorCode=U_ZERO_ERROR;
- return u_strcmpFold(s1, length, s2, length,
- options|U_COMPARE_IGNORE_CASE,
- &errorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
- UErrorCode errorCode=U_ZERO_ERROR;
- return u_strcmpFold(s1, n, s2, n,
- options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
- &errorCode);
-}
-
-/* internal API - detect length of shared prefix */
-U_CAPI void
-u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
- const UChar *s2, int32_t length2,
- uint32_t options,
- int32_t *matchLen1, int32_t *matchLen2,
- UErrorCode *pErrorCode) {
- _cmpFold(s1, length1, s2, length2, options,
- matchLen1, matchLen2, pErrorCode);
-}
diff --git a/contrib/libs/icu/common/ustrcase_locale.cpp b/contrib/libs/icu/common/ustrcase_locale.cpp
deleted file mode 100644
index 2ecd24f03ec..00000000000
--- a/contrib/libs/icu/common/ustrcase_locale.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: ustrcase_locale.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011may31
-* created by: Markus W. Scherer
-*
-* Locale-sensitive case mapping functions (ones that call uloc_getDefault())
-* were moved here to break dependency cycles among parts of the common library.
-*/
-
-#include "unicode/utypes.h"
-#include "uassert.h"
-#include "unicode/brkiter.h"
-#include "unicode/casemap.h"
-#include "unicode/ucasemap.h"
-#include "unicode/uloc.h"
-#include "unicode/ustring.h"
-#include "ucase.h"
-#include "ucasemap_imp.h"
-
-U_CFUNC int32_t
-ustrcase_getCaseLocale(const char *locale) {
- if (locale == NULL) {
- locale = uloc_getDefault();
- }
- if (*locale == 0) {
- return UCASE_LOC_ROOT;
- } else {
- return ucase_getCaseLocale(locale);
- }
-}
-
-/* public API functions */
-
-U_CAPI int32_t U_EXPORT2
-u_strToLower(UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode) {
- return ustrcase_mapWithOverlap(
- ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL
- dest, destCapacity,
- src, srcLength,
- ustrcase_internalToLower, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-u_strToUpper(UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode) {
- return ustrcase_mapWithOverlap(
- ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL
- dest, destCapacity,
- src, srcLength,
- ustrcase_internalToUpper, *pErrorCode);
-}
-
-U_NAMESPACE_BEGIN
-
-int32_t CaseMap::toLower(
- const char *locale, uint32_t options,
- const UChar *src, int32_t srcLength,
- UChar *dest, int32_t destCapacity, Edits *edits,
- UErrorCode &errorCode) {
- return ustrcase_map(
- ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
- dest, destCapacity,
- src, srcLength,
- ustrcase_internalToLower, edits, errorCode);
-}
-
-int32_t CaseMap::toUpper(
- const char *locale, uint32_t options,
- const UChar *src, int32_t srcLength,
- UChar *dest, int32_t destCapacity, Edits *edits,
- UErrorCode &errorCode) {
- return ustrcase_map(
- ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
- dest, destCapacity,
- src, srcLength,
- ustrcase_internalToUpper, edits, errorCode);
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/ustrenum.cpp b/contrib/libs/icu/common/ustrenum.cpp
deleted file mode 100644
index ed23eaa232e..00000000000
--- a/contrib/libs/icu/common/ustrenum.cpp
+++ /dev/null
@@ -1,398 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2002-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: November 11 2002
-* Since: ICU 2.4
-**********************************************************************
-*/
-#include "utypeinfo.h" // for 'typeid' to work
-
-#include "unicode/ustring.h"
-#include "unicode/strenum.h"
-#include "unicode/putil.h"
-#include "uenumimp.h"
-#include "ustrenum.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "uassert.h"
-
-U_NAMESPACE_BEGIN
-// StringEnumeration implementation ---------------------------------------- ***
-
-StringEnumeration::StringEnumeration()
- : chars(charsBuffer), charsCapacity(sizeof(charsBuffer)) {
-}
-
-StringEnumeration::~StringEnumeration() {
- if (chars != NULL && chars != charsBuffer) {
- uprv_free(chars);
- }
-}
-
-// StringEnumeration base class clone() default implementation, does not clone
-StringEnumeration *
-StringEnumeration::clone() const {
- return NULL;
-}
-
-const char *
-StringEnumeration::next(int32_t *resultLength, UErrorCode &status) {
- const UnicodeString *s=snext(status);
- if(U_SUCCESS(status) && s!=NULL) {
- unistr=*s;
- ensureCharsCapacity(unistr.length()+1, status);
- if(U_SUCCESS(status)) {
- if(resultLength!=NULL) {
- *resultLength=unistr.length();
- }
- unistr.extract(0, INT32_MAX, chars, charsCapacity, US_INV);
- return chars;
- }
- }
-
- return NULL;
-}
-
-const UChar *
-StringEnumeration::unext(int32_t *resultLength, UErrorCode &status) {
- const UnicodeString *s=snext(status);
- if(U_SUCCESS(status) && s!=NULL) {
- unistr=*s;
- if(resultLength!=NULL) {
- *resultLength=unistr.length();
- }
- return unistr.getTerminatedBuffer();
- }
-
- return NULL;
-}
-
-const UnicodeString *
-StringEnumeration::snext(UErrorCode &status) {
- int32_t length;
- const char *s=next(&length, status);
- return setChars(s, length, status);
-}
-
-void
-StringEnumeration::ensureCharsCapacity(int32_t capacity, UErrorCode &status) {
- if(U_SUCCESS(status) && capacity>charsCapacity) {
- if(capacity<(charsCapacity+charsCapacity/2)) {
- // avoid allocation thrashing
- capacity=charsCapacity+charsCapacity/2;
- }
- if(chars!=charsBuffer) {
- uprv_free(chars);
- }
- chars=(char *)uprv_malloc(capacity);
- if(chars==NULL) {
- chars=charsBuffer;
- charsCapacity=sizeof(charsBuffer);
- status=U_MEMORY_ALLOCATION_ERROR;
- } else {
- charsCapacity=capacity;
- }
- }
-}
-
-UnicodeString *
-StringEnumeration::setChars(const char *s, int32_t length, UErrorCode &status) {
- if(U_SUCCESS(status) && s!=NULL) {
- if(length<0) {
- length=(int32_t)uprv_strlen(s);
- }
-
- UChar *buffer=unistr.getBuffer(length+1);
- if(buffer!=NULL) {
- u_charsToUChars(s, buffer, length);
- buffer[length]=0;
- unistr.releaseBuffer(length);
- return &unistr;
- } else {
- status=U_MEMORY_ALLOCATION_ERROR;
- }
- }
-
- return NULL;
-}
-UBool
-StringEnumeration::operator==(const StringEnumeration& that)const {
- return typeid(*this) == typeid(that);
-}
-
-UBool
-StringEnumeration::operator!=(const StringEnumeration& that)const {
- return !operator==(that);
-}
-
-// UStringEnumeration implementation --------------------------------------- ***
-
-UStringEnumeration * U_EXPORT2
-UStringEnumeration::fromUEnumeration(
- UEnumeration *uenumToAdopt, UErrorCode &status) {
- if (U_FAILURE(status)) {
- uenum_close(uenumToAdopt);
- return NULL;
- }
- UStringEnumeration *result = new UStringEnumeration(uenumToAdopt);
- if (result == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- uenum_close(uenumToAdopt);
- return NULL;
- }
- return result;
-}
-
-UStringEnumeration::UStringEnumeration(UEnumeration* _uenum) :
- uenum(_uenum) {
- U_ASSERT(_uenum != 0);
-}
-
-UStringEnumeration::~UStringEnumeration() {
- uenum_close(uenum);
-}
-
-int32_t UStringEnumeration::count(UErrorCode& status) const {
- return uenum_count(uenum, &status);
-}
-
-const char *UStringEnumeration::next(int32_t *resultLength, UErrorCode &status) {
- return uenum_next(uenum, resultLength, &status);
-}
-
-const UnicodeString* UStringEnumeration::snext(UErrorCode& status) {
- int32_t length;
- const UChar* str = uenum_unext(uenum, &length, &status);
- if (str == 0 || U_FAILURE(status)) {
- return 0;
- }
- return &unistr.setTo(str, length);
-}
-
-void UStringEnumeration::reset(UErrorCode& status) {
- uenum_reset(uenum, &status);
-}
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UStringEnumeration)
-U_NAMESPACE_END
-
-// C wrapper --------------------------------------------------------------- ***
-
-#define THIS(en) ((icu::StringEnumeration*)(en->context))
-
-U_CDECL_BEGIN
-
-/**
- * Wrapper API to make StringEnumeration look like UEnumeration.
- */
-static void U_CALLCONV
-ustrenum_close(UEnumeration* en) {
- delete THIS(en);
- uprv_free(en);
-}
-
-/**
- * Wrapper API to make StringEnumeration look like UEnumeration.
- */
-static int32_t U_CALLCONV
-ustrenum_count(UEnumeration* en,
- UErrorCode* ec)
-{
- return THIS(en)->count(*ec);
-}
-
-/**
- * Wrapper API to make StringEnumeration look like UEnumeration.
- */
-static const UChar* U_CALLCONV
-ustrenum_unext(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* ec)
-{
- return THIS(en)->unext(resultLength, *ec);
-}
-
-/**
- * Wrapper API to make StringEnumeration look like UEnumeration.
- */
-static const char* U_CALLCONV
-ustrenum_next(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* ec)
-{
- return THIS(en)->next(resultLength, *ec);
-}
-
-/**
- * Wrapper API to make StringEnumeration look like UEnumeration.
- */
-static void U_CALLCONV
-ustrenum_reset(UEnumeration* en,
- UErrorCode* ec)
-{
- THIS(en)->reset(*ec);
-}
-
-/**
- * Pseudo-vtable for UEnumeration wrapper around StringEnumeration.
- * The StringEnumeration pointer will be stored in 'context'.
- */
-static const UEnumeration USTRENUM_VT = {
- NULL,
- NULL, // store StringEnumeration pointer here
- ustrenum_close,
- ustrenum_count,
- ustrenum_unext,
- ustrenum_next,
- ustrenum_reset
-};
-
-U_CDECL_END
-
-/**
- * Given a StringEnumeration, wrap it in a UEnumeration. The
- * StringEnumeration is adopted; after this call, the caller must not
- * delete it (regardless of error status).
- */
-U_CAPI UEnumeration* U_EXPORT2
-uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec) {
- UEnumeration* result = NULL;
- if (U_SUCCESS(*ec) && adopted != NULL) {
- result = (UEnumeration*) uprv_malloc(sizeof(UEnumeration));
- if (result == NULL) {
- *ec = U_MEMORY_ALLOCATION_ERROR;
- } else {
- uprv_memcpy(result, &USTRENUM_VT, sizeof(USTRENUM_VT));
- result->context = adopted;
- }
- }
- if (result == NULL) {
- delete adopted;
- }
- return result;
-}
-
-// C wrapper --------------------------------------------------------------- ***
-
-U_CDECL_BEGIN
-
-typedef struct UCharStringEnumeration {
- UEnumeration uenum;
- int32_t index, count;
-} UCharStringEnumeration;
-
-static void U_CALLCONV
-ucharstrenum_close(UEnumeration* en) {
- uprv_free(en);
-}
-
-static int32_t U_CALLCONV
-ucharstrenum_count(UEnumeration* en,
- UErrorCode* /*ec*/) {
- return ((UCharStringEnumeration*)en)->count;
-}
-
-static const UChar* U_CALLCONV
-ucharstrenum_unext(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* /*ec*/) {
- UCharStringEnumeration *e = (UCharStringEnumeration*) en;
- if (e->index >= e->count) {
- return NULL;
- }
- const UChar* result = ((const UChar**)e->uenum.context)[e->index++];
- if (resultLength) {
- *resultLength = (int32_t)u_strlen(result);
- }
- return result;
-}
-
-
-static const char* U_CALLCONV
-ucharstrenum_next(UEnumeration* en,
- int32_t* resultLength,
- UErrorCode* /*ec*/) {
- UCharStringEnumeration *e = (UCharStringEnumeration*) en;
- if (e->index >= e->count) {
- return NULL;
- }
- const char* result = ((const char**)e->uenum.context)[e->index++];
- if (resultLength) {
- *resultLength = (int32_t)uprv_strlen(result);
- }
- return result;
-}
-
-static void U_CALLCONV
-ucharstrenum_reset(UEnumeration* en,
- UErrorCode* /*ec*/) {
- ((UCharStringEnumeration*)en)->index = 0;
-}
-
-static const UEnumeration UCHARSTRENUM_VT = {
- NULL,
- NULL, // store StringEnumeration pointer here
- ucharstrenum_close,
- ucharstrenum_count,
- uenum_unextDefault,
- ucharstrenum_next,
- ucharstrenum_reset
-};
-
-static const UEnumeration UCHARSTRENUM_U_VT = {
- NULL,
- NULL, // store StringEnumeration pointer here
- ucharstrenum_close,
- ucharstrenum_count,
- ucharstrenum_unext,
- uenum_nextDefault,
- ucharstrenum_reset
-};
-
-U_CDECL_END
-
-U_CAPI UEnumeration* U_EXPORT2
-uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
- UErrorCode* ec) {
- UCharStringEnumeration* result = NULL;
- if (U_SUCCESS(*ec) && count >= 0 && (count == 0 || strings != 0)) {
- result = (UCharStringEnumeration*) uprv_malloc(sizeof(UCharStringEnumeration));
- if (result == NULL) {
- *ec = U_MEMORY_ALLOCATION_ERROR;
- } else {
- U_ASSERT((char*)result==(char*)(&result->uenum));
- uprv_memcpy(result, &UCHARSTRENUM_VT, sizeof(UCHARSTRENUM_VT));
- result->uenum.context = (void*)strings;
- result->index = 0;
- result->count = count;
- }
- }
- return (UEnumeration*) result;
-}
-
-U_CAPI UEnumeration* U_EXPORT2
-uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
- UErrorCode* ec) {
- UCharStringEnumeration* result = NULL;
- if (U_SUCCESS(*ec) && count >= 0 && (count == 0 || strings != 0)) {
- result = (UCharStringEnumeration*) uprv_malloc(sizeof(UCharStringEnumeration));
- if (result == NULL) {
- *ec = U_MEMORY_ALLOCATION_ERROR;
- } else {
- U_ASSERT((char*)result==(char*)(&result->uenum));
- uprv_memcpy(result, &UCHARSTRENUM_U_VT, sizeof(UCHARSTRENUM_U_VT));
- result->uenum.context = (void*)strings;
- result->index = 0;
- result->count = count;
- }
- }
- return (UEnumeration*) result;
-}
-
-
-// end C Wrapper
diff --git a/contrib/libs/icu/common/ustrenum.h b/contrib/libs/icu/common/ustrenum.h
deleted file mode 100644
index a82162e2bde..00000000000
--- a/contrib/libs/icu/common/ustrenum.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2002-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: November 11 2002
-* Since: ICU 2.4
-**********************************************************************
-*/
-#ifndef _USTRENUM_H_
-#define _USTRENUM_H_
-
-#include "unicode/uenum.h"
-#include "unicode/strenum.h"
-
-//----------------------------------------------------------------------
-U_NAMESPACE_BEGIN
-
-/**
- * A wrapper to make a UEnumeration into a StringEnumeration. The
- * wrapper adopts the UEnumeration is wraps.
- */
-class U_COMMON_API UStringEnumeration : public StringEnumeration {
-
-public:
- /**
- * Constructor. This constructor adopts its UEnumeration
- * argument.
- * @param uenum a UEnumeration object. This object takes
- * ownership of 'uenum' and will close it in its destructor. The
- * caller must not call uenum_close on 'uenum' after calling this
- * constructor.
- */
- UStringEnumeration(UEnumeration* uenum);
-
- /**
- * Destructor. This closes the UEnumeration passed in to the
- * constructor.
- */
- virtual ~UStringEnumeration();
-
- /**
- * Return the number of elements that the iterator traverses.
- * @param status the error code.
- * @return number of elements in the iterator.
- */
- virtual int32_t count(UErrorCode& status) const;
-
- virtual const char* next(int32_t *resultLength, UErrorCode& status);
-
- /**
- * Returns the next element a UnicodeString*. If there are no
- * more elements, returns NULL.
- * @param status the error code.
- * @return a pointer to the string, or NULL.
- */
- virtual const UnicodeString* snext(UErrorCode& status);
-
- /**
- * Resets the iterator.
- * @param status the error code.
- */
- virtual void reset(UErrorCode& status);
-
- /**
- * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
- */
- virtual UClassID getDynamicClassID() const;
-
- /**
- * ICU4C "poor man's RTTI", returns a UClassID for this ICU class.
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
- static UStringEnumeration * U_EXPORT2 fromUEnumeration(
- UEnumeration *enumToAdopt, UErrorCode &status);
-private:
- UEnumeration *uenum; // owned
-};
-
-U_NAMESPACE_END
-
-#endif
-
diff --git a/contrib/libs/icu/common/ustrfmt.cpp b/contrib/libs/icu/common/ustrfmt.cpp
deleted file mode 100644
index 1a9b15a59fe..00000000000
--- a/contrib/libs/icu/common/ustrfmt.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2001-2006, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#include "cstring.h"
-#include "ustrfmt.h"
-
-
-/***
- * Fills in a UChar* string with the radix-based representation of a
- * uint32_t number padded with zeroes to minwidth. The result
- * will be null terminated if there is room.
- *
- * @param buffer UChar buffer to receive result
- * @param capacity capacity of buffer
- * @param i the unsigned number to be formatted
- * @param radix the radix from 2..36
- * @param minwidth the minimum width. If the result is narrower than
- * this, '0's will be added on the left. Must be <=
- * capacity.
- * @return the length of the result, not including any terminating
- * null
- */
-U_CAPI int32_t U_EXPORT2
-uprv_itou (UChar * buffer, int32_t capacity,
- uint32_t i, uint32_t radix, int32_t minwidth)
-{
- int32_t length = 0;
- int digit;
- int32_t j;
- UChar temp;
-
- do{
- digit = (int)(i % radix);
- buffer[length++]=(UChar)(digit<=9?(0x0030+digit):(0x0030+digit+7));
- i=i/radix;
- } while(i && length<capacity);
-
- while (length < minwidth){
- buffer[length++] = (UChar) 0x0030;/*zero padding */
- }
- /* null terminate the buffer */
- if(length<capacity){
- buffer[length] = (UChar) 0x0000;
- }
-
- /* Reverses the string */
- for (j = 0; j < (length / 2); j++){
- temp = buffer[(length-1) - j];
- buffer[(length-1) - j] = buffer[j];
- buffer[j] = temp;
- }
- return length;
-}
diff --git a/contrib/libs/icu/common/ustrfmt.h b/contrib/libs/icu/common/ustrfmt.h
deleted file mode 100644
index 53eb0557e45..00000000000
--- a/contrib/libs/icu/common/ustrfmt.h
+++ /dev/null
@@ -1,19 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2001-2006, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#ifndef USTRFMT_H
-#define USTRFMT_H
-
-#include "unicode/utypes.h"
-
-U_CAPI int32_t U_EXPORT2
-uprv_itou (UChar * buffer, int32_t capacity, uint32_t i, uint32_t radix, int32_t minwidth);
-
-
-#endif
diff --git a/contrib/libs/icu/common/ustring.cpp b/contrib/libs/icu/common/ustring.cpp
deleted file mode 100644
index de43d22ccca..00000000000
--- a/contrib/libs/icu/common/ustring.cpp
+++ /dev/null
@@ -1,1529 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1998-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* File ustring.cpp
-*
-* Modification History:
-*
-* Date Name Description
-* 12/07/98 bertrand Creation.
-******************************************************************************
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/putil.h"
-#include "unicode/uchar.h"
-#include "unicode/ustring.h"
-#include "unicode/utf16.h"
-#include "cstring.h"
-#include "cwchar.h"
-#include "cmemory.h"
-#include "ustr_imp.h"
-
-/* ANSI string.h - style functions ------------------------------------------ */
-
-/* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
-#define U_BMP_MAX 0xffff
-
-/* Forward binary string search functions ----------------------------------- */
-
-/*
- * Test if a substring match inside a string is at code point boundaries.
- * All pointers refer to the same buffer.
- * The limit pointer may be NULL, all others must be real pointers.
- */
-static inline UBool
-isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
- if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
- /* the leading edge of the match is in the middle of a surrogate pair */
- return FALSE;
- }
- if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) {
- /* the trailing edge of the match is in the middle of a surrogate pair */
- return FALSE;
- }
- return TRUE;
-}
-
-U_CAPI UChar * U_EXPORT2
-u_strFindFirst(const UChar *s, int32_t length,
- const UChar *sub, int32_t subLength) {
- const UChar *start, *p, *q, *subLimit;
- UChar c, cs, cq;
-
- if(sub==NULL || subLength<-1) {
- return (UChar *)s;
- }
- if(s==NULL || length<-1) {
- return NULL;
- }
-
- start=s;
-
- if(length<0 && subLength<0) {
- /* both strings are NUL-terminated */
- if((cs=*sub++)==0) {
- return (UChar *)s;
- }
- if(*sub==0 && !U16_IS_SURROGATE(cs)) {
- /* the substring consists of a single, non-surrogate BMP code point */
- return u_strchr(s, cs);
- }
-
- while((c=*s++)!=0) {
- if(c==cs) {
- /* found first substring UChar, compare rest */
- p=s;
- q=sub;
- for(;;) {
- if((cq=*q)==0) {
- if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
- return (UChar *)(s-1); /* well-formed match */
- } else {
- break; /* no match because surrogate pair is split */
- }
- }
- if((c=*p)==0) {
- return NULL; /* no match, and none possible after s */
- }
- if(c!=cq) {
- break; /* no match */
- }
- ++p;
- ++q;
- }
- }
- }
-
- /* not found */
- return NULL;
- }
-
- if(subLength<0) {
- subLength=u_strlen(sub);
- }
- if(subLength==0) {
- return (UChar *)s;
- }
-
- /* get sub[0] to search for it fast */
- cs=*sub++;
- --subLength;
- subLimit=sub+subLength;
-
- if(subLength==0 && !U16_IS_SURROGATE(cs)) {
- /* the substring consists of a single, non-surrogate BMP code point */
- return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
- }
-
- if(length<0) {
- /* s is NUL-terminated */
- while((c=*s++)!=0) {
- if(c==cs) {
- /* found first substring UChar, compare rest */
- p=s;
- q=sub;
- for(;;) {
- if(q==subLimit) {
- if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
- return (UChar *)(s-1); /* well-formed match */
- } else {
- break; /* no match because surrogate pair is split */
- }
- }
- if((c=*p)==0) {
- return NULL; /* no match, and none possible after s */
- }
- if(c!=*q) {
- break; /* no match */
- }
- ++p;
- ++q;
- }
- }
- }
- } else {
- const UChar *limit, *preLimit;
-
- /* subLength was decremented above */
- if(length<=subLength) {
- return NULL; /* s is shorter than sub */
- }
-
- limit=s+length;
-
- /* the substring must start before preLimit */
- preLimit=limit-subLength;
-
- while(s!=preLimit) {
- c=*s++;
- if(c==cs) {
- /* found first substring UChar, compare rest */
- p=s;
- q=sub;
- for(;;) {
- if(q==subLimit) {
- if(isMatchAtCPBoundary(start, s-1, p, limit)) {
- return (UChar *)(s-1); /* well-formed match */
- } else {
- break; /* no match because surrogate pair is split */
- }
- }
- if(*p!=*q) {
- break; /* no match */
- }
- ++p;
- ++q;
- }
- }
- }
- }
-
- /* not found */
- return NULL;
-}
-
-U_CAPI UChar * U_EXPORT2
-u_strstr(const UChar *s, const UChar *substring) {
- return u_strFindFirst(s, -1, substring, -1);
-}
-
-U_CAPI UChar * U_EXPORT2
-u_strchr(const UChar *s, UChar c) {
- if(U16_IS_SURROGATE(c)) {
- /* make sure to not find half of a surrogate pair */
- return u_strFindFirst(s, -1, &c, 1);
- } else {
- UChar cs;
-
- /* trivial search for a BMP code point */
- for(;;) {
- if((cs=*s)==c) {
- return (UChar *)s;
- }
- if(cs==0) {
- return NULL;
- }
- ++s;
- }
- }
-}
-
-U_CAPI UChar * U_EXPORT2
-u_strchr32(const UChar *s, UChar32 c) {
- if((uint32_t)c<=U_BMP_MAX) {
- /* find BMP code point */
- return u_strchr(s, (UChar)c);
- } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
- /* find supplementary code point as surrogate pair */
- UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
-
- while((cs=*s++)!=0) {
- if(cs==lead && *s==trail) {
- return (UChar *)(s-1);
- }
- }
- return NULL;
- } else {
- /* not a Unicode code point, not findable */
- return NULL;
- }
-}
-
-U_CAPI UChar * U_EXPORT2
-u_memchr(const UChar *s, UChar c, int32_t count) {
- if(count<=0) {
- return NULL; /* no string */
- } else if(U16_IS_SURROGATE(c)) {
- /* make sure to not find half of a surrogate pair */
- return u_strFindFirst(s, count, &c, 1);
- } else {
- /* trivial search for a BMP code point */
- const UChar *limit=s+count;
- do {
- if(*s==c) {
- return (UChar *)s;
- }
- } while(++s!=limit);
- return NULL;
- }
-}
-
-U_CAPI UChar * U_EXPORT2
-u_memchr32(const UChar *s, UChar32 c, int32_t count) {
- if((uint32_t)c<=U_BMP_MAX) {
- /* find BMP code point */
- return u_memchr(s, (UChar)c, count);
- } else if(count<2) {
- /* too short for a surrogate pair */
- return NULL;
- } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
- /* find supplementary code point as surrogate pair */
- const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
- UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
-
- do {
- if(*s==lead && *(s+1)==trail) {
- return (UChar *)s;
- }
- } while(++s!=limit);
- return NULL;
- } else {
- /* not a Unicode code point, not findable */
- return NULL;
- }
-}
-
-/* Backward binary string search functions ---------------------------------- */
-
-U_CAPI UChar * U_EXPORT2
-u_strFindLast(const UChar *s, int32_t length,
- const UChar *sub, int32_t subLength) {
- const UChar *start, *limit, *p, *q, *subLimit;
- UChar c, cs;
-
- if(sub==NULL || subLength<-1) {
- return (UChar *)s;
- }
- if(s==NULL || length<-1) {
- return NULL;
- }
-
- /*
- * This implementation is more lazy than the one for u_strFindFirst():
- * There is no special search code for NUL-terminated strings.
- * It does not seem to be worth it for searching substrings to
- * search forward and find all matches like in u_strrchr() and similar.
- * Therefore, we simply get both string lengths and search backward.
- *
- * markus 2002oct23
- */
-
- if(subLength<0) {
- subLength=u_strlen(sub);
- }
- if(subLength==0) {
- return (UChar *)s;
- }
-
- /* get sub[subLength-1] to search for it fast */
- subLimit=sub+subLength;
- cs=*(--subLimit);
- --subLength;
-
- if(subLength==0 && !U16_IS_SURROGATE(cs)) {
- /* the substring consists of a single, non-surrogate BMP code point */
- return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
- }
-
- if(length<0) {
- length=u_strlen(s);
- }
-
- /* subLength was decremented above */
- if(length<=subLength) {
- return NULL; /* s is shorter than sub */
- }
-
- start=s;
- limit=s+length;
-
- /* the substring must start no later than s+subLength */
- s+=subLength;
-
- while(s!=limit) {
- c=*(--limit);
- if(c==cs) {
- /* found last substring UChar, compare rest */
- p=limit;
- q=subLimit;
- for(;;) {
- if(q==sub) {
- if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
- return (UChar *)p; /* well-formed match */
- } else {
- break; /* no match because surrogate pair is split */
- }
- }
- if(*(--p)!=*(--q)) {
- break; /* no match */
- }
- }
- }
- }
-
- /* not found */
- return NULL;
-}
-
-U_CAPI UChar * U_EXPORT2
-u_strrstr(const UChar *s, const UChar *substring) {
- return u_strFindLast(s, -1, substring, -1);
-}
-
-U_CAPI UChar * U_EXPORT2
-u_strrchr(const UChar *s, UChar c) {
- if(U16_IS_SURROGATE(c)) {
- /* make sure to not find half of a surrogate pair */
- return u_strFindLast(s, -1, &c, 1);
- } else {
- const UChar *result=NULL;
- UChar cs;
-
- /* trivial search for a BMP code point */
- for(;;) {
- if((cs=*s)==c) {
- result=s;
- }
- if(cs==0) {
- return (UChar *)result;
- }
- ++s;
- }
- }
-}
-
-U_CAPI UChar * U_EXPORT2
-u_strrchr32(const UChar *s, UChar32 c) {
- if((uint32_t)c<=U_BMP_MAX) {
- /* find BMP code point */
- return u_strrchr(s, (UChar)c);
- } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
- /* find supplementary code point as surrogate pair */
- const UChar *result=NULL;
- UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
-
- while((cs=*s++)!=0) {
- if(cs==lead && *s==trail) {
- result=s-1;
- }
- }
- return (UChar *)result;
- } else {
- /* not a Unicode code point, not findable */
- return NULL;
- }
-}
-
-U_CAPI UChar * U_EXPORT2
-u_memrchr(const UChar *s, UChar c, int32_t count) {
- if(count<=0) {
- return NULL; /* no string */
- } else if(U16_IS_SURROGATE(c)) {
- /* make sure to not find half of a surrogate pair */
- return u_strFindLast(s, count, &c, 1);
- } else {
- /* trivial search for a BMP code point */
- const UChar *limit=s+count;
- do {
- if(*(--limit)==c) {
- return (UChar *)limit;
- }
- } while(s!=limit);
- return NULL;
- }
-}
-
-U_CAPI UChar * U_EXPORT2
-u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
- if((uint32_t)c<=U_BMP_MAX) {
- /* find BMP code point */
- return u_memrchr(s, (UChar)c, count);
- } else if(count<2) {
- /* too short for a surrogate pair */
- return NULL;
- } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
- /* find supplementary code point as surrogate pair */
- const UChar *limit=s+count-1;
- UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
-
- do {
- if(*limit==trail && *(limit-1)==lead) {
- return (UChar *)(limit-1);
- }
- } while(s!=--limit);
- return NULL;
- } else {
- /* not a Unicode code point, not findable */
- return NULL;
- }
-}
-
-/* Tokenization functions --------------------------------------------------- */
-
-/*
- * Match each code point in a string against each code point in the matchSet.
- * Return the index of the first string code point that
- * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
- * Return -(string length)-1 if there is no such code point.
- */
-static int32_t
-_matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
- int32_t matchLen, matchBMPLen, strItr, matchItr;
- UChar32 stringCh, matchCh;
- UChar c, c2;
-
- /* first part of matchSet contains only BMP code points */
- matchBMPLen = 0;
- while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
- ++matchBMPLen;
- }
-
- /* second part of matchSet contains BMP and supplementary code points */
- matchLen = matchBMPLen;
- while(matchSet[matchLen] != 0) {
- ++matchLen;
- }
-
- for(strItr = 0; (c = string[strItr]) != 0;) {
- ++strItr;
- if(U16_IS_SINGLE(c)) {
- if(polarity) {
- for(matchItr = 0; matchItr < matchLen; ++matchItr) {
- if(c == matchSet[matchItr]) {
- return strItr - 1; /* one matches */
- }
- }
- } else {
- for(matchItr = 0; matchItr < matchLen; ++matchItr) {
- if(c == matchSet[matchItr]) {
- goto endloop;
- }
- }
- return strItr - 1; /* none matches */
- }
- } else {
- /*
- * No need to check for string length before U16_IS_TRAIL
- * because c2 could at worst be the terminating NUL.
- */
- if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
- ++strItr;
- stringCh = U16_GET_SUPPLEMENTARY(c, c2);
- } else {
- stringCh = c; /* unpaired trail surrogate */
- }
-
- if(polarity) {
- for(matchItr = matchBMPLen; matchItr < matchLen;) {
- U16_NEXT(matchSet, matchItr, matchLen, matchCh);
- if(stringCh == matchCh) {
- return strItr - U16_LENGTH(stringCh); /* one matches */
- }
- }
- } else {
- for(matchItr = matchBMPLen; matchItr < matchLen;) {
- U16_NEXT(matchSet, matchItr, matchLen, matchCh);
- if(stringCh == matchCh) {
- goto endloop;
- }
- }
- return strItr - U16_LENGTH(stringCh); /* none matches */
- }
- }
-endloop:
- /* wish C had continue with labels like Java... */;
- }
-
- /* Didn't find it. */
- return -strItr-1;
-}
-
-/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
-U_CAPI UChar * U_EXPORT2
-u_strpbrk(const UChar *string, const UChar *matchSet)
-{
- int32_t idx = _matchFromSet(string, matchSet, TRUE);
- if(idx >= 0) {
- return (UChar *)string + idx;
- } else {
- return NULL;
- }
-}
-
-/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
-U_CAPI int32_t U_EXPORT2
-u_strcspn(const UChar *string, const UChar *matchSet)
-{
- int32_t idx = _matchFromSet(string, matchSet, TRUE);
- if(idx >= 0) {
- return idx;
- } else {
- return -idx - 1; /* == u_strlen(string) */
- }
-}
-
-/* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
-U_CAPI int32_t U_EXPORT2
-u_strspn(const UChar *string, const UChar *matchSet)
-{
- int32_t idx = _matchFromSet(string, matchSet, FALSE);
- if(idx >= 0) {
- return idx;
- } else {
- return -idx - 1; /* == u_strlen(string) */
- }
-}
-
-/* ----- Text manipulation functions --- */
-
-U_CAPI UChar* U_EXPORT2
-u_strtok_r(UChar *src,
- const UChar *delim,
- UChar **saveState)
-{
- UChar *tokSource;
- UChar *nextToken;
- uint32_t nonDelimIdx;
-
- /* If saveState is NULL, the user messed up. */
- if (src != NULL) {
- tokSource = src;
- *saveState = src; /* Set to "src" in case there are no delimiters */
- }
- else if (*saveState) {
- tokSource = *saveState;
- }
- else {
- /* src == NULL && *saveState == NULL */
- /* This shouldn't happen. We already finished tokenizing. */
- return NULL;
- }
-
- /* Skip initial delimiters */
- nonDelimIdx = u_strspn(tokSource, delim);
- tokSource = &tokSource[nonDelimIdx];
-
- if (*tokSource) {
- nextToken = u_strpbrk(tokSource, delim);
- if (nextToken != NULL) {
- /* Create a token */
- *(nextToken++) = 0;
- *saveState = nextToken;
- return tokSource;
- }
- else if (*saveState) {
- /* Return the last token */
- *saveState = NULL;
- return tokSource;
- }
- }
- else {
- /* No tokens were found. Only delimiters were left. */
- *saveState = NULL;
- }
- return NULL;
-}
-
-/* Miscellaneous functions -------------------------------------------------- */
-
-U_CAPI UChar* U_EXPORT2
-u_strcat(UChar *dst,
- const UChar *src)
-{
- UChar *anchor = dst; /* save a pointer to start of dst */
-
- while(*dst != 0) { /* To end of first string */
- ++dst;
- }
- while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */
- }
-
- return anchor;
-}
-
-U_CAPI UChar* U_EXPORT2
-u_strncat(UChar *dst,
- const UChar *src,
- int32_t n )
-{
- if(n > 0) {
- UChar *anchor = dst; /* save a pointer to start of dst */
-
- while(*dst != 0) { /* To end of first string */
- ++dst;
- }
- while((*dst = *src) != 0) { /* copy string 2 over */
- ++dst;
- if(--n == 0) {
- *dst = 0;
- break;
- }
- ++src;
- }
-
- return anchor;
- } else {
- return dst;
- }
-}
-
-/* ----- Text property functions --- */
-
-U_CAPI int32_t U_EXPORT2
-u_strcmp(const UChar *s1,
- const UChar *s2)
-{
- UChar c1, c2;
-
- for(;;) {
- c1=*s1++;
- c2=*s2++;
- if (c1 != c2 || c1 == 0) {
- break;
- }
- }
- return (int32_t)c1 - (int32_t)c2;
-}
-
-U_CFUNC int32_t U_EXPORT2
-uprv_strCompare(const UChar *s1, int32_t length1,
- const UChar *s2, int32_t length2,
- UBool strncmpStyle, UBool codePointOrder) {
- const UChar *start1, *start2, *limit1, *limit2;
- UChar c1, c2;
-
- /* setup for fix-up */
- start1=s1;
- start2=s2;
-
- /* compare identical prefixes - they do not need to be fixed up */
- if(length1<0 && length2<0) {
- /* strcmp style, both NUL-terminated */
- if(s1==s2) {
- return 0;
- }
-
- for(;;) {
- c1=*s1;
- c2=*s2;
- if(c1!=c2) {
- break;
- }
- if(c1==0) {
- return 0;
- }
- ++s1;
- ++s2;
- }
-
- /* setup for fix-up */
- limit1=limit2=NULL;
- } else if(strncmpStyle) {
- /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
- if(s1==s2) {
- return 0;
- }
-
- limit1=start1+length1;
-
- for(;;) {
- /* both lengths are same, check only one limit */
- if(s1==limit1) {
- return 0;
- }
-
- c1=*s1;
- c2=*s2;
- if(c1!=c2) {
- break;
- }
- if(c1==0) {
- return 0;
- }
- ++s1;
- ++s2;
- }
-
- /* setup for fix-up */
- limit2=start2+length1; /* use length1 here, too, to enforce assumption */
- } else {
- /* memcmp/UnicodeString style, both length-specified */
- int32_t lengthResult;
-
- if(length1<0) {
- length1=u_strlen(s1);
- }
- if(length2<0) {
- length2=u_strlen(s2);
- }
-
- /* limit1=start1+min(lenght1, length2) */
- if(length1<length2) {
- lengthResult=-1;
- limit1=start1+length1;
- } else if(length1==length2) {
- lengthResult=0;
- limit1=start1+length1;
- } else /* length1>length2 */ {
- lengthResult=1;
- limit1=start1+length2;
- }
-
- if(s1==s2) {
- return lengthResult;
- }
-
- for(;;) {
- /* check pseudo-limit */
- if(s1==limit1) {
- return lengthResult;
- }
-
- c1=*s1;
- c2=*s2;
- if(c1!=c2) {
- break;
- }
- ++s1;
- ++s2;
- }
-
- /* setup for fix-up */
- limit1=start1+length1;
- limit2=start2+length2;
- }
-
- /* if both values are in or above the surrogate range, fix them up */
- if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
- /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
- if(
- (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) ||
- (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
- ) {
- /* part of a surrogate pair, leave >=d800 */
- } else {
- /* BMP code point - may be surrogate code point - make <d800 */
- c1-=0x2800;
- }
-
- if(
- (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) ||
- (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
- ) {
- /* part of a surrogate pair, leave >=d800 */
- } else {
- /* BMP code point - may be surrogate code point - make <d800 */
- c2-=0x2800;
- }
- }
-
- /* now c1 and c2 are in the requested (code unit or code point) order */
- return (int32_t)c1-(int32_t)c2;
-}
-
-/*
- * Compare two strings as presented by UCharIterators.
- * Use code unit or code point order.
- * When the function returns, it is undefined where the iterators
- * have stopped.
- */
-U_CAPI int32_t U_EXPORT2
-u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
- UChar32 c1, c2;
-
- /* argument checking */
- if(iter1==NULL || iter2==NULL) {
- return 0; /* bad arguments */
- }
- if(iter1==iter2) {
- return 0; /* identical iterators */
- }
-
- /* reset iterators to start? */
- iter1->move(iter1, 0, UITER_START);
- iter2->move(iter2, 0, UITER_START);
-
- /* compare identical prefixes - they do not need to be fixed up */
- for(;;) {
- c1=iter1->next(iter1);
- c2=iter2->next(iter2);
- if(c1!=c2) {
- break;
- }
- if(c1==-1) {
- return 0;
- }
- }
-
- /* if both values are in or above the surrogate range, fix them up */
- if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
- /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
- if(
- (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) ||
- (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
- ) {
- /* part of a surrogate pair, leave >=d800 */
- } else {
- /* BMP code point - may be surrogate code point - make <d800 */
- c1-=0x2800;
- }
-
- if(
- (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) ||
- (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
- ) {
- /* part of a surrogate pair, leave >=d800 */
- } else {
- /* BMP code point - may be surrogate code point - make <d800 */
- c2-=0x2800;
- }
- }
-
- /* now c1 and c2 are in the requested (code unit or code point) order */
- return (int32_t)c1-(int32_t)c2;
-}
-
-#if 0
-/*
- * u_strCompareIter() does not leave the iterators _on_ the different units.
- * This is possible but would cost a few extra indirect function calls to back
- * up if the last unit (c1 or c2 respectively) was >=0.
- *
- * Consistently leaving them _behind_ the different units is not an option
- * because the current "unit" is the end of the string if that is reached,
- * and in such a case the iterator does not move.
- * For example, when comparing "ab" with "abc", both iterators rest _on_ the end
- * of their strings. Calling previous() on each does not move them to where
- * the comparison fails.
- *
- * So the simplest semantics is to not define where the iterators end up.
- *
- * The following fragment is part of what would need to be done for backing up.
- */
-void fragment {
- /* iff a surrogate is part of a surrogate pair, leave >=d800 */
- if(c1<=0xdbff) {
- if(!U16_IS_TRAIL(iter1->current(iter1))) {
- /* lead surrogate code point - make <d800 */
- c1-=0x2800;
- }
- } else if(c1<=0xdfff) {
- int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
- iter1->previous(iter1); /* ==c1 */
- if(!U16_IS_LEAD(iter1->previous(iter1))) {
- /* trail surrogate code point - make <d800 */
- c1-=0x2800;
- }
- /* go back to behind where the difference is */
- iter1->move(iter1, idx, UITER_ZERO);
- } else /* 0xe000<=c1<=0xffff */ {
- /* BMP code point - make <d800 */
- c1-=0x2800;
- }
-}
-#endif
-
-U_CAPI int32_t U_EXPORT2
-u_strCompare(const UChar *s1, int32_t length1,
- const UChar *s2, int32_t length2,
- UBool codePointOrder) {
- /* argument checking */
- if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
- return 0;
- }
- return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
-}
-
-/* String compare in code point order - u_strcmp() compares in code unit order. */
-U_CAPI int32_t U_EXPORT2
-u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
- return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
-}
-
-U_CAPI int32_t U_EXPORT2
-u_strncmp(const UChar *s1,
- const UChar *s2,
- int32_t n)
-{
- if(n > 0) {
- int32_t rc;
- for(;;) {
- rc = (int32_t)*s1 - (int32_t)*s2;
- if(rc != 0 || *s1 == 0 || --n == 0) {
- return rc;
- }
- ++s1;
- ++s2;
- }
- } else {
- return 0;
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
- return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
-}
-
-U_CAPI UChar* U_EXPORT2
-u_strcpy(UChar *dst,
- const UChar *src)
-{
- UChar *anchor = dst; /* save a pointer to start of dst */
-
- while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */
- }
-
- return anchor;
-}
-
-U_CAPI UChar* U_EXPORT2
-u_strncpy(UChar *dst,
- const UChar *src,
- int32_t n)
-{
- UChar *anchor = dst; /* save a pointer to start of dst */
-
- /* copy string 2 over */
- while(n > 0 && (*(dst++) = *(src++)) != 0) {
- --n;
- }
-
- return anchor;
-}
-
-U_CAPI int32_t U_EXPORT2
-u_strlen(const UChar *s)
-{
-#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
- return (int32_t)uprv_wcslen((const wchar_t *)s);
-#else
- const UChar *t = s;
- while(*t != 0) {
- ++t;
- }
- return t - s;
-#endif
-}
-
-U_CAPI int32_t U_EXPORT2
-u_countChar32(const UChar *s, int32_t length) {
- int32_t count;
-
- if(s==NULL || length<-1) {
- return 0;
- }
-
- count=0;
- if(length>=0) {
- while(length>0) {
- ++count;
- if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) {
- s+=2;
- length-=2;
- } else {
- ++s;
- --length;
- }
- }
- } else /* length==-1 */ {
- UChar c;
-
- for(;;) {
- if((c=*s++)==0) {
- break;
- }
- ++count;
-
- /*
- * sufficient to look ahead one because of UTF-16;
- * safe to look ahead one because at worst that would be the terminating NUL
- */
- if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
- ++s;
- }
- }
- }
- return count;
-}
-
-U_CAPI UBool U_EXPORT2
-u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
-
- if(number<0) {
- return TRUE;
- }
- if(s==NULL || length<-1) {
- return FALSE;
- }
-
- if(length==-1) {
- /* s is NUL-terminated */
- UChar c;
-
- /* count code points until they exceed */
- for(;;) {
- if((c=*s++)==0) {
- return FALSE;
- }
- if(number==0) {
- return TRUE;
- }
- if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
- ++s;
- }
- --number;
- }
- } else {
- /* length>=0 known */
- const UChar *limit;
- int32_t maxSupplementary;
-
- /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
- if(((length+1)/2)>number) {
- return TRUE;
- }
-
- /* check if s does not even contain enough UChars */
- maxSupplementary=length-number;
- if(maxSupplementary<=0) {
- return FALSE;
- }
- /* there are maxSupplementary=length-number more UChars than asked-for code points */
-
- /*
- * count code points until they exceed and also check that there are
- * no more than maxSupplementary supplementary code points (UChar pairs)
- */
- limit=s+length;
- for(;;) {
- if(s==limit) {
- return FALSE;
- }
- if(number==0) {
- return TRUE;
- }
- if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
- ++s;
- if(--maxSupplementary<=0) {
- /* too many pairs - too few code points */
- return FALSE;
- }
- }
- --number;
- }
- }
-}
-
-U_CAPI UChar * U_EXPORT2
-u_memcpy(UChar *dest, const UChar *src, int32_t count) {
- if(count > 0) {
- uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR);
- }
- return dest;
-}
-
-U_CAPI UChar * U_EXPORT2
-u_memmove(UChar *dest, const UChar *src, int32_t count) {
- if(count > 0) {
- uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR);
- }
- return dest;
-}
-
-U_CAPI UChar * U_EXPORT2
-u_memset(UChar *dest, UChar c, int32_t count) {
- if(count > 0) {
- UChar *ptr = dest;
- UChar *limit = dest + count;
-
- while (ptr < limit) {
- *(ptr++) = c;
- }
- }
- return dest;
-}
-
-U_CAPI int32_t U_EXPORT2
-u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
- if(count > 0) {
- const UChar *limit = buf1 + count;
- int32_t result;
-
- while (buf1 < limit) {
- result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
- if (result != 0) {
- return result;
- }
- buf1++;
- buf2++;
- }
- }
- return 0;
-}
-
-U_CAPI int32_t U_EXPORT2
-u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
- return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
-}
-
-/* u_unescape & support fns ------------------------------------------------- */
-
-/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
-static const UChar UNESCAPE_MAP[] = {
- /*" 0x22, 0x22 */
- /*' 0x27, 0x27 */
- /*? 0x3F, 0x3F */
- /*\ 0x5C, 0x5C */
- /*a*/ 0x61, 0x07,
- /*b*/ 0x62, 0x08,
- /*e*/ 0x65, 0x1b,
- /*f*/ 0x66, 0x0c,
- /*n*/ 0x6E, 0x0a,
- /*r*/ 0x72, 0x0d,
- /*t*/ 0x74, 0x09,
- /*v*/ 0x76, 0x0b
-};
-enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) };
-
-/* Convert one octal digit to a numeric value 0..7, or -1 on failure */
-static int8_t _digit8(UChar c) {
- if (c >= 0x0030 && c <= 0x0037) {
- return (int8_t)(c - 0x0030);
- }
- return -1;
-}
-
-/* Convert one hex digit to a numeric value 0..F, or -1 on failure */
-static int8_t _digit16(UChar c) {
- if (c >= 0x0030 && c <= 0x0039) {
- return (int8_t)(c - 0x0030);
- }
- if (c >= 0x0041 && c <= 0x0046) {
- return (int8_t)(c - (0x0041 - 10));
- }
- if (c >= 0x0061 && c <= 0x0066) {
- return (int8_t)(c - (0x0061 - 10));
- }
- return -1;
-}
-
-/* Parse a single escape sequence. Although this method deals in
- * UChars, it does not use C++ or UnicodeString. This allows it to
- * be used from C contexts. */
-U_CAPI UChar32 U_EXPORT2
-u_unescapeAt(UNESCAPE_CHAR_AT charAt,
- int32_t *offset,
- int32_t length,
- void *context) {
-
- int32_t start = *offset;
- UChar c;
- UChar32 result = 0;
- int8_t n = 0;
- int8_t minDig = 0;
- int8_t maxDig = 0;
- int8_t bitsPerDigit = 4;
- int8_t dig;
- int32_t i;
- UBool braces = FALSE;
-
- /* Check that offset is in range */
- if (*offset < 0 || *offset >= length) {
- goto err;
- }
-
- /* Fetch first UChar after '\\' */
- c = charAt((*offset)++, context);
-
- /* Convert hexadecimal and octal escapes */
- switch (c) {
- case 0x0075 /*'u'*/:
- minDig = maxDig = 4;
- break;
- case 0x0055 /*'U'*/:
- minDig = maxDig = 8;
- break;
- case 0x0078 /*'x'*/:
- minDig = 1;
- if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) {
- ++(*offset);
- braces = TRUE;
- maxDig = 8;
- } else {
- maxDig = 2;
- }
- break;
- default:
- dig = _digit8(c);
- if (dig >= 0) {
- minDig = 1;
- maxDig = 3;
- n = 1; /* Already have first octal digit */
- bitsPerDigit = 3;
- result = dig;
- }
- break;
- }
- if (minDig != 0) {
- while (*offset < length && n < maxDig) {
- c = charAt(*offset, context);
- dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
- if (dig < 0) {
- break;
- }
- result = (result << bitsPerDigit) | dig;
- ++(*offset);
- ++n;
- }
- if (n < minDig) {
- goto err;
- }
- if (braces) {
- if (c != 0x7D /*}*/) {
- goto err;
- }
- ++(*offset);
- }
- if (result < 0 || result >= 0x110000) {
- goto err;
- }
- /* If an escape sequence specifies a lead surrogate, see if
- * there is a trail surrogate after it, either as an escape or
- * as a literal. If so, join them up into a supplementary.
- */
- if (*offset < length && U16_IS_LEAD(result)) {
- int32_t ahead = *offset + 1;
- c = charAt(*offset, context);
- if (c == 0x5C /*'\\'*/ && ahead < length) {
- c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
- }
- if (U16_IS_TRAIL(c)) {
- *offset = ahead;
- result = U16_GET_SUPPLEMENTARY(result, c);
- }
- }
- return result;
- }
-
- /* Convert C-style escapes in table */
- for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
- if (c == UNESCAPE_MAP[i]) {
- return UNESCAPE_MAP[i+1];
- } else if (c < UNESCAPE_MAP[i]) {
- break;
- }
- }
-
- /* Map \cX to control-X: X & 0x1F */
- if (c == 0x0063 /*'c'*/ && *offset < length) {
- c = charAt((*offset)++, context);
- if (U16_IS_LEAD(c) && *offset < length) {
- UChar c2 = charAt(*offset, context);
- if (U16_IS_TRAIL(c2)) {
- ++(*offset);
- c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */
- }
- }
- return 0x1F & c;
- }
-
- /* If no special forms are recognized, then consider
- * the backslash to generically escape the next character.
- * Deal with surrogate pairs. */
- if (U16_IS_LEAD(c) && *offset < length) {
- UChar c2 = charAt(*offset, context);
- if (U16_IS_TRAIL(c2)) {
- ++(*offset);
- return U16_GET_SUPPLEMENTARY(c, c2);
- }
- }
- return c;
-
- err:
- /* Invalid escape sequence */
- *offset = start; /* Reset to initial value */
- return (UChar32)0xFFFFFFFF;
-}
-
-/* u_unescapeAt() callback to return a UChar from a char* */
-static UChar U_CALLCONV
-_charPtr_charAt(int32_t offset, void *context) {
- UChar c16;
- /* It would be more efficient to access the invariant tables
- * directly but there is no API for that. */
- u_charsToUChars(((char*) context) + offset, &c16, 1);
- return c16;
-}
-
-/* Append an escape-free segment of the text; used by u_unescape() */
-static void _appendUChars(UChar *dest, int32_t destCapacity,
- const char *src, int32_t srcLen) {
- if (destCapacity < 0) {
- destCapacity = 0;
- }
- if (srcLen > destCapacity) {
- srcLen = destCapacity;
- }
- u_charsToUChars(src, dest, srcLen);
-}
-
-/* Do an invariant conversion of char* -> UChar*, with escape parsing */
-U_CAPI int32_t U_EXPORT2
-u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
- const char *segment = src;
- int32_t i = 0;
- char c;
-
- while ((c=*src) != 0) {
- /* '\\' intentionally written as compiler-specific
- * character constant to correspond to compiler-specific
- * char* constants. */
- if (c == '\\') {
- int32_t lenParsed = 0;
- UChar32 c32;
- if (src != segment) {
- if (dest != NULL) {
- _appendUChars(dest + i, destCapacity - i,
- segment, (int32_t)(src - segment));
- }
- i += (int32_t)(src - segment);
- }
- ++src; /* advance past '\\' */
- c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
- if (lenParsed == 0) {
- goto err;
- }
- src += lenParsed; /* advance past escape seq. */
- if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
- U16_APPEND_UNSAFE(dest, i, c32);
- } else {
- i += U16_LENGTH(c32);
- }
- segment = src;
- } else {
- ++src;
- }
- }
- if (src != segment) {
- if (dest != NULL) {
- _appendUChars(dest + i, destCapacity - i,
- segment, (int32_t)(src - segment));
- }
- i += (int32_t)(src - segment);
- }
- if (dest != NULL && i < destCapacity) {
- dest[i] = 0;
- }
- return i;
-
- err:
- if (dest != NULL && destCapacity > 0) {
- *dest = 0;
- }
- return 0;
-}
-
-/* NUL-termination of strings ----------------------------------------------- */
-
-/**
- * NUL-terminate a string no matter what its type.
- * Set warning and error codes accordingly.
- */
-#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) UPRV_BLOCK_MACRO_BEGIN { \
- if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) { \
- /* not a public function, so no complete argument checking */ \
- \
- if(length<0) { \
- /* assume that the caller handles this */ \
- } else if(length<destCapacity) { \
- /* NUL-terminate the string, the NUL fits */ \
- dest[length]=0; \
- /* unset the not-terminated warning but leave all others */ \
- if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { \
- *pErrorCode=U_ZERO_ERROR; \
- } \
- } else if(length==destCapacity) { \
- /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
- *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; \
- } else /* length>destCapacity */ { \
- /* even the string itself did not fit - set an error code */ \
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR; \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-U_CAPI UChar U_EXPORT2
-u_asciiToUpper(UChar c) {
- if (u'a' <= c && c <= u'z') {
- c = c + u'A' - u'a';
- }
- return c;
-}
-
-U_CAPI int32_t U_EXPORT2
-u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
- __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
- return length;
-}
-
-U_CAPI int32_t U_EXPORT2
-u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
- __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
- return length;
-}
-
-U_CAPI int32_t U_EXPORT2
-u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
- __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
- return length;
-}
-
-U_CAPI int32_t U_EXPORT2
-u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
- __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
- return length;
-}
-
-// Compute the hash code for a string -------------------------------------- ***
-
-// Moved here from uhash.c so that UnicodeString::hashCode() does not depend
-// on UHashtable code.
-
-/*
- Compute the hash by iterating sparsely over about 32 (up to 63)
- characters spaced evenly through the string. For each character,
- multiply the previous hash value by a prime number and add the new
- character in, like a linear congruential random number generator,
- producing a pseudorandom deterministic value well distributed over
- the output range. [LIU]
-*/
-
-#define STRING_HASH(TYPE, STR, STRLEN, DEREF) UPRV_BLOCK_MACRO_BEGIN { \
- uint32_t hash = 0; \
- const TYPE *p = (const TYPE*) STR; \
- if (p != NULL) { \
- int32_t len = (int32_t)(STRLEN); \
- int32_t inc = ((len - 32) / 32) + 1; \
- const TYPE *limit = p + len; \
- while (p<limit) { \
- hash = (hash * 37) + DEREF; \
- p += inc; \
- } \
- } \
- return static_cast<int32_t>(hash); \
-} UPRV_BLOCK_MACRO_END
-
-/* Used by UnicodeString to compute its hashcode - Not public API. */
-U_CAPI int32_t U_EXPORT2
-ustr_hashUCharsN(const UChar *str, int32_t length) {
- STRING_HASH(UChar, str, length, *p);
-}
-
-U_CAPI int32_t U_EXPORT2
-ustr_hashCharsN(const char *str, int32_t length) {
- STRING_HASH(uint8_t, str, length, *p);
-}
-
-U_CAPI int32_t U_EXPORT2
-ustr_hashICharsN(const char *str, int32_t length) {
- STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
-}
diff --git a/contrib/libs/icu/common/ustrtrns.cpp b/contrib/libs/icu/common/ustrtrns.cpp
deleted file mode 100644
index 5dc032c02fb..00000000000
--- a/contrib/libs/icu/common/ustrtrns.cpp
+++ /dev/null
@@ -1,1451 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2001-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* File ustrtrns.cpp
-*
-* Modification History:
-*
-* Date Name Description
-* 9/10/2001 Ram Creation.
-******************************************************************************
-*/
-
-/*******************************************************************************
- *
- * u_strTo* and u_strFrom* APIs
- * WCS functions moved to ustr_wcs.c for better modularization
- *
- *******************************************************************************
- */
-
-
-#include "unicode/putil.h"
-#include "unicode/ustring.h"
-#include "unicode/utf.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "ustr_imp.h"
-#include "uassert.h"
-
-U_CAPI UChar* U_EXPORT2
-u_strFromUTF32WithSub(UChar *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const UChar32 *src,
- int32_t srcLength,
- UChar32 subchar, int32_t *pNumSubstitutions,
- UErrorCode *pErrorCode) {
- const UChar32 *srcLimit;
- UChar32 ch;
- UChar *destLimit;
- UChar *pDest;
- int32_t reqLength;
- int32_t numSubstitutions;
-
- /* args check */
- if(U_FAILURE(*pErrorCode)){
- return NULL;
- }
- if( (src==NULL && srcLength!=0) || srcLength < -1 ||
- (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
- subchar > 0x10ffff || U_IS_SURROGATE(subchar)
- ) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- if(pNumSubstitutions != NULL) {
- *pNumSubstitutions = 0;
- }
-
- pDest = dest;
- destLimit = (dest!=NULL)?(dest + destCapacity):NULL;
- reqLength = 0;
- numSubstitutions = 0;
-
- if(srcLength < 0) {
- /* simple loop for conversion of a NUL-terminated BMP string */
- while((ch=*src) != 0 &&
- ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) {
- ++src;
- if(pDest < destLimit) {
- *pDest++ = (UChar)ch;
- } else {
- ++reqLength;
- }
- }
- srcLimit = src;
- if(ch != 0) {
- /* "complicated" case, find the end of the remaining string */
- while(*++srcLimit != 0) {}
- }
- } else {
- srcLimit = (src!=NULL)?(src + srcLength):NULL;
- }
-
- /* convert with length */
- while(src < srcLimit) {
- ch = *src++;
- do {
- /* usually "loops" once; twice only for writing subchar */
- if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
- if(pDest < destLimit) {
- *pDest++ = (UChar)ch;
- } else {
- ++reqLength;
- }
- break;
- } else if(0x10000 <= ch && ch <= 0x10ffff) {
- if(pDest!=NULL && ((pDest + 2) <= destLimit)) {
- *pDest++ = U16_LEAD(ch);
- *pDest++ = U16_TRAIL(ch);
- } else {
- reqLength += 2;
- }
- break;
- } else if((ch = subchar) < 0) {
- /* surrogate code point, or not a Unicode code point at all */
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- } else {
- ++numSubstitutions;
- }
- } while(TRUE);
- }
-
- reqLength += (int32_t)(pDest - dest);
- if(pDestLength) {
- *pDestLength = reqLength;
- }
- if(pNumSubstitutions != NULL) {
- *pNumSubstitutions = numSubstitutions;
- }
-
- /* Terminate the buffer */
- u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
-
- return dest;
-}
-
-U_CAPI UChar* U_EXPORT2
-u_strFromUTF32(UChar *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const UChar32 *src,
- int32_t srcLength,
- UErrorCode *pErrorCode) {
- return u_strFromUTF32WithSub(
- dest, destCapacity, pDestLength,
- src, srcLength,
- U_SENTINEL, NULL,
- pErrorCode);
-}
-
-U_CAPI UChar32* U_EXPORT2
-u_strToUTF32WithSub(UChar32 *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const UChar *src,
- int32_t srcLength,
- UChar32 subchar, int32_t *pNumSubstitutions,
- UErrorCode *pErrorCode) {
- const UChar *srcLimit;
- UChar32 ch;
- UChar ch2;
- UChar32 *destLimit;
- UChar32 *pDest;
- int32_t reqLength;
- int32_t numSubstitutions;
-
- /* args check */
- if(U_FAILURE(*pErrorCode)){
- return NULL;
- }
- if( (src==NULL && srcLength!=0) || srcLength < -1 ||
- (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
- subchar > 0x10ffff || U_IS_SURROGATE(subchar)
- ) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- if(pNumSubstitutions != NULL) {
- *pNumSubstitutions = 0;
- }
-
- pDest = dest;
- destLimit = (dest!=NULL)?(dest + destCapacity):NULL;
- reqLength = 0;
- numSubstitutions = 0;
-
- if(srcLength < 0) {
- /* simple loop for conversion of a NUL-terminated BMP string */
- while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) {
- ++src;
- if(pDest < destLimit) {
- *pDest++ = ch;
- } else {
- ++reqLength;
- }
- }
- srcLimit = src;
- if(ch != 0) {
- /* "complicated" case, find the end of the remaining string */
- while(*++srcLimit != 0) {}
- }
- } else {
- srcLimit = (src!=NULL)?(src + srcLength):NULL;
- }
-
- /* convert with length */
- while(src < srcLimit) {
- ch = *src++;
- if(!U16_IS_SURROGATE(ch)) {
- /* write or count ch below */
- } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
- ++src;
- ch = U16_GET_SUPPLEMENTARY(ch, ch2);
- } else if((ch = subchar) < 0) {
- /* unpaired surrogate */
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- } else {
- ++numSubstitutions;
- }
- if(pDest < destLimit) {
- *pDest++ = ch;
- } else {
- ++reqLength;
- }
- }
-
- reqLength += (int32_t)(pDest - dest);
- if(pDestLength) {
- *pDestLength = reqLength;
- }
- if(pNumSubstitutions != NULL) {
- *pNumSubstitutions = numSubstitutions;
- }
-
- /* Terminate the buffer */
- u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode);
-
- return dest;
-}
-
-U_CAPI UChar32* U_EXPORT2
-u_strToUTF32(UChar32 *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const UChar *src,
- int32_t srcLength,
- UErrorCode *pErrorCode) {
- return u_strToUTF32WithSub(
- dest, destCapacity, pDestLength,
- src, srcLength,
- U_SENTINEL, NULL,
- pErrorCode);
-}
-
-U_CAPI UChar* U_EXPORT2
-u_strFromUTF8WithSub(UChar *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const char* src,
- int32_t srcLength,
- UChar32 subchar, int32_t *pNumSubstitutions,
- UErrorCode *pErrorCode){
- /* args check */
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- if( (src==NULL && srcLength!=0) || srcLength < -1 ||
- (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
- subchar > 0x10ffff || U_IS_SURROGATE(subchar)
- ) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- if(pNumSubstitutions!=NULL) {
- *pNumSubstitutions=0;
- }
- UChar *pDest = dest;
- UChar *pDestLimit = dest+destCapacity;
- int32_t reqLength = 0;
- int32_t numSubstitutions=0;
-
- /*
- * Inline processing of UTF-8 byte sequences:
- *
- * Byte sequences for the most common characters are handled inline in
- * the conversion loops. In order to reduce the path lengths for those
- * characters, the tests are arranged in a kind of binary search.
- * ASCII (<=0x7f) is checked first, followed by the dividing point
- * between 2- and 3-byte sequences (0xe0).
- * The 3-byte branch is tested first to speed up CJK text.
- * The compiler should combine the subtractions for the two tests for 0xe0.
- * Each branch then tests for the other end of its range.
- */
-
- if(srcLength < 0){
- /*
- * Transform a NUL-terminated string.
- * The code explicitly checks for NULs only in the lead byte position.
- * A NUL byte in the trail byte position fails the trail byte range check anyway.
- */
- int32_t i;
- UChar32 c;
- for(i = 0; (c = (uint8_t)src[i]) != 0 && (pDest < pDestLimit);) {
- // modified copy of U8_NEXT()
- ++i;
- if(U8_IS_SINGLE(c)) {
- *pDest++=(UChar)c;
- } else {
- uint8_t __t1, __t2;
- if( /* handle U+0800..U+FFFF inline */
- (0xe0<=(c) && (c)<0xf0) &&
- U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
- (__t2=src[(i)+1]-0x80)<=0x3f) {
- *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2;
- i+=2;
- } else if( /* handle U+0080..U+07FF inline */
- ((c)<0xe0 && (c)>=0xc2) &&
- (__t1=src[i]-0x80)<=0x3f) {
- *pDest++ = (((c)&0x1f)<<6)|__t1;
- ++(i);
- } else {
- /* function call for "complicated" and error cases */
- (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1);
- if(c<0 && (++numSubstitutions, c = subchar) < 0) {
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- } else if(c<=0xFFFF) {
- *(pDest++)=(UChar)c;
- } else {
- *(pDest++)=U16_LEAD(c);
- if(pDest<pDestLimit) {
- *(pDest++)=U16_TRAIL(c);
- } else {
- reqLength++;
- break;
- }
- }
- }
- }
- }
-
- /* Pre-flight the rest of the string. */
- while((c = (uint8_t)src[i]) != 0) {
- // modified copy of U8_NEXT()
- ++i;
- if(U8_IS_SINGLE(c)) {
- ++reqLength;
- } else {
- uint8_t __t1, __t2;
- if( /* handle U+0800..U+FFFF inline */
- (0xe0<=(c) && (c)<0xf0) &&
- U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
- (__t2=src[(i)+1]-0x80)<=0x3f) {
- ++reqLength;
- i+=2;
- } else if( /* handle U+0080..U+07FF inline */
- ((c)<0xe0 && (c)>=0xc2) &&
- (__t1=src[i]-0x80)<=0x3f) {
- ++reqLength;
- ++(i);
- } else {
- /* function call for "complicated" and error cases */
- (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1);
- if(c<0 && (++numSubstitutions, c = subchar) < 0) {
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- }
- reqLength += U16_LENGTH(c);
- }
- }
- }
- } else /* srcLength >= 0 */ {
- /* Faster loop without ongoing checking for srcLength and pDestLimit. */
- int32_t i = 0;
- UChar32 c;
- for(;;) {
- /*
- * Each iteration of the inner loop progresses by at most 3 UTF-8
- * bytes and one UChar, for most characters.
- * For supplementary code points (4 & 2), which are rare,
- * there is an additional adjustment.
- */
- int32_t count = (int32_t)(pDestLimit - pDest);
- int32_t count2 = (srcLength - i) / 3;
- if(count > count2) {
- count = count2; /* min(remaining dest, remaining src/3) */
- }
- if(count < 3) {
- /*
- * Too much overhead if we get near the end of the string,
- * continue with the next loop.
- */
- break;
- }
-
- do {
- // modified copy of U8_NEXT()
- c = (uint8_t)src[i++];
- if(U8_IS_SINGLE(c)) {
- *pDest++=(UChar)c;
- } else {
- uint8_t __t1, __t2;
- if( /* handle U+0800..U+FFFF inline */
- (0xe0<=(c) && (c)<0xf0) &&
- ((i)+1)<srcLength &&
- U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
- (__t2=src[(i)+1]-0x80)<=0x3f) {
- *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2;
- i+=2;
- } else if( /* handle U+0080..U+07FF inline */
- ((c)<0xe0 && (c)>=0xc2) &&
- ((i)!=srcLength) &&
- (__t1=src[i]-0x80)<=0x3f) {
- *pDest++ = (((c)&0x1f)<<6)|__t1;
- ++(i);
- } else {
- if(c >= 0xf0 || subchar > 0xffff) {
- // We may read up to four bytes and write up to two UChars,
- // which we didn't account for with computing count,
- // so we adjust it here.
- if(--count == 0) {
- --i; // back out byte c
- break;
- }
- }
-
- /* function call for "complicated" and error cases */
- (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1);
- if(c<0 && (++numSubstitutions, c = subchar) < 0) {
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- } else if(c<=0xFFFF) {
- *(pDest++)=(UChar)c;
- } else {
- *(pDest++)=U16_LEAD(c);
- *(pDest++)=U16_TRAIL(c);
- }
- }
- }
- } while(--count > 0);
- }
-
- while(i < srcLength && (pDest < pDestLimit)) {
- // modified copy of U8_NEXT()
- c = (uint8_t)src[i++];
- if(U8_IS_SINGLE(c)) {
- *pDest++=(UChar)c;
- } else {
- uint8_t __t1, __t2;
- if( /* handle U+0800..U+FFFF inline */
- (0xe0<=(c) && (c)<0xf0) &&
- ((i)+1)<srcLength &&
- U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
- (__t2=src[(i)+1]-0x80)<=0x3f) {
- *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2;
- i+=2;
- } else if( /* handle U+0080..U+07FF inline */
- ((c)<0xe0 && (c)>=0xc2) &&
- ((i)!=srcLength) &&
- (__t1=src[i]-0x80)<=0x3f) {
- *pDest++ = (((c)&0x1f)<<6)|__t1;
- ++(i);
- } else {
- /* function call for "complicated" and error cases */
- (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1);
- if(c<0 && (++numSubstitutions, c = subchar) < 0) {
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- } else if(c<=0xFFFF) {
- *(pDest++)=(UChar)c;
- } else {
- *(pDest++)=U16_LEAD(c);
- if(pDest<pDestLimit) {
- *(pDest++)=U16_TRAIL(c);
- } else {
- reqLength++;
- break;
- }
- }
- }
- }
- }
-
- /* Pre-flight the rest of the string. */
- while(i < srcLength) {
- // modified copy of U8_NEXT()
- c = (uint8_t)src[i++];
- if(U8_IS_SINGLE(c)) {
- ++reqLength;
- } else {
- uint8_t __t1, __t2;
- if( /* handle U+0800..U+FFFF inline */
- (0xe0<=(c) && (c)<0xf0) &&
- ((i)+1)<srcLength &&
- U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
- (__t2=src[(i)+1]-0x80)<=0x3f) {
- ++reqLength;
- i+=2;
- } else if( /* handle U+0080..U+07FF inline */
- ((c)<0xe0 && (c)>=0xc2) &&
- ((i)!=srcLength) &&
- (__t1=src[i]-0x80)<=0x3f) {
- ++reqLength;
- ++(i);
- } else {
- /* function call for "complicated" and error cases */
- (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1);
- if(c<0 && (++numSubstitutions, c = subchar) < 0) {
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- }
- reqLength += U16_LENGTH(c);
- }
- }
- }
- }
-
- reqLength+=(int32_t)(pDest - dest);
-
- if(pNumSubstitutions!=NULL) {
- *pNumSubstitutions=numSubstitutions;
- }
-
- if(pDestLength){
- *pDestLength = reqLength;
- }
-
- /* Terminate the buffer */
- u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
-
- return dest;
-}
-
-U_CAPI UChar* U_EXPORT2
-u_strFromUTF8(UChar *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const char* src,
- int32_t srcLength,
- UErrorCode *pErrorCode){
- return u_strFromUTF8WithSub(
- dest, destCapacity, pDestLength,
- src, srcLength,
- U_SENTINEL, NULL,
- pErrorCode);
-}
-
-U_CAPI UChar * U_EXPORT2
-u_strFromUTF8Lenient(UChar *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const char *src,
- int32_t srcLength,
- UErrorCode *pErrorCode) {
- UChar *pDest = dest;
- UChar32 ch;
- int32_t reqLength = 0;
- uint8_t* pSrc = (uint8_t*) src;
-
- /* args check */
- if(U_FAILURE(*pErrorCode)){
- return NULL;
- }
-
- if( (src==NULL && srcLength!=0) || srcLength < -1 ||
- (destCapacity<0) || (dest == NULL && destCapacity > 0)
- ) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- if(srcLength < 0) {
- /* Transform a NUL-terminated string. */
- UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL;
- uint8_t t1, t2, t3; /* trail bytes */
-
- while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
- if(ch < 0xc0) {
- /*
- * ASCII, or a trail byte in lead position which is treated like
- * a single-byte sequence for better character boundary
- * resynchronization after illegal sequences.
- */
- *pDest++=(UChar)ch;
- ++pSrc;
- continue;
- } else if(ch < 0xe0) { /* U+0080..U+07FF */
- if((t1 = pSrc[1]) != 0) {
- /* 0x3080 = (0xc0 << 6) + 0x80 */
- *pDest++ = (UChar)((ch << 6) + t1 - 0x3080);
- pSrc += 2;
- continue;
- }
- } else if(ch < 0xf0) { /* U+0800..U+FFFF */
- if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) {
- /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
- /* 0x2080 = (0x80 << 6) + 0x80 */
- *pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080);
- pSrc += 3;
- continue;
- }
- } else /* f0..f4 */ { /* U+10000..U+10FFFF */
- if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) {
- pSrc += 4;
- /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
- ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080;
- *(pDest++) = U16_LEAD(ch);
- if(pDest < pDestLimit) {
- *(pDest++) = U16_TRAIL(ch);
- } else {
- reqLength = 1;
- break;
- }
- continue;
- }
- }
-
- /* truncated character at the end */
- *pDest++ = 0xfffd;
- while(*++pSrc != 0) {}
- break;
- }
-
- /* Pre-flight the rest of the string. */
- while((ch = *pSrc) != 0) {
- if(ch < 0xc0) {
- /*
- * ASCII, or a trail byte in lead position which is treated like
- * a single-byte sequence for better character boundary
- * resynchronization after illegal sequences.
- */
- ++reqLength;
- ++pSrc;
- continue;
- } else if(ch < 0xe0) { /* U+0080..U+07FF */
- if(pSrc[1] != 0) {
- ++reqLength;
- pSrc += 2;
- continue;
- }
- } else if(ch < 0xf0) { /* U+0800..U+FFFF */
- if(pSrc[1] != 0 && pSrc[2] != 0) {
- ++reqLength;
- pSrc += 3;
- continue;
- }
- } else /* f0..f4 */ { /* U+10000..U+10FFFF */
- if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) {
- reqLength += 2;
- pSrc += 4;
- continue;
- }
- }
-
- /* truncated character at the end */
- ++reqLength;
- break;
- }
- } else /* srcLength >= 0 */ {
- const uint8_t *pSrcLimit = (pSrc!=NULL)?(pSrc + srcLength):NULL;
-
- /*
- * This function requires that if srcLength is given, then it must be
- * destCapatity >= srcLength so that we need not check for
- * destination buffer overflow in the loop.
- */
- if(destCapacity < srcLength) {
- if(pDestLength != NULL) {
- *pDestLength = srcLength; /* this likely overestimates the true destLength! */
- }
- *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
- return NULL;
- }
-
- if((pSrcLimit - pSrc) >= 4) {
- pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
-
- /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */
- do {
- ch = *pSrc++;
- if(ch < 0xc0) {
- /*
- * ASCII, or a trail byte in lead position which is treated like
- * a single-byte sequence for better character boundary
- * resynchronization after illegal sequences.
- */
- *pDest++=(UChar)ch;
- } else if(ch < 0xe0) { /* U+0080..U+07FF */
- /* 0x3080 = (0xc0 << 6) + 0x80 */
- *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
- } else if(ch < 0xf0) { /* U+0800..U+FFFF */
- /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
- /* 0x2080 = (0x80 << 6) + 0x80 */
- ch = (ch << 12) + (*pSrc++ << 6);
- *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
- } else /* f0..f4 */ { /* U+10000..U+10FFFF */
- /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
- ch = (ch << 18) + (*pSrc++ << 12);
- ch += *pSrc++ << 6;
- ch += *pSrc++ - 0x3c82080;
- *(pDest++) = U16_LEAD(ch);
- *(pDest++) = U16_TRAIL(ch);
- }
- } while(pSrc < pSrcLimit);
-
- pSrcLimit += 3; /* restore original pSrcLimit */
- }
-
- while(pSrc < pSrcLimit) {
- ch = *pSrc++;
- if(ch < 0xc0) {
- /*
- * ASCII, or a trail byte in lead position which is treated like
- * a single-byte sequence for better character boundary
- * resynchronization after illegal sequences.
- */
- *pDest++=(UChar)ch;
- continue;
- } else if(ch < 0xe0) { /* U+0080..U+07FF */
- if(pSrc < pSrcLimit) {
- /* 0x3080 = (0xc0 << 6) + 0x80 */
- *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
- continue;
- }
- } else if(ch < 0xf0) { /* U+0800..U+FFFF */
- if((pSrcLimit - pSrc) >= 2) {
- /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
- /* 0x2080 = (0x80 << 6) + 0x80 */
- ch = (ch << 12) + (*pSrc++ << 6);
- *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
- pSrc += 3;
- continue;
- }
- } else /* f0..f4 */ { /* U+10000..U+10FFFF */
- if((pSrcLimit - pSrc) >= 3) {
- /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
- ch = (ch << 18) + (*pSrc++ << 12);
- ch += *pSrc++ << 6;
- ch += *pSrc++ - 0x3c82080;
- *(pDest++) = U16_LEAD(ch);
- *(pDest++) = U16_TRAIL(ch);
- pSrc += 4;
- continue;
- }
- }
-
- /* truncated character at the end */
- *pDest++ = 0xfffd;
- break;
- }
- }
-
- reqLength+=(int32_t)(pDest - dest);
-
- if(pDestLength){
- *pDestLength = reqLength;
- }
-
- /* Terminate the buffer */
- u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
-
- return dest;
-}
-
-static inline uint8_t *
-_appendUTF8(uint8_t *pDest, UChar32 c) {
- /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */
- if((c)<=0x7f) {
- *pDest++=(uint8_t)c;
- } else if(c<=0x7ff) {
- *pDest++=(uint8_t)((c>>6)|0xc0);
- *pDest++=(uint8_t)((c&0x3f)|0x80);
- } else if(c<=0xffff) {
- *pDest++=(uint8_t)((c>>12)|0xe0);
- *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80);
- *pDest++=(uint8_t)(((c)&0x3f)|0x80);
- } else /* if((uint32_t)(c)<=0x10ffff) */ {
- *pDest++=(uint8_t)(((c)>>18)|0xf0);
- *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80);
- *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80);
- *pDest++=(uint8_t)(((c)&0x3f)|0x80);
- }
- return pDest;
-}
-
-
-U_CAPI char* U_EXPORT2
-u_strToUTF8WithSub(char *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const UChar *pSrc,
- int32_t srcLength,
- UChar32 subchar, int32_t *pNumSubstitutions,
- UErrorCode *pErrorCode){
- int32_t reqLength=0;
- uint32_t ch=0,ch2=0;
- uint8_t *pDest = (uint8_t *)dest;
- uint8_t *pDestLimit = (pDest!=NULL)?(pDest + destCapacity):NULL;
- int32_t numSubstitutions;
-
- /* args check */
- if(U_FAILURE(*pErrorCode)){
- return NULL;
- }
-
- if( (pSrc==NULL && srcLength!=0) || srcLength < -1 ||
- (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
- subchar > 0x10ffff || U_IS_SURROGATE(subchar)
- ) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- if(pNumSubstitutions!=NULL) {
- *pNumSubstitutions=0;
- }
- numSubstitutions=0;
-
- if(srcLength==-1) {
- while((ch=*pSrc)!=0) {
- ++pSrc;
- if(ch <= 0x7f) {
- if(pDest<pDestLimit) {
- *pDest++ = (uint8_t)ch;
- } else {
- reqLength = 1;
- break;
- }
- } else if(ch <= 0x7ff) {
- if((pDestLimit - pDest) >= 2) {
- *pDest++=(uint8_t)((ch>>6)|0xc0);
- *pDest++=(uint8_t)((ch&0x3f)|0x80);
- } else {
- reqLength = 2;
- break;
- }
- } else if(ch <= 0xd7ff || ch >= 0xe000) {
- if((pDestLimit - pDest) >= 3) {
- *pDest++=(uint8_t)((ch>>12)|0xe0);
- *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
- *pDest++=(uint8_t)((ch&0x3f)|0x80);
- } else {
- reqLength = 3;
- break;
- }
- } else /* ch is a surrogate */ {
- int32_t length;
-
- /*need not check for NUL because NUL fails U16_IS_TRAIL() anyway*/
- if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
- ++pSrc;
- ch=U16_GET_SUPPLEMENTARY(ch, ch2);
- } else if(subchar>=0) {
- ch=subchar;
- ++numSubstitutions;
- } else {
- /* Unicode 3.2 forbids surrogate code points in UTF-8 */
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- }
-
- length = U8_LENGTH(ch);
- if((pDestLimit - pDest) >= length) {
- /* convert and append*/
- pDest=_appendUTF8(pDest, ch);
- } else {
- reqLength = length;
- break;
- }
- }
- }
- while((ch=*pSrc++)!=0) {
- if(ch<=0x7f) {
- ++reqLength;
- } else if(ch<=0x7ff) {
- reqLength+=2;
- } else if(!U16_IS_SURROGATE(ch)) {
- reqLength+=3;
- } else if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
- ++pSrc;
- reqLength+=4;
- } else if(subchar>=0) {
- reqLength+=U8_LENGTH(subchar);
- ++numSubstitutions;
- } else {
- /* Unicode 3.2 forbids surrogate code points in UTF-8 */
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- }
- }
- } else {
- const UChar *pSrcLimit = (pSrc!=NULL)?(pSrc+srcLength):NULL;
- int32_t count;
-
- /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
- for(;;) {
- /*
- * Each iteration of the inner loop progresses by at most 3 UTF-8
- * bytes and one UChar, for most characters.
- * For supplementary code points (4 & 2), which are rare,
- * there is an additional adjustment.
- */
- count = (int32_t)((pDestLimit - pDest) / 3);
- srcLength = (int32_t)(pSrcLimit - pSrc);
- if(count > srcLength) {
- count = srcLength; /* min(remaining dest/3, remaining src) */
- }
- if(count < 3) {
- /*
- * Too much overhead if we get near the end of the string,
- * continue with the next loop.
- */
- break;
- }
- do {
- ch=*pSrc++;
- if(ch <= 0x7f) {
- *pDest++ = (uint8_t)ch;
- } else if(ch <= 0x7ff) {
- *pDest++=(uint8_t)((ch>>6)|0xc0);
- *pDest++=(uint8_t)((ch&0x3f)|0x80);
- } else if(ch <= 0xd7ff || ch >= 0xe000) {
- *pDest++=(uint8_t)((ch>>12)|0xe0);
- *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
- *pDest++=(uint8_t)((ch&0x3f)|0x80);
- } else /* ch is a surrogate */ {
- /*
- * We will read two UChars and probably output four bytes,
- * which we didn't account for with computing count,
- * so we adjust it here.
- */
- if(--count == 0) {
- --pSrc; /* undo ch=*pSrc++ for the lead surrogate */
- break; /* recompute count */
- }
-
- if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
- ++pSrc;
- ch=U16_GET_SUPPLEMENTARY(ch, ch2);
-
- /* writing 4 bytes per 2 UChars is ok */
- *pDest++=(uint8_t)((ch>>18)|0xf0);
- *pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80);
- *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
- *pDest++=(uint8_t)((ch&0x3f)|0x80);
- } else {
- /* Unicode 3.2 forbids surrogate code points in UTF-8 */
- if(subchar>=0) {
- ch=subchar;
- ++numSubstitutions;
- } else {
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- }
-
- /* convert and append*/
- pDest=_appendUTF8(pDest, ch);
- }
- }
- } while(--count > 0);
- }
-
- while(pSrc<pSrcLimit) {
- ch=*pSrc++;
- if(ch <= 0x7f) {
- if(pDest<pDestLimit) {
- *pDest++ = (uint8_t)ch;
- } else {
- reqLength = 1;
- break;
- }
- } else if(ch <= 0x7ff) {
- if((pDestLimit - pDest) >= 2) {
- *pDest++=(uint8_t)((ch>>6)|0xc0);
- *pDest++=(uint8_t)((ch&0x3f)|0x80);
- } else {
- reqLength = 2;
- break;
- }
- } else if(ch <= 0xd7ff || ch >= 0xe000) {
- if((pDestLimit - pDest) >= 3) {
- *pDest++=(uint8_t)((ch>>12)|0xe0);
- *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
- *pDest++=(uint8_t)((ch&0x3f)|0x80);
- } else {
- reqLength = 3;
- break;
- }
- } else /* ch is a surrogate */ {
- int32_t length;
-
- if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) {
- ++pSrc;
- ch=U16_GET_SUPPLEMENTARY(ch, ch2);
- } else if(subchar>=0) {
- ch=subchar;
- ++numSubstitutions;
- } else {
- /* Unicode 3.2 forbids surrogate code points in UTF-8 */
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- }
-
- length = U8_LENGTH(ch);
- if((pDestLimit - pDest) >= length) {
- /* convert and append*/
- pDest=_appendUTF8(pDest, ch);
- } else {
- reqLength = length;
- break;
- }
- }
- }
- while(pSrc<pSrcLimit) {
- ch=*pSrc++;
- if(ch<=0x7f) {
- ++reqLength;
- } else if(ch<=0x7ff) {
- reqLength+=2;
- } else if(!U16_IS_SURROGATE(ch)) {
- reqLength+=3;
- } else if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) {
- ++pSrc;
- reqLength+=4;
- } else if(subchar>=0) {
- reqLength+=U8_LENGTH(subchar);
- ++numSubstitutions;
- } else {
- /* Unicode 3.2 forbids surrogate code points in UTF-8 */
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- }
- }
- }
-
- reqLength+=(int32_t)(pDest - (uint8_t *)dest);
-
- if(pNumSubstitutions!=NULL) {
- *pNumSubstitutions=numSubstitutions;
- }
-
- if(pDestLength){
- *pDestLength = reqLength;
- }
-
- /* Terminate the buffer */
- u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
- return dest;
-}
-
-U_CAPI char* U_EXPORT2
-u_strToUTF8(char *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const UChar *pSrc,
- int32_t srcLength,
- UErrorCode *pErrorCode){
- return u_strToUTF8WithSub(
- dest, destCapacity, pDestLength,
- pSrc, srcLength,
- U_SENTINEL, NULL,
- pErrorCode);
-}
-
-U_CAPI UChar* U_EXPORT2
-u_strFromJavaModifiedUTF8WithSub(
- UChar *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const char *src,
- int32_t srcLength,
- UChar32 subchar, int32_t *pNumSubstitutions,
- UErrorCode *pErrorCode) {
- /* args check */
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- if( (src==NULL && srcLength!=0) || srcLength < -1 ||
- (dest==NULL && destCapacity!=0) || destCapacity<0 ||
- subchar > 0x10ffff || U_IS_SURROGATE(subchar)
- ) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- if(pNumSubstitutions!=NULL) {
- *pNumSubstitutions=0;
- }
- UChar *pDest = dest;
- UChar *pDestLimit = dest+destCapacity;
- int32_t reqLength = 0;
- int32_t numSubstitutions=0;
-
- if(srcLength < 0) {
- /*
- * Transform a NUL-terminated ASCII string.
- * Handle non-ASCII strings with slower code.
- */
- UChar32 c;
- while(((c = (uint8_t)*src) != 0) && c <= 0x7f && (pDest < pDestLimit)) {
- *pDest++=(UChar)c;
- ++src;
- }
- if(c == 0) {
- reqLength=(int32_t)(pDest - dest);
- if(pDestLength) {
- *pDestLength = reqLength;
- }
-
- /* Terminate the buffer */
- u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
- return dest;
- }
- srcLength = static_cast<int32_t>(uprv_strlen(src));
- }
-
- /* Faster loop without ongoing checking for srcLength and pDestLimit. */
- UChar32 ch;
- uint8_t t1, t2;
- int32_t i = 0;
- for(;;) {
- int32_t count = (int32_t)(pDestLimit - pDest);
- int32_t count2 = srcLength - i;
- if(count >= count2 && srcLength > 0 && U8_IS_SINGLE(*src)) {
- /* fast ASCII loop */
- int32_t start = i;
- uint8_t b;
- while(i < srcLength && U8_IS_SINGLE(b = src[i])) {
- *pDest++=b;
- ++i;
- }
- int32_t delta = i - start;
- count -= delta;
- count2 -= delta;
- }
- /*
- * Each iteration of the inner loop progresses by at most 3 UTF-8
- * bytes and one UChar.
- */
- if(subchar > 0xFFFF) {
- break;
- }
- count2 /= 3;
- if(count > count2) {
- count = count2; /* min(remaining dest, remaining src/3) */
- }
- if(count < 3) {
- /*
- * Too much overhead if we get near the end of the string,
- * continue with the next loop.
- */
- break;
- }
- do {
- ch = (uint8_t)src[i++];
- if(U8_IS_SINGLE(ch)) {
- *pDest++=(UChar)ch;
- } else {
- if(ch >= 0xe0) {
- if( /* handle U+0000..U+FFFF inline */
- ch <= 0xef &&
- (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f &&
- (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f
- ) {
- /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
- *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
- i += 2;
- continue;
- }
- } else {
- if( /* handle U+0000..U+07FF inline */
- ch >= 0xc0 &&
- (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f
- ) {
- *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
- ++i;
- continue;
- }
- }
-
- if(subchar < 0) {
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- } else if(subchar > 0xffff && --count == 0) {
- /*
- * We need to write two UChars, adjusted count for that,
- * and ran out of space.
- */
- --i; // back out byte ch
- break;
- } else {
- /* function call for error cases */
- utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1);
- ++numSubstitutions;
- *(pDest++)=(UChar)subchar;
- }
- }
- } while(--count > 0);
- }
-
- while(i < srcLength && (pDest < pDestLimit)) {
- ch = (uint8_t)src[i++];
- if(U8_IS_SINGLE(ch)){
- *pDest++=(UChar)ch;
- } else {
- if(ch >= 0xe0) {
- if( /* handle U+0000..U+FFFF inline */
- ch <= 0xef &&
- (i+1) < srcLength &&
- (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f &&
- (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f
- ) {
- /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
- *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
- i += 2;
- continue;
- }
- } else {
- if( /* handle U+0000..U+07FF inline */
- ch >= 0xc0 &&
- i < srcLength &&
- (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f
- ) {
- *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
- ++i;
- continue;
- }
- }
-
- if(subchar < 0) {
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- } else {
- /* function call for error cases */
- utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1);
- ++numSubstitutions;
- if(subchar<=0xFFFF) {
- *(pDest++)=(UChar)subchar;
- } else {
- *(pDest++)=U16_LEAD(subchar);
- if(pDest<pDestLimit) {
- *(pDest++)=U16_TRAIL(subchar);
- } else {
- reqLength++;
- break;
- }
- }
- }
- }
- }
-
- /* Pre-flight the rest of the string. */
- while(i < srcLength) {
- ch = (uint8_t)src[i++];
- if(U8_IS_SINGLE(ch)) {
- reqLength++;
- } else {
- if(ch >= 0xe0) {
- if( /* handle U+0000..U+FFFF inline */
- ch <= 0xef &&
- (i+1) < srcLength &&
- (uint8_t)(src[i] - 0x80) <= 0x3f &&
- (uint8_t)(src[i+1] - 0x80) <= 0x3f
- ) {
- reqLength++;
- i += 2;
- continue;
- }
- } else {
- if( /* handle U+0000..U+07FF inline */
- ch >= 0xc0 &&
- i < srcLength &&
- (uint8_t)(src[i] - 0x80) <= 0x3f
- ) {
- reqLength++;
- ++i;
- continue;
- }
- }
-
- if(subchar < 0) {
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
- } else {
- /* function call for error cases */
- utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1);
- ++numSubstitutions;
- reqLength+=U16_LENGTH(ch);
- }
- }
- }
-
- if(pNumSubstitutions!=NULL) {
- *pNumSubstitutions=numSubstitutions;
- }
-
- reqLength+=(int32_t)(pDest - dest);
- if(pDestLength) {
- *pDestLength = reqLength;
- }
-
- /* Terminate the buffer */
- u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
- return dest;
-}
-
-U_CAPI char* U_EXPORT2
-u_strToJavaModifiedUTF8(
- char *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const UChar *src,
- int32_t srcLength,
- UErrorCode *pErrorCode) {
- int32_t reqLength=0;
- uint32_t ch=0;
- uint8_t *pDest = (uint8_t *)dest;
- uint8_t *pDestLimit = pDest + destCapacity;
- const UChar *pSrcLimit;
- int32_t count;
-
- /* args check */
- if(U_FAILURE(*pErrorCode)){
- return NULL;
- }
- if( (src==NULL && srcLength!=0) || srcLength < -1 ||
- (dest==NULL && destCapacity!=0) || destCapacity<0
- ) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- if(srcLength==-1) {
- /* Convert NUL-terminated ASCII, then find the string length. */
- while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) {
- *pDest++ = (uint8_t)ch;
- ++src;
- }
- if(ch == 0) {
- reqLength=(int32_t)(pDest - (uint8_t *)dest);
- if(pDestLength) {
- *pDestLength = reqLength;
- }
-
- /* Terminate the buffer */
- u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
- return dest;
- }
- srcLength = u_strlen(src);
- }
-
- /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
- pSrcLimit = (src!=NULL)?(src+srcLength):NULL;
- for(;;) {
- count = (int32_t)(pDestLimit - pDest);
- srcLength = (int32_t)(pSrcLimit - src);
- if(count >= srcLength && srcLength > 0 && *src <= 0x7f) {
- /* fast ASCII loop */
- const UChar *prevSrc = src;
- int32_t delta;
- while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) {
- *pDest++=(uint8_t)ch;
- ++src;
- }
- delta = (int32_t)(src - prevSrc);
- count -= delta;
- srcLength -= delta;
- }
- /*
- * Each iteration of the inner loop progresses by at most 3 UTF-8
- * bytes and one UChar.
- */
- count /= 3;
- if(count > srcLength) {
- count = srcLength; /* min(remaining dest/3, remaining src) */
- }
- if(count < 3) {
- /*
- * Too much overhead if we get near the end of the string,
- * continue with the next loop.
- */
- break;
- }
- do {
- ch=*src++;
- if(ch <= 0x7f && ch != 0) {
- *pDest++ = (uint8_t)ch;
- } else if(ch <= 0x7ff) {
- *pDest++=(uint8_t)((ch>>6)|0xc0);
- *pDest++=(uint8_t)((ch&0x3f)|0x80);
- } else {
- *pDest++=(uint8_t)((ch>>12)|0xe0);
- *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
- *pDest++=(uint8_t)((ch&0x3f)|0x80);
- }
- } while(--count > 0);
- }
-
- while(src<pSrcLimit) {
- ch=*src++;
- if(ch <= 0x7f && ch != 0) {
- if(pDest<pDestLimit) {
- *pDest++ = (uint8_t)ch;
- } else {
- reqLength = 1;
- break;
- }
- } else if(ch <= 0x7ff) {
- if((pDestLimit - pDest) >= 2) {
- *pDest++=(uint8_t)((ch>>6)|0xc0);
- *pDest++=(uint8_t)((ch&0x3f)|0x80);
- } else {
- reqLength = 2;
- break;
- }
- } else {
- if((pDestLimit - pDest) >= 3) {
- *pDest++=(uint8_t)((ch>>12)|0xe0);
- *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
- *pDest++=(uint8_t)((ch&0x3f)|0x80);
- } else {
- reqLength = 3;
- break;
- }
- }
- }
- while(src<pSrcLimit) {
- ch=*src++;
- if(ch <= 0x7f && ch != 0) {
- ++reqLength;
- } else if(ch<=0x7ff) {
- reqLength+=2;
- } else {
- reqLength+=3;
- }
- }
-
- reqLength+=(int32_t)(pDest - (uint8_t *)dest);
- if(pDestLength){
- *pDestLength = reqLength;
- }
-
- /* Terminate the buffer */
- u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
- return dest;
-}
diff --git a/contrib/libs/icu/common/utext.cpp b/contrib/libs/icu/common/utext.cpp
deleted file mode 100644
index 763b6684fba..00000000000
--- a/contrib/libs/icu/common/utext.cpp
+++ /dev/null
@@ -1,2877 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2005-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: utext.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2005apr12
-* created by: Markus W. Scherer
-*/
-
-#include <cstddef>
-
-#include "unicode/utypes.h"
-#include "unicode/ustring.h"
-#include "unicode/unistr.h"
-#include "unicode/chariter.h"
-#include "unicode/utext.h"
-#include "unicode/utf.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "ustr_imp.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uassert.h"
-#include "putilimp.h"
-
-U_NAMESPACE_USE
-
-#define I32_FLAG(bitIndex) ((int32_t)1<<(bitIndex))
-
-
-static UBool
-utext_access(UText *ut, int64_t index, UBool forward) {
- return ut->pFuncs->access(ut, index, forward);
-}
-
-
-
-U_CAPI UBool U_EXPORT2
-utext_moveIndex32(UText *ut, int32_t delta) {
- UChar32 c;
- if (delta > 0) {
- do {
- if(ut->chunkOffset>=ut->chunkLength && !utext_access(ut, ut->chunkNativeLimit, TRUE)) {
- return FALSE;
- }
- c = ut->chunkContents[ut->chunkOffset];
- if (U16_IS_SURROGATE(c)) {
- c = utext_next32(ut);
- if (c == U_SENTINEL) {
- return FALSE;
- }
- } else {
- ut->chunkOffset++;
- }
- } while(--delta>0);
-
- } else if (delta<0) {
- do {
- if(ut->chunkOffset<=0 && !utext_access(ut, ut->chunkNativeStart, FALSE)) {
- return FALSE;
- }
- c = ut->chunkContents[ut->chunkOffset-1];
- if (U16_IS_SURROGATE(c)) {
- c = utext_previous32(ut);
- if (c == U_SENTINEL) {
- return FALSE;
- }
- } else {
- ut->chunkOffset--;
- }
- } while(++delta<0);
- }
-
- return TRUE;
-}
-
-
-U_CAPI int64_t U_EXPORT2
-utext_nativeLength(UText *ut) {
- return ut->pFuncs->nativeLength(ut);
-}
-
-
-U_CAPI UBool U_EXPORT2
-utext_isLengthExpensive(const UText *ut) {
- UBool r = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)) != 0;
- return r;
-}
-
-
-U_CAPI int64_t U_EXPORT2
-utext_getNativeIndex(const UText *ut) {
- if(ut->chunkOffset <= ut->nativeIndexingLimit) {
- return ut->chunkNativeStart+ut->chunkOffset;
- } else {
- return ut->pFuncs->mapOffsetToNative(ut);
- }
-}
-
-
-U_CAPI void U_EXPORT2
-utext_setNativeIndex(UText *ut, int64_t index) {
- if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {
- // The desired position is outside of the current chunk.
- // Access the new position. Assume a forward iteration from here,
- // which will also be optimimum for a single random access.
- // Reverse iterations may suffer slightly.
- ut->pFuncs->access(ut, index, TRUE);
- } else if((int32_t)(index - ut->chunkNativeStart) <= ut->nativeIndexingLimit) {
- // utf-16 indexing.
- ut->chunkOffset=(int32_t)(index-ut->chunkNativeStart);
- } else {
- ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
- }
- // The convention is that the index must always be on a code point boundary.
- // Adjust the index position if it is in the middle of a surrogate pair.
- if (ut->chunkOffset<ut->chunkLength) {
- UChar c= ut->chunkContents[ut->chunkOffset];
- if (U16_IS_TRAIL(c)) {
- if (ut->chunkOffset==0) {
- ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE);
- }
- if (ut->chunkOffset>0) {
- UChar lead = ut->chunkContents[ut->chunkOffset-1];
- if (U16_IS_LEAD(lead)) {
- ut->chunkOffset--;
- }
- }
- }
- }
-}
-
-
-
-U_CAPI int64_t U_EXPORT2
-utext_getPreviousNativeIndex(UText *ut) {
- //
- // Fast-path the common case.
- // Common means current position is not at the beginning of a chunk
- // and the preceding character is not supplementary.
- //
- int32_t i = ut->chunkOffset - 1;
- int64_t result;
- if (i >= 0) {
- UChar c = ut->chunkContents[i];
- if (U16_IS_TRAIL(c) == FALSE) {
- if (i <= ut->nativeIndexingLimit) {
- result = ut->chunkNativeStart + i;
- } else {
- ut->chunkOffset = i;
- result = ut->pFuncs->mapOffsetToNative(ut);
- ut->chunkOffset++;
- }
- return result;
- }
- }
-
- // If at the start of text, simply return 0.
- if (ut->chunkOffset==0 && ut->chunkNativeStart==0) {
- return 0;
- }
-
- // Harder, less common cases. We are at a chunk boundary, or on a surrogate.
- // Keep it simple, use other functions to handle the edges.
- //
- utext_previous32(ut);
- result = UTEXT_GETNATIVEINDEX(ut);
- utext_next32(ut);
- return result;
-}
-
-
-//
-// utext_current32. Get the UChar32 at the current position.
-// UText iteration position is always on a code point boundary,
-// never on the trail half of a surrogate pair.
-//
-U_CAPI UChar32 U_EXPORT2
-utext_current32(UText *ut) {
- UChar32 c;
- if (ut->chunkOffset==ut->chunkLength) {
- // Current position is just off the end of the chunk.
- if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {
- // Off the end of the text.
- return U_SENTINEL;
- }
- }
-
- c = ut->chunkContents[ut->chunkOffset];
- if (U16_IS_LEAD(c) == FALSE) {
- // Normal, non-supplementary case.
- return c;
- }
-
- //
- // Possible supplementary char.
- //
- UChar32 trail = 0;
- UChar32 supplementaryC = c;
- if ((ut->chunkOffset+1) < ut->chunkLength) {
- // The trail surrogate is in the same chunk.
- trail = ut->chunkContents[ut->chunkOffset+1];
- } else {
- // The trail surrogate is in a different chunk.
- // Because we must maintain the iteration position, we need to switch forward
- // into the new chunk, get the trail surrogate, then revert the chunk back to the
- // original one.
- // An edge case to be careful of: the entire text may end with an unpaired
- // leading surrogate. The attempt to access the trail will fail, but
- // the original position before the unpaired lead still needs to be restored.
- int64_t nativePosition = ut->chunkNativeLimit;
- int32_t originalOffset = ut->chunkOffset;
- if (ut->pFuncs->access(ut, nativePosition, TRUE)) {
- trail = ut->chunkContents[ut->chunkOffset];
- }
- UBool r = ut->pFuncs->access(ut, nativePosition, FALSE); // reverse iteration flag loads preceding chunk
- U_ASSERT(r==TRUE);
- ut->chunkOffset = originalOffset;
- if(!r) {
- return U_SENTINEL;
- }
- }
-
- if (U16_IS_TRAIL(trail)) {
- supplementaryC = U16_GET_SUPPLEMENTARY(c, trail);
- }
- return supplementaryC;
-
-}
-
-
-U_CAPI UChar32 U_EXPORT2
-utext_char32At(UText *ut, int64_t nativeIndex) {
- UChar32 c = U_SENTINEL;
-
- // Fast path the common case.
- if (nativeIndex>=ut->chunkNativeStart && nativeIndex < ut->chunkNativeStart + ut->nativeIndexingLimit) {
- ut->chunkOffset = (int32_t)(nativeIndex - ut->chunkNativeStart);
- c = ut->chunkContents[ut->chunkOffset];
- if (U16_IS_SURROGATE(c) == FALSE) {
- return c;
- }
- }
-
-
- utext_setNativeIndex(ut, nativeIndex);
- if (nativeIndex>=ut->chunkNativeStart && ut->chunkOffset<ut->chunkLength) {
- c = ut->chunkContents[ut->chunkOffset];
- if (U16_IS_SURROGATE(c)) {
- // For surrogates, let current32() deal with the complications
- // of supplementaries that may span chunk boundaries.
- c = utext_current32(ut);
- }
- }
- return c;
-}
-
-
-U_CAPI UChar32 U_EXPORT2
-utext_next32(UText *ut) {
- UChar32 c;
-
- if (ut->chunkOffset >= ut->chunkLength) {
- if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {
- return U_SENTINEL;
- }
- }
-
- c = ut->chunkContents[ut->chunkOffset++];
- if (U16_IS_LEAD(c) == FALSE) {
- // Normal case, not supplementary.
- // (A trail surrogate seen here is just returned as is, as a surrogate value.
- // It cannot be part of a pair.)
- return c;
- }
-
- if (ut->chunkOffset >= ut->chunkLength) {
- if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {
- // c is an unpaired lead surrogate at the end of the text.
- // return it as it is.
- return c;
- }
- }
- UChar32 trail = ut->chunkContents[ut->chunkOffset];
- if (U16_IS_TRAIL(trail) == FALSE) {
- // c was an unpaired lead surrogate, not at the end of the text.
- // return it as it is (unpaired). Iteration position is on the
- // following character, possibly in the next chunk, where the
- // trail surrogate would have been if it had existed.
- return c;
- }
-
- UChar32 supplementary = U16_GET_SUPPLEMENTARY(c, trail);
- ut->chunkOffset++; // move iteration position over the trail surrogate.
- return supplementary;
- }
-
-
-U_CAPI UChar32 U_EXPORT2
-utext_previous32(UText *ut) {
- UChar32 c;
-
- if (ut->chunkOffset <= 0) {
- if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) {
- return U_SENTINEL;
- }
- }
- ut->chunkOffset--;
- c = ut->chunkContents[ut->chunkOffset];
- if (U16_IS_TRAIL(c) == FALSE) {
- // Normal case, not supplementary.
- // (A lead surrogate seen here is just returned as is, as a surrogate value.
- // It cannot be part of a pair.)
- return c;
- }
-
- if (ut->chunkOffset <= 0) {
- if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) {
- // c is an unpaired trail surrogate at the start of the text.
- // return it as it is.
- return c;
- }
- }
-
- UChar32 lead = ut->chunkContents[ut->chunkOffset-1];
- if (U16_IS_LEAD(lead) == FALSE) {
- // c was an unpaired trail surrogate, not at the end of the text.
- // return it as it is (unpaired). Iteration position is at c
- return c;
- }
-
- UChar32 supplementary = U16_GET_SUPPLEMENTARY(lead, c);
- ut->chunkOffset--; // move iteration position over the lead surrogate.
- return supplementary;
-}
-
-
-
-U_CAPI UChar32 U_EXPORT2
-utext_next32From(UText *ut, int64_t index) {
- UChar32 c = U_SENTINEL;
-
- if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {
- // Desired position is outside of the current chunk.
- if(!ut->pFuncs->access(ut, index, TRUE)) {
- // no chunk available here
- return U_SENTINEL;
- }
- } else if (index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) {
- // Desired position is in chunk, with direct 1:1 native to UTF16 indexing
- ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
- } else {
- // Desired position is in chunk, with non-UTF16 indexing.
- ut->chunkOffset = ut->pFuncs->mapNativeIndexToUTF16(ut, index);
- }
-
- c = ut->chunkContents[ut->chunkOffset++];
- if (U16_IS_SURROGATE(c)) {
- // Surrogates. Many edge cases. Use other functions that already
- // deal with the problems.
- utext_setNativeIndex(ut, index);
- c = utext_next32(ut);
- }
- return c;
-}
-
-
-U_CAPI UChar32 U_EXPORT2
-utext_previous32From(UText *ut, int64_t index) {
- //
- // Return the character preceding the specified index.
- // Leave the iteration position at the start of the character that was returned.
- //
- UChar32 cPrev; // The character preceding cCurr, which is what we will return.
-
- // Address the chunk containg the position preceding the incoming index
- // A tricky edge case:
- // We try to test the requested native index against the chunkNativeStart to determine
- // whether the character preceding the one at the index is in the current chunk.
- // BUT, this test can fail with UTF-8 (or any other multibyte encoding), when the
- // requested index is on something other than the first position of the first char.
- //
- if(index<=ut->chunkNativeStart || index>ut->chunkNativeLimit) {
- // Requested native index is outside of the current chunk.
- if(!ut->pFuncs->access(ut, index, FALSE)) {
- // no chunk available here
- return U_SENTINEL;
- }
- } else if(index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) {
- // Direct UTF-16 indexing.
- ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
- } else {
- ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
- if (ut->chunkOffset==0 && !ut->pFuncs->access(ut, index, FALSE)) {
- // no chunk available here
- return U_SENTINEL;
- }
- }
-
- //
- // Simple case with no surrogates.
- //
- ut->chunkOffset--;
- cPrev = ut->chunkContents[ut->chunkOffset];
-
- if (U16_IS_SURROGATE(cPrev)) {
- // Possible supplementary. Many edge cases.
- // Let other functions do the heavy lifting.
- utext_setNativeIndex(ut, index);
- cPrev = utext_previous32(ut);
- }
- return cPrev;
-}
-
-
-U_CAPI int32_t U_EXPORT2
-utext_extract(UText *ut,
- int64_t start, int64_t limit,
- UChar *dest, int32_t destCapacity,
- UErrorCode *status) {
- return ut->pFuncs->extract(ut, start, limit, dest, destCapacity, status);
- }
-
-
-
-U_CAPI UBool U_EXPORT2
-utext_equals(const UText *a, const UText *b) {
- if (a==NULL || b==NULL ||
- a->magic != UTEXT_MAGIC ||
- b->magic != UTEXT_MAGIC) {
- // Null or invalid arguments don't compare equal to anything.
- return FALSE;
- }
-
- if (a->pFuncs != b->pFuncs) {
- // Different types of text providers.
- return FALSE;
- }
-
- if (a->context != b->context) {
- // Different sources (different strings)
- return FALSE;
- }
- if (utext_getNativeIndex(a) != utext_getNativeIndex(b)) {
- // Different current position in the string.
- return FALSE;
- }
-
- return TRUE;
-}
-
-U_CAPI UBool U_EXPORT2
-utext_isWritable(const UText *ut)
-{
- UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) != 0;
- return b;
-}
-
-
-U_CAPI void U_EXPORT2
-utext_freeze(UText *ut) {
- // Zero out the WRITABLE flag.
- ut->providerProperties &= ~(I32_FLAG(UTEXT_PROVIDER_WRITABLE));
-}
-
-
-U_CAPI UBool U_EXPORT2
-utext_hasMetaData(const UText *ut)
-{
- UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)) != 0;
- return b;
-}
-
-
-
-U_CAPI int32_t U_EXPORT2
-utext_replace(UText *ut,
- int64_t nativeStart, int64_t nativeLimit,
- const UChar *replacementText, int32_t replacementLength,
- UErrorCode *status)
-{
- if (U_FAILURE(*status)) {
- return 0;
- }
- if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {
- *status = U_NO_WRITE_PERMISSION;
- return 0;
- }
- int32_t i = ut->pFuncs->replace(ut, nativeStart, nativeLimit, replacementText, replacementLength, status);
- return i;
-}
-
-U_CAPI void U_EXPORT2
-utext_copy(UText *ut,
- int64_t nativeStart, int64_t nativeLimit,
- int64_t destIndex,
- UBool move,
- UErrorCode *status)
-{
- if (U_FAILURE(*status)) {
- return;
- }
- if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {
- *status = U_NO_WRITE_PERMISSION;
- return;
- }
- ut->pFuncs->copy(ut, nativeStart, nativeLimit, destIndex, move, status);
-}
-
-
-
-U_CAPI UText * U_EXPORT2
-utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status) {
- if (U_FAILURE(*status)) {
- return dest;
- }
- UText *result = src->pFuncs->clone(dest, src, deep, status);
- if (U_FAILURE(*status)) {
- return result;
- }
- if (result == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return result;
- }
- if (readOnly) {
- utext_freeze(result);
- }
- return result;
-}
-
-
-
-//------------------------------------------------------------------------------
-//
-// UText common functions implementation
-//
-//------------------------------------------------------------------------------
-
-//
-// UText.flags bit definitions
-//
-enum {
- UTEXT_HEAP_ALLOCATED = 1, // 1 if ICU has allocated this UText struct on the heap.
- // 0 if caller provided storage for the UText.
-
- UTEXT_EXTRA_HEAP_ALLOCATED = 2, // 1 if ICU has allocated extra storage as a separate
- // heap block.
- // 0 if there is no separate allocation. Either no extra
- // storage was requested, or it is appended to the end
- // of the main UText storage.
-
- UTEXT_OPEN = 4 // 1 if this UText is currently open
- // 0 if this UText is not open.
-};
-
-
-//
-// Extended form of a UText. The purpose is to aid in computing the total size required
-// when a provider asks for a UText to be allocated with extra storage.
-
-struct ExtendedUText {
- UText ut;
- std::max_align_t extension;
-};
-
-static const UText emptyText = UTEXT_INITIALIZER;
-
-U_CAPI UText * U_EXPORT2
-utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status) {
- if (U_FAILURE(*status)) {
- return ut;
- }
-
- if (ut == NULL) {
- // We need to heap-allocate storage for the new UText
- int32_t spaceRequired = sizeof(UText);
- if (extraSpace > 0) {
- spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(std::max_align_t);
- }
- ut = (UText *)uprv_malloc(spaceRequired);
- if (ut == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- } else {
- *ut = emptyText;
- ut->flags |= UTEXT_HEAP_ALLOCATED;
- if (spaceRequired>0) {
- ut->extraSize = extraSpace;
- ut->pExtra = &((ExtendedUText *)ut)->extension;
- }
- }
- } else {
- // We have been supplied with an already existing UText.
- // Verify that it really appears to be a UText.
- if (ut->magic != UTEXT_MAGIC) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return ut;
- }
- // If the ut is already open and there's a provider supplied close
- // function, call it.
- if ((ut->flags & UTEXT_OPEN) && ut->pFuncs->close != NULL) {
- ut->pFuncs->close(ut);
- }
- ut->flags &= ~UTEXT_OPEN;
-
- // If extra space was requested by our caller, check whether
- // sufficient already exists, and allocate new if needed.
- if (extraSpace > ut->extraSize) {
- // Need more space. If there is existing separately allocated space,
- // delete it first, then allocate new space.
- if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {
- uprv_free(ut->pExtra);
- ut->extraSize = 0;
- }
- ut->pExtra = uprv_malloc(extraSpace);
- if (ut->pExtra == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- ut->extraSize = extraSpace;
- ut->flags |= UTEXT_EXTRA_HEAP_ALLOCATED;
- }
- }
- }
- if (U_SUCCESS(*status)) {
- ut->flags |= UTEXT_OPEN;
-
- // Initialize all remaining fields of the UText.
- //
- ut->context = NULL;
- ut->chunkContents = NULL;
- ut->p = NULL;
- ut->q = NULL;
- ut->r = NULL;
- ut->a = 0;
- ut->b = 0;
- ut->c = 0;
- ut->chunkOffset = 0;
- ut->chunkLength = 0;
- ut->chunkNativeStart = 0;
- ut->chunkNativeLimit = 0;
- ut->nativeIndexingLimit = 0;
- ut->providerProperties = 0;
- ut->privA = 0;
- ut->privB = 0;
- ut->privC = 0;
- ut->privP = NULL;
- if (ut->pExtra!=NULL && ut->extraSize>0)
- uprv_memset(ut->pExtra, 0, ut->extraSize);
-
- }
- return ut;
-}
-
-
-U_CAPI UText * U_EXPORT2
-utext_close(UText *ut) {
- if (ut==NULL ||
- ut->magic != UTEXT_MAGIC ||
- (ut->flags & UTEXT_OPEN) == 0)
- {
- // The supplied ut is not an open UText.
- // Do nothing.
- return ut;
- }
-
- // If the provider gave us a close function, call it now.
- // This will clean up anything allocated specifically by the provider.
- if (ut->pFuncs->close != NULL) {
- ut->pFuncs->close(ut);
- }
- ut->flags &= ~UTEXT_OPEN;
-
- // If we (the framework) allocated the UText or subsidiary storage,
- // delete it.
- if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {
- uprv_free(ut->pExtra);
- ut->pExtra = NULL;
- ut->flags &= ~UTEXT_EXTRA_HEAP_ALLOCATED;
- ut->extraSize = 0;
- }
-
- // Zero out function table of the closed UText. This is a defensive move,
- // inteded to cause applications that inadvertantly use a closed
- // utext to crash with null pointer errors.
- ut->pFuncs = NULL;
-
- if (ut->flags & UTEXT_HEAP_ALLOCATED) {
- // This UText was allocated by UText setup. We need to free it.
- // Clear magic, so we can detect if the user messes up and immediately
- // tries to reopen another UText using the deleted storage.
- ut->magic = 0;
- uprv_free(ut);
- ut = NULL;
- }
- return ut;
-}
-
-
-
-
-//
-// invalidateChunk Reset a chunk to have no contents, so that the next call
-// to access will cause new data to load.
-// This is needed when copy/move/replace operate directly on the
-// backing text, potentially putting it out of sync with the
-// contents in the chunk.
-//
-static void
-invalidateChunk(UText *ut) {
- ut->chunkLength = 0;
- ut->chunkNativeLimit = 0;
- ut->chunkNativeStart = 0;
- ut->chunkOffset = 0;
- ut->nativeIndexingLimit = 0;
-}
-
-//
-// pinIndex Do range pinning on a native index parameter.
-// 64 bit pinning is done in place.
-// 32 bit truncated result is returned as a convenience for
-// use in providers that don't need 64 bits.
-static int32_t
-pinIndex(int64_t &index, int64_t limit) {
- if (index<0) {
- index = 0;
- } else if (index > limit) {
- index = limit;
- }
- return (int32_t)index;
-}
-
-
-U_CDECL_BEGIN
-
-//
-// Pointer relocation function,
-// a utility used by shallow clone.
-// Adjust a pointer that refers to something within one UText (the source)
-// to refer to the same relative offset within a another UText (the target)
-//
-static void adjustPointer(UText *dest, const void **destPtr, const UText *src) {
- // convert all pointers to (char *) so that byte address arithmetic will work.
- char *dptr = (char *)*destPtr;
- char *dUText = (char *)dest;
- char *sUText = (char *)src;
-
- if (dptr >= (char *)src->pExtra && dptr < ((char*)src->pExtra)+src->extraSize) {
- // target ptr was to something within the src UText's pExtra storage.
- // relocate it into the target UText's pExtra region.
- *destPtr = ((char *)dest->pExtra) + (dptr - (char *)src->pExtra);
- } else if (dptr>=sUText && dptr < sUText+src->sizeOfStruct) {
- // target ptr was pointing to somewhere within the source UText itself.
- // Move it to the same offset within the target UText.
- *destPtr = dUText + (dptr-sUText);
- }
-}
-
-
-//
-// Clone. This is a generic copy-the-utext-by-value clone function that can be
-// used as-is with some utext types, and as a helper by other clones.
-//
-static UText * U_CALLCONV
-shallowTextClone(UText * dest, const UText * src, UErrorCode * status) {
- if (U_FAILURE(*status)) {
- return NULL;
- }
- int32_t srcExtraSize = src->extraSize;
-
- //
- // Use the generic text_setup to allocate storage if required.
- //
- dest = utext_setup(dest, srcExtraSize, status);
- if (U_FAILURE(*status)) {
- return dest;
- }
-
- //
- // flags (how the UText was allocated) and the pointer to the
- // extra storage must retain the values in the cloned utext that
- // were set up by utext_setup. Save them separately before
- // copying the whole struct.
- //
- void *destExtra = dest->pExtra;
- int32_t flags = dest->flags;
-
-
- //
- // Copy the whole UText struct by value.
- // Any "Extra" storage is copied also.
- //
- int sizeToCopy = src->sizeOfStruct;
- if (sizeToCopy > dest->sizeOfStruct) {
- sizeToCopy = dest->sizeOfStruct;
- }
- uprv_memcpy(dest, src, sizeToCopy);
- dest->pExtra = destExtra;
- dest->flags = flags;
- if (srcExtraSize > 0) {
- uprv_memcpy(dest->pExtra, src->pExtra, srcExtraSize);
- }
-
- //
- // Relocate any pointers in the target that refer to the UText itself
- // to point to the cloned copy rather than the original source.
- //
- adjustPointer(dest, &dest->context, src);
- adjustPointer(dest, &dest->p, src);
- adjustPointer(dest, &dest->q, src);
- adjustPointer(dest, &dest->r, src);
- adjustPointer(dest, (const void **)&dest->chunkContents, src);
-
- // The newly shallow-cloned UText does _not_ own the underlying storage for the text.
- // (The source for the clone may or may not have owned the text.)
-
- dest->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
-
- return dest;
-}
-
-
-U_CDECL_END
-
-
-
-//------------------------------------------------------------------------------
-//
-// UText implementation for UTF-8 char * strings (read-only)
-// Limitation: string length must be <= 0x7fffffff in length.
-// (length must for in an int32_t variable)
-//
-// Use of UText data members:
-// context pointer to UTF-8 string
-// utext.b is the input string length (bytes).
-// utext.c Length scanned so far in string
-// (for optimizing finding length of zero terminated strings.)
-// utext.p pointer to the current buffer
-// utext.q pointer to the other buffer.
-//
-//------------------------------------------------------------------------------
-
-// Chunk size.
-// Must be less than 85 (256/3), because of byte mapping from UChar indexes to native indexes.
-// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes
-// to two UChars.)
-// The longest illegal byte sequence treated as a single error (and converted to U+FFFD)
-// is a three-byte sequence (truncated four-byte sequence).
-//
-enum { UTF8_TEXT_CHUNK_SIZE=32 };
-
-//
-// UTF8Buf Two of these structs will be set up in the UText's extra allocated space.
-// Each contains the UChar chunk buffer, the to and from native maps, and
-// header info.
-//
-// because backwards iteration fills the buffers starting at the end and
-// working towards the front, the filled part of the buffers may not begin
-// at the start of the available storage for the buffers.
-//
-// Buffer size is one bigger than the specified UTF8_TEXT_CHUNK_SIZE to allow for
-// the last character added being a supplementary, and thus requiring a surrogate
-// pair. Doing this is simpler than checking for the edge case.
-//
-
-struct UTF8Buf {
- int32_t bufNativeStart; // Native index of first char in UChar buf
- int32_t bufNativeLimit; // Native index following last char in buf.
- int32_t bufStartIdx; // First filled position in buf.
- int32_t bufLimitIdx; // Limit of filled range in buf.
- int32_t bufNILimit; // Limit of native indexing part of buf
- int32_t toUCharsMapStart; // Native index corresponding to
- // mapToUChars[0].
- // Set to bufNativeStart when filling forwards.
- // Set to computed value when filling backwards.
-
- UChar buf[UTF8_TEXT_CHUNK_SIZE+4]; // The UChar buffer. Requires one extra position beyond the
- // the chunk size, to allow for surrogate at the end.
- // Length must be identical to mapToNative array, below,
- // because of the way indexing works when the array is
- // filled backwards during a reverse iteration. Thus,
- // the additional extra size.
- uint8_t mapToNative[UTF8_TEXT_CHUNK_SIZE+4]; // map UChar index in buf to
- // native offset from bufNativeStart.
- // Requires two extra slots,
- // one for a supplementary starting in the last normal position,
- // and one for an entry for the buffer limit position.
- uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
- // correspoding offset in filled part of buf.
- int32_t align;
-};
-
-U_CDECL_BEGIN
-
-//
-// utf8TextLength
-//
-// Get the length of the string. If we don't already know it,
-// we'll need to scan for the trailing nul.
-//
-static int64_t U_CALLCONV
-utf8TextLength(UText *ut) {
- if (ut->b < 0) {
- // Zero terminated string, and we haven't scanned to the end yet.
- // Scan it now.
- const char *r = (const char *)ut->context + ut->c;
- while (*r != 0) {
- r++;
- }
- if ((r - (const char *)ut->context) < 0x7fffffff) {
- ut->b = (int32_t)(r - (const char *)ut->context);
- } else {
- // Actual string was bigger (more than 2 gig) than we
- // can handle. Clip it to 2 GB.
- ut->b = 0x7fffffff;
- }
- ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
- }
- return ut->b;
-}
-
-
-
-
-
-
-static UBool U_CALLCONV
-utf8TextAccess(UText *ut, int64_t index, UBool forward) {
- //
- // Apologies to those who are allergic to goto statements.
- // Consider each goto to a labelled block to be the equivalent of
- // call the named block as if it were a function();
- // return;
- //
- const uint8_t *s8=(const uint8_t *)ut->context;
- UTF8Buf *u8b = NULL;
- int32_t length = ut->b; // Length of original utf-8
- int32_t ix= (int32_t)index; // Requested index, trimmed to 32 bits.
- int32_t mapIndex = 0;
- if (index<0) {
- ix=0;
- } else if (index > 0x7fffffff) {
- // Strings with 64 bit lengths not supported by this UTF-8 provider.
- ix = 0x7fffffff;
- }
-
- // Pin requested index to the string length.
- if (ix>length) {
- if (length>=0) {
- ix=length;
- } else if (ix>=ut->c) {
- // Zero terminated string, and requested index is beyond
- // the region that has already been scanned.
- // Scan up to either the end of the string or to the
- // requested position, whichever comes first.
- while (ut->c<ix && s8[ut->c]!=0) {
- ut->c++;
- }
- // TODO: support for null terminated string length > 32 bits.
- if (s8[ut->c] == 0) {
- // We just found the actual length of the string.
- // Trim the requested index back to that.
- ix = ut->c;
- ut->b = ut->c;
- length = ut->c;
- ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
- }
- }
- }
-
- //
- // Dispatch to the appropriate action for a forward iteration request.
- //
- if (forward) {
- if (ix==ut->chunkNativeLimit) {
- // Check for normal sequential iteration cases first.
- if (ix==length) {
- // Just reached end of string
- // Don't swap buffers, but do set the
- // current buffer position.
- ut->chunkOffset = ut->chunkLength;
- return FALSE;
- } else {
- // End of current buffer.
- // check whether other buffer already has what we need.
- UTF8Buf *altB = (UTF8Buf *)ut->q;
- if (ix>=altB->bufNativeStart && ix<altB->bufNativeLimit) {
- goto swapBuffers;
- }
- }
- }
-
- // A random access. Desired index could be in either or niether buf.
- // For optimizing the order of testing, first check for the index
- // being in the other buffer. This will be the case for uses that
- // move back and forth over a fairly limited range
- {
- u8b = (UTF8Buf *)ut->q; // the alternate buffer
- if (ix>=u8b->bufNativeStart && ix<u8b->bufNativeLimit) {
- // Requested index is in the other buffer.
- goto swapBuffers;
- }
- if (ix == length) {
- // Requested index is end-of-string.
- // (this is the case of randomly seeking to the end.
- // The case of iterating off the end is handled earlier.)
- if (ix == ut->chunkNativeLimit) {
- // Current buffer extends up to the end of the string.
- // Leave it as the current buffer.
- ut->chunkOffset = ut->chunkLength;
- return FALSE;
- }
- if (ix == u8b->bufNativeLimit) {
- // Alternate buffer extends to the end of string.
- // Swap it in as the current buffer.
- goto swapBuffersAndFail;
- }
-
- // Neither existing buffer extends to the end of the string.
- goto makeStubBuffer;
- }
-
- if (ix<ut->chunkNativeStart || ix>=ut->chunkNativeLimit) {
- // Requested index is in neither buffer.
- goto fillForward;
- }
-
- // Requested index is in this buffer.
- u8b = (UTF8Buf *)ut->p; // the current buffer
- mapIndex = ix - u8b->toUCharsMapStart;
- U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
- ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
- return TRUE;
-
- }
- }
-
-
- //
- // Dispatch to the appropriate action for a
- // Backwards Diretion iteration request.
- //
- if (ix==ut->chunkNativeStart) {
- // Check for normal sequential iteration cases first.
- if (ix==0) {
- // Just reached the start of string
- // Don't swap buffers, but do set the
- // current buffer position.
- ut->chunkOffset = 0;
- return FALSE;
- } else {
- // Start of current buffer.
- // check whether other buffer already has what we need.
- UTF8Buf *altB = (UTF8Buf *)ut->q;
- if (ix>altB->bufNativeStart && ix<=altB->bufNativeLimit) {
- goto swapBuffers;
- }
- }
- }
-
- // A random access. Desired index could be in either or niether buf.
- // For optimizing the order of testing,
- // Most likely case: in the other buffer.
- // Second most likely: in neither buffer.
- // Unlikely, but must work: in the current buffer.
- u8b = (UTF8Buf *)ut->q; // the alternate buffer
- if (ix>u8b->bufNativeStart && ix<=u8b->bufNativeLimit) {
- // Requested index is in the other buffer.
- goto swapBuffers;
- }
- // Requested index is start-of-string.
- // (this is the case of randomly seeking to the start.
- // The case of iterating off the start is handled earlier.)
- if (ix==0) {
- if (u8b->bufNativeStart==0) {
- // Alternate buffer contains the data for the start string.
- // Make it be the current buffer.
- goto swapBuffersAndFail;
- } else {
- // Request for data before the start of string,
- // neither buffer is usable.
- // set up a zero-length buffer.
- goto makeStubBuffer;
- }
- }
-
- if (ix<=ut->chunkNativeStart || ix>ut->chunkNativeLimit) {
- // Requested index is in neither buffer.
- goto fillReverse;
- }
-
- // Requested index is in this buffer.
- // Set the utf16 buffer index.
- u8b = (UTF8Buf *)ut->p;
- mapIndex = ix - u8b->toUCharsMapStart;
- ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
- if (ut->chunkOffset==0) {
- // This occurs when the first character in the text is
- // a multi-byte UTF-8 char, and the requested index is to
- // one of the trailing bytes. Because there is no preceding ,
- // character, this access fails. We can't pick up on the
- // situation sooner because the requested index is not zero.
- return FALSE;
- } else {
- return TRUE;
- }
-
-
-
-swapBuffers:
- // The alternate buffer (ut->q) has the string data that was requested.
- // Swap the primary and alternate buffers, and set the
- // chunk index into the new primary buffer.
- {
- u8b = (UTF8Buf *)ut->q;
- ut->q = ut->p;
- ut->p = u8b;
- ut->chunkContents = &u8b->buf[u8b->bufStartIdx];
- ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;
- ut->chunkNativeStart = u8b->bufNativeStart;
- ut->chunkNativeLimit = u8b->bufNativeLimit;
- ut->nativeIndexingLimit = u8b->bufNILimit;
-
- // Index into the (now current) chunk
- // Use the map to set the chunk index. It's more trouble than it's worth
- // to check whether native indexing can be used.
- U_ASSERT(ix>=u8b->bufNativeStart);
- U_ASSERT(ix<=u8b->bufNativeLimit);
- mapIndex = ix - u8b->toUCharsMapStart;
- U_ASSERT(mapIndex>=0);
- U_ASSERT(mapIndex<(int32_t)sizeof(u8b->mapToUChars));
- ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
-
- return TRUE;
- }
-
-
- swapBuffersAndFail:
- // We got a request for either the start or end of the string,
- // with iteration continuing in the out-of-bounds direction.
- // The alternate buffer already contains the data up to the
- // start/end.
- // Swap the buffers, then return failure, indicating that we couldn't
- // make things correct for continuing the iteration in the requested
- // direction. The position & buffer are correct should the
- // user decide to iterate in the opposite direction.
- u8b = (UTF8Buf *)ut->q;
- ut->q = ut->p;
- ut->p = u8b;
- ut->chunkContents = &u8b->buf[u8b->bufStartIdx];
- ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;
- ut->chunkNativeStart = u8b->bufNativeStart;
- ut->chunkNativeLimit = u8b->bufNativeLimit;
- ut->nativeIndexingLimit = u8b->bufNILimit;
-
- // Index into the (now current) chunk
- // For this function (swapBuffersAndFail), the requested index
- // will always be at either the start or end of the chunk.
- if (ix==u8b->bufNativeLimit) {
- ut->chunkOffset = ut->chunkLength;
- } else {
- ut->chunkOffset = 0;
- U_ASSERT(ix == u8b->bufNativeStart);
- }
- return FALSE;
-
-makeStubBuffer:
- // The user has done a seek/access past the start or end
- // of the string. Rather than loading data that is likely
- // to never be used, just set up a zero-length buffer at
- // the position.
- u8b = (UTF8Buf *)ut->q;
- u8b->bufNativeStart = ix;
- u8b->bufNativeLimit = ix;
- u8b->bufStartIdx = 0;
- u8b->bufLimitIdx = 0;
- u8b->bufNILimit = 0;
- u8b->toUCharsMapStart = ix;
- u8b->mapToNative[0] = 0;
- u8b->mapToUChars[0] = 0;
- goto swapBuffersAndFail;
-
-
-
-fillForward:
- {
- // Move the incoming index to a code point boundary.
- U8_SET_CP_START(s8, 0, ix);
-
- // Swap the UText buffers.
- // We want to fill what was previously the alternate buffer,
- // and make what was the current buffer be the new alternate.
- UTF8Buf *u8b_swap = (UTF8Buf *)ut->q;
- ut->q = ut->p;
- ut->p = u8b_swap;
-
- int32_t strLen = ut->b;
- UBool nulTerminated = FALSE;
- if (strLen < 0) {
- strLen = 0x7fffffff;
- nulTerminated = TRUE;
- }
-
- UChar *buf = u8b_swap->buf;
- uint8_t *mapToNative = u8b_swap->mapToNative;
- uint8_t *mapToUChars = u8b_swap->mapToUChars;
- int32_t destIx = 0;
- int32_t srcIx = ix;
- UBool seenNonAscii = FALSE;
- UChar32 c = 0;
-
- // Fill the chunk buffer and mapping arrays.
- while (destIx<UTF8_TEXT_CHUNK_SIZE) {
- c = s8[srcIx];
- if (c>0 && c<0x80) {
- // Special case ASCII range for speed.
- // zero is excluded to simplify bounds checking.
- buf[destIx] = (UChar)c;
- mapToNative[destIx] = (uint8_t)(srcIx - ix);
- mapToUChars[srcIx-ix] = (uint8_t)destIx;
- srcIx++;
- destIx++;
- } else {
- // General case, handle everything.
- if (seenNonAscii == FALSE) {
- seenNonAscii = TRUE;
- u8b_swap->bufNILimit = destIx;
- }
-
- int32_t cIx = srcIx;
- int32_t dIx = destIx;
- int32_t dIxSaved = destIx;
- U8_NEXT_OR_FFFD(s8, srcIx, strLen, c);
- if (c==0 && nulTerminated) {
- srcIx--;
- break;
- }
-
- U16_APPEND_UNSAFE(buf, destIx, c);
- do {
- mapToNative[dIx++] = (uint8_t)(cIx - ix);
- } while (dIx < destIx);
-
- do {
- mapToUChars[cIx++ - ix] = (uint8_t)dIxSaved;
- } while (cIx < srcIx);
- }
- if (srcIx>=strLen) {
- break;
- }
-
- }
-
- // store Native <--> Chunk Map entries for the end of the buffer.
- // There is no actual character here, but the index position is valid.
- mapToNative[destIx] = (uint8_t)(srcIx - ix);
- mapToUChars[srcIx - ix] = (uint8_t)destIx;
-
- // fill in Buffer descriptor
- u8b_swap->bufNativeStart = ix;
- u8b_swap->bufNativeLimit = srcIx;
- u8b_swap->bufStartIdx = 0;
- u8b_swap->bufLimitIdx = destIx;
- if (seenNonAscii == FALSE) {
- u8b_swap->bufNILimit = destIx;
- }
- u8b_swap->toUCharsMapStart = u8b_swap->bufNativeStart;
-
- // Set UText chunk to refer to this buffer.
- ut->chunkContents = buf;
- ut->chunkOffset = 0;
- ut->chunkLength = u8b_swap->bufLimitIdx;
- ut->chunkNativeStart = u8b_swap->bufNativeStart;
- ut->chunkNativeLimit = u8b_swap->bufNativeLimit;
- ut->nativeIndexingLimit = u8b_swap->bufNILimit;
-
- // For zero terminated strings, keep track of the maximum point
- // scanned so far.
- if (nulTerminated && srcIx>ut->c) {
- ut->c = srcIx;
- if (c==0) {
- // We scanned to the end.
- // Remember the actual length.
- ut->b = srcIx;
- ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
- }
- }
- return TRUE;
- }
-
-
-fillReverse:
- {
- // Move the incoming index to a code point boundary.
- // Can only do this if the incoming index is somewhere in the interior of the string.
- // If index is at the end, there is no character there to look at.
- if (ix != ut->b) {
- // Note: this function will only move the index back if it is on a trail byte
- // and there is a preceding lead byte and the sequence from the lead
- // through this trail could be part of a valid UTF-8 sequence
- // Otherwise the index remains unchanged.
- U8_SET_CP_START(s8, 0, ix);
- }
-
- // Swap the UText buffers.
- // We want to fill what was previously the alternate buffer,
- // and make what was the current buffer be the new alternate.
- UTF8Buf *u8b_swap = (UTF8Buf *)ut->q;
- ut->q = ut->p;
- ut->p = u8b_swap;
-
- UChar *buf = u8b_swap->buf;
- uint8_t *mapToNative = u8b_swap->mapToNative;
- uint8_t *mapToUChars = u8b_swap->mapToUChars;
- int32_t toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1;
- // Note that toUCharsMapStart can be negative. Happens when the remaining
- // text from current position to the beginning is less than the buffer size.
- // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
- int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region
- // at end of buffer to leave room
- // for a surrogate pair at the
- // buffer start.
- int32_t srcIx = ix;
- int32_t bufNILimit = destIx;
- UChar32 c;
-
- // Map to/from Native Indexes, fill in for the position at the end of
- // the buffer.
- //
- mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
- mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
-
- // Fill the chunk buffer
- // Work backwards, filling from the end of the buffer towards the front.
- //
- while (destIx>2 && (srcIx - toUCharsMapStart > 5) && (srcIx > 0)) {
- srcIx--;
- destIx--;
-
- // Get last byte of the UTF-8 character
- c = s8[srcIx];
- if (c<0x80) {
- // Special case ASCII range for speed.
- buf[destIx] = (UChar)c;
- U_ASSERT(toUCharsMapStart <= srcIx);
- mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
- mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
- } else {
- // General case, handle everything non-ASCII.
-
- int32_t sIx = srcIx; // ix of last byte of multi-byte u8 char
-
- // Get the full character from the UTF8 string.
- // use code derived from tbe macros in utf8.h
- // Leaves srcIx pointing at the first byte of the UTF-8 char.
- //
- c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -3);
- // leaves srcIx at first byte of the multi-byte char.
-
- // Store the character in UTF-16 buffer.
- if (c<0x10000) {
- buf[destIx] = (UChar)c;
- mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
- } else {
- buf[destIx] = U16_TRAIL(c);
- mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
- buf[--destIx] = U16_LEAD(c);
- mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
- }
-
- // Fill in the map from native indexes to UChars buf index.
- do {
- mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
- } while (sIx >= srcIx);
- U_ASSERT(toUCharsMapStart <= (srcIx+1));
-
- // Set native indexing limit to be the current position.
- // We are processing a non-ascii, non-native-indexing char now;
- // the limit will be here if the rest of the chars to be
- // added to this buffer are ascii.
- bufNILimit = destIx;
- }
- }
- u8b_swap->bufNativeStart = srcIx;
- u8b_swap->bufNativeLimit = ix;
- u8b_swap->bufStartIdx = destIx;
- u8b_swap->bufLimitIdx = UTF8_TEXT_CHUNK_SIZE+2;
- u8b_swap->bufNILimit = bufNILimit - u8b_swap->bufStartIdx;
- u8b_swap->toUCharsMapStart = toUCharsMapStart;
-
- ut->chunkContents = &buf[u8b_swap->bufStartIdx];
- ut->chunkLength = u8b_swap->bufLimitIdx - u8b_swap->bufStartIdx;
- ut->chunkOffset = ut->chunkLength;
- ut->chunkNativeStart = u8b_swap->bufNativeStart;
- ut->chunkNativeLimit = u8b_swap->bufNativeLimit;
- ut->nativeIndexingLimit = u8b_swap->bufNILimit;
- return TRUE;
- }
-
-}
-
-
-
-//
-// This is a slightly modified copy of u_strFromUTF8,
-// Inserts a Replacement Char rather than failing on invalid UTF-8
-// Removes unnecessary features.
-//
-static UChar*
-utext_strFromUTF8(UChar *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const char* src,
- int32_t srcLength, // required. NUL terminated not supported.
- UErrorCode *pErrorCode
- )
-{
-
- UChar *pDest = dest;
- UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL;
- UChar32 ch=0;
- int32_t index = 0;
- int32_t reqLength = 0;
- uint8_t* pSrc = (uint8_t*) src;
-
-
- while((index < srcLength)&&(pDest<pDestLimit)){
- ch = pSrc[index++];
- if(ch <=0x7f){
- *pDest++=(UChar)ch;
- }else{
- ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);
- if(U_IS_BMP(ch)){
- *(pDest++)=(UChar)ch;
- }else{
- *(pDest++)=U16_LEAD(ch);
- if(pDest<pDestLimit){
- *(pDest++)=U16_TRAIL(ch);
- }else{
- reqLength++;
- break;
- }
- }
- }
- }
- /* donot fill the dest buffer just count the UChars needed */
- while(index < srcLength){
- ch = pSrc[index++];
- if(ch <= 0x7f){
- reqLength++;
- }else{
- ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);
- reqLength+=U16_LENGTH(ch);
- }
- }
-
- reqLength+=(int32_t)(pDest - dest);
-
- if(pDestLength){
- *pDestLength = reqLength;
- }
-
- /* Terminate the buffer */
- u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
-
- return dest;
-}
-
-
-
-static int32_t U_CALLCONV
-utf8TextExtract(UText *ut,
- int64_t start, int64_t limit,
- UChar *dest, int32_t destCapacity,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- int32_t length = ut->b;
- int32_t start32 = pinIndex(start, length);
- int32_t limit32 = pinIndex(limit, length);
-
- if(start32>limit32) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
-
- // adjust the incoming indexes to land on code point boundaries if needed.
- // adjust by no more than three, because that is the largest number of trail bytes
- // in a well formed UTF8 character.
- const uint8_t *buf = (const uint8_t *)ut->context;
- int i;
- if (start32 < ut->chunkNativeLimit) {
- for (i=0; i<3; i++) {
- if (U8_IS_SINGLE(buf[start32]) || U8_IS_LEAD(buf[start32]) || start32==0) {
- break;
- }
- start32--;
- }
- }
-
- if (limit32 < ut->chunkNativeLimit) {
- for (i=0; i<3; i++) {
- if (U8_IS_SINGLE(buf[limit32]) || U8_IS_LEAD(buf[limit32]) || limit32==0) {
- break;
- }
- limit32--;
- }
- }
-
- // Do the actual extract.
- int32_t destLength=0;
- utext_strFromUTF8(dest, destCapacity, &destLength,
- (const char *)ut->context+start32, limit32-start32,
- pErrorCode);
- utf8TextAccess(ut, limit32, TRUE);
- return destLength;
-}
-
-//
-// utf8TextMapOffsetToNative
-//
-// Map a chunk (UTF-16) offset to a native index.
-static int64_t U_CALLCONV
-utf8TextMapOffsetToNative(const UText *ut) {
- //
- UTF8Buf *u8b = (UTF8Buf *)ut->p;
- U_ASSERT(ut->chunkOffset>ut->nativeIndexingLimit && ut->chunkOffset<=ut->chunkLength);
- int32_t nativeOffset = u8b->mapToNative[ut->chunkOffset + u8b->bufStartIdx] + u8b->toUCharsMapStart;
- U_ASSERT(nativeOffset >= ut->chunkNativeStart && nativeOffset <= ut->chunkNativeLimit);
- return nativeOffset;
-}
-
-//
-// Map a native index to the corrsponding chunk offset
-//
-static int32_t U_CALLCONV
-utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {
- U_ASSERT(index64 <= 0x7fffffff);
- int32_t index = (int32_t)index64;
- UTF8Buf *u8b = (UTF8Buf *)ut->p;
- U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
- U_ASSERT(index<=ut->chunkNativeLimit);
- int32_t mapIndex = index - u8b->toUCharsMapStart;
- U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
- int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
- U_ASSERT(offset>=0 && offset<=ut->chunkLength);
- return offset;
-}
-
-static UText * U_CALLCONV
-utf8TextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
-{
- // First do a generic shallow clone. Does everything needed for the UText struct itself.
- dest = shallowTextClone(dest, src, status);
-
- // For deep clones, make a copy of the string.
- // The copied storage is owned by the newly created clone.
- //
- // TODO: There is an isssue with using utext_nativeLength().
- // That function is non-const in cases where the input was NUL terminated
- // and the length has not yet been determined.
- // This function (clone()) is const.
- // There potentially a thread safety issue lurking here.
- //
- if (deep && U_SUCCESS(*status)) {
- int32_t len = (int32_t)utext_nativeLength((UText *)src);
- char *copyStr = (char *)uprv_malloc(len+1);
- if (copyStr == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- uprv_memcpy(copyStr, src->context, len+1);
- dest->context = copyStr;
- dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
- }
- }
- return dest;
-}
-
-
-static void U_CALLCONV
-utf8TextClose(UText *ut) {
- // Most of the work of close is done by the generic UText framework close.
- // All that needs to be done here is to delete the UTF8 string if the UText
- // owns it. This occurs if the UText was created by cloning.
- if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
- char *s = (char *)ut->context;
- uprv_free(s);
- ut->context = NULL;
- }
-}
-
-U_CDECL_END
-
-
-static const struct UTextFuncs utf8Funcs =
-{
- sizeof(UTextFuncs),
- 0, 0, 0, // Reserved alignment padding
- utf8TextClone,
- utf8TextLength,
- utf8TextAccess,
- utf8TextExtract,
- NULL, /* replace*/
- NULL, /* copy */
- utf8TextMapOffsetToNative,
- utf8TextMapIndexToUTF16,
- utf8TextClose,
- NULL, // spare 1
- NULL, // spare 2
- NULL // spare 3
-};
-
-
-static const char gEmptyString[] = {0};
-
-U_CAPI UText * U_EXPORT2
-utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status) {
- if(U_FAILURE(*status)) {
- return NULL;
- }
- if(s==NULL && length==0) {
- s = gEmptyString;
- }
-
- if(s==NULL || length<-1 || length>INT32_MAX) {
- *status=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- ut = utext_setup(ut, sizeof(UTF8Buf) * 2, status);
- if (U_FAILURE(*status)) {
- return ut;
- }
-
- ut->pFuncs = &utf8Funcs;
- ut->context = s;
- ut->b = (int32_t)length;
- ut->c = (int32_t)length;
- if (ut->c < 0) {
- ut->c = 0;
- ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
- }
- ut->p = ut->pExtra;
- ut->q = (char *)ut->pExtra + sizeof(UTF8Buf);
- return ut;
-
-}
-
-
-
-
-
-
-
-
-//------------------------------------------------------------------------------
-//
-// UText implementation wrapper for Replaceable (read/write)
-//
-// Use of UText data members:
-// context pointer to Replaceable.
-// p pointer to Replaceable if it is owned by the UText.
-//
-//------------------------------------------------------------------------------
-
-
-
-// minimum chunk size for this implementation: 3
-// to allow for possible trimming for code point boundaries
-enum { REP_TEXT_CHUNK_SIZE=10 };
-
-struct ReplExtra {
- /*
- * Chunk UChars.
- * +1 to simplify filling with surrogate pair at the end.
- */
- UChar s[REP_TEXT_CHUNK_SIZE+1];
-};
-
-
-U_CDECL_BEGIN
-
-static UText * U_CALLCONV
-repTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
- // First do a generic shallow clone. Does everything needed for the UText struct itself.
- dest = shallowTextClone(dest, src, status);
-
- // For deep clones, make a copy of the Replaceable.
- // The copied Replaceable storage is owned by the newly created UText clone.
- // A non-NULL pointer in UText.p is the signal to the close() function to delete
- // it.
- //
- if (deep && U_SUCCESS(*status)) {
- const Replaceable *replSrc = (const Replaceable *)src->context;
- dest->context = replSrc->clone();
- dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
-
- // with deep clone, the copy is writable, even when the source is not.
- dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
- }
- return dest;
-}
-
-
-static void U_CALLCONV
-repTextClose(UText *ut) {
- // Most of the work of close is done by the generic UText framework close.
- // All that needs to be done here is delete the Replaceable if the UText
- // owns it. This occurs if the UText was created by cloning.
- if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
- Replaceable *rep = (Replaceable *)ut->context;
- delete rep;
- ut->context = NULL;
- }
-}
-
-
-static int64_t U_CALLCONV
-repTextLength(UText *ut) {
- const Replaceable *replSrc = (const Replaceable *)ut->context;
- int32_t len = replSrc->length();
- return len;
-}
-
-
-static UBool U_CALLCONV
-repTextAccess(UText *ut, int64_t index, UBool forward) {
- const Replaceable *rep=(const Replaceable *)ut->context;
- int32_t length=rep->length(); // Full length of the input text (bigger than a chunk)
-
- // clip the requested index to the limits of the text.
- int32_t index32 = pinIndex(index, length);
- U_ASSERT(index<=INT32_MAX);
-
-
- /*
- * Compute start/limit boundaries around index, for a segment of text
- * to be extracted.
- * To allow for the possibility that our user gave an index to the trailing
- * half of a surrogate pair, we must request one extra preceding UChar when
- * going in the forward direction. This will ensure that the buffer has the
- * entire code point at the specified index.
- */
- if(forward) {
-
- if (index32>=ut->chunkNativeStart && index32<ut->chunkNativeLimit) {
- // Buffer already contains the requested position.
- ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
- return TRUE;
- }
- if (index32>=length && ut->chunkNativeLimit==length) {
- // Request for end of string, and buffer already extends up to it.
- // Can't get the data, but don't change the buffer.
- ut->chunkOffset = length - (int32_t)ut->chunkNativeStart;
- return FALSE;
- }
-
- ut->chunkNativeLimit = index + REP_TEXT_CHUNK_SIZE - 1;
- // Going forward, so we want to have the buffer with stuff at and beyond
- // the requested index. The -1 gets us one code point before the
- // requested index also, to handle the case of the index being on
- // a trail surrogate of a surrogate pair.
- if(ut->chunkNativeLimit > length) {
- ut->chunkNativeLimit = length;
- }
- // unless buffer ran off end, start is index-1.
- ut->chunkNativeStart = ut->chunkNativeLimit - REP_TEXT_CHUNK_SIZE;
- if(ut->chunkNativeStart < 0) {
- ut->chunkNativeStart = 0;
- }
- } else {
- // Reverse iteration. Fill buffer with data preceding the requested index.
- if (index32>ut->chunkNativeStart && index32<=ut->chunkNativeLimit) {
- // Requested position already in buffer.
- ut->chunkOffset = index32 - (int32_t)ut->chunkNativeStart;
- return TRUE;
- }
- if (index32==0 && ut->chunkNativeStart==0) {
- // Request for start, buffer already begins at start.
- // No data, but keep the buffer as is.
- ut->chunkOffset = 0;
- return FALSE;
- }
-
- // Figure out the bounds of the chunk to extract for reverse iteration.
- // Need to worry about chunk not splitting surrogate pairs, and while still
- // containing the data we need.
- // Fix by requesting a chunk that includes an extra UChar at the end.
- // If this turns out to be a lead surrogate, we can lop it off and still have
- // the data we wanted.
- ut->chunkNativeStart = index32 + 1 - REP_TEXT_CHUNK_SIZE;
- if (ut->chunkNativeStart < 0) {
- ut->chunkNativeStart = 0;
- }
-
- ut->chunkNativeLimit = index32 + 1;
- if (ut->chunkNativeLimit > length) {
- ut->chunkNativeLimit = length;
- }
- }
-
- // Extract the new chunk of text from the Replaceable source.
- ReplExtra *ex = (ReplExtra *)ut->pExtra;
- // UnicodeString with its buffer a writable alias to the chunk buffer
- UnicodeString buffer(ex->s, 0 /*buffer length*/, REP_TEXT_CHUNK_SIZE /*buffer capacity*/);
- rep->extractBetween((int32_t)ut->chunkNativeStart, (int32_t)ut->chunkNativeLimit, buffer);
-
- ut->chunkContents = ex->s;
- ut->chunkLength = (int32_t)(ut->chunkNativeLimit - ut->chunkNativeStart);
- ut->chunkOffset = (int32_t)(index32 - ut->chunkNativeStart);
-
- // Surrogate pairs from the input text must not span chunk boundaries.
- // If end of chunk could be the start of a surrogate, trim it off.
- if (ut->chunkNativeLimit < length &&
- U16_IS_LEAD(ex->s[ut->chunkLength-1])) {
- ut->chunkLength--;
- ut->chunkNativeLimit--;
- if (ut->chunkOffset > ut->chunkLength) {
- ut->chunkOffset = ut->chunkLength;
- }
- }
-
- // if the first UChar in the chunk could be the trailing half of a surrogate pair,
- // trim it off.
- if(ut->chunkNativeStart>0 && U16_IS_TRAIL(ex->s[0])) {
- ++(ut->chunkContents);
- ++(ut->chunkNativeStart);
- --(ut->chunkLength);
- --(ut->chunkOffset);
- }
-
- // adjust the index/chunkOffset to a code point boundary
- U16_SET_CP_START(ut->chunkContents, 0, ut->chunkOffset);
-
- // Use fast indexing for get/setNativeIndex()
- ut->nativeIndexingLimit = ut->chunkLength;
-
- return TRUE;
-}
-
-
-
-static int32_t U_CALLCONV
-repTextExtract(UText *ut,
- int64_t start, int64_t limit,
- UChar *dest, int32_t destCapacity,
- UErrorCode *status) {
- const Replaceable *rep=(const Replaceable *)ut->context;
- int32_t length=rep->length();
-
- if(U_FAILURE(*status)) {
- return 0;
- }
- if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
- *status=U_ILLEGAL_ARGUMENT_ERROR;
- }
- if(start>limit) {
- *status=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- int32_t start32 = pinIndex(start, length);
- int32_t limit32 = pinIndex(limit, length);
-
- // adjust start, limit if they point to trail half of surrogates
- if (start32<length && U16_IS_TRAIL(rep->charAt(start32)) &&
- U_IS_SUPPLEMENTARY(rep->char32At(start32))){
- start32--;
- }
- if (limit32<length && U16_IS_TRAIL(rep->charAt(limit32)) &&
- U_IS_SUPPLEMENTARY(rep->char32At(limit32))){
- limit32--;
- }
-
- length=limit32-start32;
- if(length>destCapacity) {
- limit32 = start32 + destCapacity;
- }
- UnicodeString buffer(dest, 0, destCapacity); // writable alias
- rep->extractBetween(start32, limit32, buffer);
- repTextAccess(ut, limit32, TRUE);
-
- return u_terminateUChars(dest, destCapacity, length, status);
-}
-
-static int32_t U_CALLCONV
-repTextReplace(UText *ut,
- int64_t start, int64_t limit,
- const UChar *src, int32_t length,
- UErrorCode *status) {
- Replaceable *rep=(Replaceable *)ut->context;
- int32_t oldLength;
-
- if(U_FAILURE(*status)) {
- return 0;
- }
- if(src==NULL && length!=0) {
- *status=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- oldLength=rep->length(); // will subtract from new length
- if(start>limit ) {
- *status=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- int32_t start32 = pinIndex(start, oldLength);
- int32_t limit32 = pinIndex(limit, oldLength);
-
- // Snap start & limit to code point boundaries.
- if (start32<oldLength && U16_IS_TRAIL(rep->charAt(start32)) &&
- start32>0 && U16_IS_LEAD(rep->charAt(start32-1)))
- {
- start32--;
- }
- if (limit32<oldLength && U16_IS_LEAD(rep->charAt(limit32-1)) &&
- U16_IS_TRAIL(rep->charAt(limit32)))
- {
- limit32++;
- }
-
- // Do the actual replace operation using methods of the Replaceable class
- UnicodeString replStr((UBool)(length<0), src, length); // read-only alias
- rep->handleReplaceBetween(start32, limit32, replStr);
- int32_t newLength = rep->length();
- int32_t lengthDelta = newLength - oldLength;
-
- // Is the UText chunk buffer OK?
- if (ut->chunkNativeLimit > start32) {
- // this replace operation may have impacted the current chunk.
- // invalidate it, which will force a reload on the next access.
- invalidateChunk(ut);
- }
-
- // set the iteration position to the end of the newly inserted replacement text.
- int32_t newIndexPos = limit32 + lengthDelta;
- repTextAccess(ut, newIndexPos, TRUE);
-
- return lengthDelta;
-}
-
-
-static void U_CALLCONV
-repTextCopy(UText *ut,
- int64_t start, int64_t limit,
- int64_t destIndex,
- UBool move,
- UErrorCode *status)
-{
- Replaceable *rep=(Replaceable *)ut->context;
- int32_t length=rep->length();
-
- if(U_FAILURE(*status)) {
- return;
- }
- if (start>limit || (start<destIndex && destIndex<limit))
- {
- *status=U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
-
- int32_t start32 = pinIndex(start, length);
- int32_t limit32 = pinIndex(limit, length);
- int32_t destIndex32 = pinIndex(destIndex, length);
-
- // TODO: snap input parameters to code point boundaries.
-
- if(move) {
- // move: copy to destIndex, then replace original with nothing
- int32_t segLength=limit32-start32;
- rep->copy(start32, limit32, destIndex32);
- if(destIndex32<start32) {
- start32+=segLength;
- limit32+=segLength;
- }
- rep->handleReplaceBetween(start32, limit32, UnicodeString());
- } else {
- // copy
- rep->copy(start32, limit32, destIndex32);
- }
-
- // If the change to the text touched the region in the chunk buffer,
- // invalidate the buffer.
- int32_t firstAffectedIndex = destIndex32;
- if (move && start32<firstAffectedIndex) {
- firstAffectedIndex = start32;
- }
- if (firstAffectedIndex < ut->chunkNativeLimit) {
- // changes may have affected range covered by the chunk
- invalidateChunk(ut);
- }
-
- // Put iteration position at the newly inserted (moved) block,
- int32_t nativeIterIndex = destIndex32 + limit32 - start32;
- if (move && destIndex32>start32) {
- // moved a block of text towards the end of the string.
- nativeIterIndex = destIndex32;
- }
-
- // Set position, reload chunk if needed.
- repTextAccess(ut, nativeIterIndex, TRUE);
-}
-
-static const struct UTextFuncs repFuncs =
-{
- sizeof(UTextFuncs),
- 0, 0, 0, // Reserved alignment padding
- repTextClone,
- repTextLength,
- repTextAccess,
- repTextExtract,
- repTextReplace,
- repTextCopy,
- NULL, // MapOffsetToNative,
- NULL, // MapIndexToUTF16,
- repTextClose,
- NULL, // spare 1
- NULL, // spare 2
- NULL // spare 3
-};
-
-
-U_CAPI UText * U_EXPORT2
-utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status)
-{
- if(U_FAILURE(*status)) {
- return NULL;
- }
- if(rep==NULL) {
- *status=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- ut = utext_setup(ut, sizeof(ReplExtra), status);
- if(U_FAILURE(*status)) {
- return ut;
- }
-
- ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE);
- if(rep->hasMetaData()) {
- ut->providerProperties |=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA);
- }
-
- ut->pFuncs = &repFuncs;
- ut->context = rep;
- return ut;
-}
-
-U_CDECL_END
-
-
-
-
-
-
-
-
-//------------------------------------------------------------------------------
-//
-// UText implementation for UnicodeString (read/write) and
-// for const UnicodeString (read only)
-// (same implementation, only the flags are different)
-//
-// Use of UText data members:
-// context pointer to UnicodeString
-// p pointer to UnicodeString IF this UText owns the string
-// and it must be deleted on close(). NULL otherwise.
-//
-//------------------------------------------------------------------------------
-
-U_CDECL_BEGIN
-
-
-static UText * U_CALLCONV
-unistrTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
- // First do a generic shallow clone. Does everything needed for the UText struct itself.
- dest = shallowTextClone(dest, src, status);
-
- // For deep clones, make a copy of the UnicodeSring.
- // The copied UnicodeString storage is owned by the newly created UText clone.
- // A non-NULL pointer in UText.p is the signal to the close() function to delete
- // the UText.
- //
- if (deep && U_SUCCESS(*status)) {
- const UnicodeString *srcString = (const UnicodeString *)src->context;
- dest->context = new UnicodeString(*srcString);
- dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
-
- // with deep clone, the copy is writable, even when the source is not.
- dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
- }
- return dest;
-}
-
-static void U_CALLCONV
-unistrTextClose(UText *ut) {
- // Most of the work of close is done by the generic UText framework close.
- // All that needs to be done here is delete the UnicodeString if the UText
- // owns it. This occurs if the UText was created by cloning.
- if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
- UnicodeString *str = (UnicodeString *)ut->context;
- delete str;
- ut->context = NULL;
- }
-}
-
-
-static int64_t U_CALLCONV
-unistrTextLength(UText *t) {
- return ((const UnicodeString *)t->context)->length();
-}
-
-
-static UBool U_CALLCONV
-unistrTextAccess(UText *ut, int64_t index, UBool forward) {
- int32_t length = ut->chunkLength;
- ut->chunkOffset = pinIndex(index, length);
-
- // Check whether request is at the start or end
- UBool retVal = (forward && index<length) || (!forward && index>0);
- return retVal;
-}
-
-
-
-static int32_t U_CALLCONV
-unistrTextExtract(UText *t,
- int64_t start, int64_t limit,
- UChar *dest, int32_t destCapacity,
- UErrorCode *pErrorCode) {
- const UnicodeString *us=(const UnicodeString *)t->context;
- int32_t length=us->length();
-
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
- if(start<0 || start>limit) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- int32_t start32 = start<length ? us->getChar32Start((int32_t)start) : length;
- int32_t limit32 = limit<length ? us->getChar32Start((int32_t)limit) : length;
-
- length=limit32-start32;
- if (destCapacity>0 && dest!=NULL) {
- int32_t trimmedLength = length;
- if(trimmedLength>destCapacity) {
- trimmedLength=destCapacity;
- }
- us->extract(start32, trimmedLength, dest);
- t->chunkOffset = start32+trimmedLength;
- } else {
- t->chunkOffset = start32;
- }
- u_terminateUChars(dest, destCapacity, length, pErrorCode);
- return length;
-}
-
-static int32_t U_CALLCONV
-unistrTextReplace(UText *ut,
- int64_t start, int64_t limit,
- const UChar *src, int32_t length,
- UErrorCode *pErrorCode) {
- UnicodeString *us=(UnicodeString *)ut->context;
- int32_t oldLength;
-
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(src==NULL && length!=0) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
- if(start>limit) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- oldLength=us->length();
- int32_t start32 = pinIndex(start, oldLength);
- int32_t limit32 = pinIndex(limit, oldLength);
- if (start32 < oldLength) {
- start32 = us->getChar32Start(start32);
- }
- if (limit32 < oldLength) {
- limit32 = us->getChar32Start(limit32);
- }
-
- // replace
- us->replace(start32, limit32-start32, src, length);
- int32_t newLength = us->length();
-
- // Update the chunk description.
- ut->chunkContents = us->getBuffer();
- ut->chunkLength = newLength;
- ut->chunkNativeLimit = newLength;
- ut->nativeIndexingLimit = newLength;
-
- // Set iteration position to the point just following the newly inserted text.
- int32_t lengthDelta = newLength - oldLength;
- ut->chunkOffset = limit32 + lengthDelta;
-
- return lengthDelta;
-}
-
-static void U_CALLCONV
-unistrTextCopy(UText *ut,
- int64_t start, int64_t limit,
- int64_t destIndex,
- UBool move,
- UErrorCode *pErrorCode) {
- UnicodeString *us=(UnicodeString *)ut->context;
- int32_t length=us->length();
-
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- int32_t start32 = pinIndex(start, length);
- int32_t limit32 = pinIndex(limit, length);
- int32_t destIndex32 = pinIndex(destIndex, length);
-
- if( start32>limit32 || (start32<destIndex32 && destIndex32<limit32)) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
-
- if(move) {
- // move: copy to destIndex, then remove original
- int32_t segLength=limit32-start32;
- us->copy(start32, limit32, destIndex32);
- if(destIndex32<start32) {
- start32+=segLength;
- }
- us->remove(start32, segLength);
- } else {
- // copy
- us->copy(start32, limit32, destIndex32);
- }
-
- // update chunk description, set iteration position.
- ut->chunkContents = us->getBuffer();
- if (move==FALSE) {
- // copy operation, string length grows
- ut->chunkLength += limit32-start32;
- ut->chunkNativeLimit = ut->chunkLength;
- ut->nativeIndexingLimit = ut->chunkLength;
- }
-
- // Iteration position to end of the newly inserted text.
- ut->chunkOffset = destIndex32+limit32-start32;
- if (move && destIndex32>start32) {
- ut->chunkOffset = destIndex32;
- }
-
-}
-
-static const struct UTextFuncs unistrFuncs =
-{
- sizeof(UTextFuncs),
- 0, 0, 0, // Reserved alignment padding
- unistrTextClone,
- unistrTextLength,
- unistrTextAccess,
- unistrTextExtract,
- unistrTextReplace,
- unistrTextCopy,
- NULL, // MapOffsetToNative,
- NULL, // MapIndexToUTF16,
- unistrTextClose,
- NULL, // spare 1
- NULL, // spare 2
- NULL // spare 3
-};
-
-
-
-U_CDECL_END
-
-
-U_CAPI UText * U_EXPORT2
-utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
- ut = utext_openConstUnicodeString(ut, s, status);
- if (U_SUCCESS(*status)) {
- ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
- }
- return ut;
-}
-
-
-
-U_CAPI UText * U_EXPORT2
-utext_openConstUnicodeString(UText *ut, const UnicodeString *s, UErrorCode *status) {
- if (U_SUCCESS(*status) && s->isBogus()) {
- // The UnicodeString is bogus, but we still need to detach the UText
- // from whatever it was hooked to before, if anything.
- utext_openUChars(ut, NULL, 0, status);
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return ut;
- }
- ut = utext_setup(ut, 0, status);
- // note: use the standard (writable) function table for UnicodeString.
- // The flag settings disable writing, so having the functions in
- // the table is harmless.
- if (U_SUCCESS(*status)) {
- ut->pFuncs = &unistrFuncs;
- ut->context = s;
- ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
- ut->chunkContents = s->getBuffer();
- ut->chunkLength = s->length();
- ut->chunkNativeStart = 0;
- ut->chunkNativeLimit = ut->chunkLength;
- ut->nativeIndexingLimit = ut->chunkLength;
- }
- return ut;
-}
-
-//------------------------------------------------------------------------------
-//
-// UText implementation for const UChar * strings
-//
-// Use of UText data members:
-// context pointer to UnicodeString
-// a length. -1 if not yet known.
-//
-// TODO: support 64 bit lengths.
-//
-//------------------------------------------------------------------------------
-
-U_CDECL_BEGIN
-
-
-static UText * U_CALLCONV
-ucstrTextClone(UText *dest, const UText * src, UBool deep, UErrorCode * status) {
- // First do a generic shallow clone.
- dest = shallowTextClone(dest, src, status);
-
- // For deep clones, make a copy of the string.
- // The copied storage is owned by the newly created clone.
- // A non-NULL pointer in UText.p is the signal to the close() function to delete
- // it.
- //
- if (deep && U_SUCCESS(*status)) {
- U_ASSERT(utext_nativeLength(dest) < INT32_MAX);
- int32_t len = (int32_t)utext_nativeLength(dest);
-
- // The cloned string IS going to be NUL terminated, whether or not the original was.
- const UChar *srcStr = (const UChar *)src->context;
- UChar *copyStr = (UChar *)uprv_malloc((len+1) * sizeof(UChar));
- if (copyStr == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- int64_t i;
- for (i=0; i<len; i++) {
- copyStr[i] = srcStr[i];
- }
- copyStr[len] = 0;
- dest->context = copyStr;
- dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
- }
- }
- return dest;
-}
-
-
-static void U_CALLCONV
-ucstrTextClose(UText *ut) {
- // Most of the work of close is done by the generic UText framework close.
- // All that needs to be done here is delete the string if the UText
- // owns it. This occurs if the UText was created by cloning.
- if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
- UChar *s = (UChar *)ut->context;
- uprv_free(s);
- ut->context = NULL;
- }
-}
-
-
-
-static int64_t U_CALLCONV
-ucstrTextLength(UText *ut) {
- if (ut->a < 0) {
- // null terminated, we don't yet know the length. Scan for it.
- // Access is not convenient for doing this
- // because the current interation postion can't be changed.
- const UChar *str = (const UChar *)ut->context;
- for (;;) {
- if (str[ut->chunkNativeLimit] == 0) {
- break;
- }
- ut->chunkNativeLimit++;
- }
- ut->a = ut->chunkNativeLimit;
- ut->chunkLength = (int32_t)ut->chunkNativeLimit;
- ut->nativeIndexingLimit = ut->chunkLength;
- ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
- }
- return ut->a;
-}
-
-
-static UBool U_CALLCONV
-ucstrTextAccess(UText *ut, int64_t index, UBool forward) {
- const UChar *str = (const UChar *)ut->context;
-
- // pin the requested index to the bounds of the string,
- // and set current iteration position.
- if (index<0) {
- index = 0;
- } else if (index < ut->chunkNativeLimit) {
- // The request data is within the chunk as it is known so far.
- // Put index on a code point boundary.
- U16_SET_CP_START(str, 0, index);
- } else if (ut->a >= 0) {
- // We know the length of this string, and the user is requesting something
- // at or beyond the length. Pin the requested index to the length.
- index = ut->a;
- } else {
- // Null terminated string, length not yet known, and the requested index
- // is beyond where we have scanned so far.
- // Scan to 32 UChars beyond the requested index. The strategy here is
- // to avoid fully scanning a long string when the caller only wants to
- // see a few characters at its beginning.
- int32_t scanLimit = (int32_t)index + 32;
- if ((index + 32)>INT32_MAX || (index + 32)<0 ) { // note: int64 expression
- scanLimit = INT32_MAX;
- }
-
- int32_t chunkLimit = (int32_t)ut->chunkNativeLimit;
- for (; chunkLimit<scanLimit; chunkLimit++) {
- if (str[chunkLimit] == 0) {
- // We found the end of the string. Remember it, pin the requested index to it,
- // and bail out of here.
- ut->a = chunkLimit;
- ut->chunkLength = chunkLimit;
- ut->nativeIndexingLimit = chunkLimit;
- if (index >= chunkLimit) {
- index = chunkLimit;
- } else {
- U16_SET_CP_START(str, 0, index);
- }
-
- ut->chunkNativeLimit = chunkLimit;
- ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
- goto breakout;
- }
- }
- // We scanned through the next batch of UChars without finding the end.
- U16_SET_CP_START(str, 0, index);
- if (chunkLimit == INT32_MAX) {
- // Scanned to the limit of a 32 bit length.
- // Forceably trim the overlength string back so length fits in int32
- // TODO: add support for 64 bit strings.
- ut->a = chunkLimit;
- ut->chunkLength = chunkLimit;
- ut->nativeIndexingLimit = chunkLimit;
- if (index > chunkLimit) {
- index = chunkLimit;
- }
- ut->chunkNativeLimit = chunkLimit;
- ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
- } else {
- // The endpoint of a chunk must not be left in the middle of a surrogate pair.
- // If the current end is on a lead surrogate, back the end up by one.
- // It doesn't matter if the end char happens to be an unpaired surrogate,
- // and it's simpler not to worry about it.
- if (U16_IS_LEAD(str[chunkLimit-1])) {
- --chunkLimit;
- }
- // Null-terminated chunk with end still unknown.
- // Update the chunk length to reflect what has been scanned thus far.
- // That the full length is still unknown is (still) flagged by
- // ut->a being < 0.
- ut->chunkNativeLimit = chunkLimit;
- ut->nativeIndexingLimit = chunkLimit;
- ut->chunkLength = chunkLimit;
- }
-
- }
-breakout:
- U_ASSERT(index<=INT32_MAX);
- ut->chunkOffset = (int32_t)index;
-
- // Check whether request is at the start or end
- UBool retVal = (forward && index<ut->chunkNativeLimit) || (!forward && index>0);
- return retVal;
-}
-
-
-
-static int32_t U_CALLCONV
-ucstrTextExtract(UText *ut,
- int64_t start, int64_t limit,
- UChar *dest, int32_t destCapacity,
- UErrorCode *pErrorCode)
-{
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- //const UChar *s=(const UChar *)ut->context;
- int32_t si, di;
-
- int32_t start32;
- int32_t limit32;
-
- // Access the start. Does two things we need:
- // Pins 'start' to the length of the string, if it came in out-of-bounds.
- // Snaps 'start' to the beginning of a code point.
- ucstrTextAccess(ut, start, TRUE);
- const UChar *s=ut->chunkContents;
- start32 = ut->chunkOffset;
-
- int32_t strLength=(int32_t)ut->a;
- if (strLength >= 0) {
- limit32 = pinIndex(limit, strLength);
- } else {
- limit32 = pinIndex(limit, INT32_MAX);
- }
- di = 0;
- for (si=start32; si<limit32; si++) {
- if (strLength<0 && s[si]==0) {
- // Just hit the end of a null-terminated string.
- ut->a = si; // set string length for this UText
- ut->chunkNativeLimit = si;
- ut->chunkLength = si;
- ut->nativeIndexingLimit = si;
- strLength = si;
- limit32 = si;
- break;
- }
- U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */
- if (di<destCapacity) {
- // only store if there is space.
- dest[di] = s[si];
- } else {
- if (strLength>=0) {
- // We have filled the destination buffer, and the string length is known.
- // Cut the loop short. There is no need to scan string termination.
- di = limit32 - start32;
- si = limit32;
- break;
- }
- }
- di++;
- }
-
- // If the limit index points to a lead surrogate of a pair,
- // add the corresponding trail surrogate to the destination.
- if (si>0 && U16_IS_LEAD(s[si-1]) &&
- ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si])))
- {
- if (di<destCapacity) {
- // store only if there is space in the output buffer.
- dest[di++] = s[si];
- }
- si++;
- }
-
- // Put iteration position at the point just following the extracted text
- if (si <= ut->chunkNativeLimit) {
- ut->chunkOffset = si;
- } else {
- ucstrTextAccess(ut, si, TRUE);
- }
-
- // Add a terminating NUL if space in the buffer permits,
- // and set the error status as required.
- u_terminateUChars(dest, destCapacity, di, pErrorCode);
- return di;
-}
-
-static const struct UTextFuncs ucstrFuncs =
-{
- sizeof(UTextFuncs),
- 0, 0, 0, // Reserved alignment padding
- ucstrTextClone,
- ucstrTextLength,
- ucstrTextAccess,
- ucstrTextExtract,
- NULL, // Replace
- NULL, // Copy
- NULL, // MapOffsetToNative,
- NULL, // MapIndexToUTF16,
- ucstrTextClose,
- NULL, // spare 1
- NULL, // spare 2
- NULL, // spare 3
-};
-
-U_CDECL_END
-
-static const UChar gEmptyUString[] = {0};
-
-U_CAPI UText * U_EXPORT2
-utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status) {
- if (U_FAILURE(*status)) {
- return NULL;
- }
- if(s==NULL && length==0) {
- s = gEmptyUString;
- }
- if (s==NULL || length < -1 || length>INT32_MAX) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- ut = utext_setup(ut, 0, status);
- if (U_SUCCESS(*status)) {
- ut->pFuncs = &ucstrFuncs;
- ut->context = s;
- ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
- if (length==-1) {
- ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
- }
- ut->a = length;
- ut->chunkContents = s;
- ut->chunkNativeStart = 0;
- ut->chunkNativeLimit = length>=0? length : 0;
- ut->chunkLength = (int32_t)ut->chunkNativeLimit;
- ut->chunkOffset = 0;
- ut->nativeIndexingLimit = ut->chunkLength;
- }
- return ut;
-}
-
-
-//------------------------------------------------------------------------------
-//
-// UText implementation for text from ICU CharacterIterators
-//
-// Use of UText data members:
-// context pointer to the CharacterIterator
-// a length of the full text.
-// p pointer to buffer 1
-// b start index of local buffer 1 contents
-// q pointer to buffer 2
-// c start index of local buffer 2 contents
-// r pointer to the character iterator if the UText owns it.
-// Null otherwise.
-//
-//------------------------------------------------------------------------------
-#define CIBufSize 16
-
-U_CDECL_BEGIN
-static void U_CALLCONV
-charIterTextClose(UText *ut) {
- // Most of the work of close is done by the generic UText framework close.
- // All that needs to be done here is delete the CharacterIterator if the UText
- // owns it. This occurs if the UText was created by cloning.
- CharacterIterator *ci = (CharacterIterator *)ut->r;
- delete ci;
- ut->r = NULL;
-}
-
-static int64_t U_CALLCONV
-charIterTextLength(UText *ut) {
- return (int32_t)ut->a;
-}
-
-static UBool U_CALLCONV
-charIterTextAccess(UText *ut, int64_t index, UBool forward) {
- CharacterIterator *ci = (CharacterIterator *)ut->context;
-
- int32_t clippedIndex = (int32_t)index;
- if (clippedIndex<0) {
- clippedIndex=0;
- } else if (clippedIndex>=ut->a) {
- clippedIndex=(int32_t)ut->a;
- }
- int32_t neededIndex = clippedIndex;
- if (!forward && neededIndex>0) {
- // reverse iteration, want the position just before what was asked for.
- neededIndex--;
- } else if (forward && neededIndex==ut->a && neededIndex>0) {
- // Forward iteration, don't ask for something past the end of the text.
- neededIndex--;
- }
-
- // Find the native index of the start of the buffer containing what we want.
- neededIndex -= neededIndex % CIBufSize;
-
- UChar *buf = NULL;
- UBool needChunkSetup = TRUE;
- int i;
- if (ut->chunkNativeStart == neededIndex) {
- // The buffer we want is already the current chunk.
- needChunkSetup = FALSE;
- } else if (ut->b == neededIndex) {
- // The first buffer (buffer p) has what we need.
- buf = (UChar *)ut->p;
- } else if (ut->c == neededIndex) {
- // The second buffer (buffer q) has what we need.
- buf = (UChar *)ut->q;
- } else {
- // Neither buffer already has what we need.
- // Load new data from the character iterator.
- // Use the buf that is not the current buffer.
- buf = (UChar *)ut->p;
- if (ut->p == ut->chunkContents) {
- buf = (UChar *)ut->q;
- }
- ci->setIndex(neededIndex);
- for (i=0; i<CIBufSize; i++) {
- buf[i] = ci->nextPostInc();
- if (i+neededIndex > ut->a) {
- break;
- }
- }
- }
-
- // We have a buffer with the data we need.
- // Set it up as the current chunk, if it wasn't already.
- if (needChunkSetup) {
- ut->chunkContents = buf;
- ut->chunkLength = CIBufSize;
- ut->chunkNativeStart = neededIndex;
- ut->chunkNativeLimit = neededIndex + CIBufSize;
- if (ut->chunkNativeLimit > ut->a) {
- ut->chunkNativeLimit = ut->a;
- ut->chunkLength = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chunkNativeStart);
- }
- ut->nativeIndexingLimit = ut->chunkLength;
- U_ASSERT(ut->chunkOffset>=0 && ut->chunkOffset<=CIBufSize);
- }
- ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart;
- UBool success = (forward? ut->chunkOffset<ut->chunkLength : ut->chunkOffset>0);
- return success;
-}
-
-static UText * U_CALLCONV
-charIterTextClone(UText *dest, const UText *src, UBool deep, UErrorCode * status) {
- if (U_FAILURE(*status)) {
- return NULL;
- }
-
- if (deep) {
- // There is no CharacterIterator API for cloning the underlying text storage.
- *status = U_UNSUPPORTED_ERROR;
- return NULL;
- } else {
- CharacterIterator *srcCI =(CharacterIterator *)src->context;
- srcCI = srcCI->clone();
- dest = utext_openCharacterIterator(dest, srcCI, status);
- if (U_FAILURE(*status)) {
- return dest;
- }
- // cast off const on getNativeIndex.
- // For CharacterIterator based UTexts, this is safe, the operation is const.
- int64_t ix = utext_getNativeIndex((UText *)src);
- utext_setNativeIndex(dest, ix);
- dest->r = srcCI; // flags that this UText owns the CharacterIterator
- }
- return dest;
-}
-
-static int32_t U_CALLCONV
-charIterTextExtract(UText *ut,
- int64_t start, int64_t limit,
- UChar *dest, int32_t destCapacity,
- UErrorCode *status)
-{
- if(U_FAILURE(*status)) {
- return 0;
- }
- if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) {
- *status=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- int32_t length = (int32_t)ut->a;
- int32_t start32 = pinIndex(start, length);
- int32_t limit32 = pinIndex(limit, length);
- int32_t desti = 0;
- int32_t srci;
- int32_t copyLimit;
-
- CharacterIterator *ci = (CharacterIterator *)ut->context;
- ci->setIndex32(start32); // Moves ix to lead of surrogate pair, if needed.
- srci = ci->getIndex();
- copyLimit = srci;
- while (srci<limit32) {
- UChar32 c = ci->next32PostInc();
- int32_t len = U16_LENGTH(c);
- U_ASSERT(desti+len>0); /* to ensure desti+len never exceeds MAX_INT32, which must not happen logically */
- if (desti+len <= destCapacity) {
- U16_APPEND_UNSAFE(dest, desti, c);
- copyLimit = srci+len;
- } else {
- desti += len;
- *status = U_BUFFER_OVERFLOW_ERROR;
- }
- srci += len;
- }
-
- charIterTextAccess(ut, copyLimit, TRUE);
-
- u_terminateUChars(dest, destCapacity, desti, status);
- return desti;
-}
-
-static const struct UTextFuncs charIterFuncs =
-{
- sizeof(UTextFuncs),
- 0, 0, 0, // Reserved alignment padding
- charIterTextClone,
- charIterTextLength,
- charIterTextAccess,
- charIterTextExtract,
- NULL, // Replace
- NULL, // Copy
- NULL, // MapOffsetToNative,
- NULL, // MapIndexToUTF16,
- charIterTextClose,
- NULL, // spare 1
- NULL, // spare 2
- NULL // spare 3
-};
-U_CDECL_END
-
-
-U_CAPI UText * U_EXPORT2
-utext_openCharacterIterator(UText *ut, CharacterIterator *ci, UErrorCode *status) {
- if (U_FAILURE(*status)) {
- return NULL;
- }
-
- if (ci->startIndex() > 0) {
- // No support for CharacterIterators that do not start indexing from zero.
- *status = U_UNSUPPORTED_ERROR;
- return NULL;
- }
-
- // Extra space in UText for 2 buffers of CIBufSize UChars each.
- int32_t extraSpace = 2 * CIBufSize * sizeof(UChar);
- ut = utext_setup(ut, extraSpace, status);
- if (U_SUCCESS(*status)) {
- ut->pFuncs = &charIterFuncs;
- ut->context = ci;
- ut->providerProperties = 0;
- ut->a = ci->endIndex(); // Length of text
- ut->p = ut->pExtra; // First buffer
- ut->b = -1; // Native index of first buffer contents
- ut->q = (UChar*)ut->pExtra+CIBufSize; // Second buffer
- ut->c = -1; // Native index of second buffer contents
-
- // Initialize current chunk contents to be empty.
- // First access will fault something in.
- // Note: The initial nativeStart and chunkOffset must sum to zero
- // so that getNativeIndex() will correctly compute to zero
- // if no call to Access() has ever been made. They can't be both
- // zero without Access() thinking that the chunk is valid.
- ut->chunkContents = (UChar *)ut->p;
- ut->chunkNativeStart = -1;
- ut->chunkOffset = 1;
- ut->chunkNativeLimit = 0;
- ut->chunkLength = 0;
- ut->nativeIndexingLimit = ut->chunkOffset; // enables native indexing
- }
- return ut;
-}
diff --git a/contrib/libs/icu/common/utf_impl.cpp b/contrib/libs/icu/common/utf_impl.cpp
deleted file mode 100644
index 9dd241a12bf..00000000000
--- a/contrib/libs/icu/common/utf_impl.cpp
+++ /dev/null
@@ -1,329 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: utf_impl.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999sep13
-* created by: Markus W. Scherer
-*
-* This file provides implementation functions for macros in the utfXX.h
-* that would otherwise be too long as macros.
-*/
-
-/* set import/export definitions */
-#ifndef U_UTF8_IMPL
-# define U_UTF8_IMPL
-#endif
-
-#include "unicode/utypes.h"
-#include "unicode/utf.h"
-#include "unicode/utf8.h"
-#include "uassert.h"
-
-/*
- * Table of the number of utf8 trail bytes, indexed by the lead byte.
- * Used by the deprecated macro UTF8_COUNT_TRAIL_BYTES, defined in utf_old.h
- *
- * The current macro, U8_COUNT_TRAIL_BYTES, does _not_ use this table.
- *
- * Note that this table cannot be removed, even if UTF8_COUNT_TRAIL_BYTES were
- * changed to no longer use it. References to the table from expansions of UTF8_COUNT_TRAIL_BYTES
- * may exist in old client code that must continue to run with newer icu library versions.
- *
- * This table could be replaced on many machines by
- * a few lines of assembler code using an
- * "index of first 0-bit from msb" instruction and
- * one or two more integer instructions.
- *
- * For example, on an i386, do something like
- * - MOV AL, leadByte
- * - NOT AL (8-bit, leave b15..b8==0..0, reverse only b7..b0)
- * - MOV AH, 0
- * - BSR BX, AX (16-bit)
- * - MOV AX, 6 (result)
- * - JZ finish (ZF==1 if leadByte==0xff)
- * - SUB AX, BX (result)
- * -finish:
- * (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB)
- */
-extern "C" U_EXPORT const uint8_t
-utf8_countTrailBytes[256]={
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- // illegal C0 & C1
- // 2-byte lead bytes C2..DF
- 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
- // 3-byte lead bytes E0..EF
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- // 4-byte lead bytes F0..F4
- // illegal F5..FF
- 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-static const UChar32
-utf8_errorValue[6]={
- // Same values as UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_2, UTF_ERROR_VALUE,
- // but without relying on the obsolete unicode/utf_old.h.
- 0x15, 0x9f, 0xffff,
- 0x10ffff
-};
-
-static UChar32
-errorValue(int32_t count, int8_t strict) {
- if(strict>=0) {
- return utf8_errorValue[count];
- } else if(strict==-3) {
- return 0xfffd;
- } else {
- return U_SENTINEL;
- }
-}
-
-/*
- * Handle the non-inline part of the U8_NEXT() and U8_NEXT_FFFD() macros
- * and their obsolete sibling UTF8_NEXT_CHAR_SAFE().
- *
- * U8_NEXT() supports NUL-terminated strings indicated via length<0.
- *
- * The "strict" parameter controls the error behavior:
- * <0 "Safe" behavior of U8_NEXT():
- * -1: All illegal byte sequences yield U_SENTINEL=-1.
- * -2: Same as -1, except for lenient treatment of surrogate code points as legal.
- * Some implementations use this for roundtripping of
- * Unicode 16-bit strings that are not well-formed UTF-16, that is, they
- * contain unpaired surrogates.
- * -3: All illegal byte sequences yield U+FFFD.
- * 0 Obsolete "safe" behavior of UTF8_NEXT_CHAR_SAFE(..., FALSE):
- * All illegal byte sequences yield a positive code point such that this
- * result code point would be encoded with the same number of bytes as
- * the illegal sequence.
- * >0 Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., TRUE):
- * Same as the obsolete "safe" behavior, but non-characters are also treated
- * like illegal sequences.
- *
- * Note that a UBool is the same as an int8_t.
- */
-U_CAPI UChar32 U_EXPORT2
-utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) {
- // *pi is one after byte c.
- int32_t i=*pi;
- // length can be negative for NUL-terminated strings: Read and validate one byte at a time.
- if(i==length || c>0xf4) {
- // end of string, or not a lead byte
- } else if(c>=0xf0) {
- // Test for 4-byte sequences first because
- // U8_NEXT() handles shorter valid sequences inline.
- uint8_t t1=s[i], t2, t3;
- c&=7;
- if(U8_IS_VALID_LEAD4_AND_T1(c, t1) &&
- ++i!=length && (t2=s[i]-0x80)<=0x3f &&
- ++i!=length && (t3=s[i]-0x80)<=0x3f) {
- ++i;
- c=(c<<18)|((t1&0x3f)<<12)|(t2<<6)|t3;
- // strict: forbid non-characters like U+fffe
- if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
- *pi=i;
- return c;
- }
- }
- } else if(c>=0xe0) {
- c&=0xf;
- if(strict!=-2) {
- uint8_t t1=s[i], t2;
- if(U8_IS_VALID_LEAD3_AND_T1(c, t1) &&
- ++i!=length && (t2=s[i]-0x80)<=0x3f) {
- ++i;
- c=(c<<12)|((t1&0x3f)<<6)|t2;
- // strict: forbid non-characters like U+fffe
- if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
- *pi=i;
- return c;
- }
- }
- } else {
- // strict=-2 -> lenient: allow surrogates
- uint8_t t1=s[i]-0x80, t2;
- if(t1<=0x3f && (c>0 || t1>=0x20) &&
- ++i!=length && (t2=s[i]-0x80)<=0x3f) {
- *pi=i+1;
- return (c<<12)|(t1<<6)|t2;
- }
- }
- } else if(c>=0xc2) {
- uint8_t t1=s[i]-0x80;
- if(t1<=0x3f) {
- *pi=i+1;
- return ((c-0xc0)<<6)|t1;
- }
- } // else 0x80<=c<0xc2 is not a lead byte
-
- /* error handling */
- c=errorValue(i-*pi, strict);
- *pi=i;
- return c;
-}
-
-U_CAPI int32_t U_EXPORT2
-utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError) {
- if((uint32_t)(c)<=0x7ff) {
- if((i)+1<(length)) {
- (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0);
- (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80);
- return i;
- }
- } else if((uint32_t)(c)<=0xffff) {
- /* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. */
- if((i)+2<(length) && !U_IS_SURROGATE(c)) {
- (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0);
- (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80);
- (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80);
- return i;
- }
- } else if((uint32_t)(c)<=0x10ffff) {
- if((i)+3<(length)) {
- (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0);
- (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80);
- (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80);
- (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80);
- return i;
- }
- }
- /* c>0x10ffff or not enough space, write an error value */
- if(pIsError!=NULL) {
- *pIsError=TRUE;
- } else {
- length-=i;
- if(length>0) {
- int32_t offset;
- if(length>3) {
- length=3;
- }
- s+=i;
- offset=0;
- c=utf8_errorValue[length-1];
- U8_APPEND_UNSAFE(s, offset, c);
- i=i+offset;
- }
- }
- return i;
-}
-
-U_CAPI UChar32 U_EXPORT2
-utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict) {
- // *pi is the index of byte c.
- int32_t i=*pi;
- if(U8_IS_TRAIL(c) && i>start) {
- uint8_t b1=s[--i];
- if(U8_IS_LEAD(b1)) {
- if(b1<0xe0) {
- *pi=i;
- return ((b1-0xc0)<<6)|(c&0x3f);
- } else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) {
- // Truncated 3- or 4-byte sequence.
- *pi=i;
- return errorValue(1, strict);
- }
- } else if(U8_IS_TRAIL(b1) && i>start) {
- // Extract the value bits from the last trail byte.
- c&=0x3f;
- uint8_t b2=s[--i];
- if(0xe0<=b2 && b2<=0xf4) {
- if(b2<0xf0) {
- b2&=0xf;
- if(strict!=-2) {
- if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
- *pi=i;
- c=(b2<<12)|((b1&0x3f)<<6)|c;
- if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
- return c;
- } else {
- // strict: forbid non-characters like U+fffe
- return errorValue(2, strict);
- }
- }
- } else {
- // strict=-2 -> lenient: allow surrogates
- b1-=0x80;
- if((b2>0 || b1>=0x20)) {
- *pi=i;
- return (b2<<12)|(b1<<6)|c;
- }
- }
- } else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
- // Truncated 4-byte sequence.
- *pi=i;
- return errorValue(2, strict);
- }
- } else if(U8_IS_TRAIL(b2) && i>start) {
- uint8_t b3=s[--i];
- if(0xf0<=b3 && b3<=0xf4) {
- b3&=7;
- if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
- *pi=i;
- c=(b3<<18)|((b2&0x3f)<<12)|((b1&0x3f)<<6)|c;
- if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
- return c;
- } else {
- // strict: forbid non-characters like U+fffe
- return errorValue(3, strict);
- }
- }
- }
- }
- }
- }
- return errorValue(0, strict);
-}
-
-U_CAPI int32_t U_EXPORT2
-utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) {
- // Same as utf8_prevCharSafeBody(..., strict=-1) minus assembling code points.
- int32_t orig_i=i;
- uint8_t c=s[i];
- if(U8_IS_TRAIL(c) && i>start) {
- uint8_t b1=s[--i];
- if(U8_IS_LEAD(b1)) {
- if(b1<0xe0 ||
- (b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
- return i;
- }
- } else if(U8_IS_TRAIL(b1) && i>start) {
- uint8_t b2=s[--i];
- if(0xe0<=b2 && b2<=0xf4) {
- if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
- return i;
- }
- } else if(U8_IS_TRAIL(b2) && i>start) {
- uint8_t b3=s[--i];
- if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
- return i;
- }
- }
- }
- }
- return orig_i;
-}
diff --git a/contrib/libs/icu/common/util.cpp b/contrib/libs/icu/common/util.cpp
deleted file mode 100644
index 86e5c791bad..00000000000
--- a/contrib/libs/icu/common/util.cpp
+++ /dev/null
@@ -1,421 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2001-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 11/19/2001 aliu Creation.
-**********************************************************************
-*/
-
-#include "unicode/unimatch.h"
-#include "unicode/utf16.h"
-#include "patternprops.h"
-#include "util.h"
-
-// Define UChar constants using hex for EBCDIC compatibility
-
-static const UChar BACKSLASH = 0x005C; /*\*/
-static const UChar UPPER_U = 0x0055; /*U*/
-static const UChar LOWER_U = 0x0075; /*u*/
-static const UChar APOSTROPHE = 0x0027; // '\''
-static const UChar SPACE = 0x0020; // ' '
-
-// "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-static const UChar DIGITS[] = {
- 48,49,50,51,52,53,54,55,56,57,
- 65,66,67,68,69,70,71,72,73,74,
- 75,76,77,78,79,80,81,82,83,84,
- 85,86,87,88,89,90
-};
-
-U_NAMESPACE_BEGIN
-
-UnicodeString& ICU_Utility::appendNumber(UnicodeString& result, int32_t n,
- int32_t radix, int32_t minDigits) {
- if (radix < 2 || radix > 36) {
- // Bogus radix
- return result.append((UChar)63/*?*/);
- }
- // Handle negatives
- if (n < 0) {
- n = -n;
- result.append((UChar)45/*-*/);
- }
- // First determine the number of digits
- int32_t nn = n;
- int32_t r = 1;
- while (nn >= radix) {
- nn /= radix;
- r *= radix;
- --minDigits;
- }
- // Now generate the digits
- while (--minDigits > 0) {
- result.append(DIGITS[0]);
- }
- while (r > 0) {
- int32_t digit = n / r;
- result.append(DIGITS[digit]);
- n -= digit * r;
- r /= radix;
- }
- return result;
-}
-
-/**
- * Return true if the character is NOT printable ASCII.
- */
-UBool ICU_Utility::isUnprintable(UChar32 c) {
- return !(c >= 0x20 && c <= 0x7E);
-}
-
-/**
- * Escape unprintable characters using \uxxxx notation for U+0000 to
- * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
- * printable ASCII, then do nothing and return FALSE. Otherwise,
- * append the escaped notation and return TRUE.
- */
-UBool ICU_Utility::escapeUnprintable(UnicodeString& result, UChar32 c) {
- if (isUnprintable(c)) {
- result.append(BACKSLASH);
- if (c & ~0xFFFF) {
- result.append(UPPER_U);
- result.append(DIGITS[0xF&(c>>28)]);
- result.append(DIGITS[0xF&(c>>24)]);
- result.append(DIGITS[0xF&(c>>20)]);
- result.append(DIGITS[0xF&(c>>16)]);
- } else {
- result.append(LOWER_U);
- }
- result.append(DIGITS[0xF&(c>>12)]);
- result.append(DIGITS[0xF&(c>>8)]);
- result.append(DIGITS[0xF&(c>>4)]);
- result.append(DIGITS[0xF&c]);
- return TRUE;
- }
- return FALSE;
-}
-
-/**
- * Returns the index of a character, ignoring quoted text.
- * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
- * found by a search for 'h'.
- */
-// FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
-/*
-int32_t ICU_Utility::quotedIndexOf(const UnicodeString& text,
- int32_t start, int32_t limit,
- UChar charToFind) {
- for (int32_t i=start; i<limit; ++i) {
- UChar c = text.charAt(i);
- if (c == BACKSLASH) {
- ++i;
- } else if (c == APOSTROPHE) {
- while (++i < limit
- && text.charAt(i) != APOSTROPHE) {}
- } else if (c == charToFind) {
- return i;
- }
- }
- return -1;
-}
-*/
-
-/**
- * Skip over a sequence of zero or more white space characters at pos.
- * @param advance if true, advance pos to the first non-white-space
- * character at or after pos, or str.length(), if there is none.
- * Otherwise leave pos unchanged.
- * @return the index of the first non-white-space character at or
- * after pos, or str.length(), if there is none.
- */
-int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
- UBool advance) {
- int32_t p = pos;
- const UChar* s = str.getBuffer();
- p = (int32_t)(PatternProps::skipWhiteSpace(s + p, str.length() - p) - s);
- if (advance) {
- pos = p;
- }
- return p;
-}
-
-/**
- * Skip over Pattern_White_Space in a Replaceable.
- * Skipping may be done in the forward or
- * reverse direction. In either case, the leftmost index will be
- * inclusive, and the rightmost index will be exclusive. That is,
- * given a range defined as [start, limit), the call
- * skipWhitespace(text, start, limit) will advance start past leading
- * whitespace, whereas the call skipWhitespace(text, limit, start),
- * will back up limit past trailing whitespace.
- * @param text the text to be analyzed
- * @param pos either the start or limit of a range of 'text', to skip
- * leading or trailing whitespace, respectively
- * @param stop either the limit or start of a range of 'text', to skip
- * leading or trailing whitespace, respectively
- * @return the new start or limit, depending on what was passed in to
- * 'pos'
- */
-//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
-//?int32_t ICU_Utility::skipWhitespace(const Replaceable& text,
-//? int32_t pos, int32_t stop) {
-//? UChar32 c;
-//? UBool isForward = (stop >= pos);
-//?
-//? if (!isForward) {
-//? --pos; // pos is a limit, so back up by one
-//? }
-//?
-//? while (pos != stop &&
-//? PatternProps::isWhiteSpace(c = text.char32At(pos))) {
-//? if (isForward) {
-//? pos += U16_LENGTH(c);
-//? } else {
-//? pos -= U16_LENGTH(c);
-//? }
-//? }
-//?
-//? if (!isForward) {
-//? ++pos; // make pos back into a limit
-//? }
-//?
-//? return pos;
-//?}
-
-/**
- * Parse a single non-whitespace character 'ch', optionally
- * preceded by whitespace.
- * @param id the string to be parsed
- * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the
- * offset of the first character to be parsed. On output, pos[0]
- * is the index after the last parsed character. If the parse
- * fails, pos[0] will be unchanged.
- * @param ch the non-whitespace character to be parsed.
- * @return true if 'ch' is seen preceded by zero or more
- * whitespace characters.
- */
-UBool ICU_Utility::parseChar(const UnicodeString& id, int32_t& pos, UChar ch) {
- int32_t start = pos;
- skipWhitespace(id, pos, TRUE);
- if (pos == id.length() ||
- id.charAt(pos) != ch) {
- pos = start;
- return FALSE;
- }
- ++pos;
- return TRUE;
-}
-
-/**
- * Parse a pattern string within the given Replaceable and a parsing
- * pattern. Characters are matched literally and case-sensitively
- * except for the following special characters:
- *
- * ~ zero or more Pattern_White_Space chars
- *
- * If end of pattern is reached with all matches along the way,
- * pos is advanced to the first unparsed index and returned.
- * Otherwise -1 is returned.
- * @param pat pattern that controls parsing
- * @param text text to be parsed, starting at index
- * @param index offset to first character to parse
- * @param limit offset after last character to parse
- * @return index after last parsed character, or -1 on parse failure.
- */
-int32_t ICU_Utility::parsePattern(const UnicodeString& pat,
- const Replaceable& text,
- int32_t index,
- int32_t limit) {
- int32_t ipat = 0;
-
- // empty pattern matches immediately
- if (ipat == pat.length()) {
- return index;
- }
-
- UChar32 cpat = pat.char32At(ipat);
-
- while (index < limit) {
- UChar32 c = text.char32At(index);
-
- // parse \s*
- if (cpat == 126 /*~*/) {
- if (PatternProps::isWhiteSpace(c)) {
- index += U16_LENGTH(c);
- continue;
- } else {
- if (++ipat == pat.length()) {
- return index; // success; c unparsed
- }
- // fall thru; process c again with next cpat
- }
- }
-
- // parse literal
- else if (c == cpat) {
- index += U16_LENGTH(c);
- ipat += U16_LENGTH(cpat);
- if (ipat == pat.length()) {
- return index; // success; c parsed
- }
- // fall thru; get next cpat
- }
-
- // match failure of literal
- else {
- return -1;
- }
-
- cpat = pat.char32At(ipat);
- }
-
- return -1; // text ended before end of pat
-}
-
-int32_t ICU_Utility::parseAsciiInteger(const UnicodeString& str, int32_t& pos) {
- int32_t result = 0;
- UChar c;
- while (pos < str.length() && (c = str.charAt(pos)) >= u'0' && c <= u'9') {
- result = result * 10 + (c - u'0');
- pos++;
- }
- return result;
-}
-
-/**
- * Append a character to a rule that is being built up. To flush
- * the quoteBuf to rule, make one final call with isLiteral == TRUE.
- * If there is no final character, pass in (UChar32)-1 as c.
- * @param rule the string to append the character to
- * @param c the character to append, or (UChar32)-1 if none.
- * @param isLiteral if true, then the given character should not be
- * quoted or escaped. Usually this means it is a syntactic element
- * such as > or $
- * @param escapeUnprintable if true, then unprintable characters
- * should be escaped using \uxxxx or \Uxxxxxxxx. These escapes will
- * appear outside of quotes.
- * @param quoteBuf a buffer which is used to build up quoted
- * substrings. The caller should initially supply an empty buffer,
- * and thereafter should not modify the buffer. The buffer should be
- * cleared out by, at the end, calling this method with a literal
- * character.
- */
-void ICU_Utility::appendToRule(UnicodeString& rule,
- UChar32 c,
- UBool isLiteral,
- UBool escapeUnprintable,
- UnicodeString& quoteBuf) {
- // If we are escaping unprintables, then escape them outside
- // quotes. \u and \U are not recognized within quotes. The same
- // logic applies to literals, but literals are never escaped.
- if (isLiteral ||
- (escapeUnprintable && ICU_Utility::isUnprintable(c))) {
- if (quoteBuf.length() > 0) {
- // We prefer backslash APOSTROPHE to double APOSTROPHE
- // (more readable, less similar to ") so if there are
- // double APOSTROPHEs at the ends, we pull them outside
- // of the quote.
-
- // If the first thing in the quoteBuf is APOSTROPHE
- // (doubled) then pull it out.
- while (quoteBuf.length() >= 2 &&
- quoteBuf.charAt(0) == APOSTROPHE &&
- quoteBuf.charAt(1) == APOSTROPHE) {
- rule.append(BACKSLASH).append(APOSTROPHE);
- quoteBuf.remove(0, 2);
- }
- // If the last thing in the quoteBuf is APOSTROPHE
- // (doubled) then remove and count it and add it after.
- int32_t trailingCount = 0;
- while (quoteBuf.length() >= 2 &&
- quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
- quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
- quoteBuf.truncate(quoteBuf.length()-2);
- ++trailingCount;
- }
- if (quoteBuf.length() > 0) {
- rule.append(APOSTROPHE);
- rule.append(quoteBuf);
- rule.append(APOSTROPHE);
- quoteBuf.truncate(0);
- }
- while (trailingCount-- > 0) {
- rule.append(BACKSLASH).append(APOSTROPHE);
- }
- }
- if (c != (UChar32)-1) {
- /* Since spaces are ignored during parsing, they are
- * emitted only for readability. We emit one here
- * only if there isn't already one at the end of the
- * rule.
- */
- if (c == SPACE) {
- int32_t len = rule.length();
- if (len > 0 && rule.charAt(len-1) != c) {
- rule.append(c);
- }
- } else if (!escapeUnprintable || !ICU_Utility::escapeUnprintable(rule, c)) {
- rule.append(c);
- }
- }
- }
-
- // Escape ' and '\' and don't begin a quote just for them
- else if (quoteBuf.length() == 0 &&
- (c == APOSTROPHE || c == BACKSLASH)) {
- rule.append(BACKSLASH);
- rule.append(c);
- }
-
- // Specials (printable ascii that isn't [0-9a-zA-Z]) and
- // whitespace need quoting. Also append stuff to quotes if we are
- // building up a quoted substring already.
- else if (quoteBuf.length() > 0 ||
- (c >= 0x0021 && c <= 0x007E &&
- !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
- (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
- (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
- PatternProps::isWhiteSpace(c)) {
- quoteBuf.append(c);
- // Double ' within a quote
- if (c == APOSTROPHE) {
- quoteBuf.append(c);
- }
- }
-
- // Otherwise just append
- else {
- rule.append(c);
- }
-}
-
-void ICU_Utility::appendToRule(UnicodeString& rule,
- const UnicodeString& text,
- UBool isLiteral,
- UBool escapeUnprintable,
- UnicodeString& quoteBuf) {
- for (int32_t i=0; i<text.length(); ++i) {
- appendToRule(rule, text[i], isLiteral, escapeUnprintable, quoteBuf);
- }
-}
-
-/**
- * Given a matcher reference, which may be null, append its
- * pattern as a literal to the given rule.
- */
-void ICU_Utility::appendToRule(UnicodeString& rule,
- const UnicodeMatcher* matcher,
- UBool escapeUnprintable,
- UnicodeString& quoteBuf) {
- if (matcher != NULL) {
- UnicodeString pat;
- appendToRule(rule, matcher->toPattern(pat, escapeUnprintable),
- TRUE, escapeUnprintable, quoteBuf);
- }
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/util.h b/contrib/libs/icu/common/util.h
deleted file mode 100644
index f3f71dce458..00000000000
--- a/contrib/libs/icu/common/util.h
+++ /dev/null
@@ -1,257 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- **********************************************************************
- * Copyright (c) 2001-2011, International Business Machines
- * Corporation and others. All Rights Reserved.
- **********************************************************************
- * Date Name Description
- * 11/19/2001 aliu Creation.
- **********************************************************************
- */
-
-#ifndef ICU_UTIL_H
-#define ICU_UTIL_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-
-//--------------------------------------------------------------------
-// class ICU_Utility
-// i18n utility functions, scoped into the class ICU_Utility.
-//--------------------------------------------------------------------
-
-U_NAMESPACE_BEGIN
-
-class UnicodeMatcher;
-
-class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
- public:
-
- /**
- * Append a number to the given UnicodeString in the given radix.
- * Standard digits '0'-'9' are used and letters 'A'-'Z' for
- * radices 11 through 36.
- * @param result the digits of the number are appended here
- * @param n the number to be converted to digits; may be negative.
- * If negative, a '-' is prepended to the digits.
- * @param radix a radix from 2 to 36 inclusive.
- * @param minDigits the minimum number of digits, not including
- * any '-', to produce. Values less than 2 have no effect. One
- * digit is always emitted regardless of this parameter.
- * @return a reference to result
- */
- static UnicodeString& appendNumber(UnicodeString& result, int32_t n,
- int32_t radix = 10,
- int32_t minDigits = 1);
-
- /** Returns a bogus UnicodeString by value. */
- static inline UnicodeString makeBogusString() {
- UnicodeString result;
- result.setToBogus();
- return result;
- }
-
- /**
- * Return true if the character is NOT printable ASCII.
- *
- * This method should really be in UnicodeString (or similar). For
- * now, we implement it here and share it with friend classes.
- */
- static UBool isUnprintable(UChar32 c);
-
- /**
- * Escape unprintable characters using \uxxxx notation for U+0000 to
- * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
- * printable ASCII, then do nothing and return FALSE. Otherwise,
- * append the escaped notation and return TRUE.
- */
- static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
-
- /**
- * Returns the index of a character, ignoring quoted text.
- * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
- * found by a search for 'h'.
- * @param text text to be searched
- * @param start the beginning index, inclusive; <code>0 <= start
- * <= limit</code>.
- * @param limit the ending index, exclusive; <code>start <= limit
- * <= text.length()</code>.
- * @param c character to search for
- * @return Offset of the first instance of c, or -1 if not found.
- */
-//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
-// static int32_t quotedIndexOf(const UnicodeString& text,
-// int32_t start, int32_t limit,
-// UChar c);
-
- /**
- * Skip over a sequence of zero or more white space characters at pos.
- * @param advance if true, advance pos to the first non-white-space
- * character at or after pos, or str.length(), if there is none.
- * Otherwise leave pos unchanged.
- * @return the index of the first non-white-space character at or
- * after pos, or str.length(), if there is none.
- */
- static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos,
- UBool advance = FALSE);
-
- /**
- * Skip over Pattern_White_Space in a Replaceable.
- * Skipping may be done in the forward or
- * reverse direction. In either case, the leftmost index will be
- * inclusive, and the rightmost index will be exclusive. That is,
- * given a range defined as [start, limit), the call
- * skipWhitespace(text, start, limit) will advance start past leading
- * whitespace, whereas the call skipWhitespace(text, limit, start),
- * will back up limit past trailing whitespace.
- * @param text the text to be analyzed
- * @param pos either the start or limit of a range of 'text', to skip
- * leading or trailing whitespace, respectively
- * @param stop either the limit or start of a range of 'text', to skip
- * leading or trailing whitespace, respectively
- * @return the new start or limit, depending on what was passed in to
- * 'pos'
- */
-//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
-//? static int32_t skipWhitespace(const Replaceable& text,
-//? int32_t pos, int32_t stop);
-
- /**
- * Parse a single non-whitespace character 'ch', optionally
- * preceded by whitespace.
- * @param id the string to be parsed
- * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the
- * offset of the first character to be parsed. On output, pos[0]
- * is the index after the last parsed character. If the parse
- * fails, pos[0] will be unchanged.
- * @param ch the non-whitespace character to be parsed.
- * @return true if 'ch' is seen preceded by zero or more
- * whitespace characters.
- */
- static UBool parseChar(const UnicodeString& id, int32_t& pos, UChar ch);
-
- /**
- * Parse a pattern string starting at offset pos. Keywords are
- * matched case-insensitively. Spaces may be skipped and may be
- * optional or required. Integer values may be parsed, and if
- * they are, they will be returned in the given array. If
- * successful, the offset of the next non-space character is
- * returned. On failure, -1 is returned.
- * @param pattern must only contain lowercase characters, which
- * will match their uppercase equivalents as well. A space
- * character matches one or more required spaces. A '~' character
- * matches zero or more optional spaces. A '#' character matches
- * an integer and stores it in parsedInts, which the caller must
- * ensure has enough capacity.
- * @param parsedInts array to receive parsed integers. Caller
- * must ensure that parsedInts.length is >= the number of '#'
- * signs in 'pattern'.
- * @return the position after the last character parsed, or -1 if
- * the parse failed
- */
- static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
- const UnicodeString& pattern, int32_t* parsedInts);
-
- /**
- * Parse a pattern string within the given Replaceable and a parsing
- * pattern. Characters are matched literally and case-sensitively
- * except for the following special characters:
- *
- * ~ zero or more Pattern_White_Space chars
- *
- * If end of pattern is reached with all matches along the way,
- * pos is advanced to the first unparsed index and returned.
- * Otherwise -1 is returned.
- * @param pat pattern that controls parsing
- * @param text text to be parsed, starting at index
- * @param index offset to first character to parse
- * @param limit offset after last character to parse
- * @return index after last parsed character, or -1 on parse failure.
- */
- static int32_t parsePattern(const UnicodeString& pat,
- const Replaceable& text,
- int32_t index,
- int32_t limit);
-
- /**
- * Parse an integer at pos, either of the form \d+ or of the form
- * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
- * or octal format.
- * @param pos INPUT-OUTPUT parameter. On input, the index of the first
- * character to parse. On output, the index of the character after the
- * last parsed character.
- */
- static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit);
-
- /**
- * Parse an integer at pos using only ASCII digits.
- * Base 10 only.
- * @param pos INPUT-OUTPUT parameter. On input, the index of the first
- * character to parse. On output, the index of the character after the
- * last parsed character.
- */
- static int32_t parseAsciiInteger(const UnicodeString& str, int32_t& pos);
-
- /**
- * Parse a Unicode identifier from the given string at the given
- * position. Return the identifier, or an empty string if there
- * is no identifier.
- * @param str the string to parse
- * @param pos INPUT-OUPUT parameter. On INPUT, pos is the
- * first character to examine. It must be less than str.length(),
- * and it must not point to a whitespace character. That is, must
- * have pos < str.length() and
- * !UCharacter::isWhitespace(str.char32At(pos)). On
- * OUTPUT, the position after the last parsed character.
- * @return the Unicode identifier, or an empty string if there is
- * no valid identifier at pos.
- */
- static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos);
-
- /**
- * Parse an unsigned 31-bit integer at the given offset. Use
- * UCharacter.digit() to parse individual characters into digits.
- * @param text the text to be parsed
- * @param pos INPUT-OUTPUT parameter. On entry, pos is the
- * offset within text at which to start parsing; it should point
- * to a valid digit. On exit, pos is the offset after the last
- * parsed character. If the parse failed, it will be unchanged on
- * exit. Must be >= 0 on entry.
- * @param radix the radix in which to parse; must be >= 2 and <=
- * 36.
- * @return a non-negative parsed number, or -1 upon parse failure.
- * Parse fails if there are no digits, that is, if pos does not
- * point to a valid digit on entry, or if the number to be parsed
- * does not fit into a 31-bit unsigned integer.
- */
- static int32_t parseNumber(const UnicodeString& text,
- int32_t& pos, int8_t radix);
-
- static void appendToRule(UnicodeString& rule,
- UChar32 c,
- UBool isLiteral,
- UBool escapeUnprintable,
- UnicodeString& quoteBuf);
-
- static void appendToRule(UnicodeString& rule,
- const UnicodeString& text,
- UBool isLiteral,
- UBool escapeUnprintable,
- UnicodeString& quoteBuf);
-
- static void appendToRule(UnicodeString& rule,
- const UnicodeMatcher* matcher,
- UBool escapeUnprintable,
- UnicodeString& quoteBuf);
-
-private:
- // do not instantiate
- ICU_Utility();
-};
-
-U_NAMESPACE_END
-
-#endif
-//eof
diff --git a/contrib/libs/icu/common/util_props.cpp b/contrib/libs/icu/common/util_props.cpp
deleted file mode 100644
index 95a112bc912..00000000000
--- a/contrib/libs/icu/common/util_props.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (c) 2001-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 11/19/2001 aliu Creation.
-**********************************************************************
-*/
-
-#include "unicode/uchar.h"
-#include "unicode/utf16.h"
-#include "patternprops.h"
-#include "util.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * Parse an integer at pos, either of the form \d+ or of the form
- * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
- * or octal format.
- * @param pos INPUT-OUTPUT parameter. On input, the first
- * character to parse. On output, the character after the last
- * parsed character.
- */
-int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit) {
- int32_t count = 0;
- int32_t value = 0;
- int32_t p = pos;
- int8_t radix = 10;
-
- if (p < limit && rule.charAt(p) == 48 /*0*/) {
- if (p+1 < limit && (rule.charAt(p+1) == 0x78 /*x*/ || rule.charAt(p+1) == 0x58 /*X*/)) {
- p += 2;
- radix = 16;
- }
- else {
- p++;
- count = 1;
- radix = 8;
- }
- }
-
- while (p < limit) {
- int32_t d = u_digit(rule.charAt(p++), radix);
- if (d < 0) {
- --p;
- break;
- }
- ++count;
- int32_t v = (value * radix) + d;
- if (v <= value) {
- // If there are too many input digits, at some point
- // the value will go negative, e.g., if we have seen
- // "0x8000000" already and there is another '0', when
- // we parse the next 0 the value will go negative.
- return 0;
- }
- value = v;
- }
- if (count > 0) {
- pos = p;
- }
- return value;
-}
-
-/**
- * Parse a pattern string starting at offset pos. Keywords are
- * matched case-insensitively. Spaces may be skipped and may be
- * optional or required. Integer values may be parsed, and if
- * they are, they will be returned in the given array. If
- * successful, the offset of the next non-space character is
- * returned. On failure, -1 is returned.
- * @param pattern must only contain lowercase characters, which
- * will match their uppercase equivalents as well. A space
- * character matches one or more required spaces. A '~' character
- * matches zero or more optional spaces. A '#' character matches
- * an integer and stores it in parsedInts, which the caller must
- * ensure has enough capacity.
- * @param parsedInts array to receive parsed integers. Caller
- * must ensure that parsedInts.length is >= the number of '#'
- * signs in 'pattern'.
- * @return the position after the last character parsed, or -1 if
- * the parse failed
- */
-int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
- const UnicodeString& pattern, int32_t* parsedInts) {
- // TODO Update this to handle surrogates
- int32_t p;
- int32_t intCount = 0; // number of integers parsed
- for (int32_t i=0; i<pattern.length(); ++i) {
- UChar cpat = pattern.charAt(i);
- UChar c;
- switch (cpat) {
- case 32 /*' '*/:
- if (pos >= limit) {
- return -1;
- }
- c = rule.charAt(pos++);
- if (!PatternProps::isWhiteSpace(c)) {
- return -1;
- }
- // FALL THROUGH to skipWhitespace
- U_FALLTHROUGH;
- case 126 /*'~'*/:
- pos = skipWhitespace(rule, pos);
- break;
- case 35 /*'#'*/:
- p = pos;
- parsedInts[intCount++] = parseInteger(rule, p, limit);
- if (p == pos) {
- // Syntax error; failed to parse integer
- return -1;
- }
- pos = p;
- break;
- default:
- if (pos >= limit) {
- return -1;
- }
- c = (UChar) u_tolower(rule.charAt(pos++));
- if (c != cpat) {
- return -1;
- }
- break;
- }
- }
- return pos;
-}
-
-/**
- * Parse a Unicode identifier from the given string at the given
- * position. Return the identifier, or an empty string if there
- * is no identifier.
- * @param str the string to parse
- * @param pos INPUT-OUPUT parameter. On INPUT, pos is the
- * first character to examine. It must be less than str.length(),
- * and it must not point to a whitespace character. That is, must
- * have pos < str.length(). On
- * OUTPUT, the position after the last parsed character.
- * @return the Unicode identifier, or an empty string if there is
- * no valid identifier at pos.
- */
-UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) {
- // assert(pos < str.length());
- UnicodeString buf;
- int p = pos;
- while (p < str.length()) {
- UChar32 ch = str.char32At(p);
- if (buf.length() == 0) {
- if (u_isIDStart(ch)) {
- buf.append(ch);
- } else {
- buf.truncate(0);
- return buf;
- }
- } else {
- if (u_isIDPart(ch)) {
- buf.append(ch);
- } else {
- break;
- }
- }
- p += U16_LENGTH(ch);
- }
- pos = p;
- return buf;
-}
-
-/**
- * Parse an unsigned 31-bit integer at the given offset. Use
- * UCharacter.digit() to parse individual characters into digits.
- * @param text the text to be parsed
- * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the
- * offset within text at which to start parsing; it should point
- * to a valid digit. On exit, pos[0] is the offset after the last
- * parsed character. If the parse failed, it will be unchanged on
- * exit. Must be >= 0 on entry.
- * @param radix the radix in which to parse; must be >= 2 and <=
- * 36.
- * @return a non-negative parsed number, or -1 upon parse failure.
- * Parse fails if there are no digits, that is, if pos[0] does not
- * point to a valid digit on entry, or if the number to be parsed
- * does not fit into a 31-bit unsigned integer.
- */
-int32_t ICU_Utility::parseNumber(const UnicodeString& text,
- int32_t& pos, int8_t radix) {
- // assert(pos[0] >= 0);
- // assert(radix >= 2);
- // assert(radix <= 36);
- int32_t n = 0;
- int32_t p = pos;
- while (p < text.length()) {
- UChar32 ch = text.char32At(p);
- int32_t d = u_digit(ch, radix);
- if (d < 0) {
- break;
- }
- n = radix*n + d;
- // ASSUME that when a 32-bit integer overflows it becomes
- // negative. E.g., 214748364 * 10 + 8 => negative value.
- if (n < 0) {
- return -1;
- }
- ++p;
- }
- if (p == pos) {
- return -1;
- }
- pos = p;
- return n;
-}
-
-U_NAMESPACE_END
-
diff --git a/contrib/libs/icu/common/utrace.cpp b/contrib/libs/icu/common/utrace.cpp
deleted file mode 100644
index c9815465947..00000000000
--- a/contrib/libs/icu/common/utrace.cpp
+++ /dev/null
@@ -1,504 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2003-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: utrace.c
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*/
-
-#include "unicode/utrace.h"
-#include "utracimp.h"
-#include "cstring.h"
-#include "uassert.h"
-#include "ucln_cmn.h"
-
-
-static UTraceEntry *pTraceEntryFunc = NULL;
-static UTraceExit *pTraceExitFunc = NULL;
-static UTraceData *pTraceDataFunc = NULL;
-static const void *gTraceContext = NULL;
-
-/**
- * \var utrace_level
- * Trace level variable. Negative for "off".
- */
-static int32_t
-utrace_level = UTRACE_ERROR;
-
-U_CAPI void U_EXPORT2
-utrace_entry(int32_t fnNumber) {
- if (pTraceEntryFunc != NULL) {
- (*pTraceEntryFunc)(gTraceContext, fnNumber);
- }
-}
-
-
-static const char gExitFmt[] = "Returns.";
-static const char gExitFmtValue[] = "Returns %d.";
-static const char gExitFmtStatus[] = "Returns. Status = %d.";
-static const char gExitFmtValueStatus[] = "Returns %d. Status = %d.";
-static const char gExitFmtPtrStatus[] = "Returns %d. Status = %p.";
-
-U_CAPI void U_EXPORT2
-utrace_exit(int32_t fnNumber, int32_t returnType, ...) {
- if (pTraceExitFunc != NULL) {
- va_list args;
- const char *fmt;
-
- switch (returnType) {
- case 0:
- fmt = gExitFmt;
- break;
- case UTRACE_EXITV_I32:
- fmt = gExitFmtValue;
- break;
- case UTRACE_EXITV_STATUS:
- fmt = gExitFmtStatus;
- break;
- case UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS:
- fmt = gExitFmtValueStatus;
- break;
- case UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS:
- fmt = gExitFmtPtrStatus;
- break;
- default:
- UPRV_UNREACHABLE;
- }
-
- va_start(args, returnType);
- (*pTraceExitFunc)(gTraceContext, fnNumber, fmt, args);
- va_end(args);
- }
-}
-
-
-
-U_CAPI void U_EXPORT2
-utrace_data(int32_t fnNumber, int32_t level, const char *fmt, ...) {
- if (pTraceDataFunc != NULL) {
- va_list args;
- va_start(args, fmt );
- (*pTraceDataFunc)(gTraceContext, fnNumber, level, fmt, args);
- va_end(args);
- }
-}
-
-
-static void outputChar(char c, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) {
- int32_t i;
- /* Check whether a start of line indenting is needed. Three cases:
- * 1. At the start of the first line (output index == 0).
- * 2. At the start of subsequent lines (preceeding char in buffer == '\n')
- * 3. When preflighting buffer len (buffer capacity is exceeded), when
- * a \n is output. Ideally we wouldn't do the indent until the following char
- * is received, but that won't work because there's no place to remember that
- * the preceding char was \n. Meaning that we may overstimate the
- * buffer size needed. No harm done.
- */
- if (*outIx==0 || /* case 1. */
- (c!='\n' && c!=0 && *outIx < capacity && outBuf[(*outIx)-1]=='\n') || /* case 2. */
- (c=='\n' && *outIx>=capacity)) /* case 3 */
- {
- /* At the start of a line. Indent. */
- for(i=0; i<indent; i++) {
- if (*outIx < capacity) {
- outBuf[*outIx] = ' ';
- }
- (*outIx)++;
- }
- }
-
- if (*outIx < capacity) {
- outBuf[*outIx] = c;
- }
- if (c != 0) {
- /* Nulls only appear as end-of-string terminators. Move them to the output
- * buffer, but do not update the length of the buffer, so that any
- * following output will overwrite the null. */
- (*outIx)++;
- }
-}
-
-static void outputHexBytes(int64_t val, int32_t charsToOutput,
- char *outBuf, int32_t *outIx, int32_t capacity) {
- static const char gHexChars[] = "0123456789abcdef";
- int32_t shiftCount;
- for (shiftCount=(charsToOutput-1)*4; shiftCount >= 0; shiftCount-=4) {
- char c = gHexChars[(val >> shiftCount) & 0xf];
- outputChar(c, outBuf, outIx, capacity, 0);
- }
-}
-
-/* Output a pointer value in hex. Work with any size of pointer */
-static void outputPtrBytes(void *val, char *outBuf, int32_t *outIx, int32_t capacity) {
- uint32_t i;
- int32_t incVal = 1; /* +1 for big endian, -1 for little endian */
- char *p = (char *)&val; /* point to current byte to output in the ptr val */
-
-#if !U_IS_BIG_ENDIAN
- /* Little Endian. Move p to most significant end of the value */
- incVal = -1;
- p += sizeof(void *) - 1;
-#endif
-
- /* Loop through the bytes of the ptr as it sits in memory, from
- * most significant to least significant end */
- for (i=0; i<sizeof(void *); i++) {
- outputHexBytes(*p, 2, outBuf, outIx, capacity);
- p += incVal;
- }
-}
-
-static void outputString(const char *s, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) {
- int32_t i = 0;
- char c;
- if (s==NULL) {
- s = "*NULL*";
- }
- do {
- c = s[i++];
- outputChar(c, outBuf, outIx, capacity, indent);
- } while (c != 0);
-}
-
-
-
-static void outputUString(const UChar *s, int32_t len,
- char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) {
- int32_t i = 0;
- UChar c;
- if (s==NULL) {
- outputString(NULL, outBuf, outIx, capacity, indent);
- return;
- }
-
- for (i=0; i<len || len==-1; i++) {
- c = s[i];
- outputHexBytes(c, 4, outBuf, outIx, capacity);
- outputChar(' ', outBuf, outIx, capacity, indent);
- if (len == -1 && c==0) {
- break;
- }
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-utrace_vformat(char *outBuf, int32_t capacity, int32_t indent, const char *fmt, va_list args) {
- int32_t outIx = 0;
- int32_t fmtIx = 0;
- char fmtC;
- char c;
- int32_t intArg;
- int64_t longArg = 0;
- char *ptrArg;
-
- /* Loop runs once for each character in the format string.
- */
- for (;;) {
- fmtC = fmt[fmtIx++];
- if (fmtC != '%') {
- /* Literal character, not part of a %sequence. Just copy it to the output. */
- outputChar(fmtC, outBuf, &outIx, capacity, indent);
- if (fmtC == 0) {
- /* We hit the null that terminates the format string.
- * This is the normal (and only) exit from the loop that
- * interprets the format
- */
- break;
- }
- continue;
- }
-
- /* We encountered a '%'. Pick up the following format char */
- fmtC = fmt[fmtIx++];
-
- switch (fmtC) {
- case 'c':
- /* single 8 bit char */
- c = (char)va_arg(args, int32_t);
- outputChar(c, outBuf, &outIx, capacity, indent);
- break;
-
- case 's':
- /* char * string, null terminated. */
- ptrArg = va_arg(args, char *);
- outputString((const char *)ptrArg, outBuf, &outIx, capacity, indent);
- break;
-
- case 'S':
- /* UChar * string, with length, len==-1 for null terminated. */
- ptrArg = va_arg(args, char *); /* Ptr */
- intArg =(int32_t)va_arg(args, int32_t); /* Length */
- outputUString((const UChar *)ptrArg, intArg, outBuf, &outIx, capacity, indent);
- break;
-
- case 'b':
- /* 8 bit int */
- intArg = va_arg(args, int);
- outputHexBytes(intArg, 2, outBuf, &outIx, capacity);
- break;
-
- case 'h':
- /* 16 bit int */
- intArg = va_arg(args, int);
- outputHexBytes(intArg, 4, outBuf, &outIx, capacity);
- break;
-
- case 'd':
- /* 32 bit int */
- intArg = va_arg(args, int);
- outputHexBytes(intArg, 8, outBuf, &outIx, capacity);
- break;
-
- case 'l':
- /* 64 bit long */
- longArg = va_arg(args, int64_t);
- outputHexBytes(longArg, 16, outBuf, &outIx, capacity);
- break;
-
- case 'p':
- /* Pointers. */
- ptrArg = va_arg(args, char *);
- outputPtrBytes(ptrArg, outBuf, &outIx, capacity);
- break;
-
- case 0:
- /* Single '%' at end of fmt string. Output as literal '%'.
- * Back up index into format string so that the terminating null will be
- * re-fetched in the outer loop, causing it to terminate.
- */
- outputChar('%', outBuf, &outIx, capacity, indent);
- fmtIx--;
- break;
-
- case 'v':
- {
- /* Vector of values, e.g. %vh */
- char vectorType;
- int32_t vectorLen;
- const char *i8Ptr;
- int16_t *i16Ptr;
- int32_t *i32Ptr;
- int64_t *i64Ptr;
- void **ptrPtr;
- int32_t charsToOutput = 0;
- int32_t i;
-
- vectorType = fmt[fmtIx]; /* b, h, d, l, p, etc. */
- if (vectorType != 0) {
- fmtIx++;
- }
- i8Ptr = (const char *)va_arg(args, void*);
- i16Ptr = (int16_t *)i8Ptr;
- i32Ptr = (int32_t *)i8Ptr;
- i64Ptr = (int64_t *)i8Ptr;
- ptrPtr = (void **)i8Ptr;
- vectorLen =(int32_t)va_arg(args, int32_t);
- if (ptrPtr == NULL) {
- outputString("*NULL* ", outBuf, &outIx, capacity, indent);
- } else {
- for (i=0; i<vectorLen || vectorLen==-1; i++) {
- switch (vectorType) {
- case 'b':
- charsToOutput = 2;
- longArg = *i8Ptr++;
- break;
- case 'h':
- charsToOutput = 4;
- longArg = *i16Ptr++;
- break;
- case 'd':
- charsToOutput = 8;
- longArg = *i32Ptr++;
- break;
- case 'l':
- charsToOutput = 16;
- longArg = *i64Ptr++;
- break;
- case 'p':
- charsToOutput = 0;
- outputPtrBytes(*ptrPtr, outBuf, &outIx, capacity);
- longArg = *ptrPtr==NULL? 0: 1; /* test for null terminated array. */
- ptrPtr++;
- break;
- case 'c':
- charsToOutput = 0;
- outputChar(*i8Ptr, outBuf, &outIx, capacity, indent);
- longArg = *i8Ptr; /* for test for null terminated array. */
- i8Ptr++;
- break;
- case 's':
- charsToOutput = 0;
- outputString((const char *)*ptrPtr, outBuf, &outIx, capacity, indent);
- outputChar('\n', outBuf, &outIx, capacity, indent);
- longArg = *ptrPtr==NULL? 0: 1; /* for test for null term. array. */
- ptrPtr++;
- break;
-
- case 'S':
- charsToOutput = 0;
- outputUString((const UChar *)*ptrPtr, -1, outBuf, &outIx, capacity, indent);
- outputChar('\n', outBuf, &outIx, capacity, indent);
- longArg = *ptrPtr==NULL? 0: 1; /* for test for null term. array. */
- ptrPtr++;
- break;
-
-
- }
- if (charsToOutput > 0) {
- outputHexBytes(longArg, charsToOutput, outBuf, &outIx, capacity);
- outputChar(' ', outBuf, &outIx, capacity, indent);
- }
- if (vectorLen == -1 && longArg == 0) {
- break;
- }
- }
- }
- outputChar('[', outBuf, &outIx, capacity, indent);
- outputHexBytes(vectorLen, 8, outBuf, &outIx, capacity);
- outputChar(']', outBuf, &outIx, capacity, indent);
- }
- break;
-
-
- default:
- /* %. in format string, where . is some character not in the set
- * of recognized format chars. Just output it as if % wasn't there.
- * (Covers "%%" outputing a single '%')
- */
- outputChar(fmtC, outBuf, &outIx, capacity, indent);
- }
- }
- outputChar(0, outBuf, &outIx, capacity, indent); /* Make sure that output is null terminated */
- return outIx + 1; /* outIx + 1 because outIx does not increment when outputing final null. */
-}
-
-
-
-
-U_CAPI int32_t U_EXPORT2
-utrace_format(char *outBuf, int32_t capacity,
- int32_t indent, const char *fmt, ...) {
- int32_t retVal;
- va_list args;
- va_start(args, fmt );
- retVal = utrace_vformat(outBuf, capacity, indent, fmt, args);
- va_end(args);
- return retVal;
-}
-
-
-U_CAPI void U_EXPORT2
-utrace_setFunctions(const void *context,
- UTraceEntry *e, UTraceExit *x, UTraceData *d) {
- pTraceEntryFunc = e;
- pTraceExitFunc = x;
- pTraceDataFunc = d;
- gTraceContext = context;
-}
-
-
-U_CAPI void U_EXPORT2
-utrace_getFunctions(const void **context,
- UTraceEntry **e, UTraceExit **x, UTraceData **d) {
- *e = pTraceEntryFunc;
- *x = pTraceExitFunc;
- *d = pTraceDataFunc;
- *context = gTraceContext;
-}
-
-U_CAPI void U_EXPORT2
-utrace_setLevel(int32_t level) {
- if (level < UTRACE_OFF) {
- level = UTRACE_OFF;
- }
- if (level > UTRACE_VERBOSE) {
- level = UTRACE_VERBOSE;
- }
- utrace_level = level;
-}
-
-U_CAPI int32_t U_EXPORT2
-utrace_getLevel() {
- return utrace_level;
-}
-
-
-U_CFUNC UBool
-utrace_cleanup() {
- pTraceEntryFunc = NULL;
- pTraceExitFunc = NULL;
- pTraceDataFunc = NULL;
- utrace_level = UTRACE_OFF;
- gTraceContext = NULL;
- return TRUE;
-}
-
-
-static const char * const
-trFnName[] = {
- "u_init",
- "u_cleanup",
- NULL
-};
-
-
-static const char * const
-trConvNames[] = {
- "ucnv_open",
- "ucnv_openPackage",
- "ucnv_openAlgorithmic",
- "ucnv_clone",
- "ucnv_close",
- "ucnv_flushCache",
- "ucnv_load",
- "ucnv_unload",
- NULL
-};
-
-
-static const char * const
-trCollNames[] = {
- "ucol_open",
- "ucol_close",
- "ucol_strcoll",
- "ucol_getSortKey",
- "ucol_getLocale",
- "ucol_nextSortKeyPart",
- "ucol_strcollIter",
- "ucol_openFromShortString",
- "ucol_strcollUTF8",
- NULL
-};
-
-
-static const char* const
-trResDataNames[] = {
- "resc",
- "bundle-open",
- "file-open",
- "res-open",
- NULL
-};
-
-
-U_CAPI const char * U_EXPORT2
-utrace_functionName(int32_t fnNumber) {
- if(UTRACE_FUNCTION_START <= fnNumber && fnNumber < UTRACE_FUNCTION_LIMIT) {
- return trFnName[fnNumber];
- } else if(UTRACE_CONVERSION_START <= fnNumber && fnNumber < UTRACE_CONVERSION_LIMIT) {
- return trConvNames[fnNumber - UTRACE_CONVERSION_START];
- } else if(UTRACE_COLLATION_START <= fnNumber && fnNumber < UTRACE_COLLATION_LIMIT){
- return trCollNames[fnNumber - UTRACE_COLLATION_START];
- } else if(UTRACE_UDATA_START <= fnNumber && fnNumber < UTRACE_RES_DATA_LIMIT){
- return trResDataNames[fnNumber - UTRACE_UDATA_START];
- } else {
- return "[BOGUS Trace Function Number]";
- }
-}
-
diff --git a/contrib/libs/icu/common/utracimp.h b/contrib/libs/icu/common/utracimp.h
deleted file mode 100644
index f32fe1db394..00000000000
--- a/contrib/libs/icu/common/utracimp.h
+++ /dev/null
@@ -1,391 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 2003-2009, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: utracimp.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2003aug06
-* created by: Markus W. Scherer
-*
-* Internal header for ICU tracing/logging.
-*
-*
-* Various notes:
-* - using a trace level variable to only call trace functions
-* when the level is sufficient
-* - using the same variable for tracing on/off to never make a function
-* call when off
-* - the function number is put into a local variable by the entry macro
-* and used implicitly to avoid copy&paste/typing mistakes by the developer
-* - the application must call utrace_setFunctions() and pass in
-* implementations for the trace functions
-* - ICU trace macros call ICU functions that route through the function
-* pointers if they have been set;
-* this avoids an indirection at the call site
-* (which would cost more code for another check and for the indirection)
-*
-* ### TODO Issues:
-* - Verify that va_list is portable among compilers for the same platform.
-* va_list should be portable because printf() would fail otherwise!
-* - Should enum values like UTraceLevel be passed into int32_t-type arguments,
-* or should enum types be used?
-*/
-
-#ifndef __UTRACIMP_H__
-#define __UTRACIMP_H__
-
-#include "unicode/utrace.h"
-#include <stdarg.h>
-
-U_CDECL_BEGIN
-
-/**
- * Traced Function Exit return types.
- * Flags indicating the number and types of varargs included in a call
- * to a UTraceExit function.
- * Bits 0-3: The function return type. First variable param.
- * Bit 4: Flag for presence of U_ErrorCode status param.
- * @internal
- */
-typedef enum UTraceExitVal {
- /** The traced function returns no value @internal */
- UTRACE_EXITV_NONE = 0,
- /** The traced function returns an int32_t, or compatible, type. @internal */
- UTRACE_EXITV_I32 = 1,
- /** The traced function returns a pointer @internal */
- UTRACE_EXITV_PTR = 2,
- /** The traced function returns a UBool @internal */
- UTRACE_EXITV_BOOL = 3,
- /** Mask to extract the return type values from a UTraceExitVal @internal */
- UTRACE_EXITV_MASK = 0xf,
- /** Bit indicating that the traced function includes a UErrorCode parameter @internal */
- UTRACE_EXITV_STATUS = 0x10
-} UTraceExitVal;
-
-/**
- * Trace function for the entry point of a function.
- * Do not use directly, use UTRACE_ENTRY instead.
- * @param fnNumber The UTraceFunctionNumber for the current function.
- * @internal
- */
-U_CAPI void U_EXPORT2
-utrace_entry(int32_t fnNumber);
-
-/**
- * Trace function for each exit point of a function.
- * Do not use directly, use UTRACE_EXIT* instead.
- * @param fnNumber The UTraceFunctionNumber for the current function.
- * @param returnType The type of the value returned by the function.
- * @param errorCode The UErrorCode value at function exit. See UTRACE_EXIT.
- * @internal
- */
-U_CAPI void U_EXPORT2
-utrace_exit(int32_t fnNumber, int32_t returnType, ...);
-
-
-/**
- * Trace function used inside functions that have a UTRACE_ENTRY() statement.
- * Do not use directly, use UTRACE_DATAX() macros instead.
- *
- * @param utraceFnNumber The number of the current function, from the local
- * variable of the same name.
- * @param level The trace level for this message.
- * @param fmt The trace format string.
- *
- * @internal
- */
-U_CAPI void U_EXPORT2
-utrace_data(int32_t utraceFnNumber, int32_t level, const char *fmt, ...);
-
-U_CDECL_END
-
-#if U_ENABLE_TRACING
-
-/**
- * Boolean expression to see if ICU tracing is turned on
- * to at least the specified level.
- * @internal
- */
-#define UTRACE_LEVEL(level) (utrace_getLevel()>=(level))
-
-/**
- * Flag bit in utraceFnNumber, the local variable added to each function
- * with tracing code to contains the function number.
- *
- * Set the flag if the function's entry is traced, which will cause the
- * function's exit to also be traced. utraceFnNumber is uncoditionally
- * set at entry, whether or not the entry is traced, so that it will
- * always be available for error trace output.
- * @internal
- */
-#define UTRACE_TRACED_ENTRY 0x80000000
-
-/**
- * Trace statement for the entry point of a function.
- * Stores the function number in a local variable.
- * In C code, must be placed immediately after the last variable declaration.
- * Must be matched with UTRACE_EXIT() at all function exit points.
- *
- * Tracing should start with UTRACE_ENTRY after checking for
- * U_FAILURE at function entry, so that if a function returns immediately
- * because of a pre-existing error condition, it does not show up in the trace,
- * consistent with ICU's error handling model.
- *
- * @param fnNumber The UTraceFunctionNumber for the current function.
- * @internal
- */
-#define UTRACE_ENTRY(fnNumber) \
- int32_t utraceFnNumber=(fnNumber); \
-UPRV_BLOCK_MACRO_BEGIN { \
- if(utrace_getLevel()>=UTRACE_INFO) { \
- utrace_entry(fnNumber); \
- utraceFnNumber |= UTRACE_TRACED_ENTRY; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-
-/**
- * Trace statement for the entry point of open and close functions.
- * Produces trace output at a less verbose setting than plain UTRACE_ENTRY
- * Stores the function number in a local variable.
- * In C code, must be placed immediately after the last variable declaration.
- * Must be matched with UTRACE_EXIT() at all function exit points.
- *
- * @param fnNumber The UTraceFunctionNumber for the current function.
- * @internal
- */
-#define UTRACE_ENTRY_OC(fnNumber) \
- int32_t utraceFnNumber=(fnNumber); \
-UPRV_BLOCK_MACRO_BEGIN { \
- if(utrace_getLevel()>=UTRACE_OPEN_CLOSE) { \
- utrace_entry(fnNumber); \
- utraceFnNumber |= UTRACE_TRACED_ENTRY; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Trace statement for each exit point of a function that has a UTRACE_ENTRY()
- * statement.
- *
- * @param errorCode The function's ICU UErrorCode value at function exit,
- * or U_ZERO_ERROR if the function does not use a UErrorCode.
- * 0==U_ZERO_ERROR indicates success,
- * positive values an error (see u_errorName()),
- * negative values an informational status.
- *
- * @internal
- */
-#define UTRACE_EXIT() UPRV_BLOCK_MACRO_BEGIN { \
- if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
- utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_NONE); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Trace statement for each exit point of a function that has a UTRACE_ENTRY()
- * statement, and that returns a value.
- *
- * @param val The function's return value, int32_t or comatible type.
- *
- * @internal
- */
-#define UTRACE_EXIT_VALUE(val) UPRV_BLOCK_MACRO_BEGIN { \
- if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
- utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_I32, val); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-#define UTRACE_EXIT_STATUS(status) UPRV_BLOCK_MACRO_BEGIN { \
- if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
- utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_STATUS, status); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-#define UTRACE_EXIT_VALUE_STATUS(val, status) UPRV_BLOCK_MACRO_BEGIN { \
- if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
- utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS), val, status); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-#define UTRACE_EXIT_PTR_STATUS(ptr, status) UPRV_BLOCK_MACRO_BEGIN { \
- if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
- utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS), ptr, status); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
- * Takes no data arguments.
- * The number of arguments for this macro must match the number of inserts
- * in the format string. Vector inserts count as two arguments.
- * Calls utrace_data() if the level is high enough.
- * @internal
- */
-#define UTRACE_DATA0(level, fmt) UPRV_BLOCK_MACRO_BEGIN { \
- if(UTRACE_LEVEL(level)) { \
- utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
- * Takes one data argument.
- * The number of arguments for this macro must match the number of inserts
- * in the format string. Vector inserts count as two arguments.
- * Calls utrace_data() if the level is high enough.
- * @internal
- */
-#define UTRACE_DATA1(level, fmt, a) UPRV_BLOCK_MACRO_BEGIN { \
- if(UTRACE_LEVEL(level)) { \
- utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
- * Takes two data arguments.
- * The number of arguments for this macro must match the number of inserts
- * in the format string. Vector inserts count as two arguments.
- * Calls utrace_data() if the level is high enough.
- * @internal
- */
-#define UTRACE_DATA2(level, fmt, a, b) UPRV_BLOCK_MACRO_BEGIN { \
- if(UTRACE_LEVEL(level)) { \
- utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a), (b)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
- * Takes three data arguments.
- * The number of arguments for this macro must match the number of inserts
- * in the format string. Vector inserts count as two arguments.
- * Calls utrace_data() if the level is high enough.
- * @internal
- */
-#define UTRACE_DATA3(level, fmt, a, b, c) UPRV_BLOCK_MACRO_BEGIN { \
- if(UTRACE_LEVEL(level)) { \
- utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
- * Takes four data arguments.
- * The number of arguments for this macro must match the number of inserts
- * in the format string. Vector inserts count as two arguments.
- * Calls utrace_data() if the level is high enough.
- * @internal
- */
-#define UTRACE_DATA4(level, fmt, a, b, c, d) UPRV_BLOCK_MACRO_BEGIN { \
- if(UTRACE_LEVEL(level)) { \
- utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
- * Takes five data arguments.
- * The number of arguments for this macro must match the number of inserts
- * in the format string. Vector inserts count as two arguments.
- * Calls utrace_data() if the level is high enough.
- * @internal
- */
-#define UTRACE_DATA5(level, fmt, a, b, c, d, e) UPRV_BLOCK_MACRO_BEGIN { \
- if(UTRACE_LEVEL(level)) { \
- utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
- * Takes six data arguments.
- * The number of arguments for this macro must match the number of inserts
- * in the format string. Vector inserts count as two arguments.
- * Calls utrace_data() if the level is high enough.
- * @internal
- */
-#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f) UPRV_BLOCK_MACRO_BEGIN { \
- if(UTRACE_LEVEL(level)) { \
- utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
- * Takes seven data arguments.
- * The number of arguments for this macro must match the number of inserts
- * in the format string. Vector inserts count as two arguments.
- * Calls utrace_data() if the level is high enough.
- * @internal
- */
-#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g) UPRV_BLOCK_MACRO_BEGIN { \
- if(UTRACE_LEVEL(level)) { \
- utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
- * Takes eight data arguments.
- * The number of arguments for this macro must match the number of inserts
- * in the format string. Vector inserts count as two arguments.
- * Calls utrace_data() if the level is high enough.
- * @internal
- */
-#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h) UPRV_BLOCK_MACRO_BEGIN { \
- if(UTRACE_LEVEL(level)) { \
- utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
- * Takes nine data arguments.
- * The number of arguments for this macro must match the number of inserts
- * in the format string. Vector inserts count as two arguments.
- * Calls utrace_data() if the level is high enough.
- * @internal
- */
-#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i) UPRV_BLOCK_MACRO_BEGIN { \
- if(UTRACE_LEVEL(level)) { \
- utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h), (i)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-#else
-
-/*
- * When tracing is disabled, the following macros become empty
- */
-
-#define UTRACE_LEVEL(level) 0
-#define UTRACE_ENTRY(fnNumber)
-#define UTRACE_ENTRY_OC(fnNumber)
-#define UTRACE_EXIT()
-#define UTRACE_EXIT_VALUE(val)
-#define UTRACE_EXIT_STATUS(status)
-#define UTRACE_EXIT_VALUE_STATUS(val, status)
-#define UTRACE_EXIT_PTR_STATUS(ptr, status)
-#define UTRACE_DATA0(level, fmt)
-#define UTRACE_DATA1(level, fmt, a)
-#define UTRACE_DATA2(level, fmt, a, b)
-#define UTRACE_DATA3(level, fmt, a, b, c)
-#define UTRACE_DATA4(level, fmt, a, b, c, d)
-#define UTRACE_DATA5(level, fmt, a, b, c, d, e)
-#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f)
-#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g)
-#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h)
-#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i)
-
-#endif
-
-#endif
diff --git a/contrib/libs/icu/common/utrie.cpp b/contrib/libs/icu/common/utrie.cpp
deleted file mode 100644
index ecf9b1cba72..00000000000
--- a/contrib/libs/icu/common/utrie.cpp
+++ /dev/null
@@ -1,1234 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2001-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: utrie.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001oct20
-* created by: Markus W. Scherer
-*
-* This is a common implementation of a "folded" trie.
-* It is a kind of compressed, serializable table of 16- or 32-bit values associated with
-* Unicode code points (0..0x10ffff).
-*/
-
-#ifdef UTRIE_DEBUG
-# include <stdio.h>
-#endif
-
-#include "unicode/utypes.h"
-#include "cmemory.h"
-#include "utrie.h"
-
-/* miscellaneous ------------------------------------------------------------ */
-
-#undef ABS
-#define ABS(x) ((x)>=0 ? (x) : -(x))
-
-static inline UBool
-equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
- while(length>0 && *s==*t) {
- ++s;
- ++t;
- --length;
- }
- return (UBool)(length==0);
-}
-
-/* Building a trie ----------------------------------------------------------*/
-
-U_CAPI UNewTrie * U_EXPORT2
-utrie_open(UNewTrie *fillIn,
- uint32_t *aliasData, int32_t maxDataLength,
- uint32_t initialValue, uint32_t leadUnitValue,
- UBool latin1Linear) {
- UNewTrie *trie;
- int32_t i, j;
-
- if( maxDataLength<UTRIE_DATA_BLOCK_LENGTH ||
- (latin1Linear && maxDataLength<1024)
- ) {
- return NULL;
- }
-
- if(fillIn!=NULL) {
- trie=fillIn;
- } else {
- trie=(UNewTrie *)uprv_malloc(sizeof(UNewTrie));
- if(trie==NULL) {
- return NULL;
- }
- }
- uprv_memset(trie, 0, sizeof(UNewTrie));
- trie->isAllocated= (UBool)(fillIn==NULL);
-
- if(aliasData!=NULL) {
- trie->data=aliasData;
- trie->isDataAllocated=FALSE;
- } else {
- trie->data=(uint32_t *)uprv_malloc(maxDataLength*4);
- if(trie->data==NULL) {
- uprv_free(trie);
- return NULL;
- }
- trie->isDataAllocated=TRUE;
- }
-
- /* preallocate and reset the first data block (block index 0) */
- j=UTRIE_DATA_BLOCK_LENGTH;
-
- if(latin1Linear) {
- /* preallocate and reset the first block (number 0) and Latin-1 (U+0000..U+00ff) after that */
- /* made sure above that maxDataLength>=1024 */
-
- /* set indexes to point to consecutive data blocks */
- i=0;
- do {
- /* do this at least for trie->index[0] even if that block is only partly used for Latin-1 */
- trie->index[i++]=j;
- j+=UTRIE_DATA_BLOCK_LENGTH;
- } while(i<(256>>UTRIE_SHIFT));
- }
-
- /* reset the initially allocated blocks to the initial value */
- trie->dataLength=j;
- while(j>0) {
- trie->data[--j]=initialValue;
- }
-
- trie->leadUnitValue=leadUnitValue;
- trie->indexLength=UTRIE_MAX_INDEX_LENGTH;
- trie->dataCapacity=maxDataLength;
- trie->isLatin1Linear=latin1Linear;
- trie->isCompacted=FALSE;
- return trie;
-}
-
-U_CAPI UNewTrie * U_EXPORT2
-utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataCapacity) {
- UNewTrie *trie;
- UBool isDataAllocated;
-
- /* do not clone if other is not valid or already compacted */
- if(other==NULL || other->data==NULL || other->isCompacted) {
- return NULL;
- }
-
- /* clone data */
- if(aliasData!=NULL && aliasDataCapacity>=other->dataCapacity) {
- isDataAllocated=FALSE;
- } else {
- aliasDataCapacity=other->dataCapacity;
- aliasData=(uint32_t *)uprv_malloc(other->dataCapacity*4);
- if(aliasData==NULL) {
- return NULL;
- }
- isDataAllocated=TRUE;
- }
-
- trie=utrie_open(fillIn, aliasData, aliasDataCapacity,
- other->data[0], other->leadUnitValue,
- other->isLatin1Linear);
- if(trie==NULL) {
- uprv_free(aliasData);
- } else {
- uprv_memcpy(trie->index, other->index, sizeof(trie->index));
- uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4);
- trie->dataLength=other->dataLength;
- trie->isDataAllocated=isDataAllocated;
- }
-
- return trie;
-}
-
-U_CAPI void U_EXPORT2
-utrie_close(UNewTrie *trie) {
- if(trie!=NULL) {
- if(trie->isDataAllocated) {
- uprv_free(trie->data);
- trie->data=NULL;
- }
- if(trie->isAllocated) {
- uprv_free(trie);
- }
- }
-}
-
-U_CAPI uint32_t * U_EXPORT2
-utrie_getData(UNewTrie *trie, int32_t *pLength) {
- if(trie==NULL || pLength==NULL) {
- return NULL;
- }
-
- *pLength=trie->dataLength;
- return trie->data;
-}
-
-static int32_t
-utrie_allocDataBlock(UNewTrie *trie) {
- int32_t newBlock, newTop;
-
- newBlock=trie->dataLength;
- newTop=newBlock+UTRIE_DATA_BLOCK_LENGTH;
- if(newTop>trie->dataCapacity) {
- /* out of memory in the data array */
- return -1;
- }
- trie->dataLength=newTop;
- return newBlock;
-}
-
-/**
- * No error checking for illegal arguments.
- *
- * @return -1 if no new data block available (out of memory in data array)
- * @internal
- */
-static int32_t
-utrie_getDataBlock(UNewTrie *trie, UChar32 c) {
- int32_t indexValue, newBlock;
-
- c>>=UTRIE_SHIFT;
- indexValue=trie->index[c];
- if(indexValue>0) {
- return indexValue;
- }
-
- /* allocate a new data block */
- newBlock=utrie_allocDataBlock(trie);
- if(newBlock<0) {
- /* out of memory in the data array */
- return -1;
- }
- trie->index[c]=newBlock;
-
- /* copy-on-write for a block from a setRange() */
- uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH);
- return newBlock;
-}
-
-/**
- * @return TRUE if the value was successfully set
- */
-U_CAPI UBool U_EXPORT2
-utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value) {
- int32_t block;
-
- /* valid, uncompacted trie and valid c? */
- if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) {
- return FALSE;
- }
-
- block=utrie_getDataBlock(trie, c);
- if(block<0) {
- return FALSE;
- }
-
- trie->data[block+(c&UTRIE_MASK)]=value;
- return TRUE;
-}
-
-U_CAPI uint32_t U_EXPORT2
-utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero) {
- int32_t block;
-
- /* valid, uncompacted trie and valid c? */
- if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) {
- if(pInBlockZero!=NULL) {
- *pInBlockZero=TRUE;
- }
- return 0;
- }
-
- block=trie->index[c>>UTRIE_SHIFT];
- if(pInBlockZero!=NULL) {
- *pInBlockZero= (UBool)(block==0);
- }
-
- return trie->data[ABS(block)+(c&UTRIE_MASK)];
-}
-
-/**
- * @internal
- */
-static void
-utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
- uint32_t value, uint32_t initialValue, UBool overwrite) {
- uint32_t *pLimit;
-
- pLimit=block+limit;
- block+=start;
- if(overwrite) {
- while(block<pLimit) {
- *block++=value;
- }
- } else {
- while(block<pLimit) {
- if(*block==initialValue) {
- *block=value;
- }
- ++block;
- }
- }
-}
-
-U_CAPI UBool U_EXPORT2
-utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite) {
- /*
- * repeat value in [start..limit[
- * mark index values for repeat-data blocks by setting bit 31 of the index values
- * fill around existing values if any, if(overwrite)
- */
- uint32_t initialValue;
- int32_t block, rest, repeatBlock;
-
- /* valid, uncompacted trie and valid indexes? */
- if( trie==NULL || trie->isCompacted ||
- (uint32_t)start>0x10ffff || (uint32_t)limit>0x110000 || start>limit
- ) {
- return FALSE;
- }
- if(start==limit) {
- return TRUE; /* nothing to do */
- }
-
- initialValue=trie->data[0];
- if(start&UTRIE_MASK) {
- UChar32 nextStart;
-
- /* set partial block at [start..following block boundary[ */
- block=utrie_getDataBlock(trie, start);
- if(block<0) {
- return FALSE;
- }
-
- nextStart=(start+UTRIE_DATA_BLOCK_LENGTH)&~UTRIE_MASK;
- if(nextStart<=limit) {
- utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH,
- value, initialValue, overwrite);
- start=nextStart;
- } else {
- utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK,
- value, initialValue, overwrite);
- return TRUE;
- }
- }
-
- /* number of positions in the last, partial block */
- rest=limit&UTRIE_MASK;
-
- /* round down limit to a block boundary */
- limit&=~UTRIE_MASK;
-
- /* iterate over all-value blocks */
- if(value==initialValue) {
- repeatBlock=0;
- } else {
- repeatBlock=-1;
- }
- while(start<limit) {
- /* get index value */
- block=trie->index[start>>UTRIE_SHIFT];
- if(block>0) {
- /* already allocated, fill in value */
- utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, overwrite);
- } else if(trie->data[-block]!=value && (block==0 || overwrite)) {
- /* set the repeatBlock instead of the current block 0 or range block */
- if(repeatBlock>=0) {
- trie->index[start>>UTRIE_SHIFT]=-repeatBlock;
- } else {
- /* create and set and fill the repeatBlock */
- repeatBlock=utrie_getDataBlock(trie, start);
- if(repeatBlock<0) {
- return FALSE;
- }
-
- /* set the negative block number to indicate that it is a repeat block */
- trie->index[start>>UTRIE_SHIFT]=-repeatBlock;
- utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, TRUE);
- }
- }
-
- start+=UTRIE_DATA_BLOCK_LENGTH;
- }
-
- if(rest>0) {
- /* set partial block at [last block boundary..limit[ */
- block=utrie_getDataBlock(trie, start);
- if(block<0) {
- return FALSE;
- }
-
- utrie_fillBlock(trie->data+block, 0, rest, value, initialValue, overwrite);
- }
-
- return TRUE;
-}
-
-static int32_t
-_findSameIndexBlock(const int32_t *idx, int32_t indexLength,
- int32_t otherBlock) {
- int32_t block, i;
-
- for(block=UTRIE_BMP_INDEX_LENGTH; block<indexLength; block+=UTRIE_SURROGATE_BLOCK_COUNT) {
- for(i=0; i<UTRIE_SURROGATE_BLOCK_COUNT; ++i) {
- if(idx[block+i]!=idx[otherBlock+i]) {
- break;
- }
- }
- if(i==UTRIE_SURROGATE_BLOCK_COUNT) {
- return block;
- }
- }
- return indexLength;
-}
-
-/*
- * Fold the normalization data for supplementary code points into
- * a compact area on top of the BMP-part of the trie index,
- * with the lead surrogates indexing this compact area.
- *
- * Duplicate the index values for lead surrogates:
- * From inside the BMP area, where some may be overridden with folded values,
- * to just after the BMP area, where they can be retrieved for
- * code point lookups.
- */
-static void
-utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *pErrorCode) {
- int32_t leadIndexes[UTRIE_SURROGATE_BLOCK_COUNT];
- int32_t *idx;
- uint32_t value;
- UChar32 c;
- int32_t indexLength, block;
-#ifdef UTRIE_DEBUG
- int countLeadCUWithData=0;
-#endif
-
- idx=trie->index;
-
- /* copy the lead surrogate indexes into a temporary array */
- uprv_memcpy(leadIndexes, idx+(0xd800>>UTRIE_SHIFT), 4*UTRIE_SURROGATE_BLOCK_COUNT);
-
- /*
- * set all values for lead surrogate code *units* to leadUnitValue
- * so that, by default, runtime lookups will find no data for associated
- * supplementary code points, unless there is data for such code points
- * which will result in a non-zero folding value below that is set for
- * the respective lead units
- *
- * the above saved the indexes for surrogate code *points*
- * fill the indexes with simplified code from utrie_setRange32()
- */
- if(trie->leadUnitValue==trie->data[0]) {
- block=0; /* leadUnitValue==initialValue, use all-initial-value block */
- } else {
- /* create and fill the repeatBlock */
- block=utrie_allocDataBlock(trie);
- if(block<0) {
- /* data table overflow */
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, trie->leadUnitValue, trie->data[0], TRUE);
- block=-block; /* negative block number to indicate that it is a repeat block */
- }
- for(c=(0xd800>>UTRIE_SHIFT); c<(0xdc00>>UTRIE_SHIFT); ++c) {
- trie->index[c]=block;
- }
-
- /*
- * Fold significant index values into the area just after the BMP indexes.
- * In case the first lead surrogate has significant data,
- * its index block must be used first (in which case the folding is a no-op).
- * Later all folded index blocks are moved up one to insert the copied
- * lead surrogate indexes.
- */
- indexLength=UTRIE_BMP_INDEX_LENGTH;
-
- /* search for any index (stage 1) entries for supplementary code points */
- for(c=0x10000; c<0x110000;) {
- if(idx[c>>UTRIE_SHIFT]!=0) {
- /* there is data, treat the full block for a lead surrogate */
- c&=~0x3ff;
-
-#ifdef UTRIE_DEBUG
- ++countLeadCUWithData;
- /* printf("supplementary data for lead surrogate U+%04lx\n", (long)(0xd7c0+(c>>10))); */
-#endif
-
- /* is there an identical index block? */
- block=_findSameIndexBlock(idx, indexLength, c>>UTRIE_SHIFT);
-
- /*
- * get a folded value for [c..c+0x400[ and,
- * if different from the value for the lead surrogate code point,
- * set it for the lead surrogate code unit
- */
- value=getFoldedValue(trie, c, block+UTRIE_SURROGATE_BLOCK_COUNT);
- if(value!=utrie_get32(trie, U16_LEAD(c), NULL)) {
- if(!utrie_set32(trie, U16_LEAD(c), value)) {
- /* data table overflow */
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- /* if we did not find an identical index block... */
- if(block==indexLength) {
- /* move the actual index (stage 1) entries from the supplementary position to the new one */
- uprv_memmove(idx+indexLength,
- idx+(c>>UTRIE_SHIFT),
- 4*UTRIE_SURROGATE_BLOCK_COUNT);
- indexLength+=UTRIE_SURROGATE_BLOCK_COUNT;
- }
- }
- c+=0x400;
- } else {
- c+=UTRIE_DATA_BLOCK_LENGTH;
- }
- }
-#ifdef UTRIE_DEBUG
- if(countLeadCUWithData>0) {
- printf("supplementary data for %d lead surrogates\n", countLeadCUWithData);
- }
-#endif
-
- /*
- * index array overflow?
- * This is to guarantee that a folding offset is of the form
- * UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
- * If the index is too large, then n>=1024 and more than 10 bits are necessary.
- *
- * In fact, it can only ever become n==1024 with completely unfoldable data and
- * the additional block of duplicated values for lead surrogates.
- */
- if(indexLength>=UTRIE_MAX_INDEX_LENGTH) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
-
- /*
- * make space for the lead surrogate index block and
- * insert it between the BMP indexes and the folded ones
- */
- uprv_memmove(idx+UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT,
- idx+UTRIE_BMP_INDEX_LENGTH,
- 4*(indexLength-UTRIE_BMP_INDEX_LENGTH));
- uprv_memcpy(idx+UTRIE_BMP_INDEX_LENGTH,
- leadIndexes,
- 4*UTRIE_SURROGATE_BLOCK_COUNT);
- indexLength+=UTRIE_SURROGATE_BLOCK_COUNT;
-
-#ifdef UTRIE_DEBUG
- printf("trie index count: BMP %ld all Unicode %ld folded %ld\n",
- UTRIE_BMP_INDEX_LENGTH, (long)UTRIE_MAX_INDEX_LENGTH, indexLength);
-#endif
-
- trie->indexLength=indexLength;
-}
-
-/*
- * Set a value in the trie index map to indicate which data block
- * is referenced and which one is not.
- * utrie_compact() will remove data blocks that are not used at all.
- * Set
- * - 0 if it is used
- * - -1 if it is not used
- */
-static void
-_findUnusedBlocks(UNewTrie *trie) {
- int32_t i;
-
- /* fill the entire map with "not used" */
- uprv_memset(trie->map, 0xff, (UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT)*4);
-
- /* mark each block that _is_ used with 0 */
- for(i=0; i<trie->indexLength; ++i) {
- trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]=0;
- }
-
- /* never move the all-initial-value block 0 */
- trie->map[0]=0;
-}
-
-static int32_t
-_findSameDataBlock(const uint32_t *data, int32_t dataLength,
- int32_t otherBlock, int32_t step) {
- int32_t block;
-
- /* ensure that we do not even partially get past dataLength */
- dataLength-=UTRIE_DATA_BLOCK_LENGTH;
-
- for(block=0; block<=dataLength; block+=step) {
- if(equal_uint32(data+block, data+otherBlock, UTRIE_DATA_BLOCK_LENGTH)) {
- return block;
- }
- }
- return -1;
-}
-
-/*
- * Compact a folded build-time trie.
- *
- * The compaction
- * - removes blocks that are identical with earlier ones
- * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
- * - moves blocks in steps of the data granularity
- * - moves and overlaps blocks that overlap with multiple values in the overlap region
- *
- * It does not
- * - try to move and overlap blocks that are not already adjacent
- */
-static void
-utrie_compact(UNewTrie *trie, UBool overlap, UErrorCode *pErrorCode) {
- int32_t i, start, newStart, overlapStart;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- /* valid, uncompacted trie? */
- if(trie==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if(trie->isCompacted) {
- return; /* nothing left to do */
- }
-
- /* compaction */
-
- /* initialize the index map with "block is used/unused" flags */
- _findUnusedBlocks(trie);
-
- /* if Latin-1 is preallocated and linear, then do not compact Latin-1 data */
- if(trie->isLatin1Linear && UTRIE_SHIFT<=8) {
- overlapStart=UTRIE_DATA_BLOCK_LENGTH+256;
- } else {
- overlapStart=UTRIE_DATA_BLOCK_LENGTH;
- }
-
- newStart=UTRIE_DATA_BLOCK_LENGTH;
- for(start=newStart; start<trie->dataLength;) {
- /*
- * start: index of first entry of current block
- * newStart: index where the current block is to be moved
- * (right after current end of already-compacted data)
- */
-
- /* skip blocks that are not used */
- if(trie->map[start>>UTRIE_SHIFT]<0) {
- /* advance start to the next block */
- start+=UTRIE_DATA_BLOCK_LENGTH;
-
- /* leave newStart with the previous block! */
- continue;
- }
-
- /* search for an identical block */
- if( start>=overlapStart &&
- (i=_findSameDataBlock(trie->data, newStart, start,
- overlap ? UTRIE_DATA_GRANULARITY : UTRIE_DATA_BLOCK_LENGTH))
- >=0
- ) {
- /* found an identical block, set the other block's index value for the current block */
- trie->map[start>>UTRIE_SHIFT]=i;
-
- /* advance start to the next block */
- start+=UTRIE_DATA_BLOCK_LENGTH;
-
- /* leave newStart with the previous block! */
- continue;
- }
-
- /* see if the beginning of this block can be overlapped with the end of the previous block */
- if(overlap && start>=overlapStart) {
- /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
- for(i=UTRIE_DATA_BLOCK_LENGTH-UTRIE_DATA_GRANULARITY;
- i>0 && !equal_uint32(trie->data+(newStart-i), trie->data+start, i);
- i-=UTRIE_DATA_GRANULARITY) {}
- } else {
- i=0;
- }
-
- if(i>0) {
- /* some overlap */
- trie->map[start>>UTRIE_SHIFT]=newStart-i;
-
- /* move the non-overlapping indexes to their new positions */
- start+=i;
- for(i=UTRIE_DATA_BLOCK_LENGTH-i; i>0; --i) {
- trie->data[newStart++]=trie->data[start++];
- }
- } else if(newStart<start) {
- /* no overlap, just move the indexes to their new positions */
- trie->map[start>>UTRIE_SHIFT]=newStart;
- for(i=UTRIE_DATA_BLOCK_LENGTH; i>0; --i) {
- trie->data[newStart++]=trie->data[start++];
- }
- } else /* no overlap && newStart==start */ {
- trie->map[start>>UTRIE_SHIFT]=start;
- newStart+=UTRIE_DATA_BLOCK_LENGTH;
- start=newStart;
- }
- }
-
- /* now adjust the index (stage 1) table */
- for(i=0; i<trie->indexLength; ++i) {
- trie->index[i]=trie->map[ABS(trie->index[i])>>UTRIE_SHIFT];
- }
-
-#ifdef UTRIE_DEBUG
- /* we saved some space */
- printf("compacting trie: count of 32-bit words %lu->%lu\n",
- (long)trie->dataLength, (long)newStart);
-#endif
-
- trie->dataLength=newStart;
-}
-
-/* serialization ------------------------------------------------------------ */
-
-/*
- * Default function for the folding value:
- * Just store the offset (16 bits) if there is any non-initial-value entry.
- *
- * The offset parameter is never 0.
- * Returning the offset itself is safe for UTRIE_SHIFT>=5 because
- * for UTRIE_SHIFT==5 the maximum index length is UTRIE_MAX_INDEX_LENGTH==0x8800
- * which fits into 16-bit trie values;
- * for higher UTRIE_SHIFT, UTRIE_MAX_INDEX_LENGTH decreases.
- *
- * Theoretically, it would be safer for all possible UTRIE_SHIFT including
- * those of 4 and lower to return offset>>UTRIE_SURROGATE_BLOCK_BITS
- * which would always result in a value of 0x40..0x43f
- * (start/end 1k blocks of supplementary Unicode code points).
- * However, this would be uglier, and would not work for some existing
- * binary data file formats.
- *
- * Also, we do not plan to change UTRIE_SHIFT because it would change binary
- * data file formats, and we would probably not make it smaller because of
- * the then even larger BMP index length even for empty tries.
- */
-static uint32_t U_CALLCONV
-defaultGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
- uint32_t value, initialValue;
- UChar32 limit;
- UBool inBlockZero;
-
- initialValue=trie->data[0];
- limit=start+0x400;
- while(start<limit) {
- value=utrie_get32(trie, start, &inBlockZero);
- if(inBlockZero) {
- start+=UTRIE_DATA_BLOCK_LENGTH;
- } else if(value!=initialValue) {
- return (uint32_t)offset;
- } else {
- ++start;
- }
- }
- return 0;
-}
-
-U_CAPI int32_t U_EXPORT2
-utrie_serialize(UNewTrie *trie, void *dt, int32_t capacity,
- UNewTrieGetFoldedValue *getFoldedValue,
- UBool reduceTo16Bits,
- UErrorCode *pErrorCode) {
- UTrieHeader *header;
- uint32_t *p;
- uint16_t *dest16;
- int32_t i, length;
- uint8_t* data = NULL;
-
- /* argument check */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if(trie==NULL || capacity<0 || (capacity>0 && dt==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- if(getFoldedValue==NULL) {
- getFoldedValue=defaultGetFoldedValue;
- }
-
- data = (uint8_t*)dt;
- /* fold and compact if necessary, also checks that indexLength is within limits */
- if(!trie->isCompacted) {
- /* compact once without overlap to improve folding */
- utrie_compact(trie, FALSE, pErrorCode);
-
- /* fold the supplementary part of the index array */
- utrie_fold(trie, getFoldedValue, pErrorCode);
-
- /* compact again with overlap for minimum data array length */
- utrie_compact(trie, TRUE, pErrorCode);
-
- trie->isCompacted=TRUE;
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- }
-
- /* is dataLength within limits? */
- if( (reduceTo16Bits ? (trie->dataLength+trie->indexLength) : trie->dataLength) >= UTRIE_MAX_DATA_LENGTH) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- }
-
- length=sizeof(UTrieHeader)+2*trie->indexLength;
- if(reduceTo16Bits) {
- length+=2*trie->dataLength;
- } else {
- length+=4*trie->dataLength;
- }
-
- if(length>capacity) {
- return length; /* preflighting */
- }
-
-#ifdef UTRIE_DEBUG
- printf("**UTrieLengths(serialize)** index:%6ld data:%6ld serialized:%6ld\n",
- (long)trie->indexLength, (long)trie->dataLength, (long)length);
-#endif
-
- /* set the header fields */
- header=(UTrieHeader *)data;
- data+=sizeof(UTrieHeader);
-
- header->signature=0x54726965; /* "Trie" */
- header->options=UTRIE_SHIFT | (UTRIE_INDEX_SHIFT<<UTRIE_OPTIONS_INDEX_SHIFT);
-
- if(!reduceTo16Bits) {
- header->options|=UTRIE_OPTIONS_DATA_IS_32_BIT;
- }
- if(trie->isLatin1Linear) {
- header->options|=UTRIE_OPTIONS_LATIN1_IS_LINEAR;
- }
-
- header->indexLength=trie->indexLength;
- header->dataLength=trie->dataLength;
-
- /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
- if(reduceTo16Bits) {
- /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
- p=(uint32_t *)trie->index;
- dest16=(uint16_t *)data;
- for(i=trie->indexLength; i>0; --i) {
- *dest16++=(uint16_t)((*p++ + trie->indexLength)>>UTRIE_INDEX_SHIFT);
- }
-
- /* write 16-bit data values */
- p=trie->data;
- for(i=trie->dataLength; i>0; --i) {
- *dest16++=(uint16_t)*p++;
- }
- } else {
- /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
- p=(uint32_t *)trie->index;
- dest16=(uint16_t *)data;
- for(i=trie->indexLength; i>0; --i) {
- *dest16++=(uint16_t)(*p++ >> UTRIE_INDEX_SHIFT);
- }
-
- /* write 32-bit data values */
- uprv_memcpy(dest16, trie->data, 4*(size_t)trie->dataLength);
- }
-
- return length;
-}
-
-/* inverse to defaultGetFoldedValue() */
-U_CAPI int32_t U_EXPORT2
-utrie_defaultGetFoldingOffset(uint32_t data) {
- return (int32_t)data;
-}
-
-U_CAPI int32_t U_EXPORT2
-utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode) {
- const UTrieHeader *header;
- const uint16_t *p16;
- uint32_t options;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return -1;
- }
-
- /* enough data for a trie header? */
- if(length<(int32_t)sizeof(UTrieHeader)) {
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return -1;
- }
-
- /* check the signature */
- header=(const UTrieHeader *)data;
- if(header->signature!=0x54726965) {
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return -1;
- }
-
- /* get the options and check the shift values */
- options=header->options;
- if( (options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
- ((options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT
- ) {
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return -1;
- }
- trie->isLatin1Linear= (UBool)((options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0);
-
- /* get the length values */
- trie->indexLength=header->indexLength;
- trie->dataLength=header->dataLength;
-
- length-=(int32_t)sizeof(UTrieHeader);
-
- /* enough data for the index? */
- if(length<2*trie->indexLength) {
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return -1;
- }
- p16=(const uint16_t *)(header+1);
- trie->index=p16;
- p16+=trie->indexLength;
- length-=2*trie->indexLength;
-
- /* get the data */
- if(options&UTRIE_OPTIONS_DATA_IS_32_BIT) {
- if(length<4*trie->dataLength) {
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return -1;
- }
- trie->data32=(const uint32_t *)p16;
- trie->initialValue=trie->data32[0];
- length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+4*trie->dataLength;
- } else {
- if(length<2*trie->dataLength) {
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return -1;
- }
-
- /* the "data16" data is used via the index pointer */
- trie->data32=NULL;
- trie->initialValue=trie->index[trie->indexLength];
- length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+2*trie->dataLength;
- }
-
- trie->getFoldingOffset=utrie_defaultGetFoldingOffset;
-
- return length;
-}
-
-U_CAPI int32_t U_EXPORT2
-utrie_unserializeDummy(UTrie *trie,
- void *data, int32_t length,
- uint32_t initialValue, uint32_t leadUnitValue,
- UBool make16BitTrie,
- UErrorCode *pErrorCode) {
- uint16_t *p16;
- int32_t actualLength, latin1Length, i, limit;
- uint16_t block;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return -1;
- }
-
- /* calculate the actual size of the dummy trie data */
-
- /* max(Latin-1, block 0) */
- latin1Length= 256; /*UTRIE_SHIFT<=8 ? 256 : UTRIE_DATA_BLOCK_LENGTH;*/
-
- trie->indexLength=UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT;
- trie->dataLength=latin1Length;
- if(leadUnitValue!=initialValue) {
- trie->dataLength+=UTRIE_DATA_BLOCK_LENGTH;
- }
-
- actualLength=trie->indexLength*2;
- if(make16BitTrie) {
- actualLength+=trie->dataLength*2;
- } else {
- actualLength+=trie->dataLength*4;
- }
-
- /* enough space for the dummy trie? */
- if(length<actualLength) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return actualLength;
- }
-
- trie->isLatin1Linear=TRUE;
- trie->initialValue=initialValue;
-
- /* fill the index and data arrays */
- p16=(uint16_t *)data;
- trie->index=p16;
-
- if(make16BitTrie) {
- /* indexes to block 0 */
- block=(uint16_t)(trie->indexLength>>UTRIE_INDEX_SHIFT);
- limit=trie->indexLength;
- for(i=0; i<limit; ++i) {
- p16[i]=block;
- }
-
- if(leadUnitValue!=initialValue) {
- /* indexes for lead surrogate code units to the block after Latin-1 */
- block+=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT);
- i=0xd800>>UTRIE_SHIFT;
- limit=0xdc00>>UTRIE_SHIFT;
- for(; i<limit; ++i) {
- p16[i]=block;
- }
- }
-
- trie->data32=NULL;
-
- /* Latin-1 data */
- p16+=trie->indexLength;
- for(i=0; i<latin1Length; ++i) {
- p16[i]=(uint16_t)initialValue;
- }
-
- /* data for lead surrogate code units */
- if(leadUnitValue!=initialValue) {
- limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH;
- for(/* i=latin1Length */; i<limit; ++i) {
- p16[i]=(uint16_t)leadUnitValue;
- }
- }
- } else {
- uint32_t *p32;
-
- /* indexes to block 0 */
- uprv_memset(p16, 0, trie->indexLength*2);
-
- if(leadUnitValue!=initialValue) {
- /* indexes for lead surrogate code units to the block after Latin-1 */
- block=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT);
- i=0xd800>>UTRIE_SHIFT;
- limit=0xdc00>>UTRIE_SHIFT;
- for(; i<limit; ++i) {
- p16[i]=block;
- }
- }
-
- trie->data32=p32=(uint32_t *)(p16+trie->indexLength);
-
- /* Latin-1 data */
- for(i=0; i<latin1Length; ++i) {
- p32[i]=initialValue;
- }
-
- /* data for lead surrogate code units */
- if(leadUnitValue!=initialValue) {
- limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH;
- for(/* i=latin1Length */; i<limit; ++i) {
- p32[i]=leadUnitValue;
- }
- }
- }
-
- trie->getFoldingOffset=utrie_defaultGetFoldingOffset;
-
- return actualLength;
-}
-
-/* enumeration -------------------------------------------------------------- */
-
-/* default UTrieEnumValue() returns the input value itself */
-static uint32_t U_CALLCONV
-enumSameValue(const void * /*context*/, uint32_t value) {
- return value;
-}
-
-/**
- * Enumerate all ranges of code points with the same relevant values.
- * The values are transformed from the raw trie entries by the enumValue function.
- */
-U_CAPI void U_EXPORT2
-utrie_enum(const UTrie *trie,
- UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context) {
- const uint32_t *data32;
- const uint16_t *idx;
-
- uint32_t value, prevValue, initialValue;
- UChar32 c, prev;
- int32_t l, i, j, block, prevBlock, nullBlock, offset;
-
- /* check arguments */
- if(trie==NULL || trie->index==NULL || enumRange==NULL) {
- return;
- }
- if(enumValue==NULL) {
- enumValue=enumSameValue;
- }
-
- idx=trie->index;
- data32=trie->data32;
-
- /* get the enumeration value that corresponds to an initial-value trie data entry */
- initialValue=enumValue(context, trie->initialValue);
-
- if(data32==NULL) {
- nullBlock=trie->indexLength;
- } else {
- nullBlock=0;
- }
-
- /* set variables for previous range */
- prevBlock=nullBlock;
- prev=0;
- prevValue=initialValue;
-
- /* enumerate BMP - the main loop enumerates data blocks */
- for(i=0, c=0; c<=0xffff; ++i) {
- if(c==0xd800) {
- /* skip lead surrogate code _units_, go to lead surr. code _points_ */
- i=UTRIE_BMP_INDEX_LENGTH;
- } else if(c==0xdc00) {
- /* go back to regular BMP code points */
- i=c>>UTRIE_SHIFT;
- }
-
- block=idx[i]<<UTRIE_INDEX_SHIFT;
- if(block==prevBlock) {
- /* the block is the same as the previous one, and filled with value */
- c+=UTRIE_DATA_BLOCK_LENGTH;
- } else if(block==nullBlock) {
- /* this is the all-initial-value block */
- if(prevValue!=initialValue) {
- if(prev<c) {
- if(!enumRange(context, prev, c, prevValue)) {
- return;
- }
- }
- prevBlock=nullBlock;
- prev=c;
- prevValue=initialValue;
- }
- c+=UTRIE_DATA_BLOCK_LENGTH;
- } else {
- prevBlock=block;
- for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) {
- value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
- if(value!=prevValue) {
- if(prev<c) {
- if(!enumRange(context, prev, c, prevValue)) {
- return;
- }
- }
- if(j>0) {
- /* the block is not filled with all the same value */
- prevBlock=-1;
- }
- prev=c;
- prevValue=value;
- }
- ++c;
- }
- }
- }
-
- /* enumerate supplementary code points */
- for(l=0xd800; l<0xdc00;) {
- /* lead surrogate access */
- offset=idx[l>>UTRIE_SHIFT]<<UTRIE_INDEX_SHIFT;
- if(offset==nullBlock) {
- /* no entries for a whole block of lead surrogates */
- if(prevValue!=initialValue) {
- if(prev<c) {
- if(!enumRange(context, prev, c, prevValue)) {
- return;
- }
- }
- prevBlock=nullBlock;
- prev=c;
- prevValue=initialValue;
- }
-
- l+=UTRIE_DATA_BLOCK_LENGTH;
- c+=UTRIE_DATA_BLOCK_LENGTH<<10;
- continue;
- }
-
- value= data32!=NULL ? data32[offset+(l&UTRIE_MASK)] : idx[offset+(l&UTRIE_MASK)];
-
- /* enumerate trail surrogates for this lead surrogate */
- offset=trie->getFoldingOffset(value);
- if(offset<=0) {
- /* no data for this lead surrogate */
- if(prevValue!=initialValue) {
- if(prev<c) {
- if(!enumRange(context, prev, c, prevValue)) {
- return;
- }
- }
- prevBlock=nullBlock;
- prev=c;
- prevValue=initialValue;
- }
-
- /* nothing else to do for the supplementary code points for this lead surrogate */
- c+=0x400;
- } else {
- /* enumerate code points for this lead surrogate */
- i=offset;
- offset+=UTRIE_SURROGATE_BLOCK_COUNT;
- do {
- /* copy of most of the body of the BMP loop */
- block=idx[i]<<UTRIE_INDEX_SHIFT;
- if(block==prevBlock) {
- /* the block is the same as the previous one, and filled with value */
- c+=UTRIE_DATA_BLOCK_LENGTH;
- } else if(block==nullBlock) {
- /* this is the all-initial-value block */
- if(prevValue!=initialValue) {
- if(prev<c) {
- if(!enumRange(context, prev, c, prevValue)) {
- return;
- }
- }
- prevBlock=nullBlock;
- prev=c;
- prevValue=initialValue;
- }
- c+=UTRIE_DATA_BLOCK_LENGTH;
- } else {
- prevBlock=block;
- for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) {
- value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
- if(value!=prevValue) {
- if(prev<c) {
- if(!enumRange(context, prev, c, prevValue)) {
- return;
- }
- }
- if(j>0) {
- /* the block is not filled with all the same value */
- prevBlock=-1;
- }
- prev=c;
- prevValue=value;
- }
- ++c;
- }
- }
- } while(++i<offset);
- }
-
- ++l;
- }
-
- /* deliver last range */
- enumRange(context, prev, c, prevValue);
-}
diff --git a/contrib/libs/icu/common/utrie.h b/contrib/libs/icu/common/utrie.h
deleted file mode 100644
index 532ba778eb6..00000000000
--- a/contrib/libs/icu/common/utrie.h
+++ /dev/null
@@ -1,793 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2001-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: utrie.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001nov08
-* created by: Markus W. Scherer
-*/
-
-#ifndef __UTRIE_H__
-#define __UTRIE_H__
-
-#include "unicode/utypes.h"
-#include "unicode/utf16.h"
-
-U_CDECL_BEGIN
-
-/**
- * \file
- *
- * This is a common implementation of a "folded" trie.
- * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
- * Unicode code points (0..0x10ffff).
- *
- * This implementation is optimized for getting values while walking forward
- * through a UTF-16 string.
- * Therefore, the simplest and fastest access macros are the
- * _FROM_LEAD() and _FROM_OFFSET_TRAIL() macros.
- *
- * The _FROM_BMP() macros are a little more complicated; they get values
- * even for lead surrogate code _points_, while the _FROM_LEAD() macros
- * get special "folded" values for lead surrogate code _units_ if
- * there is relevant data associated with them.
- * From such a folded value, an offset needs to be extracted to supply
- * to the _FROM_OFFSET_TRAIL() macros.
- *
- * Most of the more complex (and more convenient) functions/macros call a callback function
- * to get that offset from the folded value for a lead surrogate unit.
- */
-
-/**
- * Trie constants, defining shift widths, index array lengths, etc.
- */
-enum {
- /** Shift size for shifting right the input index. 1..9 */
- UTRIE_SHIFT=5,
-
- /** Number of data values in a stage 2 (data array) block. 2, 4, 8, .., 0x200 */
- UTRIE_DATA_BLOCK_LENGTH=1<<UTRIE_SHIFT,
-
- /** Mask for getting the lower bits from the input index. */
- UTRIE_MASK=UTRIE_DATA_BLOCK_LENGTH-1,
-
- /**
- * Lead surrogate code points' index displacement in the index array.
- * 0x10000-0xd800=0x2800
- */
- UTRIE_LEAD_INDEX_DISP=0x2800>>UTRIE_SHIFT,
-
- /**
- * Shift size for shifting left the index array values.
- * Increases possible data size with 16-bit index values at the cost
- * of compactability.
- * This requires blocks of stage 2 data to be aligned by UTRIE_DATA_GRANULARITY.
- * 0..UTRIE_SHIFT
- */
- UTRIE_INDEX_SHIFT=2,
-
- /** The alignment size of a stage 2 data block. Also the granularity for compaction. */
- UTRIE_DATA_GRANULARITY=1<<UTRIE_INDEX_SHIFT,
-
- /** Number of bits of a trail surrogate that are used in index table lookups. */
- UTRIE_SURROGATE_BLOCK_BITS=10-UTRIE_SHIFT,
-
- /**
- * Number of index (stage 1) entries per lead surrogate.
- * Same as number of index entries for 1024 trail surrogates,
- * ==0x400>>UTRIE_SHIFT
- */
- UTRIE_SURROGATE_BLOCK_COUNT=(1<<UTRIE_SURROGATE_BLOCK_BITS),
-
- /** Length of the BMP portion of the index (stage 1) array. */
- UTRIE_BMP_INDEX_LENGTH=0x10000>>UTRIE_SHIFT
-};
-
-/**
- * Length of the index (stage 1) array before folding.
- * Maximum number of Unicode code points (0x110000) shifted right by UTRIE_SHIFT.
- */
-#define UTRIE_MAX_INDEX_LENGTH (0x110000>>UTRIE_SHIFT)
-
-/**
- * Maximum length of the runtime data (stage 2) array.
- * Limited by 16-bit index values that are left-shifted by UTRIE_INDEX_SHIFT.
- */
-#define UTRIE_MAX_DATA_LENGTH (0x10000<<UTRIE_INDEX_SHIFT)
-
-/**
- * Maximum length of the build-time data (stage 2) array.
- * The maximum length is 0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400.
- * (Number of Unicode code points + one all-initial-value block +
- * possible duplicate entries for 1024 lead surrogates.)
- */
-#define UTRIE_MAX_BUILD_TIME_DATA_LENGTH (0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400)
-
-/**
- * Number of bytes for a dummy trie.
- * A dummy trie is an empty runtime trie, used when a real data trie cannot
- * be loaded.
- * The number of bytes works for Latin-1-linear tries with 32-bit data
- * (worst case).
- *
- * Calculation:
- * BMP index + 1 index block for lead surrogate code points +
- * Latin-1-linear array + 1 data block for lead surrogate code points
- *
- * Latin-1: if(UTRIE_SHIFT<=8) { 256 } else { included in first data block }
- *
- * @see utrie_unserializeDummy
- */
-#define UTRIE_DUMMY_SIZE ((UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT)*2+(UTRIE_SHIFT<=8?256:UTRIE_DATA_BLOCK_LENGTH)*4+UTRIE_DATA_BLOCK_LENGTH*4)
-
-/**
- * Runtime UTrie callback function.
- * Extract from a lead surrogate's data the
- * index array offset of the indexes for that lead surrogate.
- *
- * @param data data value for a surrogate from the trie, including the folding offset
- * @return offset>=UTRIE_BMP_INDEX_LENGTH, or 0 if there is no data for the lead surrogate
- */
-typedef int32_t U_CALLCONV
-UTrieGetFoldingOffset(uint32_t data);
-
-/**
- * Run-time Trie structure.
- *
- * Either the data table is 16 bits wide and accessed via the index
- * pointer, with each index item increased by indexLength;
- * in this case, data32==NULL.
- *
- * Or the data table is 32 bits wide and accessed via the data32 pointer.
- */
-struct UTrie {
- const uint16_t *index;
- const uint32_t *data32; /* NULL if 16b data is used via index */
-
- /**
- * This function is not used in _FROM_LEAD, _FROM_BMP, and _FROM_OFFSET_TRAIL macros.
- * If convenience macros like _GET16 or _NEXT32 are used, this function must be set.
- *
- * utrie_unserialize() sets a default function which simply returns
- * the lead surrogate's value itself - which is the inverse of the default
- * folding function used by utrie_serialize().
- *
- * @see UTrieGetFoldingOffset
- */
- UTrieGetFoldingOffset *getFoldingOffset;
-
- int32_t indexLength, dataLength;
- uint32_t initialValue;
- UBool isLatin1Linear;
-};
-
-#ifndef __UTRIE2_H__
-typedef struct UTrie UTrie;
-#endif
-
-/** Internal trie getter from an offset (0 if c16 is a BMP/lead units) and a 16-bit unit */
-#define _UTRIE_GET_RAW(trie, data, offset, c16) \
- (trie)->data[ \
- ((int32_t)((trie)->index[(offset)+((c16)>>UTRIE_SHIFT)])<<UTRIE_INDEX_SHIFT)+ \
- ((c16)&UTRIE_MASK) \
- ]
-
-/** Internal trie getter from a pair of surrogates */
-#define _UTRIE_GET_FROM_PAIR(trie, data, c, c2, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t __offset; \
-\
- /* get data for lead surrogate */ \
- (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
- __offset=(trie)->getFoldingOffset(result); \
-\
- /* get the real data from the folded lead/trail units */ \
- if(__offset>0) { \
- (result)=_UTRIE_GET_RAW((trie), data, __offset, (c2)&0x3ff); \
- } else { \
- (result)=(resultType)((trie)->initialValue); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/** Internal trie getter from a BMP code point, treating a lead surrogate as a normal code point */
-#define _UTRIE_GET_FROM_BMP(trie, data, c16) \
- _UTRIE_GET_RAW(trie, data, 0xd800<=(c16) && (c16)<=0xdbff ? UTRIE_LEAD_INDEX_DISP : 0, c16)
-
-/**
- * Internal trie getter from a code point.
- * Could be faster(?) but longer with
- * if((c32)<=0xd7ff) { (result)=_UTRIE_GET_RAW(trie, data, 0, c32); }
- */
-#define _UTRIE_GET(trie, data, c32, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \
- if((uint32_t)(c32)<=0xffff) { \
- /* BMP code points */ \
- (result)=_UTRIE_GET_FROM_BMP(trie, data, c32); \
- } else if((uint32_t)(c32)<=0x10ffff) { \
- /* supplementary code point */ \
- UChar __lead16=U16_LEAD(c32); \
- _UTRIE_GET_FROM_PAIR(trie, data, __lead16, c32, result, resultType); \
- } else { \
- /* out of range */ \
- (result)=(resultType)((trie)->initialValue); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/** Internal next-post-increment: get the next code point (c, c2) and its data */
-#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=*(src)++; \
- if(!U16_IS_LEAD(c)) { \
- (c2)=0; \
- (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
- } else if((src)!=(limit) && U16_IS_TRAIL((c2)=*(src))) { \
- ++(src); \
- _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
- } else { \
- /* unpaired lead surrogate code point */ \
- (c2)=0; \
- (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/** Internal previous: get the previous code point (c, c2) and its data */
-#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=*--(src); \
- if(!U16_IS_SURROGATE(c)) { \
- (c2)=0; \
- (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
- } else if(!U16_IS_SURROGATE_LEAD(c)) { \
- /* trail surrogate */ \
- if((start)!=(src) && U16_IS_LEAD((c2)=*((src)-1))) { \
- --(src); \
- (result)=(c); (c)=(c2); (c2)=(UChar)(result); /* swap c, c2 */ \
- _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
- } else { \
- /* unpaired trail surrogate code point */ \
- (c2)=0; \
- (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
- } \
- } else { \
- /* unpaired lead surrogate code point */ \
- (c2)=0; \
- (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/* Public UTrie API ---------------------------------------------------------*/
-
-/**
- * Get a pointer to the contiguous part of the data array
- * for the Latin-1 range (U+0000..U+00ff).
- * Must be used only if the Latin-1 range is in fact linear
- * (trie->isLatin1Linear).
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @return (const uint16_t *) pointer to values for Latin-1 code points
- */
-#define UTRIE_GET16_LATIN1(trie) ((trie)->index+(trie)->indexLength+UTRIE_DATA_BLOCK_LENGTH)
-
-/**
- * Get a pointer to the contiguous part of the data array
- * for the Latin-1 range (U+0000..U+00ff).
- * Must be used only if the Latin-1 range is in fact linear
- * (trie->isLatin1Linear).
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @return (const uint32_t *) pointer to values for Latin-1 code points
- */
-#define UTRIE_GET32_LATIN1(trie) ((trie)->data32+UTRIE_DATA_BLOCK_LENGTH)
-
-/**
- * Get a 16-bit trie value from a BMP code point (UChar, <=U+ffff).
- * c16 may be a lead surrogate, which may have a value including a folding offset.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param c16 (UChar, in) the input BMP code point
- * @return (uint16_t) trie lookup result
- */
-#define UTRIE_GET16_FROM_LEAD(trie, c16) _UTRIE_GET_RAW(trie, index, 0, c16)
-
-/**
- * Get a 32-bit trie value from a BMP code point (UChar, <=U+ffff).
- * c16 may be a lead surrogate, which may have a value including a folding offset.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param c16 (UChar, in) the input BMP code point
- * @return (uint32_t) trie lookup result
- */
-#define UTRIE_GET32_FROM_LEAD(trie, c16) _UTRIE_GET_RAW(trie, data32, 0, c16)
-
-/**
- * Get a 16-bit trie value from a BMP code point (UChar, <=U+ffff).
- * Even lead surrogate code points are treated as normal code points,
- * with unfolded values that may differ from _FROM_LEAD() macro results for them.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param c16 (UChar, in) the input BMP code point
- * @return (uint16_t) trie lookup result
- */
-#define UTRIE_GET16_FROM_BMP(trie, c16) _UTRIE_GET_FROM_BMP(trie, index, c16)
-
-/**
- * Get a 32-bit trie value from a BMP code point (UChar, <=U+ffff).
- * Even lead surrogate code points are treated as normal code points,
- * with unfolded values that may differ from _FROM_LEAD() macro results for them.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param c16 (UChar, in) the input BMP code point
- * @return (uint32_t) trie lookup result
- */
-#define UTRIE_GET32_FROM_BMP(trie, c16) _UTRIE_GET_FROM_BMP(trie, data32, c16)
-
-/**
- * Get a 16-bit trie value from a code point.
- * Even lead surrogate code points are treated as normal code points,
- * with unfolded values that may differ from _FROM_LEAD() macro results for them.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param c32 (UChar32, in) the input code point
- * @param result (uint16_t, out) uint16_t variable for the trie lookup result
- */
-#define UTRIE_GET16(trie, c32, result) _UTRIE_GET(trie, index, c32, result, uint16_t)
-
-/**
- * Get a 32-bit trie value from a code point.
- * Even lead surrogate code points are treated as normal code points,
- * with unfolded values that may differ from _FROM_LEAD() macro results for them.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param c32 (UChar32, in) the input code point
- * @param result (uint32_t, out) uint32_t variable for the trie lookup result
- */
-#define UTRIE_GET32(trie, c32, result) _UTRIE_GET(trie, data32, c32, result, uint32_t)
-
-/**
- * Get the next code point (c, c2), post-increment src,
- * and get a 16-bit value from the trie.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param src (const UChar *, in/out) the source text pointer
- * @param limit (const UChar *, in) the limit pointer for the text, or NULL
- * @param c (UChar, out) variable for the BMP or lead code unit
- * @param c2 (UChar, out) variable for 0 or the trail code unit
- * @param result (uint16_t, out) uint16_t variable for the trie lookup result
- */
-#define UTRIE_NEXT16(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, index, src, limit, c, c2, result, uint16_t)
-
-/**
- * Get the next code point (c, c2), post-increment src,
- * and get a 32-bit value from the trie.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param src (const UChar *, in/out) the source text pointer
- * @param limit (const UChar *, in) the limit pointer for the text, or NULL
- * @param c (UChar, out) variable for the BMP or lead code unit
- * @param c2 (UChar, out) variable for 0 or the trail code unit
- * @param result (uint32_t, out) uint32_t variable for the trie lookup result
- */
-#define UTRIE_NEXT32(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, data32, src, limit, c, c2, result, uint32_t)
-
-/**
- * Get the previous code point (c, c2), pre-decrement src,
- * and get a 16-bit value from the trie.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param start (const UChar *, in) the start pointer for the text, or NULL
- * @param src (const UChar *, in/out) the source text pointer
- * @param c (UChar, out) variable for the BMP or lead code unit
- * @param c2 (UChar, out) variable for 0 or the trail code unit
- * @param result (uint16_t, out) uint16_t variable for the trie lookup result
- */
-#define UTRIE_PREVIOUS16(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, index, start, src, c, c2, result, uint16_t)
-
-/**
- * Get the previous code point (c, c2), pre-decrement src,
- * and get a 32-bit value from the trie.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param start (const UChar *, in) the start pointer for the text, or NULL
- * @param src (const UChar *, in/out) the source text pointer
- * @param c (UChar, out) variable for the BMP or lead code unit
- * @param c2 (UChar, out) variable for 0 or the trail code unit
- * @param result (uint32_t, out) uint32_t variable for the trie lookup result
- */
-#define UTRIE_PREVIOUS32(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, data32, start, src, c, c2, result, uint32_t)
-
-/**
- * Get a 16-bit trie value from a pair of surrogates.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param c (UChar, in) a lead surrogate
- * @param c2 (UChar, in) a trail surrogate
- * @param result (uint16_t, out) uint16_t variable for the trie lookup result
- */
-#define UTRIE_GET16_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, index, c, c2, result, uint16_t)
-
-/**
- * Get a 32-bit trie value from a pair of surrogates.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param c (UChar, in) a lead surrogate
- * @param c2 (UChar, in) a trail surrogate
- * @param result (uint32_t, out) uint32_t variable for the trie lookup result
- */
-#define UTRIE_GET32_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, data32, c, c2, result, uint32_t)
-
-/**
- * Get a 16-bit trie value from a folding offset (from the value of a lead surrogate)
- * and a trail surrogate.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param offset (int32_t, in) the folding offset from the value of a lead surrogate
- * @param c2 (UChar, in) a trail surrogate (only the 10 low bits are significant)
- * @return (uint16_t) trie lookup result
- */
-#define UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2) _UTRIE_GET_RAW(trie, index, offset, (c2)&0x3ff)
-
-/**
- * Get a 32-bit trie value from a folding offset (from the value of a lead surrogate)
- * and a trail surrogate.
- *
- * @param trie (const UTrie *, in) a pointer to the runtime trie structure
- * @param offset (int32_t, in) the folding offset from the value of a lead surrogate
- * @param c2 (UChar, in) a trail surrogate (only the 10 low bits are significant)
- * @return (uint32_t) trie lookup result
- */
-#define UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2) _UTRIE_GET_RAW(trie, data32, offset, (c2)&0x3ff)
-
-/* enumeration callback types */
-
-/**
- * Callback from utrie_enum(), extracts a uint32_t value from a
- * trie value. This value will be passed on to the UTrieEnumRange function.
- *
- * @param context an opaque pointer, as passed into utrie_enum()
- * @param value a value from the trie
- * @return the value that is to be passed on to the UTrieEnumRange function
- */
-typedef uint32_t U_CALLCONV
-UTrieEnumValue(const void *context, uint32_t value);
-
-/**
- * Callback from utrie_enum(), is called for each contiguous range
- * of code points with the same value as retrieved from the trie and
- * transformed by the UTrieEnumValue function.
- *
- * The callback function can stop the enumeration by returning FALSE.
- *
- * @param context an opaque pointer, as passed into utrie_enum()
- * @param start the first code point in a contiguous range with value
- * @param limit one past the last code point in a contiguous range with value
- * @param value the value that is set for all code points in [start..limit[
- * @return FALSE to stop the enumeration
- */
-typedef UBool U_CALLCONV
-UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value);
-
-/**
- * Enumerate efficiently all values in a trie.
- * For each entry in the trie, the value to be delivered is passed through
- * the UTrieEnumValue function.
- * The value is unchanged if that function pointer is NULL.
- *
- * For each contiguous range of code points with a given value,
- * the UTrieEnumRange function is called.
- *
- * @param trie a pointer to the runtime trie structure
- * @param enumValue a pointer to a function that may transform the trie entry value,
- * or NULL if the values from the trie are to be used directly
- * @param enumRange a pointer to a function that is called for each contiguous range
- * of code points with the same value
- * @param context an opaque pointer that is passed on to the callback functions
- */
-U_CAPI void U_EXPORT2
-utrie_enum(const UTrie *trie,
- UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context);
-
-/**
- * Unserialize a trie from 32-bit-aligned memory.
- * Inverse of utrie_serialize().
- * Fills the UTrie runtime trie structure with the settings for the trie data.
- *
- * @param trie a pointer to the runtime trie structure
- * @param data a pointer to 32-bit-aligned memory containing trie data
- * @param length the number of bytes available at data
- * @param pErrorCode an in/out ICU UErrorCode
- * @return the number of bytes at data taken up by the trie data
- */
-U_CAPI int32_t U_EXPORT2
-utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode);
-
-/**
- * "Unserialize" a dummy trie.
- * A dummy trie is an empty runtime trie, used when a real data trie cannot
- * be loaded.
- *
- * The input memory is filled so that the trie always returns the initialValue,
- * or the leadUnitValue for lead surrogate code points.
- * The Latin-1 part is always set up to be linear.
- *
- * @param trie a pointer to the runtime trie structure
- * @param data a pointer to 32-bit-aligned memory to be filled with the dummy trie data
- * @param length the number of bytes available at data (recommended to use UTRIE_DUMMY_SIZE)
- * @param initialValue the initial value that is set for all code points
- * @param leadUnitValue the value for lead surrogate code _units_ that do not
- * have associated supplementary data
- * @param pErrorCode an in/out ICU UErrorCode
- *
- * @see UTRIE_DUMMY_SIZE
- * @see utrie_open
- */
-U_CAPI int32_t U_EXPORT2
-utrie_unserializeDummy(UTrie *trie,
- void *data, int32_t length,
- uint32_t initialValue, uint32_t leadUnitValue,
- UBool make16BitTrie,
- UErrorCode *pErrorCode);
-
-/**
- * Default implementation for UTrie.getFoldingOffset, set automatically by
- * utrie_unserialize().
- * Simply returns the lead surrogate's value itself - which is the inverse
- * of the default folding function used by utrie_serialize().
- * Exported for static const UTrie structures.
- *
- * @see UTrieGetFoldingOffset
- */
-U_CAPI int32_t U_EXPORT2
-utrie_defaultGetFoldingOffset(uint32_t data);
-
-/* Building a trie ----------------------------------------------------------*/
-
-/**
- * Build-time trie structure.
- * Opaque definition, here only to make fillIn parameters possible
- * for utrie_open() and utrie_clone().
- */
-struct UNewTrie {
- /**
- * Index values at build-time are 32 bits wide for easier processing.
- * Bit 31 is set if the data block is used by multiple index values (from utrie_setRange()).
- */
- int32_t index[UTRIE_MAX_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT];
- uint32_t *data;
-
- uint32_t leadUnitValue;
- int32_t indexLength, dataCapacity, dataLength;
- UBool isAllocated, isDataAllocated;
- UBool isLatin1Linear, isCompacted;
-
- /**
- * Map of adjusted indexes, used in utrie_compact().
- * Maps from original indexes to new ones.
- */
- int32_t map[UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT];
-};
-
-typedef struct UNewTrie UNewTrie;
-
-/**
- * Build-time trie callback function, used with utrie_serialize().
- * This function calculates a lead surrogate's value including a folding offset
- * from the 1024 supplementary code points [start..start+1024[ .
- * It is U+10000 <= start <= U+10fc00 and (start&0x3ff)==0.
- *
- * The folding offset is provided by the caller.
- * It is offset=UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
- * Instead of the offset itself, n can be stored in 10 bits -
- * or fewer if it can be assumed that few lead surrogates have associated data.
- *
- * The returned value must be
- * - not zero if and only if there is relevant data
- * for the corresponding 1024 supplementary code points
- * - such that UTrie.getFoldingOffset(UNewTrieGetFoldedValue(..., offset))==offset
- *
- * @return a folded value, or 0 if there is no relevant data for the lead surrogate.
- */
-typedef uint32_t U_CALLCONV
-UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset);
-
-/**
- * Open a build-time trie structure.
- * The size of the build-time data array is specified to avoid allocating a large
- * array in all cases. The array itself can also be passed in.
- *
- * Although the trie is never fully expanded to a linear array, especially when
- * utrie_setRange32() is used, the data array could be large during build time.
- * The maximum length is
- * UTRIE_MAX_BUILD_TIME_DATA_LENGTH=0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400.
- * (Number of Unicode code points + one all-initial-value block +
- * possible duplicate entries for 1024 lead surrogates.)
- * (UTRIE_DATA_BLOCK_LENGTH<=0x200 in all cases.)
- *
- * @param fillIn a pointer to a UNewTrie structure to be initialized (will not be released), or
- * NULL if one is to be allocated
- * @param aliasData a pointer to a data array to be used (will not be released), or
- * NULL if one is to be allocated
- * @param maxDataLength the capacity of aliasData (if not NULL) or
- * the length of the data array to be allocated
- * @param initialValue the initial value that is set for all code points
- * @param leadUnitValue the value for lead surrogate code _units_ that do not
- * have associated supplementary data
- * @param latin1Linear a flag indicating whether the Latin-1 range is to be allocated and
- * kept in a linear, contiguous part of the data array
- * @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie
- */
-U_CAPI UNewTrie * U_EXPORT2
-utrie_open(UNewTrie *fillIn,
- uint32_t *aliasData, int32_t maxDataLength,
- uint32_t initialValue, uint32_t leadUnitValue,
- UBool latin1Linear);
-
-/**
- * Clone a build-time trie structure with all entries.
- *
- * @param fillIn like in utrie_open()
- * @param other the build-time trie structure to clone
- * @param aliasData like in utrie_open(),
- * used if aliasDataLength>=(capacity of other's data array)
- * @param aliasDataLength the length of aliasData
- * @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie
- */
-U_CAPI UNewTrie * U_EXPORT2
-utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataLength);
-
-/**
- * Close a build-time trie structure, and release memory
- * that was allocated by utrie_open() or utrie_clone().
- *
- * @param trie the build-time trie
- */
-U_CAPI void U_EXPORT2
-utrie_close(UNewTrie *trie);
-
-/**
- * Get the data array of a build-time trie.
- * The data may be modified, but entries that are equal before
- * must still be equal after modification.
- *
- * @param trie the build-time trie
- * @param pLength (out) a pointer to a variable that receives the number
- * of entries in the data array
- * @return the data array
- */
-U_CAPI uint32_t * U_EXPORT2
-utrie_getData(UNewTrie *trie, int32_t *pLength);
-
-/**
- * Set a value for a code point.
- *
- * @param trie the build-time trie
- * @param c the code point
- * @param value the value
- * @return FALSE if a failure occurred (illegal argument or data array overrun)
- */
-U_CAPI UBool U_EXPORT2
-utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value);
-
-/**
- * Get a value from a code point as stored in the build-time trie.
- *
- * @param trie the build-time trie
- * @param c the code point
- * @param pInBlockZero if not NULL, then *pInBlockZero is set to TRUE
- * iff the value is retrieved from block 0;
- * block 0 is the all-initial-value initial block
- * @return the value
- */
-U_CAPI uint32_t U_EXPORT2
-utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero);
-
-/**
- * Set a value in a range of code points [start..limit[.
- * All code points c with start<=c<limit will get the value if
- * overwrite is TRUE or if the old value is 0.
- *
- * @param trie the build-time trie
- * @param start the first code point to get the value
- * @param limit one past the last code point to get the value
- * @param value the value
- * @param overwrite flag for whether old non-initial values are to be overwritten
- * @return FALSE if a failure occurred (illegal argument or data array overrun)
- */
-U_CAPI UBool U_EXPORT2
-utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite);
-
-/**
- * Compact the build-time trie after all values are set, and then
- * serialize it into 32-bit aligned memory.
- *
- * After this, the trie can only be serizalized again and/or closed;
- * no further values can be added.
- *
- * @see utrie_unserialize()
- *
- * @param trie the build-time trie
- * @param data a pointer to 32-bit-aligned memory for the trie data
- * @param capacity the number of bytes available at data
- * @param getFoldedValue a callback function that calculates the value for
- * a lead surrogate from all of its supplementary code points
- * and the folding offset;
- * if NULL, then a default function is used which returns just
- * the input offset when there are any non-initial-value entries
- * @param reduceTo16Bits flag for whether the values are to be reduced to a
- * width of 16 bits for serialization and runtime
- * @param pErrorCode a UErrorCode argument; among other possible error codes:
- * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
- * - U_MEMORY_ALLOCATION_ERROR if the trie data array is too small
- * - U_INDEX_OUTOFBOUNDS_ERROR if the index or data arrays are too long after compaction for serialization
- *
- * @return the number of bytes written for the trie
- */
-U_CAPI int32_t U_EXPORT2
-utrie_serialize(UNewTrie *trie, void *data, int32_t capacity,
- UNewTrieGetFoldedValue *getFoldedValue,
- UBool reduceTo16Bits,
- UErrorCode *pErrorCode);
-
-/* serialization ------------------------------------------------------------ */
-
-// UTrie signature values, in platform endianness and opposite endianness.
-// The UTrie signature ASCII byte values spell "Trie".
-#define UTRIE_SIG 0x54726965
-#define UTRIE_OE_SIG 0x65697254
-
-/**
- * Trie data structure in serialized form:
- *
- * UTrieHeader header;
- * uint16_t index[header.indexLength];
- * uint16_t data[header.dataLength];
- * @internal
- */
-typedef struct UTrieHeader {
- /** "Trie" in big-endian US-ASCII (0x54726965) */
- uint32_t signature;
-
- /**
- * options bit field:
- * 9 1=Latin-1 data is stored linearly at data+UTRIE_DATA_BLOCK_LENGTH
- * 8 0=16-bit data, 1=32-bit data
- * 7..4 UTRIE_INDEX_SHIFT // 0..UTRIE_SHIFT
- * 3..0 UTRIE_SHIFT // 1..9
- */
- uint32_t options;
-
- /** indexLength is a multiple of UTRIE_SURROGATE_BLOCK_COUNT */
- int32_t indexLength;
-
- /** dataLength>=UTRIE_DATA_BLOCK_LENGTH */
- int32_t dataLength;
-} UTrieHeader;
-
-/**
- * Constants for use with UTrieHeader.options.
- * @internal
- */
-enum {
- /** Mask to get the UTRIE_SHIFT value from options. */
- UTRIE_OPTIONS_SHIFT_MASK=0xf,
-
- /** Shift options right this much to get the UTRIE_INDEX_SHIFT value. */
- UTRIE_OPTIONS_INDEX_SHIFT=4,
-
- /** If set, then the data (stage 2) array is 32 bits wide. */
- UTRIE_OPTIONS_DATA_IS_32_BIT=0x100,
-
- /**
- * If set, then Latin-1 data (for U+0000..U+00ff) is stored in the data (stage 2) array
- * as a simple, linear array at data+UTRIE_DATA_BLOCK_LENGTH.
- */
- UTRIE_OPTIONS_LATIN1_IS_LINEAR=0x200
-};
-
-U_CDECL_END
-
-#endif
diff --git a/contrib/libs/icu/common/utrie2.cpp b/contrib/libs/icu/common/utrie2.cpp
deleted file mode 100644
index 24ef5782c90..00000000000
--- a/contrib/libs/icu/common/utrie2.cpp
+++ /dev/null
@@ -1,663 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2001-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: utrie2.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2008aug16 (starting from a copy of utrie.c)
-* created by: Markus W. Scherer
-*
-* This is a common implementation of a Unicode trie.
-* It is a kind of compressed, serializable table of 16- or 32-bit values associated with
-* Unicode code points (0..0x10ffff).
-* This is the second common version of a Unicode trie (hence the name UTrie2).
-* See utrie2.h for a comparison.
-*
-* This file contains only the runtime and enumeration code, for read-only access.
-* See utrie2_builder.c for the builder code.
-*/
-#include "unicode/utypes.h"
-#ifdef UCPTRIE_DEBUG
-#include "unicode/umutablecptrie.h"
-#endif
-#include "unicode/utf.h"
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "utrie2.h"
-#include "utrie2_impl.h"
-#include "uassert.h"
-
-/* Public UTrie2 API implementation ----------------------------------------- */
-
-static uint32_t
-get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) {
- int32_t i2, block;
-
- if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) {
- return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY];
- }
-
- if(U_IS_LEAD(c) && fromLSCP) {
- i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
- (c>>UTRIE2_SHIFT_2);
- } else {
- i2=trie->index1[c>>UTRIE2_SHIFT_1]+
- ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
- }
- block=trie->index2[i2];
- return trie->data[block+(c&UTRIE2_DATA_MASK)];
-}
-
-U_CAPI uint32_t U_EXPORT2
-utrie2_get32(const UTrie2 *trie, UChar32 c) {
- if(trie->data16!=NULL) {
- return UTRIE2_GET16(trie, c);
- } else if(trie->data32!=NULL) {
- return UTRIE2_GET32(trie, c);
- } else if((uint32_t)c>0x10ffff) {
- return trie->errorValue;
- } else {
- return get32(trie->newTrie, c, TRUE);
- }
-}
-
-U_CAPI uint32_t U_EXPORT2
-utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) {
- if(!U_IS_LEAD(c)) {
- return trie->errorValue;
- }
- if(trie->data16!=NULL) {
- return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c);
- } else if(trie->data32!=NULL) {
- return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
- } else {
- return get32(trie->newTrie, c, FALSE);
- }
-}
-
-static inline int32_t
-u8Index(const UTrie2 *trie, UChar32 c, int32_t i) {
- int32_t idx=
- _UTRIE2_INDEX_FROM_CP(
- trie,
- trie->data32==NULL ? trie->indexLength : 0,
- c);
- return (idx<<3)|i;
-}
-
-U_CAPI int32_t U_EXPORT2
-utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
- const uint8_t *src, const uint8_t *limit) {
- int32_t i, length;
- i=0;
- /* support 64-bit pointers by avoiding cast of arbitrary difference */
- if((limit-src)<=7) {
- length=(int32_t)(limit-src);
- } else {
- length=7;
- }
- c=utf8_nextCharSafeBody(src, &i, length, c, -1);
- return u8Index(trie, c, i);
-}
-
-U_CAPI int32_t U_EXPORT2
-utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
- const uint8_t *start, const uint8_t *src) {
- int32_t i, length;
- /* support 64-bit pointers by avoiding cast of arbitrary difference */
- if((src-start)<=7) {
- i=length=(int32_t)(src-start);
- } else {
- i=length=7;
- start=src-7;
- }
- c=utf8_prevCharSafeBody(start, 0, &i, c, -1);
- i=length-i; /* number of bytes read backward from src */
- return u8Index(trie, c, i);
-}
-
-U_CAPI UTrie2 * U_EXPORT2
-utrie2_openFromSerialized(UTrie2ValueBits valueBits,
- const void *data, int32_t length, int32_t *pActualLength,
- UErrorCode *pErrorCode) {
- const UTrie2Header *header;
- const uint16_t *p16;
- int32_t actualLength;
-
- UTrie2 tempTrie;
- UTrie2 *trie;
-
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) ||
- valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* enough data for a trie header? */
- if(length<(int32_t)sizeof(UTrie2Header)) {
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
-
- /* check the signature */
- header=(const UTrie2Header *)data;
- if(header->signature!=UTRIE2_SIG) {
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
-
- /* get the options */
- if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) {
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
-
- /* get the length values and offsets */
- uprv_memset(&tempTrie, 0, sizeof(tempTrie));
- tempTrie.indexLength=header->indexLength;
- tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT;
- tempTrie.index2NullOffset=header->index2NullOffset;
- tempTrie.dataNullOffset=header->dataNullOffset;
-
- tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1;
- tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY;
- if(valueBits==UTRIE2_16_VALUE_BITS) {
- tempTrie.highValueIndex+=tempTrie.indexLength;
- }
-
- /* calculate the actual length */
- actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2;
- if(valueBits==UTRIE2_16_VALUE_BITS) {
- actualLength+=tempTrie.dataLength*2;
- } else {
- actualLength+=tempTrie.dataLength*4;
- }
- if(length<actualLength) {
- *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */
- return 0;
- }
-
- /* allocate the trie */
- trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
- if(trie==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
- trie->memory=(uint32_t *)data;
- trie->length=actualLength;
- trie->isMemoryOwned=FALSE;
-#ifdef UTRIE2_DEBUG
- trie->name="fromSerialized";
-#endif
-
- /* set the pointers to its index and data arrays */
- p16=(const uint16_t *)(header+1);
- trie->index=p16;
- p16+=trie->indexLength;
-
- /* get the data */
- switch(valueBits) {
- case UTRIE2_16_VALUE_BITS:
- trie->data16=p16;
- trie->data32=NULL;
- trie->initialValue=trie->index[trie->dataNullOffset];
- trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET];
- break;
- case UTRIE2_32_VALUE_BITS:
- trie->data16=NULL;
- trie->data32=(const uint32_t *)p16;
- trie->initialValue=trie->data32[trie->dataNullOffset];
- trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
- break;
- default:
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
-
- if(pActualLength!=NULL) {
- *pActualLength=actualLength;
- }
- return trie;
-}
-
-U_CAPI UTrie2 * U_EXPORT2
-utrie2_openDummy(UTrie2ValueBits valueBits,
- uint32_t initialValue, uint32_t errorValue,
- UErrorCode *pErrorCode) {
- UTrie2 *trie;
- UTrie2Header *header;
- uint32_t *p;
- uint16_t *dest16;
- int32_t indexLength, dataLength, length, i;
- int32_t dataMove; /* >0 if the data is moved to the end of the index array */
-
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* calculate the total length of the dummy trie data */
- indexLength=UTRIE2_INDEX_1_OFFSET;
- dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY;
- length=(int32_t)sizeof(UTrie2Header)+indexLength*2;
- if(valueBits==UTRIE2_16_VALUE_BITS) {
- length+=dataLength*2;
- } else {
- length+=dataLength*4;
- }
-
- /* allocate the trie */
- trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
- if(trie==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- uprv_memset(trie, 0, sizeof(UTrie2));
- trie->memory=uprv_malloc(length);
- if(trie->memory==NULL) {
- uprv_free(trie);
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- trie->length=length;
- trie->isMemoryOwned=TRUE;
-
- /* set the UTrie2 fields */
- if(valueBits==UTRIE2_16_VALUE_BITS) {
- dataMove=indexLength;
- } else {
- dataMove=0;
- }
-
- trie->indexLength=indexLength;
- trie->dataLength=dataLength;
- trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET;
- trie->dataNullOffset=(uint16_t)dataMove;
- trie->initialValue=initialValue;
- trie->errorValue=errorValue;
- trie->highStart=0;
- trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET;
-#ifdef UTRIE2_DEBUG
- trie->name="dummy";
-#endif
-
- /* set the header fields */
- header=(UTrie2Header *)trie->memory;
-
- header->signature=UTRIE2_SIG; /* "Tri2" */
- header->options=(uint16_t)valueBits;
-
- header->indexLength=(uint16_t)indexLength;
- header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT);
- header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET;
- header->dataNullOffset=(uint16_t)dataMove;
- header->shiftedHighStart=0;
-
- /* fill the index and data arrays */
- dest16=(uint16_t *)(header+1);
- trie->index=dest16;
-
- /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */
- for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
- *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */
- }
-
- /* write UTF-8 2-byte index-2 values, not right-shifted */
- for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */
- *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
- }
- for(; i<(0xe0-0xc0); ++i) { /* C2..DF */
- *dest16++=(uint16_t)dataMove;
- }
-
- /* write the 16/32-bit data array */
- switch(valueBits) {
- case UTRIE2_16_VALUE_BITS:
- /* write 16-bit data values */
- trie->data16=dest16;
- trie->data32=NULL;
- for(i=0; i<0x80; ++i) {
- *dest16++=(uint16_t)initialValue;
- }
- for(; i<0xc0; ++i) {
- *dest16++=(uint16_t)errorValue;
- }
- /* highValue and reserved values */
- for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
- *dest16++=(uint16_t)initialValue;
- }
- break;
- case UTRIE2_32_VALUE_BITS:
- /* write 32-bit data values */
- p=(uint32_t *)dest16;
- trie->data16=NULL;
- trie->data32=p;
- for(i=0; i<0x80; ++i) {
- *p++=initialValue;
- }
- for(; i<0xc0; ++i) {
- *p++=errorValue;
- }
- /* highValue and reserved values */
- for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
- *p++=initialValue;
- }
- break;
- default:
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- return trie;
-}
-
-U_CAPI void U_EXPORT2
-utrie2_close(UTrie2 *trie) {
- if(trie!=NULL) {
- if(trie->isMemoryOwned) {
- uprv_free(trie->memory);
- }
- if(trie->newTrie!=NULL) {
- uprv_free(trie->newTrie->data);
-#ifdef UCPTRIE_DEBUG
- umutablecptrie_close(trie->newTrie->t3);
-#endif
- uprv_free(trie->newTrie);
- }
- uprv_free(trie);
- }
-}
-
-U_CAPI UBool U_EXPORT2
-utrie2_isFrozen(const UTrie2 *trie) {
- return (UBool)(trie->newTrie==NULL);
-}
-
-U_CAPI int32_t U_EXPORT2
-utrie2_serialize(const UTrie2 *trie,
- void *data, int32_t capacity,
- UErrorCode *pErrorCode) {
- /* argument check */
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL ||
- capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)))
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if(capacity>=trie->length) {
- uprv_memcpy(data, trie->memory, trie->length);
- } else {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- }
- return trie->length;
-}
-
-/* enumeration -------------------------------------------------------------- */
-
-#define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b))
-
-/* default UTrie2EnumValue() returns the input value itself */
-static uint32_t U_CALLCONV
-enumSameValue(const void * /*context*/, uint32_t value) {
- return value;
-}
-
-/**
- * Enumerate all ranges of code points with the same relevant values.
- * The values are transformed from the raw trie entries by the enumValue function.
- *
- * Currently requires start<limit and both start and limit must be multiples
- * of UTRIE2_DATA_BLOCK_LENGTH.
- *
- * Optimizations:
- * - Skip a whole block if we know that it is filled with a single value,
- * and it is the same as we visited just before.
- * - Handle the null block specially because we know a priori that it is filled
- * with a single value.
- */
-static void
-enumEitherTrie(const UTrie2 *trie,
- UChar32 start, UChar32 limit,
- UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
- const uint32_t *data32;
- const uint16_t *idx;
-
- uint32_t value, prevValue, initialValue;
- UChar32 c, prev, highStart;
- int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
-
- if(enumRange==NULL) {
- return;
- }
- if(enumValue==NULL) {
- enumValue=enumSameValue;
- }
-
- if(trie->newTrie==NULL) {
- /* frozen trie */
- idx=trie->index;
- U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */
- data32=trie->data32;
-
- index2NullOffset=trie->index2NullOffset;
- nullBlock=trie->dataNullOffset;
- } else {
- /* unfrozen, mutable trie */
- idx=NULL;
- data32=trie->newTrie->data;
- U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */
-
- index2NullOffset=trie->newTrie->index2NullOffset;
- nullBlock=trie->newTrie->dataNullOffset;
- }
-
- highStart=trie->highStart;
-
- /* get the enumeration value that corresponds to an initial-value trie data entry */
- initialValue=enumValue(context, trie->initialValue);
-
- /* set variables for previous range */
- prevI2Block=-1;
- prevBlock=-1;
- prev=start;
- prevValue=0;
-
- /* enumerate index-2 blocks */
- for(c=start; c<limit && c<highStart;) {
- /* Code point limit for iterating inside this i2Block. */
- UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY;
- if(limit<tempLimit) {
- tempLimit=limit;
- }
- if(c<=0xffff) {
- if(!U_IS_SURROGATE(c)) {
- i2Block=c>>UTRIE2_SHIFT_2;
- } else if(U_IS_SURROGATE_LEAD(c)) {
- /*
- * Enumerate values for lead surrogate code points, not code units:
- * This special block has half the normal length.
- */
- i2Block=UTRIE2_LSCP_INDEX_2_OFFSET;
- tempLimit=MIN_VALUE(0xdc00, limit);
- } else {
- /*
- * Switch back to the normal part of the index-2 table.
- * Enumerate the second half of the surrogates block.
- */
- i2Block=0xd800>>UTRIE2_SHIFT_2;
- tempLimit=MIN_VALUE(0xe000, limit);
- }
- } else {
- /* supplementary code points */
- if(idx!=NULL) {
- i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+
- (c>>UTRIE2_SHIFT_1)];
- } else {
- i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1];
- }
- if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) {
- /*
- * The index-2 block is the same as the previous one, and filled with prevValue.
- * Only possible for supplementary code points because the linear-BMP index-2
- * table creates unique i2Block values.
- */
- c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
- continue;
- }
- }
- prevI2Block=i2Block;
- if(i2Block==index2NullOffset) {
- /* this is the null index-2 block */
- if(prevValue!=initialValue) {
- if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
- return;
- }
- prevBlock=nullBlock;
- prev=c;
- prevValue=initialValue;
- }
- c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
- } else {
- /* enumerate data blocks for one index-2 block */
- int32_t i2, i2Limit;
- i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
- if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) {
- i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
- } else {
- i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH;
- }
- for(; i2<i2Limit; ++i2) {
- if(idx!=NULL) {
- block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT;
- } else {
- block=trie->newTrie->index2[i2Block+i2];
- }
- if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) {
- /* the block is the same as the previous one, and filled with prevValue */
- c+=UTRIE2_DATA_BLOCK_LENGTH;
- continue;
- }
- prevBlock=block;
- if(block==nullBlock) {
- /* this is the null data block */
- if(prevValue!=initialValue) {
- if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
- return;
- }
- prev=c;
- prevValue=initialValue;
- }
- c+=UTRIE2_DATA_BLOCK_LENGTH;
- } else {
- for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) {
- value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
- if(value!=prevValue) {
- if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
- return;
- }
- prev=c;
- prevValue=value;
- }
- ++c;
- }
- }
- }
- }
- }
-
- if(c>limit) {
- c=limit; /* could be higher if in the index2NullOffset */
- } else if(c<limit) {
- /* c==highStart<limit */
- uint32_t highValue;
- if(idx!=NULL) {
- highValue=
- data32!=NULL ?
- data32[trie->highValueIndex] :
- idx[trie->highValueIndex];
- } else {
- highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY];
- }
- value=enumValue(context, highValue);
- if(value!=prevValue) {
- if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
- return;
- }
- prev=c;
- prevValue=value;
- }
- c=limit;
- }
-
- /* deliver last range */
- enumRange(context, prev, c-1, prevValue);
-}
-
-U_CAPI void U_EXPORT2
-utrie2_enum(const UTrie2 *trie,
- UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
- enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context);
-}
-
-U_CAPI void U_EXPORT2
-utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
- UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
- const void *context) {
- if(!U16_IS_LEAD(lead)) {
- return;
- }
- lead=(lead-0xd7c0)<<10; /* start code point */
- enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context);
-}
-
-/* C++ convenience wrappers ------------------------------------------------- */
-
-U_NAMESPACE_BEGIN
-
-uint16_t BackwardUTrie2StringIterator::previous16() {
- codePointLimit=codePointStart;
- if(start>=codePointStart) {
- codePoint=U_SENTINEL;
- return static_cast<uint16_t>(trie->errorValue);
- }
- uint16_t result;
- UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result);
- return result;
-}
-
-uint16_t ForwardUTrie2StringIterator::next16() {
- codePointStart=codePointLimit;
- if(codePointLimit==limit) {
- codePoint=U_SENTINEL;
- return static_cast<uint16_t>(trie->errorValue);
- }
- uint16_t result;
- UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result);
- return result;
-}
-
-U_NAMESPACE_END
diff --git a/contrib/libs/icu/common/utrie2.h b/contrib/libs/icu/common/utrie2.h
deleted file mode 100644
index 671f44e16a6..00000000000
--- a/contrib/libs/icu/common/utrie2.h
+++ /dev/null
@@ -1,955 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2001-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: utrie2.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2008aug16 (starting from a copy of utrie.h)
-* created by: Markus W. Scherer
-*/
-
-#ifndef __UTRIE2_H__
-#define __UTRIE2_H__
-
-#include "unicode/utypes.h"
-#include "unicode/utf8.h"
-#include "putilimp.h"
-
-U_CDECL_BEGIN
-
-struct UTrie; /* forward declaration */
-#ifndef __UTRIE_H__
-typedef struct UTrie UTrie;
-#endif
-
-/**
- * \file
- *
- * This is a common implementation of a Unicode trie.
- * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
- * Unicode code points (0..0x10ffff). (A map from code points to integers.)
- *
- * This is the second common version of a Unicode trie (hence the name UTrie2).
- * Compared with UTrie version 1:
- * - Still splitting BMP code points 11:5 bits for index and data table lookups.
- * - Still separate data for lead surrogate code _units_ vs. code _points_,
- * but the lead surrogate code unit values are not required any more
- * for data lookup for supplementary code points.
- * - The "folding" mechanism is removed. In UTrie version 1, this somewhat
- * hard-to-explain mechanism was meant to be used for optimized UTF-16
- * processing, with application-specific encoding of indexing bits
- * in the lead surrogate data for the associated supplementary code points.
- * - For the last single-value code point range (ending with U+10ffff),
- * the starting code point ("highStart") and the value are stored.
- * - For supplementary code points U+10000..highStart-1 a three-table lookup
- * (two index tables and one data table) is used. The first index
- * is truncated, omitting both the BMP portion and the high range.
- * - There is a special small index for 2-byte UTF-8, and the initial data
- * entries are designed for fast 1/2-byte UTF-8 lookup.
- * Starting with ICU 60, C0 and C1 are not recognized as UTF-8 lead bytes any more at all,
- * and the associated 2-byte indexes are unused.
- */
-
-/**
- * Trie structure.
- * Use only with public API macros and functions.
- */
-struct UTrie2;
-typedef struct UTrie2 UTrie2;
-
-/* Public UTrie2 API functions: read-only access ---------------------------- */
-
-/**
- * Selectors for the width of a UTrie2 data value.
- */
-enum UTrie2ValueBits {
- /** 16 bits per UTrie2 data value. */
- UTRIE2_16_VALUE_BITS,
- /** 32 bits per UTrie2 data value. */
- UTRIE2_32_VALUE_BITS,
- /** Number of selectors for the width of UTrie2 data values. */
- UTRIE2_COUNT_VALUE_BITS
-};
-typedef enum UTrie2ValueBits UTrie2ValueBits;
-
-/**
- * Open a frozen trie from its serialized from, stored in 32-bit-aligned memory.
- * Inverse of utrie2_serialize().
- * The memory must remain valid and unchanged as long as the trie is used.
- * You must utrie2_close() the trie once you are done using it.
- *
- * @param valueBits selects the data entry size; results in an
- * U_INVALID_FORMAT_ERROR if it does not match the serialized form
- * @param data a pointer to 32-bit-aligned memory containing the serialized form of a UTrie2
- * @param length the number of bytes available at data;
- * can be more than necessary
- * @param pActualLength receives the actual number of bytes at data taken up by the trie data;
- * can be NULL
- * @param pErrorCode an in/out ICU UErrorCode
- * @return the unserialized trie
- *
- * @see utrie2_open
- * @see utrie2_serialize
- */
-U_CAPI UTrie2 * U_EXPORT2
-utrie2_openFromSerialized(UTrie2ValueBits valueBits,
- const void *data, int32_t length, int32_t *pActualLength,
- UErrorCode *pErrorCode);
-
-/**
- * Open a frozen, empty "dummy" trie.
- * A dummy trie is an empty trie, used when a real data trie cannot
- * be loaded. Equivalent to calling utrie2_open() and utrie2_freeze(),
- * but without internally creating and compacting/serializing the
- * builder data structure.
- *
- * The trie always returns the initialValue,
- * or the errorValue for out-of-range code points and illegal UTF-8.
- *
- * You must utrie2_close() the trie once you are done using it.
- *
- * @param valueBits selects the data entry size
- * @param initialValue the initial value that is set for all code points
- * @param errorValue the value for out-of-range code points and illegal UTF-8
- * @param pErrorCode an in/out ICU UErrorCode
- * @return the dummy trie
- *
- * @see utrie2_openFromSerialized
- * @see utrie2_open
- */
-U_CAPI UTrie2 * U_EXPORT2
-utrie2_openDummy(UTrie2ValueBits valueBits,
- uint32_t initialValue, uint32_t errorValue,
- UErrorCode *pErrorCode);
-
-/**
- * Get a value from a code point as stored in the trie.
- * Easier to use than UTRIE2_GET16() and UTRIE2_GET32() but slower.
- * Easier to use because, unlike the macros, this function works on all UTrie2
- * objects, frozen or not, holding 16-bit or 32-bit data values.
- *
- * @param trie the trie
- * @param c the code point
- * @return the value
- */
-U_CAPI uint32_t U_EXPORT2
-utrie2_get32(const UTrie2 *trie, UChar32 c);
-
-/* enumeration callback types */
-
-/**
- * Callback from utrie2_enum(), extracts a uint32_t value from a
- * trie value. This value will be passed on to the UTrie2EnumRange function.
- *
- * @param context an opaque pointer, as passed into utrie2_enum()
- * @param value a value from the trie
- * @return the value that is to be passed on to the UTrie2EnumRange function
- */
-typedef uint32_t U_CALLCONV
-UTrie2EnumValue(const void *context, uint32_t value);
-
-/**
- * Callback from utrie2_enum(), is called for each contiguous range
- * of code points with the same value as retrieved from the trie and
- * transformed by the UTrie2EnumValue function.
- *
- * The callback function can stop the enumeration by returning FALSE.
- *
- * @param context an opaque pointer, as passed into utrie2_enum()
- * @param start the first code point in a contiguous range with value
- * @param end the last code point in a contiguous range with value (inclusive)
- * @param value the value that is set for all code points in [start..end]
- * @return FALSE to stop the enumeration
- */
-typedef UBool U_CALLCONV
-UTrie2EnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value);
-
-/**
- * Enumerate efficiently all values in a trie.
- * Do not modify the trie during the enumeration.
- *
- * For each entry in the trie, the value to be delivered is passed through
- * the UTrie2EnumValue function.
- * The value is unchanged if that function pointer is NULL.
- *
- * For each contiguous range of code points with a given (transformed) value,
- * the UTrie2EnumRange function is called.
- *
- * @param trie a pointer to the trie
- * @param enumValue a pointer to a function that may transform the trie entry value,
- * or NULL if the values from the trie are to be used directly
- * @param enumRange a pointer to a function that is called for each contiguous range
- * of code points with the same (transformed) value
- * @param context an opaque pointer that is passed on to the callback functions
- */
-U_CAPI void U_EXPORT2
-utrie2_enum(const UTrie2 *trie,
- UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context);
-
-/* Building a trie ---------------------------------------------------------- */
-
-/**
- * Open an empty, writable trie. At build time, 32-bit data values are used.
- * utrie2_freeze() takes a valueBits parameter
- * which determines the data value width in the serialized and frozen forms.
- * You must utrie2_close() the trie once you are done using it.
- *
- * @param initialValue the initial value that is set for all code points
- * @param errorValue the value for out-of-range code points and illegal UTF-8
- * @param pErrorCode an in/out ICU UErrorCode
- * @return a pointer to the allocated and initialized new trie
- */
-U_CAPI UTrie2 * U_EXPORT2
-utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode);
-
-/**
- * Clone a trie.
- * You must utrie2_close() the clone once you are done using it.
- *
- * @param other the trie to clone
- * @param pErrorCode an in/out ICU UErrorCode
- * @return a pointer to the new trie clone
- */
-U_CAPI UTrie2 * U_EXPORT2
-utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode);
-
-/**
- * Clone a trie. The clone will be mutable/writable even if the other trie
- * is frozen. (See utrie2_freeze().)
- * You must utrie2_close() the clone once you are done using it.
- *
- * @param other the trie to clone
- * @param pErrorCode an in/out ICU UErrorCode
- * @return a pointer to the new trie clone
- */
-U_CAPI UTrie2 * U_EXPORT2
-utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode);
-
-/**
- * Close a trie and release associated memory.
- *
- * @param trie the trie
- */
-U_CAPI void U_EXPORT2
-utrie2_close(UTrie2 *trie);
-
-/**
- * Set a value for a code point.
- *
- * @param trie the unfrozen trie
- * @param c the code point
- * @param value the value
- * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
- * - U_NO_WRITE_PERMISSION if the trie is frozen
- */
-U_CAPI void U_EXPORT2
-utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode);
-
-/**
- * Set a value in a range of code points [start..end].
- * All code points c with start<=c<=end will get the value if
- * overwrite is TRUE or if the old value is the initial value.
- *
- * @param trie the unfrozen trie
- * @param start the first code point to get the value
- * @param end the last code point to get the value (inclusive)
- * @param value the value
- * @param overwrite flag for whether old non-initial values are to be overwritten
- * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
- * - U_NO_WRITE_PERMISSION if the trie is frozen
- */
-U_CAPI void U_EXPORT2
-utrie2_setRange32(UTrie2 *trie,
- UChar32 start, UChar32 end,
- uint32_t value, UBool overwrite,
- UErrorCode *pErrorCode);
-
-/**
- * Freeze a trie. Make it immutable (read-only) and compact it,
- * ready for serialization and for use with fast macros.
- * Functions to set values will fail after serializing.
- *
- * A trie can be frozen only once. If this function is called again with different
- * valueBits then it will set a U_ILLEGAL_ARGUMENT_ERROR.
- *
- * @param trie the trie
- * @param valueBits selects the data entry size; if smaller than 32 bits, then
- * the values stored in the trie will be truncated
- * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
- * - U_INDEX_OUTOFBOUNDS_ERROR if the compacted index or data arrays are too long
- * for serialization
- * (the trie will be immutable and usable,
- * but not frozen and not usable with the fast macros)
- *
- * @see utrie2_cloneAsThawed
- */
-U_CAPI void U_EXPORT2
-utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode);
-
-/**
- * Test if the trie is frozen. (See utrie2_freeze().)
- *
- * @param trie the trie
- * @return TRUE if the trie is frozen, that is, immutable, ready for serialization
- * and for use with fast macros
- */
-U_CAPI UBool U_EXPORT2
-utrie2_isFrozen(const UTrie2 *trie);
-
-/**
- * Serialize a frozen trie into 32-bit aligned memory.
- * If the trie is not frozen, then the function returns with a U_ILLEGAL_ARGUMENT_ERROR.
- * A trie can be serialized multiple times.
- *
- * @param trie the frozen trie
- * @param data a pointer to 32-bit-aligned memory to be filled with the trie data,
- * can be NULL if capacity==0
- * @param capacity the number of bytes available at data,
- * or 0 for preflighting
- * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
- * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
- * - U_ILLEGAL_ARGUMENT_ERROR if the trie is not frozen or the data and capacity
- * parameters are bad
- * @return the number of bytes written or needed for the trie
- *
- * @see utrie2_openFromSerialized()
- */
-U_CAPI int32_t U_EXPORT2
-utrie2_serialize(const UTrie2 *trie,
- void *data, int32_t capacity,
- UErrorCode *pErrorCode);
-
-/* Public UTrie2 API: miscellaneous functions ------------------------------- */
-
-/**
- * Build a UTrie2 (version 2) from a UTrie (version 1).
- * Enumerates all values in the UTrie and builds a UTrie2 with the same values.
- * The resulting UTrie2 will be frozen.
- *
- * @param trie1 the runtime UTrie structure to be enumerated
- * @param errorValue the value for out-of-range code points and illegal UTF-8
- * @param pErrorCode an in/out ICU UErrorCode
- * @return The frozen UTrie2 with the same values as the UTrie.
- */
-U_CAPI UTrie2 * U_EXPORT2
-utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode);
-
-/* Public UTrie2 API macros ------------------------------------------------- */
-
-/*
- * These macros provide fast data lookup from a frozen trie.
- * They will crash when used on an unfrozen trie.
- */
-
-/**
- * Return a 16-bit trie value from a code point, with range checking.
- * Returns trie->errorValue if c is not in the range 0..U+10ffff.
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param c (UChar32, in) the input code point
- * @return (uint16_t) The code point's trie value.
- */
-#define UTRIE2_GET16(trie, c) _UTRIE2_GET((trie), index, (trie)->indexLength, (c))
-
-/**
- * Return a 32-bit trie value from a code point, with range checking.
- * Returns trie->errorValue if c is not in the range 0..U+10ffff.
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param c (UChar32, in) the input code point
- * @return (uint32_t) The code point's trie value.
- */
-#define UTRIE2_GET32(trie, c) _UTRIE2_GET((trie), data32, 0, (c))
-
-/**
- * UTF-16: Get the next code point (UChar32 c, out), post-increment src,
- * and get a 16-bit value from the trie.
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param src (const UChar *, in/out) the source text pointer
- * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
- * @param c (UChar32, out) variable for the code point
- * @param result (uint16_t, out) uint16_t variable for the trie lookup result
- */
-#define UTRIE2_U16_NEXT16(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, index, src, limit, c, result)
-
-/**
- * UTF-16: Get the next code point (UChar32 c, out), post-increment src,
- * and get a 32-bit value from the trie.
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param src (const UChar *, in/out) the source text pointer
- * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
- * @param c (UChar32, out) variable for the code point
- * @param result (uint32_t, out) uint32_t variable for the trie lookup result
- */
-#define UTRIE2_U16_NEXT32(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, data32, src, limit, c, result)
-
-/**
- * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src,
- * and get a 16-bit value from the trie.
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param start (const UChar *, in) the start pointer for the text
- * @param src (const UChar *, in/out) the source text pointer
- * @param c (UChar32, out) variable for the code point
- * @param result (uint16_t, out) uint16_t variable for the trie lookup result
- */
-#define UTRIE2_U16_PREV16(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, index, start, src, c, result)
-
-/**
- * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src,
- * and get a 32-bit value from the trie.
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param start (const UChar *, in) the start pointer for the text
- * @param src (const UChar *, in/out) the source text pointer
- * @param c (UChar32, out) variable for the code point
- * @param result (uint32_t, out) uint32_t variable for the trie lookup result
- */
-#define UTRIE2_U16_PREV32(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, data32, start, src, c, result)
-
-/**
- * UTF-8: Post-increment src and get a 16-bit value from the trie.
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param src (const char *, in/out) the source text pointer
- * @param limit (const char *, in) the limit pointer for the text (must not be NULL)
- * @param result (uint16_t, out) uint16_t variable for the trie lookup result
- */
-#define UTRIE2_U8_NEXT16(trie, src, limit, result)\
- _UTRIE2_U8_NEXT(trie, data16, index, src, limit, result)
-
-/**
- * UTF-8: Post-increment src and get a 32-bit value from the trie.
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param src (const char *, in/out) the source text pointer
- * @param limit (const char *, in) the limit pointer for the text (must not be NULL)
- * @param result (uint16_t, out) uint32_t variable for the trie lookup result
- */
-#define UTRIE2_U8_NEXT32(trie, src, limit, result) \
- _UTRIE2_U8_NEXT(trie, data32, data32, src, limit, result)
-
-/**
- * UTF-8: Pre-decrement src and get a 16-bit value from the trie.
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param start (const char *, in) the start pointer for the text
- * @param src (const char *, in/out) the source text pointer
- * @param result (uint16_t, out) uint16_t variable for the trie lookup result
- */
-#define UTRIE2_U8_PREV16(trie, start, src, result) \
- _UTRIE2_U8_PREV(trie, data16, index, start, src, result)
-
-/**
- * UTF-8: Pre-decrement src and get a 32-bit value from the trie.
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param start (const char *, in) the start pointer for the text
- * @param src (const char *, in/out) the source text pointer
- * @param result (uint16_t, out) uint32_t variable for the trie lookup result
- */
-#define UTRIE2_U8_PREV32(trie, start, src, result) \
- _UTRIE2_U8_PREV(trie, data32, data32, start, src, result)
-
-/* Public UTrie2 API: optimized UTF-16 access ------------------------------- */
-
-/*
- * The following functions and macros are used for highly optimized UTF-16
- * text processing. The UTRIE2_U16_NEXTxy() macros do not depend on these.
- *
- * A UTrie2 stores separate values for lead surrogate code _units_ vs. code _points_.
- * UTF-16 text processing can be optimized by detecting surrogate pairs and
- * assembling supplementary code points only when there is non-trivial data
- * available.
- *
- * At build-time, use utrie2_enumForLeadSurrogate() to see if there
- * is non-trivial (non-initialValue) data for any of the supplementary
- * code points associated with a lead surrogate.
- * If so, then set a special (application-specific) value for the
- * lead surrogate code _unit_, with utrie2_set32ForLeadSurrogateCodeUnit().
- *
- * At runtime, use UTRIE2_GET16_FROM_U16_SINGLE_LEAD() or
- * UTRIE2_GET32_FROM_U16_SINGLE_LEAD() per code unit. If there is non-trivial
- * data and the code unit is a lead surrogate, then check if a trail surrogate
- * follows. If so, assemble the supplementary code point with
- * U16_GET_SUPPLEMENTARY() and look up its value with UTRIE2_GET16_FROM_SUPP()
- * or UTRIE2_GET32_FROM_SUPP(); otherwise reset the lead
- * surrogate's value or do a code point lookup for it.
- *
- * If there is only trivial data for lead and trail surrogates, then processing
- * can often skip them. For example, in normalization or case mapping
- * all characters that do not have any mappings are simply copied as is.
- */
-
-/**
- * Get a value from a lead surrogate code unit as stored in the trie.
- *
- * @param trie the trie
- * @param c the code unit (U+D800..U+DBFF)
- * @return the value
- */
-U_CAPI uint32_t U_EXPORT2
-utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c);
-
-/**
- * Enumerate the trie values for the 1024=0x400 code points
- * corresponding to a given lead surrogate.
- * For example, for the lead surrogate U+D87E it will enumerate the values
- * for [U+2F800..U+2FC00[.
- * Used by data builder code that sets special lead surrogate code unit values
- * for optimized UTF-16 string processing.
- *
- * Do not modify the trie during the enumeration.
- *
- * Except for the limited code point range, this functions just like utrie2_enum():
- * For each entry in the trie, the value to be delivered is passed through
- * the UTrie2EnumValue function.
- * The value is unchanged if that function pointer is NULL.
- *
- * For each contiguous range of code points with a given (transformed) value,
- * the UTrie2EnumRange function is called.
- *
- * @param trie a pointer to the trie
- * @param enumValue a pointer to a function that may transform the trie entry value,
- * or NULL if the values from the trie are to be used directly
- * @param enumRange a pointer to a function that is called for each contiguous range
- * of code points with the same (transformed) value
- * @param context an opaque pointer that is passed on to the callback functions
- */
-U_CAPI void U_EXPORT2
-utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
- UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
- const void *context);
-
-/**
- * Set a value for a lead surrogate code unit.
- *
- * @param trie the unfrozen trie
- * @param lead the lead surrogate code unit (U+D800..U+DBFF)
- * @param value the value
- * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
- * - U_NO_WRITE_PERMISSION if the trie is frozen
- */
-U_CAPI void U_EXPORT2
-utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
- UChar32 lead, uint32_t value,
- UErrorCode *pErrorCode);
-
-/**
- * Return a 16-bit trie value from a UTF-16 single/lead code unit (<=U+ffff).
- * Same as UTRIE2_GET16() if c is a BMP code point except for lead surrogates,
- * but smaller and faster.
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff
- * @return (uint16_t) The code unit's trie value.
- */
-#define UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), index, c)
-
-/**
- * Return a 32-bit trie value from a UTF-16 single/lead code unit (<=U+ffff).
- * Same as UTRIE2_GET32() if c is a BMP code point except for lead surrogates,
- * but smaller and faster.
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff
- * @return (uint32_t) The code unit's trie value.
- */
-#define UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), data32, c)
-
-/**
- * Return a 16-bit trie value from a supplementary code point (U+10000..U+10ffff).
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff
- * @return (uint16_t) The code point's trie value.
- */
-#define UTRIE2_GET16_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), index, c)
-
-/**
- * Return a 32-bit trie value from a supplementary code point (U+10000..U+10ffff).
- *
- * @param trie (const UTrie2 *, in) a frozen trie
- * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff
- * @return (uint32_t) The code point's trie value.
- */
-#define UTRIE2_GET32_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), data32, c)
-
-U_CDECL_END
-
-/* C++ convenience wrappers ------------------------------------------------- */
-
-#ifdef __cplusplus
-
-#include "unicode/utf.h"
-#include "mutex.h"
-
-U_NAMESPACE_BEGIN
-
-// Use the Forward/Backward subclasses below.
-class UTrie2StringIterator : public UMemory {
-public:
- UTrie2StringIterator(const UTrie2 *t, const UChar *p) :
- trie(t), codePointStart(p), codePointLimit(p), codePoint(U_SENTINEL) {}
-
- const UTrie2 *trie;
- const UChar *codePointStart, *codePointLimit;
- UChar32 codePoint;
-};
-
-class BackwardUTrie2StringIterator : public UTrie2StringIterator {
-public:
- BackwardUTrie2StringIterator(const UTrie2 *t, const UChar *s, const UChar *p) :
- UTrie2StringIterator(t, p), start(s) {}
-
- uint16_t previous16();
-
- const UChar *start;
-};
-
-class ForwardUTrie2StringIterator : public UTrie2StringIterator {
-public:
- // Iteration limit l can be NULL.
- // In that case, the caller must detect c==0 and stop.
- ForwardUTrie2StringIterator(const UTrie2 *t, const UChar *p, const UChar *l) :
- UTrie2StringIterator(t, p), limit(l) {}
-
- uint16_t next16();
-
- const UChar *limit;
-};
-
-U_NAMESPACE_END
-
-#endif
-
-/* Internal definitions ----------------------------------------------------- */
-
-U_CDECL_BEGIN
-
-/** Build-time trie structure. */
-struct UNewTrie2;
-typedef struct UNewTrie2 UNewTrie2;
-
-/*
- * Trie structure definition.
- *
- * Either the data table is 16 bits wide and accessed via the index
- * pointer, with each index item increased by indexLength;
- * in this case, data32==NULL, and data16 is used for direct ASCII access.
- *
- * Or the data table is 32 bits wide and accessed via the data32 pointer.
- */
-struct UTrie2 {
- /* protected: used by macros and functions for reading values */
- const uint16_t *index;
- const uint16_t *data16; /* for fast UTF-8 ASCII access, if 16b data */
- const uint32_t *data32; /* NULL if 16b data is used via index */
-
- int32_t indexLength, dataLength;
- uint16_t index2NullOffset; /* 0xffff if there is no dedicated index-2 null block */
- uint16_t dataNullOffset;
- uint32_t initialValue;
- /** Value returned for out-of-range code points and illegal UTF-8. */
- uint32_t errorValue;
-
- /* Start of the last range which ends at U+10ffff, and its value. */
- UChar32 highStart;
- int32_t highValueIndex;
-
- /* private: used by builder and unserialization functions */
- void *memory; /* serialized bytes; NULL if not frozen yet */
- int32_t length; /* number of serialized bytes at memory; 0 if not frozen yet */
- UBool isMemoryOwned; /* TRUE if the trie owns the memory */
- UBool padding1;
- int16_t padding2;
- UNewTrie2 *newTrie; /* builder object; NULL when frozen */
-
-#ifdef UTRIE2_DEBUG
- const char *name;
-#endif
-};
-
-/**
- * Trie constants, defining shift widths, index array lengths, etc.
- *
- * These are needed for the runtime macros but users can treat these as
- * implementation details and skip to the actual public API further below.
- */
-enum {
- /** Shift size for getting the index-1 table offset. */
- UTRIE2_SHIFT_1=6+5,
-
- /** Shift size for getting the index-2 table offset. */
- UTRIE2_SHIFT_2=5,
-
- /**
- * Difference between the two shift sizes,
- * for getting an index-1 offset from an index-2 offset. 6=11-5
- */
- UTRIE2_SHIFT_1_2=UTRIE2_SHIFT_1-UTRIE2_SHIFT_2,
-
- /**
- * Number of index-1 entries for the BMP. 32=0x20
- * This part of the index-1 table is omitted from the serialized form.
- */
- UTRIE2_OMITTED_BMP_INDEX_1_LENGTH=0x10000>>UTRIE2_SHIFT_1,
-
- /** Number of code points per index-1 table entry. 2048=0x800 */
- UTRIE2_CP_PER_INDEX_1_ENTRY=1<<UTRIE2_SHIFT_1,
-
- /** Number of entries in an index-2 block. 64=0x40 */
- UTRIE2_INDEX_2_BLOCK_LENGTH=1<<UTRIE2_SHIFT_1_2,
-
- /** Mask for getting the lower bits for the in-index-2-block offset. */
- UTRIE2_INDEX_2_MASK=UTRIE2_INDEX_2_BLOCK_LENGTH-1,
-
- /** Number of entries in a data block. 32=0x20 */
- UTRIE2_DATA_BLOCK_LENGTH=1<<UTRIE2_SHIFT_2,
-
- /** Mask for getting the lower bits for the in-data-block offset. */
- UTRIE2_DATA_MASK=UTRIE2_DATA_BLOCK_LENGTH-1,
-
- /**
- * Shift size for shifting left the index array values.
- * Increases possible data size with 16-bit index values at the cost
- * of compactability.
- * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY.
- */
- UTRIE2_INDEX_SHIFT=2,
-
- /** The alignment size of a data block. Also the granularity for compaction. */
- UTRIE2_DATA_GRANULARITY=1<<UTRIE2_INDEX_SHIFT,
-
- /* Fixed layout of the first part of the index array. ------------------- */
-
- /**
- * The BMP part of the index-2 table is fixed and linear and starts at offset 0.
- * Length=2048=0x800=0x10000>>UTRIE2_SHIFT_2.
- */
- UTRIE2_INDEX_2_OFFSET=0,
-
- /**
- * The part of the index-2 table for U+D800..U+DBFF stores values for
- * lead surrogate code _units_ not code _points_.
- * Values for lead surrogate code _points_ are indexed with this portion of the table.
- * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.)
- */
- UTRIE2_LSCP_INDEX_2_OFFSET=0x10000>>UTRIE2_SHIFT_2,
- UTRIE2_LSCP_INDEX_2_LENGTH=0x400>>UTRIE2_SHIFT_2,
-
- /** Count the lengths of both BMP pieces. 2080=0x820 */
- UTRIE2_INDEX_2_BMP_LENGTH=UTRIE2_LSCP_INDEX_2_OFFSET+UTRIE2_LSCP_INDEX_2_LENGTH,
-
- /**
- * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820.
- * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2.
- */
- UTRIE2_UTF8_2B_INDEX_2_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH,
- UTRIE2_UTF8_2B_INDEX_2_LENGTH=0x800>>6, /* U+0800 is the first code point after 2-byte UTF-8 */
-
- /**
- * The index-1 table, only used for supplementary code points, at offset 2112=0x840.
- * Variable length, for code points up to highStart, where the last single-value range starts.
- * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1.
- * (For 0x100000 supplementary code points U+10000..U+10ffff.)
- *
- * The part of the index-2 table for supplementary code points starts
- * after this index-1 table.
- *
- * Both the index-1 table and the following part of the index-2 table
- * are omitted completely if there is only BMP data.
- */
- UTRIE2_INDEX_1_OFFSET=UTRIE2_UTF8_2B_INDEX_2_OFFSET+UTRIE2_UTF8_2B_INDEX_2_LENGTH,
- UTRIE2_MAX_INDEX_1_LENGTH=0x100000>>UTRIE2_SHIFT_1,
-
- /*
- * Fixed layout of the first part of the data array. -----------------------
- * Starts with 4 blocks (128=0x80 entries) for ASCII.
- */
-
- /**
- * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80.
- * Used with linear access for single bytes 0..0xbf for simple error handling.
- * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH.
- */
- UTRIE2_BAD_UTF8_DATA_OFFSET=0x80,
-
- /** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */
- UTRIE2_DATA_START_OFFSET=0xc0
-};
-
-/* Internal functions and macros -------------------------------------------- */
-
-/**
- * Internal function for part of the UTRIE2_U8_NEXTxx() macro implementations.
- * Do not call directly.
- * @internal
- */
-U_INTERNAL int32_t U_EXPORT2
-utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
- const uint8_t *src, const uint8_t *limit);
-
-/**
- * Internal function for part of the UTRIE2_U8_PREVxx() macro implementations.
- * Do not call directly.
- * @internal
- */
-U_INTERNAL int32_t U_EXPORT2
-utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
- const uint8_t *start, const uint8_t *src);
-
-
-/** Internal low-level trie getter. Returns a data index. */
-#define _UTRIE2_INDEX_RAW(offset, trieIndex, c) \
- (((int32_t)((trieIndex)[(offset)+((c)>>UTRIE2_SHIFT_2)]) \
- <<UTRIE2_INDEX_SHIFT)+ \
- ((c)&UTRIE2_DATA_MASK))
-
-/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data index. */
-#define _UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(trieIndex, c) _UTRIE2_INDEX_RAW(0, trieIndex, c)
-
-/** Internal trie getter from a lead surrogate code point (D800..DBFF). Returns the data index. */
-#define _UTRIE2_INDEX_FROM_LSCP(trieIndex, c) \
- _UTRIE2_INDEX_RAW(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2), trieIndex, c)
-
-/** Internal trie getter from a BMP code point. Returns the data index. */
-#define _UTRIE2_INDEX_FROM_BMP(trieIndex, c) \
- _UTRIE2_INDEX_RAW(U_IS_LEAD(c) ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \
- trieIndex, c)
-
-/** Internal trie getter from a supplementary code point below highStart. Returns the data index. */
-#define _UTRIE2_INDEX_FROM_SUPP(trieIndex, c) \
- (((int32_t)((trieIndex)[ \
- (trieIndex)[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ \
- ((c)>>UTRIE2_SHIFT_1)]+ \
- (((c)>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK)]) \
- <<UTRIE2_INDEX_SHIFT)+ \
- ((c)&UTRIE2_DATA_MASK))
-
-/**
- * Internal trie getter from a code point, with checking that c is in 0..10FFFF.
- * Returns the data index.
- */
-#define _UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c) \
- ((uint32_t)(c)<0xd800 ? \
- _UTRIE2_INDEX_RAW(0, (trie)->index, c) : \
- (uint32_t)(c)<=0xffff ? \
- _UTRIE2_INDEX_RAW( \
- (c)<=0xdbff ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \
- (trie)->index, c) : \
- (uint32_t)(c)>0x10ffff ? \
- (asciiOffset)+UTRIE2_BAD_UTF8_DATA_OFFSET : \
- (c)>=(trie)->highStart ? \
- (trie)->highValueIndex : \
- _UTRIE2_INDEX_FROM_SUPP((trie)->index, c))
-
-/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data. */
-#define _UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c) \
- (trie)->data[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD((trie)->index, c)]
-
-/** Internal trie getter from a supplementary code point. Returns the data. */
-#define _UTRIE2_GET_FROM_SUPP(trie, data, c) \
- (trie)->data[(c)>=(trie)->highStart ? (trie)->highValueIndex : \
- _UTRIE2_INDEX_FROM_SUPP((trie)->index, c)]
-
-/**
- * Internal trie getter from a code point, with checking that c is in 0..10FFFF.
- * Returns the data.
- */
-#define _UTRIE2_GET(trie, data, asciiOffset, c) \
- (trie)->data[_UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c)]
-
-/** Internal next-post-increment: get the next code point (c) and its data. */
-#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \
- { \
- uint16_t __c2; \
- (c)=*(src)++; \
- if(!U16_IS_LEAD(c)) { \
- (result)=_UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c); \
- } else if((src)==(limit) || !U16_IS_TRAIL(__c2=*(src))) { \
- (result)=(trie)->data[_UTRIE2_INDEX_FROM_LSCP((trie)->index, c)]; \
- } else { \
- ++(src); \
- (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
- (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/** Internal pre-decrement-previous: get the previous code point (c) and its data */
-#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \
- { \
- uint16_t __c2; \
- (c)=*--(src); \
- if(!U16_IS_TRAIL(c) || (src)==(start) || !U16_IS_LEAD(__c2=*((src)-1))) { \
- (result)=(trie)->data[_UTRIE2_INDEX_FROM_BMP((trie)->index, c)]; \
- } else { \
- --(src); \
- (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
- (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/** Internal UTF-8 next-post-increment: get the next code point's data. */
-#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \
- uint8_t __lead=(uint8_t)*(src)++; \
- if(U8_IS_SINGLE(__lead)) { \
- (result)=(trie)->ascii[__lead]; \
- } else { \
- uint8_t __t1, __t2; \
- if( /* handle U+0800..U+FFFF inline */ \
- 0xe0<=__lead && __lead<0xf0 && ((src)+1)<(limit) && \
- U8_IS_VALID_LEAD3_AND_T1(__lead, __t1=(uint8_t)*(src)) && \
- (__t2=(uint8_t)(*((src)+1)-0x80))<= 0x3f \
- ) { \
- (src)+=2; \
- (result)=(trie)->data[ \
- ((int32_t)((trie)->index[((__lead-0xe0)<<(12-UTRIE2_SHIFT_2))+ \
- ((__t1&0x3f)<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \
- <<UTRIE2_INDEX_SHIFT)+ \
- (__t2&UTRIE2_DATA_MASK)]; \
- } else if( /* handle U+0080..U+07FF inline */ \
- __lead<0xe0 && __lead>=0xc2 && (src)<(limit) && \
- (__t1=(uint8_t)(*(src)-0x80))<=0x3f \
- ) { \
- ++(src); \
- (result)=(trie)->data[ \
- (trie)->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ \
- __t1]; \
- } else { \
- int32_t __index=utrie2_internalU8NextIndex((trie), __lead, (const uint8_t *)(src), \
- (const uint8_t *)(limit)); \
- (src)+=__index&7; \
- (result)=(trie)->data[__index>>3]; \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/** Internal UTF-8 pre-decrement-previous: get the previous code point's data. */
-#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \
- uint8_t __b=(uint8_t)*--(src); \
- if(U8_IS_SINGLE(__b)) { \
- (result)=(trie)->ascii[__b]; \
- } else { \
- int32_t __index=utrie2_internalU8PrevIndex((trie), __b, (const uint8_t *)(start), \
- (const uint8_t *)(src)); \
- (src)-=__index&7; \
- (result)=(trie)->data[__index>>3]; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-U_CDECL_END
-
-#endif
diff --git a/contrib/libs/icu/common/utrie2_builder.cpp b/contrib/libs/icu/common/utrie2_builder.cpp
deleted file mode 100644
index 8de824cc3d4..00000000000
--- a/contrib/libs/icu/common/utrie2_builder.cpp
+++ /dev/null
@@ -1,1483 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2001-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: utrie2_builder.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2008sep26 (split off from utrie2.c)
-* created by: Markus W. Scherer
-*
-* This is a common implementation of a Unicode trie.
-* It is a kind of compressed, serializable table of 16- or 32-bit values associated with
-* Unicode code points (0..0x10ffff).
-* This is the second common version of a Unicode trie (hence the name UTrie2).
-* See utrie2.h for a comparison.
-*
-* This file contains only the builder code.
-* See utrie2.c for the runtime and enumeration code.
-*/
-// #define UTRIE2_DEBUG
-#ifdef UTRIE2_DEBUG
-# include <stdio.h>
-#endif
-// #define UCPTRIE_DEBUG
-
-#include "unicode/utypes.h"
-#ifdef UCPTRIE_DEBUG
-#include "unicode/ucptrie.h"
-#include "unicode/umutablecptrie.h"
-#include "ucptrie_impl.h"
-#endif
-#include "cmemory.h"
-#include "utrie2.h"
-#include "utrie2_impl.h"
-
-#include "utrie.h" // for utrie2_fromUTrie()
-
-/* Implementation notes ----------------------------------------------------- */
-
-/*
- * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values
- * have been chosen to minimize trie sizes overall.
- * Most of the code is flexible enough to work with a range of values,
- * within certain limits.
- *
- * Exception: Support for separate values for lead surrogate code _units_
- * vs. code _points_ was added after the constants were fixed,
- * and has not been tested nor particularly designed for different constant values.
- * (Especially the utrie2_enum() code that jumps to the special LSCP index-2
- * part and back.)
- *
- * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data
- * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH
- * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction
- * remapping) stops working.
- *
- * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()
- * assumes that a single index-2 block is used for 0x400 code points
- * corresponding to one lead surrogate.
- *
- * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains
- * more than one Unicode plane, and the split of the index-2 table into a BMP
- * part and a supplementary part, with a gap in between, would not work.
- *
- * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because
- * there is data with more than 64k distinct values,
- * for example for Unihan collation with a separate collation weight per
- * Han character.
- */
-
-/* Building a trie ----------------------------------------------------------*/
-
-enum {
- /** The null index-2 block, following the gap in the index-2 table. */
- UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,
-
- /** The start of allocated index-2 blocks. */
- UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,
-
- /**
- * The null data block.
- * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
- * to work with 6-bit trail bytes from 2-byte UTF-8.
- */
- UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,
-
- /** The start of allocated data blocks. */
- UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,
-
- /**
- * The start of data blocks for U+0800 and above.
- * Below, compaction uses a block length of 64 for 2-byte UTF-8.
- * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
- * Data values for 0x780 code points beyond ASCII.
- */
- UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780
-};
-
-/* Start with allocation of 16k data entries. */
-#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
-
-/* Grow about 8x each time. */
-#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
-
-static int32_t
-allocIndex2Block(UNewTrie2 *trie);
-
-U_CAPI UTrie2 * U_EXPORT2
-utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
- UTrie2 *trie;
- UNewTrie2 *newTrie;
- uint32_t *data;
- int32_t i, j;
-
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
-
- trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
- newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
- data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);
- if(trie==NULL || newTrie==NULL || data==NULL) {
- uprv_free(trie);
- uprv_free(newTrie);
- uprv_free(data);
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
- uprv_memset(trie, 0, sizeof(UTrie2));
- trie->initialValue=initialValue;
- trie->errorValue=errorValue;
- trie->highStart=0x110000;
- trie->newTrie=newTrie;
-#ifdef UTRIE2_DEBUG
- trie->name="open";
-#endif
-
- newTrie->data=data;
-#ifdef UCPTRIE_DEBUG
- newTrie->t3=umutablecptrie_open(initialValue, errorValue, pErrorCode);
-#endif
- newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;
- newTrie->initialValue=initialValue;
- newTrie->errorValue=errorValue;
- newTrie->highStart=0x110000;
- newTrie->firstFreeBlock=0; /* no free block in the list */
- newTrie->isCompacted=FALSE;
-
- /*
- * preallocate and reset
- * - ASCII
- * - the bad-UTF-8-data block
- * - the null data block
- */
- for(i=0; i<0x80; ++i) {
- newTrie->data[i]=initialValue;
- }
- for(; i<0xc0; ++i) {
- newTrie->data[i]=errorValue;
- }
- for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
- newTrie->data[i]=initialValue;
- }
- newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;
- newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;
-
- /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
- for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
- newTrie->index2[i]=j;
- newTrie->map[i]=1;
- }
- /* reference counts for the bad-UTF-8-data block */
- for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
- newTrie->map[i]=0;
- }
- /*
- * Reference counts for the null data block: all blocks except for the ASCII blocks.
- * Plus 1 so that we don't drop this block during compaction.
- * Plus as many as needed for lead surrogate code points.
- */
- /* i==newTrie->dataNullOffset */
- newTrie->map[i++]=
- (0x110000>>UTRIE2_SHIFT_2)-
- (0x80>>UTRIE2_SHIFT_2)+
- 1+
- UTRIE2_LSCP_INDEX_2_LENGTH;
- j+=UTRIE2_DATA_BLOCK_LENGTH;
- for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
- newTrie->map[i]=0;
- }
-
- /*
- * set the remaining indexes in the BMP index-2 block
- * to the null data block
- */
- for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
- newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
- }
-
- /*
- * Fill the index gap with impossible values so that compaction
- * does not overlap other index-2 blocks with the gap.
- */
- for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
- newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
- }
-
- /* set the indexes in the null index-2 block */
- for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
- newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
- }
- newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
- newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
-
- /* set the index-1 indexes for the linear index-2 block */
- for(i=0, j=0;
- i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
- ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
- ) {
- newTrie->index1[i]=j;
- }
-
- /* set the remaining index-1 indexes to the null index-2 block */
- for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
- newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
- }
-
- /*
- * Preallocate and reset data for U+0080..U+07ff,
- * for 2-byte UTF-8 which will be compacted in 64-blocks
- * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
- */
- for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
- utrie2_set32(trie, i, initialValue, pErrorCode);
- }
-
- return trie;
-}
-
-static UNewTrie2 *
-cloneBuilder(const UNewTrie2 *other) {
- UNewTrie2 *trie;
-
- trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
- if(trie==NULL) {
- return NULL;
- }
-
- trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4);
- if(trie->data==NULL) {
- uprv_free(trie);
- return NULL;
- }
-#ifdef UCPTRIE_DEBUG
- if(other->t3==nullptr) {
- trie->t3=nullptr;
- } else {
- UErrorCode errorCode=U_ZERO_ERROR;
- trie->t3=umutablecptrie_clone(other->t3, &errorCode);
- }
-#endif
- trie->dataCapacity=other->dataCapacity;
-
- /* clone data */
- uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));
- uprv_memcpy(trie->index2, other->index2, (size_t)other->index2Length*4);
- trie->index2NullOffset=other->index2NullOffset;
- trie->index2Length=other->index2Length;
-
- uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4);
- trie->dataNullOffset=other->dataNullOffset;
- trie->dataLength=other->dataLength;
-
- /* reference counters */
- if(other->isCompacted) {
- trie->firstFreeBlock=0;
- } else {
- uprv_memcpy(trie->map, other->map, ((size_t)other->dataLength>>UTRIE2_SHIFT_2)*4);
- trie->firstFreeBlock=other->firstFreeBlock;
- }
-
- trie->initialValue=other->initialValue;
- trie->errorValue=other->errorValue;
- trie->highStart=other->highStart;
- trie->isCompacted=other->isCompacted;
-
- return trie;
-}
-
-U_CAPI UTrie2 * U_EXPORT2
-utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) {
- UTrie2 *trie;
-
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
- if(trie==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memcpy(trie, other, sizeof(UTrie2));
-
- if(other->memory!=NULL) {
- trie->memory=uprv_malloc(other->length);
- if(trie->memory!=NULL) {
- trie->isMemoryOwned=TRUE;
- uprv_memcpy(trie->memory, other->memory, other->length);
-
- /* make the clone's pointers point to its own memory */
- trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);
- if(other->data16!=NULL) {
- trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);
- }
- if(other->data32!=NULL) {
- trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);
- }
- }
- } else /* other->newTrie!=NULL */ {
- trie->newTrie=cloneBuilder(other->newTrie);
- }
-
- if(trie->memory==NULL && trie->newTrie==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- uprv_free(trie);
- trie=NULL;
- }
- return trie;
-}
-
-typedef struct NewTrieAndStatus {
- UTrie2 *trie;
- UErrorCode errorCode;
- UBool exclusiveLimit; /* rather than inclusive range end */
-} NewTrieAndStatus;
-
-static UBool U_CALLCONV
-copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
- NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
- if(value!=nt->trie->initialValue) {
- if(nt->exclusiveLimit) {
- --end;
- }
- if(start==end) {
- utrie2_set32(nt->trie, start, value, &nt->errorCode);
- } else {
- utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode);
- }
- return U_SUCCESS(nt->errorCode);
- } else {
- return TRUE;
- }
-}
-
-#ifdef UTRIE2_DEBUG
-static long countInitial(const UTrie2 *trie) {
- uint32_t initialValue=trie->initialValue;
- int32_t length=trie->dataLength;
- long count=0;
- if(trie->data16!=nullptr) {
- for(int32_t i=0; i<length; ++i) {
- if(trie->data16[i]==initialValue) { ++count; }
- }
- } else {
- for(int32_t i=0; i<length; ++i) {
- if(trie->data32[i]==initialValue) { ++count; }
- }
- }
- return count;
-}
-
-static void
-utrie_printLengths(const UTrie *trie) {
- long indexLength=trie->indexLength;
- long dataLength=(long)trie->dataLength;
- long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
- printf("**UTrieLengths** index:%6ld data:%6ld serialized:%6ld\n",
- indexLength, dataLength, totalLength);
-}
-
-static void
-utrie2_printLengths(const UTrie2 *trie, const char *which) {
- long indexLength=trie->indexLength;
- long dataLength=(long)trie->dataLength;
- long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
- printf("**UTrie2Lengths(%s %s)** index:%6ld data:%6ld countInitial:%6ld serialized:%6ld\n",
- which, trie->name, indexLength, dataLength, countInitial(trie), totalLength);
-}
-#endif
-
-U_CAPI UTrie2 * U_EXPORT2
-utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
- NewTrieAndStatus context;
- UChar lead;
-
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- if(other->newTrie!=NULL && !other->newTrie->isCompacted) {
- return utrie2_clone(other, pErrorCode); /* clone an unfrozen trie */
- }
-
- /* Clone the frozen trie by enumerating it and building a new one. */
- context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- context.exclusiveLimit=FALSE;
- context.errorCode=*pErrorCode;
- utrie2_enum(other, NULL, copyEnumRange, &context);
- *pErrorCode=context.errorCode;
- for(lead=0xd800; lead<0xdc00; ++lead) {
- uint32_t value;
- if(other->data32==NULL) {
- value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
- } else {
- value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
- }
- if(value!=other->initialValue) {
- utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
- }
- }
- if(U_FAILURE(*pErrorCode)) {
- utrie2_close(context.trie);
- context.trie=NULL;
- }
- return context.trie;
-}
-
-/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
-U_CAPI UTrie2 * U_EXPORT2
-utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
- NewTrieAndStatus context;
- UChar lead;
-
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- if(trie1==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return NULL;
- }
- context.exclusiveLimit=TRUE;
- context.errorCode=*pErrorCode;
- utrie_enum(trie1, NULL, copyEnumRange, &context);
- *pErrorCode=context.errorCode;
- for(lead=0xd800; lead<0xdc00; ++lead) {
- uint32_t value;
- if(trie1->data32==NULL) {
- value=UTRIE_GET16_FROM_LEAD(trie1, lead);
- } else {
- value=UTRIE_GET32_FROM_LEAD(trie1, lead);
- }
- if(value!=trie1->initialValue) {
- utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
- }
- }
- if(U_SUCCESS(*pErrorCode)) {
- utrie2_freeze(context.trie,
- trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
- pErrorCode);
- }
-#ifdef UTRIE2_DEBUG
- if(U_SUCCESS(*pErrorCode)) {
- utrie_printLengths(trie1);
- utrie2_printLengths(context.trie, "fromUTrie");
- }
-#endif
- if(U_FAILURE(*pErrorCode)) {
- utrie2_close(context.trie);
- context.trie=NULL;
- }
- return context.trie;
-}
-
-static inline UBool
-isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
- int32_t i2, block;
-
- if(U_IS_LEAD(c) && forLSCP) {
- i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
- (c>>UTRIE2_SHIFT_2);
- } else {
- i2=trie->index1[c>>UTRIE2_SHIFT_1]+
- ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
- }
- block=trie->index2[i2];
- return (UBool)(block==trie->dataNullOffset);
-}
-
-static int32_t
-allocIndex2Block(UNewTrie2 *trie) {
- int32_t newBlock, newTop;
-
- newBlock=trie->index2Length;
- newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
- if(newTop>UPRV_LENGTHOF(trie->index2)) {
- /*
- * Should never occur.
- * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
- * or the code writes more values than should be possible.
- */
- return -1;
- }
- trie->index2Length=newTop;
- uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);
- return newBlock;
-}
-
-static int32_t
-getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
- int32_t i1, i2;
-
- if(U_IS_LEAD(c) && forLSCP) {
- return UTRIE2_LSCP_INDEX_2_OFFSET;
- }
-
- i1=c>>UTRIE2_SHIFT_1;
- i2=trie->index1[i1];
- if(i2==trie->index2NullOffset) {
- i2=allocIndex2Block(trie);
- if(i2<0) {
- return -1; /* program error */
- }
- trie->index1[i1]=i2;
- }
- return i2;
-}
-
-static int32_t
-allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) {
- int32_t newBlock, newTop;
-
- if(trie->firstFreeBlock!=0) {
- /* get the first free block */
- newBlock=trie->firstFreeBlock;
- trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];
- } else {
- /* get a new block from the high end */
- newBlock=trie->dataLength;
- newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
- if(newTop>trie->dataCapacity) {
- /* out of memory in the data array */
- int32_t capacity;
- uint32_t *data;
-
- if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
- capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
- } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
- capacity=UNEWTRIE2_MAX_DATA_LENGTH;
- } else {
- /*
- * Should never occur.
- * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
- * or the code writes more values than should be possible.
- */
- return -1;
- }
- data=(uint32_t *)uprv_malloc(capacity*4);
- if(data==NULL) {
- return -1;
- }
- uprv_memcpy(data, trie->data, (size_t)trie->dataLength*4);
- uprv_free(trie->data);
- trie->data=data;
- trie->dataCapacity=capacity;
- }
- trie->dataLength=newTop;
- }
- uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);
- trie->map[newBlock>>UTRIE2_SHIFT_2]=0;
- return newBlock;
-}
-
-/* call when the block's reference counter reaches 0 */
-static void
-releaseDataBlock(UNewTrie2 *trie, int32_t block) {
- /* put this block at the front of the free-block chain */
- trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;
- trie->firstFreeBlock=block;
-}
-
-static inline UBool
-isWritableBlock(UNewTrie2 *trie, int32_t block) {
- return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]);
-}
-
-static inline void
-setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) {
- int32_t oldBlock;
- ++trie->map[block>>UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */
- oldBlock=trie->index2[i2];
- if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) {
- releaseDataBlock(trie, oldBlock);
- }
- trie->index2[i2]=block;
-}
-
-/**
- * No error checking for illegal arguments.
- *
- * @return -1 if no new data block available (out of memory in data array)
- * @internal
- */
-static int32_t
-getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
- int32_t i2, oldBlock, newBlock;
-
- i2=getIndex2Block(trie, c, forLSCP);
- if(i2<0) {
- return -1; /* program error */
- }
-
- i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
- oldBlock=trie->index2[i2];
- if(isWritableBlock(trie, oldBlock)) {
- return oldBlock;
- }
-
- /* allocate a new data block */
- newBlock=allocDataBlock(trie, oldBlock);
- if(newBlock<0) {
- /* out of memory in the data array */
- return -1;
- }
- setIndex2Entry(trie, i2, newBlock);
- return newBlock;
-}
-
-/**
- * @return TRUE if the value was successfully set
- */
-static void
-set32(UNewTrie2 *trie,
- UChar32 c, UBool forLSCP, uint32_t value,
- UErrorCode *pErrorCode) {
- int32_t block;
-
- if(trie==NULL || trie->isCompacted) {
- *pErrorCode=U_NO_WRITE_PERMISSION;
- return;
- }
-#ifdef UCPTRIE_DEBUG
- umutablecptrie_set(trie->t3, c, value, pErrorCode);
-#endif
-
- block=getDataBlock(trie, c, forLSCP);
- if(block<0) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- trie->data[block+(c&UTRIE2_DATA_MASK)]=value;
-}
-
-U_CAPI void U_EXPORT2
-utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- if((uint32_t)c>0x10ffff) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- set32(trie->newTrie, c, TRUE, value, pErrorCode);
-}
-
-U_CAPI void U_EXPORT2
-utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
- UChar32 c, uint32_t value,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- if(!U_IS_LEAD(c)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- set32(trie->newTrie, c, FALSE, value, pErrorCode);
-}
-
-static void
-writeBlock(uint32_t *block, uint32_t value) {
- uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;
- while(block<limit) {
- *block++=value;
- }
-}
-
-/**
- * initialValue is ignored if overwrite=TRUE
- * @internal
- */
-static void
-fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
- uint32_t value, uint32_t initialValue, UBool overwrite) {
- uint32_t *pLimit;
-
- pLimit=block+limit;
- block+=start;
- if(overwrite) {
- while(block<pLimit) {
- *block++=value;
- }
- } else {
- while(block<pLimit) {
- if(*block==initialValue) {
- *block=value;
- }
- ++block;
- }
- }
-}
-
-U_CAPI void U_EXPORT2
-utrie2_setRange32(UTrie2 *trie,
- UChar32 start, UChar32 end,
- uint32_t value, UBool overwrite,
- UErrorCode *pErrorCode) {
- /*
- * repeat value in [start..end]
- * mark index values for repeat-data blocks by setting bit 31 of the index values
- * fill around existing values if any, if(overwrite)
- */
- UNewTrie2 *newTrie;
- int32_t block, rest, repeatBlock;
- UChar32 limit;
-
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- newTrie=trie->newTrie;
- if(newTrie==NULL || newTrie->isCompacted) {
- *pErrorCode=U_NO_WRITE_PERMISSION;
- return;
- }
-#ifdef UCPTRIE_DEBUG
- umutablecptrie_setRange(newTrie->t3, start, end, value, pErrorCode);
-#endif
- if(!overwrite && value==newTrie->initialValue) {
- return; /* nothing to do */
- }
-
- limit=end+1;
- if(start&UTRIE2_DATA_MASK) {
- UChar32 nextStart;
-
- /* set partial block at [start..following block boundary[ */
- block=getDataBlock(newTrie, start, TRUE);
- if(block<0) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- nextStart=(start+UTRIE2_DATA_MASK)&~UTRIE2_DATA_MASK;
- if(nextStart<=limit) {
- fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
- value, newTrie->initialValue, overwrite);
- start=nextStart;
- } else {
- fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
- value, newTrie->initialValue, overwrite);
- return;
- }
- }
-
- /* number of positions in the last, partial block */
- rest=limit&UTRIE2_DATA_MASK;
-
- /* round down limit to a block boundary */
- limit&=~UTRIE2_DATA_MASK;
-
- /* iterate over all-value blocks */
- if(value==newTrie->initialValue) {
- repeatBlock=newTrie->dataNullOffset;
- } else {
- repeatBlock=-1;
- }
-
- while(start<limit) {
- int32_t i2;
- UBool setRepeatBlock=FALSE;
-
- if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) {
- start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
- continue;
- }
-
- /* get index value */
- i2=getIndex2Block(newTrie, start, TRUE);
- if(i2<0) {
- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
- block=newTrie->index2[i2];
- if(isWritableBlock(newTrie, block)) {
- /* already allocated */
- if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
- /*
- * We overwrite all values, and it's not a
- * protected (ASCII-linear or 2-byte UTF-8) block:
- * replace with the repeatBlock.
- */
- setRepeatBlock=TRUE;
- } else {
- /* !overwrite, or protected block: just write the values into this block */
- fillBlock(newTrie->data+block,
- 0, UTRIE2_DATA_BLOCK_LENGTH,
- value, newTrie->initialValue, overwrite);
- }
- } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) {
- /*
- * Set the repeatBlock instead of the null block or previous repeat block:
- *
- * If !isWritableBlock() then all entries in the block have the same value
- * because it's the null block or a range block (the repeatBlock from a previous
- * call to utrie2_setRange32()).
- * No other blocks are used multiple times before compacting.
- *
- * The null block is the only non-writable block with the initialValue because
- * of the repeatBlock initialization above. (If value==initialValue, then
- * the repeatBlock will be the null data block.)
- *
- * We set our repeatBlock if the desired value differs from the block's value,
- * and if we overwrite any data or if the data is all initial values
- * (which is the same as the block being the null block, see above).
- */
- setRepeatBlock=TRUE;
- }
- if(setRepeatBlock) {
- if(repeatBlock>=0) {
- setIndex2Entry(newTrie, i2, repeatBlock);
- } else {
- /* create and set and fill the repeatBlock */
- repeatBlock=getDataBlock(newTrie, start, TRUE);
- if(repeatBlock<0) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- writeBlock(newTrie->data+repeatBlock, value);
- }
- }
-
- start+=UTRIE2_DATA_BLOCK_LENGTH;
- }
-
- if(rest>0) {
- /* set partial block at [last block boundary..limit[ */
- block=getDataBlock(newTrie, start, TRUE);
- if(block<0) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);
- }
-
- return;
-}
-
-/* compaction --------------------------------------------------------------- */
-
-static inline UBool
-equal_int32(const int32_t *s, const int32_t *t, int32_t length) {
- while(length>0 && *s==*t) {
- ++s;
- ++t;
- --length;
- }
- return (UBool)(length==0);
-}
-
-static inline UBool
-equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
- while(length>0 && *s==*t) {
- ++s;
- ++t;
- --length;
- }
- return (UBool)(length==0);
-}
-
-static int32_t
-findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) {
- int32_t block;
-
- /* ensure that we do not even partially get past index2Length */
- index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
-
- for(block=0; block<=index2Length; ++block) {
- if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
- return block;
- }
- }
- return -1;
-}
-
-static int32_t
-findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) {
- int32_t block;
-
- /* ensure that we do not even partially get past dataLength */
- dataLength-=blockLength;
-
- for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
- if(equal_uint32(data+block, data+otherBlock, blockLength)) {
- return block;
- }
- }
- return -1;
-}
-
-/*
- * Find the start of the last range in the trie by enumerating backward.
- * Indexes for supplementary code points higher than this will be omitted.
- */
-static UChar32
-findHighStart(UNewTrie2 *trie, uint32_t highValue) {
- const uint32_t *data32;
-
- uint32_t value, initialValue;
- UChar32 c, prev;
- int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
-
- data32=trie->data;
- initialValue=trie->initialValue;
-
- index2NullOffset=trie->index2NullOffset;
- nullBlock=trie->dataNullOffset;
-
- /* set variables for previous range */
- if(highValue==initialValue) {
- prevI2Block=index2NullOffset;
- prevBlock=nullBlock;
- } else {
- prevI2Block=-1;
- prevBlock=-1;
- }
- prev=0x110000;
-
- /* enumerate index-2 blocks */
- i1=UNEWTRIE2_INDEX_1_LENGTH;
- c=prev;
- while(c>0) {
- i2Block=trie->index1[--i1];
- if(i2Block==prevI2Block) {
- /* the index-2 block is the same as the previous one, and filled with highValue */
- c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
- continue;
- }
- prevI2Block=i2Block;
- if(i2Block==index2NullOffset) {
- /* this is the null index-2 block */
- if(highValue!=initialValue) {
- return c;
- }
- c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
- } else {
- /* enumerate data blocks for one index-2 block */
- for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
- block=trie->index2[i2Block+ --i2];
- if(block==prevBlock) {
- /* the block is the same as the previous one, and filled with highValue */
- c-=UTRIE2_DATA_BLOCK_LENGTH;
- continue;
- }
- prevBlock=block;
- if(block==nullBlock) {
- /* this is the null data block */
- if(highValue!=initialValue) {
- return c;
- }
- c-=UTRIE2_DATA_BLOCK_LENGTH;
- } else {
- for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
- value=data32[block+ --j];
- if(value!=highValue) {
- return c;
- }
- --c;
- }
- }
- }
- }
- }
-
- /* deliver last range */
- return 0;
-}
-
-/*
- * Compact a build-time trie.
- *
- * The compaction
- * - removes blocks that are identical with earlier ones
- * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
- * - moves blocks in steps of the data granularity
- * - moves and overlaps blocks that overlap with multiple values in the overlap region
- *
- * It does not
- * - try to move and overlap blocks that are not already adjacent
- */
-static void
-compactData(UNewTrie2 *trie) {
-#ifdef UTRIE2_DEBUG
- int32_t countSame=0, sumOverlaps=0;
-#endif
-
- int32_t start, newStart, movedStart;
- int32_t blockLength, overlap;
- int32_t i, mapIndex, blockCount;
-
- /* do not compact linear-ASCII data */
- newStart=UTRIE2_DATA_START_OFFSET;
- for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
- trie->map[i]=start;
- }
-
- /*
- * Start with a block length of 64 for 2-byte UTF-8,
- * then switch to UTRIE2_DATA_BLOCK_LENGTH.
- */
- blockLength=64;
- blockCount=blockLength>>UTRIE2_SHIFT_2;
- for(start=newStart; start<trie->dataLength;) {
- /*
- * start: index of first entry of current block
- * newStart: index where the current block is to be moved
- * (right after current end of already-compacted data)
- */
- if(start==UNEWTRIE2_DATA_0800_OFFSET) {
- blockLength=UTRIE2_DATA_BLOCK_LENGTH;
- blockCount=1;
- }
-
- /* skip blocks that are not used */
- if(trie->map[start>>UTRIE2_SHIFT_2]<=0) {
- /* advance start to the next block */
- start+=blockLength;
-
- /* leave newStart with the previous block! */
- continue;
- }
-
- /* search for an identical block */
- if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))
- >=0
- ) {
-#ifdef UTRIE2_DEBUG
- ++countSame;
-#endif
- /* found an identical block, set the other block's index value for the current block */
- for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
- trie->map[mapIndex++]=movedStart;
- movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
- }
-
- /* advance start to the next block */
- start+=blockLength;
-
- /* leave newStart with the previous block! */
- continue;
- }
-
- /* see if the beginning of this block can be overlapped with the end of the previous block */
- /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
- for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
- overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);
- overlap-=UTRIE2_DATA_GRANULARITY) {}
-
-#ifdef UTRIE2_DEBUG
- sumOverlaps+=overlap;
-#endif
- if(overlap>0 || newStart<start) {
- /* some overlap, or just move the whole block */
- movedStart=newStart-overlap;
- for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
- trie->map[mapIndex++]=movedStart;
- movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
- }
-
- /* move the non-overlapping indexes to their new positions */
- start+=overlap;
- for(i=blockLength-overlap; i>0; --i) {
- trie->data[newStart++]=trie->data[start++];
- }
- } else /* no overlap && newStart==start */ {
- for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
- trie->map[mapIndex++]=start;
- start+=UTRIE2_DATA_BLOCK_LENGTH;
- }
- newStart=start;
- }
- }
-
- /* now adjust the index-2 table */
- for(i=0; i<trie->index2Length; ++i) {
- if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
- /* Gap indexes are invalid (-1). Skip over the gap. */
- i+=UNEWTRIE2_INDEX_GAP_LENGTH;
- }
- trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];
- }
- trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];
-
- /* ensure dataLength alignment */
- while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
- trie->data[newStart++]=trie->initialValue;
- }
-
-#ifdef UTRIE2_DEBUG
- /* we saved some space */
- printf("compacting UTrie2: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n",
- (long)trie->dataLength, (long)newStart, (long)countSame, (long)sumOverlaps);
-#endif
-
- trie->dataLength=newStart;
-}
-
-static void
-compactIndex2(UNewTrie2 *trie) {
- int32_t i, start, newStart, movedStart, overlap;
-
- /* do not compact linear-BMP index-2 blocks */
- newStart=UTRIE2_INDEX_2_BMP_LENGTH;
- for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
- trie->map[i]=start;
- }
-
- /* Reduce the index table gap to what will be needed at runtime. */
- newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);
-
- for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) {
- /*
- * start: index of first entry of current block
- * newStart: index where the current block is to be moved
- * (right after current end of already-compacted data)
- */
-
- /* search for an identical block */
- if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))
- >=0
- ) {
- /* found an identical block, set the other block's index value for the current block */
- trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;
-
- /* advance start to the next block */
- start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
-
- /* leave newStart with the previous block! */
- continue;
- }
-
- /* see if the beginning of this block can be overlapped with the end of the previous block */
- /* look for maximum overlap with the previous, adjacent block */
- for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
- overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);
- --overlap) {}
-
- if(overlap>0 || newStart<start) {
- /* some overlap, or just move the whole block */
- trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
-
- /* move the non-overlapping indexes to their new positions */
- start+=overlap;
- for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
- trie->index2[newStart++]=trie->index2[start++];
- }
- } else /* no overlap && newStart==start */ {
- trie->map[start>>UTRIE2_SHIFT_1_2]=start;
- start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
- newStart=start;
- }
- }
-
- /* now adjust the index-1 table */
- for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
- trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];
- }
- trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];
-
- /*
- * Ensure data table alignment:
- * Needs to be granularity-aligned for 16-bit trie
- * (so that dataMove will be down-shiftable),
- * and 2-aligned for uint32_t data.
- */
- while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
- /* Arbitrary value: 0x3fffc not possible for real data. */
- trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT;
- }
-
-#ifdef UTRIE2_DEBUG
- /* we saved some space */
- printf("compacting UTrie2: count of 16-bit index words %lu->%lu\n",
- (long)trie->index2Length, (long)newStart);
-#endif
-
- trie->index2Length=newStart;
-}
-
-static void
-compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) {
- UNewTrie2 *newTrie;
- UChar32 highStart, suppHighStart;
- uint32_t highValue;
-
- newTrie=trie->newTrie;
-
- /* find highStart and round it up */
- highValue=utrie2_get32(trie, 0x10ffff);
- highStart=findHighStart(newTrie, highValue);
- highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
- if(highStart==0x110000) {
- highValue=trie->errorValue;
- }
-
- /*
- * Set trie->highStart only after utrie2_get32(trie, highStart).
- * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
- */
- trie->highStart=newTrie->highStart=highStart;
-
-#ifdef UTRIE2_DEBUG
- printf("UTrie2: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n",
- (long)highStart, (long)highValue, (long)trie->initialValue);
-#endif
-
- if(highStart<0x110000) {
- /* Blank out [highStart..10ffff] to release associated data blocks. */
- suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
- utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- }
-
- compactData(newTrie);
- if(highStart>0x10000) {
- compactIndex2(newTrie);
-#ifdef UTRIE2_DEBUG
- } else {
- printf("UTrie2: highStart U+%04lx count of 16-bit index words %lu->%lu\n",
- (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);
-#endif
- }
-
- /*
- * Store the highValue in the data array and round up the dataLength.
- * Must be done after compactData() because that assumes that dataLength
- * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
- */
- newTrie->data[newTrie->dataLength++]=highValue;
- while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
- newTrie->data[newTrie->dataLength++]=trie->initialValue;
- }
-
- newTrie->isCompacted=TRUE;
-}
-
-/* serialization ------------------------------------------------------------ */
-
-/**
- * Maximum length of the runtime index array.
- * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
- * (The actual maximum length is lower,
- * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
- */
-#define UTRIE2_MAX_INDEX_LENGTH 0xffff
-
-/**
- * Maximum length of the runtime data array.
- * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
- * and by uint16_t UTrie2Header.shiftedDataLength.
- */
-#define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
-
-/* Compact and internally serialize the trie. */
-U_CAPI void U_EXPORT2
-utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) {
- UNewTrie2 *newTrie;
- UTrie2Header *header;
- uint32_t *p;
- uint16_t *dest16;
- int32_t i, length;
- int32_t allIndexesLength;
- int32_t dataMove; /* >0 if the data is moved to the end of the index array */
- UChar32 highStart;
-
- /* argument check */
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- if( trie==NULL ||
- valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- newTrie=trie->newTrie;
- if(newTrie==NULL) {
- /* already frozen */
- UTrie2ValueBits frozenValueBits=
- trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;
- if(valueBits!=frozenValueBits) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
- return;
- }
-
- /* compact if necessary */
- if(!newTrie->isCompacted) {
- compactTrie(trie, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- }
- highStart=trie->highStart;
-
- if(highStart<=0x10000) {
- allIndexesLength=UTRIE2_INDEX_1_OFFSET;
- } else {
- allIndexesLength=newTrie->index2Length;
- }
- if(valueBits==UTRIE2_16_VALUE_BITS) {
- dataMove=allIndexesLength;
- } else {
- dataMove=0;
- }
-
- /* are indexLength and dataLength within limits? */
- if( /* for unshifted indexLength */
- allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
- /* for unshifted dataNullOffset */
- (dataMove+newTrie->dataNullOffset)>0xffff ||
- /* for unshifted 2-byte UTF-8 index-2 values */
- (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
- /* for shiftedDataLength */
- (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH
- ) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
-
- /* calculate the total serialized length */
- length=sizeof(UTrie2Header)+allIndexesLength*2;
- if(valueBits==UTRIE2_16_VALUE_BITS) {
- length+=newTrie->dataLength*2;
- } else {
- length+=newTrie->dataLength*4;
- }
-
- trie->memory=uprv_malloc(length);
- if(trie->memory==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- trie->length=length;
- trie->isMemoryOwned=TRUE;
-
- trie->indexLength=allIndexesLength;
- trie->dataLength=newTrie->dataLength;
- if(highStart<=0x10000) {
- trie->index2NullOffset=0xffff;
- } else {
- trie->index2NullOffset=static_cast<uint16_t>(UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset);
- }
- trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);
- trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;
-
- /* set the header fields */
- header=(UTrie2Header *)trie->memory;
-
- header->signature=UTRIE2_SIG; /* "Tri2" */
- header->options=(uint16_t)valueBits;
-
- header->indexLength=(uint16_t)trie->indexLength;
- header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);
- header->index2NullOffset=trie->index2NullOffset;
- header->dataNullOffset=trie->dataNullOffset;
- header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);
-
- /* fill the index and data arrays */
- dest16=(uint16_t *)(header+1);
- trie->index=dest16;
-
- /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
- p=(uint32_t *)newTrie->index2;
- for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) {
- *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
- }
-
- /* write UTF-8 2-byte index-2 values, not right-shifted */
- for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */
- *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
- }
- for(; i<(0xe0-0xc0); ++i) { /* C2..DF */
- *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);
- }
-
- if(highStart>0x10000) {
- int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;
- int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;
-
- /* write 16-bit index-1 values for supplementary code points */
- p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
- for(i=index1Length; i>0; --i) {
- *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
- }
-
- /*
- * write the index-2 array values for supplementary code points,
- * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
- */
- p=(uint32_t *)newTrie->index2+index2Offset;
- for(i=newTrie->index2Length-index2Offset; i>0; --i) {
- *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
- }
- }
-
- /* write the 16/32-bit data array */
- switch(valueBits) {
- case UTRIE2_16_VALUE_BITS:
- /* write 16-bit data values */
- trie->data16=dest16;
- trie->data32=NULL;
- p=newTrie->data;
- for(i=newTrie->dataLength; i>0; --i) {
- *dest16++=(uint16_t)*p++;
- }
- break;
- case UTRIE2_32_VALUE_BITS:
- /* write 32-bit data values */
- trie->data16=NULL;
- trie->data32=(uint32_t *)dest16;
- uprv_memcpy(dest16, newTrie->data, (size_t)newTrie->dataLength*4);
- break;
- default:
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
-#ifdef UTRIE2_DEBUG
- utrie2_printLengths(trie, "");
-#endif
-
-#ifdef UCPTRIE_DEBUG
- umutablecptrie_setName(newTrie->t3, trie->name);
- ucptrie_close(
- umutablecptrie_buildImmutable(
- newTrie->t3, UCPTRIE_TYPE_FAST, (UCPTrieValueWidth)valueBits, pErrorCode));
-#endif
- /* Delete the UNewTrie2. */
- uprv_free(newTrie->data);
- uprv_free(newTrie);
- trie->newTrie=NULL;
-}
diff --git a/contrib/libs/icu/common/utrie2_impl.h b/contrib/libs/icu/common/utrie2_impl.h
deleted file mode 100644
index 2a14db3a6bd..00000000000
--- a/contrib/libs/icu/common/utrie2_impl.h
+++ /dev/null
@@ -1,175 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2001-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: utrie2_impl.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2008sep26 (split off from utrie2.c)
-* created by: Markus W. Scherer
-*
-* Definitions needed for both runtime and builder code for UTrie2,
-* used by utrie2.c and utrie2_builder.c.
-*/
-
-#ifndef __UTRIE2_IMPL_H__
-#define __UTRIE2_IMPL_H__
-
-#ifdef UCPTRIE_DEBUG
-#include "unicode/umutablecptrie.h"
-#endif
-#include "utrie2.h"
-
-/* Public UTrie2 API implementation ----------------------------------------- */
-
-/*
- * These definitions are mostly needed by utrie2.cpp,
- * but also by utrie2_serialize() and utrie2_swap().
- */
-
-// UTrie2 signature values, in platform endianness and opposite endianness.
-// The UTrie2 signature ASCII byte values spell "Tri2".
-#define UTRIE2_SIG 0x54726932
-#define UTRIE2_OE_SIG 0x32697254
-
-/**
- * Trie data structure in serialized form:
- *
- * UTrie2Header header;
- * uint16_t index[header.index2Length];
- * uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...]
- * @internal
- */
-typedef struct UTrie2Header {
- /** "Tri2" in big-endian US-ASCII (0x54726932) */
- uint32_t signature;
-
- /**
- * options bit field:
- * 15.. 4 reserved (0)
- * 3.. 0 UTrie2ValueBits valueBits
- */
- uint16_t options;
-
- /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */
- uint16_t indexLength;
-
- /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */
- uint16_t shiftedDataLength;
-
- /** Null index and data blocks, not shifted. */
- uint16_t index2NullOffset, dataNullOffset;
-
- /**
- * First code point of the single-value range ending with U+10ffff,
- * rounded up and then shifted right by UTRIE2_SHIFT_1.
- */
- uint16_t shiftedHighStart;
-} UTrie2Header;
-
-/**
- * Constants for use with UTrie2Header.options.
- * @internal
- */
-enum {
- /** Mask to get the UTrie2ValueBits valueBits from options. */
- UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf
-};
-
-/* Building a trie ---------------------------------------------------------- */
-
-/*
- * These definitions are mostly needed by utrie2_builder.c, but also by
- * utrie2_get32() and utrie2_enum().
- */
-
-enum {
- /**
- * At build time, leave a gap in the index-2 table,
- * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table
- * and the supplementary index-1 table.
- * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting.
- */
- UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH,
- UNEWTRIE2_INDEX_GAP_LENGTH=
- ((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)&
- ~UTRIE2_INDEX_2_MASK,
-
- /**
- * Maximum length of the build-time index-2 array.
- * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2,
- * plus the part of the index-2 table for lead surrogate code points,
- * plus the build-time index gap,
- * plus the null index-2 block.
- */
- UNEWTRIE2_MAX_INDEX_2_LENGTH=
- (0x110000>>UTRIE2_SHIFT_2)+
- UTRIE2_LSCP_INDEX_2_LENGTH+
- UNEWTRIE2_INDEX_GAP_LENGTH+
- UTRIE2_INDEX_2_BLOCK_LENGTH,
-
- UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1
-};
-
-/**
- * Maximum length of the build-time data array.
- * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block,
- * plus values for the 0x400 surrogate code units.
- */
-#define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400)
-
-/*
- * Build-time trie structure.
- *
- * Just using a boolean flag for "repeat use" could lead to data array overflow
- * because we would not be able to detect when a data block becomes unused.
- * It also leads to orphan data blocks that are kept through serialization.
- *
- * Need to use reference counting for data blocks,
- * and allocDataBlock() needs to look for a free block before increasing dataLength.
- *
- * This scheme seems like overkill for index-2 blocks since the whole index array is
- * preallocated anyway (unlike the growable data array).
- * Just allocating multiple index-2 blocks as needed.
- */
-struct UNewTrie2 {
- int32_t index1[UNEWTRIE2_INDEX_1_LENGTH];
- int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH];
- uint32_t *data;
-#ifdef UCPTRIE_DEBUG
- UMutableCPTrie *t3;
-#endif
-
- uint32_t initialValue, errorValue;
- int32_t index2Length, dataCapacity, dataLength;
- int32_t firstFreeBlock;
- int32_t index2NullOffset, dataNullOffset;
- UChar32 highStart;
- UBool isCompacted;
-
- /**
- * Multi-purpose per-data-block table.
- *
- * Before compacting:
- *
- * Per-data-block reference counters/free-block list.
- * 0: unused
- * >0: reference counter (number of index-2 entries pointing here)
- * <0: next free data block in free-block list
- *
- * While compacting:
- *
- * Map of adjusted indexes, used in compactData() and compactIndex2().
- * Maps from original indexes to new ones.
- */
- int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2];
-};
-
-#endif
diff --git a/contrib/libs/icu/common/utrie_swap.cpp b/contrib/libs/icu/common/utrie_swap.cpp
deleted file mode 100644
index 5abe7bd5d77..00000000000
--- a/contrib/libs/icu/common/utrie_swap.cpp
+++ /dev/null
@@ -1,344 +0,0 @@
-// © 2018 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// utrie_swap.cpp
-// created: 2018aug08 Markus W. Scherer
-
-#include "unicode/utypes.h"
-#include "cmemory.h"
-#include "ucptrie_impl.h"
-#include "udataswp.h"
-#include "utrie.h"
-#include "utrie2_impl.h"
-
-// These functions for swapping different generations of ICU code point tries are here
-// so that their implementation files need not depend on swapper code,
-// need not depend on each other, and so that other swapper code
-// need not depend on other trie code.
-
-namespace {
-
-constexpr int32_t ASCII_LIMIT = 0x80;
-
-} // namespace
-
-U_CAPI int32_t U_EXPORT2
-utrie_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UTrieHeader *inTrie;
- UTrieHeader trie;
- int32_t size;
- UBool dataIs32;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* setup and swapping */
- if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- inTrie=(const UTrieHeader *)inData;
- trie.signature=ds->readUInt32(inTrie->signature);
- trie.options=ds->readUInt32(inTrie->options);
- trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
- trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
-
- if( trie.signature!=0x54726965 ||
- (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
- ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
- trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
- (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
- trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
- (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
- ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
- ) {
- *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
- return 0;
- }
-
- dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
- size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
-
- if(length>=0) {
- UTrieHeader *outTrie;
-
- if(length<size) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- outTrie=(UTrieHeader *)outData;
-
- /* swap the header */
- ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
-
- /* swap the index and the data */
- if(dataIs32) {
- ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
- ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
- (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
- } else {
- ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
- }
- }
-
- return size;
-}
-
-U_CAPI int32_t U_EXPORT2
-utrie2_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UTrie2Header *inTrie;
- UTrie2Header trie;
- int32_t dataLength, size;
- UTrie2ValueBits valueBits;
-
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* setup and swapping */
- if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- inTrie=(const UTrie2Header *)inData;
- trie.signature=ds->readUInt32(inTrie->signature);
- trie.options=ds->readUInt16(inTrie->options);
- trie.indexLength=ds->readUInt16(inTrie->indexLength);
- trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
-
- valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
- dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
-
- if( trie.signature!=UTRIE2_SIG ||
- valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
- trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
- dataLength<UTRIE2_DATA_START_OFFSET
- ) {
- *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
- return 0;
- }
-
- size=sizeof(UTrie2Header)+trie.indexLength*2;
- switch(valueBits) {
- case UTRIE2_16_VALUE_BITS:
- size+=dataLength*2;
- break;
- case UTRIE2_32_VALUE_BITS:
- size+=dataLength*4;
- break;
- default:
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
-
- if(length>=0) {
- UTrie2Header *outTrie;
-
- if(length<size) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- outTrie=(UTrie2Header *)outData;
-
- /* swap the header */
- ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
- ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
-
- /* swap the index and the data */
- switch(valueBits) {
- case UTRIE2_16_VALUE_BITS:
- ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
- break;
- case UTRIE2_32_VALUE_BITS:
- ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
- ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
- (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
- break;
- default:
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
- }
-
- return size;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucptrie_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UCPTrieHeader *inTrie;
- UCPTrieHeader trie;
- int32_t dataLength, size;
- UCPTrieValueWidth valueWidth;
-
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* setup and swapping */
- if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- inTrie=(const UCPTrieHeader *)inData;
- trie.signature=ds->readUInt32(inTrie->signature);
- trie.options=ds->readUInt16(inTrie->options);
- trie.indexLength=ds->readUInt16(inTrie->indexLength);
- trie.dataLength = ds->readUInt16(inTrie->dataLength);
-
- UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3);
- valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK);
- dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength;
-
- int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ?
- UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
- if( trie.signature!=UCPTRIE_SIG ||
- type > UCPTRIE_TYPE_SMALL ||
- (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 ||
- valueWidth > UCPTRIE_VALUE_BITS_8 ||
- trie.indexLength < minIndexLength ||
- dataLength < ASCII_LIMIT
- ) {
- *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */
- return 0;
- }
-
- size=sizeof(UCPTrieHeader)+trie.indexLength*2;
- switch(valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- size+=dataLength*2;
- break;
- case UCPTRIE_VALUE_BITS_32:
- size+=dataLength*4;
- break;
- case UCPTRIE_VALUE_BITS_8:
- size+=dataLength;
- break;
- default:
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
-
- if(length>=0) {
- UCPTrieHeader *outTrie;
-
- if(length<size) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- outTrie=(UCPTrieHeader *)outData;
-
- /* swap the header */
- ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
- ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
-
- /* swap the index and the data */
- switch(valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
- break;
- case UCPTRIE_VALUE_BITS_32:
- ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
- ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
- (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
- break;
- case UCPTRIE_VALUE_BITS_8:
- ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
- if(inTrie!=outTrie) {
- uprv_memmove((outTrie+1)+trie.indexLength, (inTrie+1)+trie.indexLength, dataLength);
- }
- break;
- default:
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
- }
-
- return size;
-}
-
-namespace {
-
-/**
- * Gets the trie version from 32-bit-aligned memory containing the serialized form
- * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3).
- *
- * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie
- * @param length the number of bytes available at data;
- * can be more than necessary (see return value)
- * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized.
- * If TRUE, opposite-endian serialized forms are recognized as well.
- * @return the trie version of the serialized form, or 0 if it is not
- * recognized as a serialized trie
- */
-int32_t
-getVersion(const void *data, int32_t length, UBool anyEndianOk) {
- uint32_t signature;
- if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) {
- return 0;
- }
- signature=*(const uint32_t *)data;
- if(signature==UCPTRIE_SIG) {
- return 3;
- }
- if(anyEndianOk && signature==UCPTRIE_OE_SIG) {
- return 3;
- }
- if(signature==UTRIE2_SIG) {
- return 2;
- }
- if(anyEndianOk && signature==UTRIE2_OE_SIG) {
- return 2;
- }
- if(signature==UTRIE_SIG) {
- return 1;
- }
- if(anyEndianOk && signature==UTRIE_OE_SIG) {
- return 1;
- }
- return 0;
-}
-
-} // namespace
-
-U_CAPI int32_t U_EXPORT2
-utrie_swapAnyVersion(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) { return 0; }
- switch(getVersion(inData, length, TRUE)) {
- case 1:
- return utrie_swap(ds, inData, length, outData, pErrorCode);
- case 2:
- return utrie2_swap(ds, inData, length, outData, pErrorCode);
- case 3:
- return ucptrie_swap(ds, inData, length, outData, pErrorCode);
- default:
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
-}
diff --git a/contrib/libs/icu/common/uts46.cpp b/contrib/libs/icu/common/uts46.cpp
deleted file mode 100644
index b9e6cb023bb..00000000000
--- a/contrib/libs/icu/common/uts46.cpp
+++ /dev/null
@@ -1,1484 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2010-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: uts46.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2010mar09
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_IDNA
-
-#include "unicode/idna.h"
-#include "unicode/normalizer2.h"
-#include "unicode/uscript.h"
-#include "unicode/ustring.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "punycode.h"
-#include "ubidi_props.h"
-#include "ustr_imp.h"
-
-// Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG:
-//
-// The domain name length limit is 255 octets in an internal DNS representation
-// where the last ("root") label is the empty label
-// represented by length byte 0 alone.
-// In a conventional string, this translates to 253 characters, or 254
-// if there is a trailing dot for the root label.
-
-U_NAMESPACE_BEGIN
-
-// Severe errors which usually result in a U+FFFD replacement character in the result string.
-const uint32_t severeErrors=
- UIDNA_ERROR_LEADING_COMBINING_MARK|
- UIDNA_ERROR_DISALLOWED|
- UIDNA_ERROR_PUNYCODE|
- UIDNA_ERROR_LABEL_HAS_DOT|
- UIDNA_ERROR_INVALID_ACE_LABEL;
-
-static inline UBool
-isASCIIString(const UnicodeString &dest) {
- const UChar *s=dest.getBuffer();
- const UChar *limit=s+dest.length();
- while(s<limit) {
- if(*s++>0x7f) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-static UBool
-isASCIIOkBiDi(const UChar *s, int32_t length);
-
-static UBool
-isASCIIOkBiDi(const char *s, int32_t length);
-
-// IDNA class default implementations -------------------------------------- ***
-
-IDNA::~IDNA() {}
-
-void
-IDNA::labelToASCII_UTF8(StringPiece label, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- if(U_SUCCESS(errorCode)) {
- UnicodeString destString;
- labelToASCII(UnicodeString::fromUTF8(label), destString,
- info, errorCode).toUTF8(dest);
- }
-}
-
-void
-IDNA::labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- if(U_SUCCESS(errorCode)) {
- UnicodeString destString;
- labelToUnicode(UnicodeString::fromUTF8(label), destString,
- info, errorCode).toUTF8(dest);
- }
-}
-
-void
-IDNA::nameToASCII_UTF8(StringPiece name, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- if(U_SUCCESS(errorCode)) {
- UnicodeString destString;
- nameToASCII(UnicodeString::fromUTF8(name), destString,
- info, errorCode).toUTF8(dest);
- }
-}
-
-void
-IDNA::nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- if(U_SUCCESS(errorCode)) {
- UnicodeString destString;
- nameToUnicode(UnicodeString::fromUTF8(name), destString,
- info, errorCode).toUTF8(dest);
- }
-}
-
-// UTS46 class declaration ------------------------------------------------- ***
-
-class UTS46 : public IDNA {
-public:
- UTS46(uint32_t options, UErrorCode &errorCode);
- virtual ~UTS46();
-
- virtual UnicodeString &
- labelToASCII(const UnicodeString &label, UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
-
- virtual UnicodeString &
- labelToUnicode(const UnicodeString &label, UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
-
- virtual UnicodeString &
- nameToASCII(const UnicodeString &name, UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
-
- virtual UnicodeString &
- nameToUnicode(const UnicodeString &name, UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
-
- virtual void
- labelToASCII_UTF8(StringPiece label, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
-
- virtual void
- labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
-
- virtual void
- nameToASCII_UTF8(StringPiece name, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
-
- virtual void
- nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
-
-private:
- UnicodeString &
- process(const UnicodeString &src,
- UBool isLabel, UBool toASCII,
- UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
-
- void
- processUTF8(StringPiece src,
- UBool isLabel, UBool toASCII,
- ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
-
- UnicodeString &
- processUnicode(const UnicodeString &src,
- int32_t labelStart, int32_t mappingStart,
- UBool isLabel, UBool toASCII,
- UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
-
- // returns the new dest.length()
- int32_t
- mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart,
- UErrorCode &errorCode) const;
-
- // returns the new label length
- int32_t
- processLabel(UnicodeString &dest,
- int32_t labelStart, int32_t labelLength,
- UBool toASCII,
- IDNAInfo &info, UErrorCode &errorCode) const;
- int32_t
- markBadACELabel(UnicodeString &dest,
- int32_t labelStart, int32_t labelLength,
- UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const;
-
- void
- checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const;
-
- UBool
- isLabelOkContextJ(const UChar *label, int32_t labelLength) const;
-
- void
- checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const;
-
- const Normalizer2 &uts46Norm2; // uts46.nrm
- uint32_t options;
-};
-
-IDNA *
-IDNA::createUTS46Instance(uint32_t options, UErrorCode &errorCode) {
- if(U_SUCCESS(errorCode)) {
- IDNA *idna=new UTS46(options, errorCode);
- if(idna==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- } else if(U_FAILURE(errorCode)) {
- delete idna;
- idna=NULL;
- }
- return idna;
- } else {
- return NULL;
- }
-}
-
-// UTS46 implementation ---------------------------------------------------- ***
-
-UTS46::UTS46(uint32_t opt, UErrorCode &errorCode)
- : uts46Norm2(*Normalizer2::getInstance(NULL, "uts46", UNORM2_COMPOSE, errorCode)),
- options(opt) {}
-
-UTS46::~UTS46() {}
-
-UnicodeString &
-UTS46::labelToASCII(const UnicodeString &label, UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- return process(label, TRUE, TRUE, dest, info, errorCode);
-}
-
-UnicodeString &
-UTS46::labelToUnicode(const UnicodeString &label, UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- return process(label, TRUE, FALSE, dest, info, errorCode);
-}
-
-UnicodeString &
-UTS46::nameToASCII(const UnicodeString &name, UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- process(name, FALSE, TRUE, dest, info, errorCode);
- if( dest.length()>=254 && (info.errors&UIDNA_ERROR_DOMAIN_NAME_TOO_LONG)==0 &&
- isASCIIString(dest) &&
- (dest.length()>254 || dest[253]!=0x2e)
- ) {
- info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
- }
- return dest;
-}
-
-UnicodeString &
-UTS46::nameToUnicode(const UnicodeString &name, UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- return process(name, FALSE, FALSE, dest, info, errorCode);
-}
-
-void
-UTS46::labelToASCII_UTF8(StringPiece label, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- processUTF8(label, TRUE, TRUE, dest, info, errorCode);
-}
-
-void
-UTS46::labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- processUTF8(label, TRUE, FALSE, dest, info, errorCode);
-}
-
-void
-UTS46::nameToASCII_UTF8(StringPiece name, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- processUTF8(name, FALSE, TRUE, dest, info, errorCode);
-}
-
-void
-UTS46::nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- processUTF8(name, FALSE, FALSE, dest, info, errorCode);
-}
-
-// UTS #46 data for ASCII characters.
-// The normalizer (using uts46.nrm) maps uppercase ASCII letters to lowercase
-// and passes through all other ASCII characters.
-// If UIDNA_USE_STD3_RULES is set, then non-LDH characters are disallowed
-// using this data.
-// The ASCII fastpath also uses this data.
-// Values: -1=disallowed 0==valid 1==mapped (lowercase)
-static const int8_t asciiData[128]={
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- // 002D..002E; valid # HYPHEN-MINUS..FULL STOP
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1,
- // 0030..0039; valid # DIGIT ZERO..DIGIT NINE
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1,
- // 0041..005A; mapped # LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
- -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
- // 0061..007A; valid # LATIN SMALL LETTER A..LATIN SMALL LETTER Z
- -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1
-};
-
-UnicodeString &
-UTS46::process(const UnicodeString &src,
- UBool isLabel, UBool toASCII,
- UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- // uts46Norm2.normalize() would do all of this error checking and setup,
- // but with the ASCII fastpath we do not always call it, and do not
- // call it first.
- if(U_FAILURE(errorCode)) {
- dest.setToBogus();
- return dest;
- }
- const UChar *srcArray=src.getBuffer();
- if(&dest==&src || srcArray==NULL) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- dest.setToBogus();
- return dest;
- }
- // Arguments are fine, reset output values.
- dest.remove();
- info.reset();
- int32_t srcLength=src.length();
- if(srcLength==0) {
- info.errors|=UIDNA_ERROR_EMPTY_LABEL;
- return dest;
- }
- UChar *destArray=dest.getBuffer(srcLength);
- if(destArray==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return dest;
- }
- // ASCII fastpath
- UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
- int32_t labelStart=0;
- int32_t i;
- for(i=0;; ++i) {
- if(i==srcLength) {
- if(toASCII) {
- if((i-labelStart)>63) {
- info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
- }
- // There is a trailing dot if labelStart==i.
- if(!isLabel && i>=254 && (i>254 || labelStart<i)) {
- info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
- }
- }
- info.errors|=info.labelErrors;
- dest.releaseBuffer(i);
- return dest;
- }
- UChar c=srcArray[i];
- if(c>0x7f) {
- break;
- }
- int cData=asciiData[c];
- if(cData>0) {
- destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter.
- } else if(cData<0 && disallowNonLDHDot) {
- break; // Replacing with U+FFFD can be complicated for toASCII.
- } else {
- destArray[i]=c;
- if(c==0x2d) { // hyphen
- if(i==(labelStart+3) && srcArray[i-1]==0x2d) {
- // "??--..." is Punycode or forbidden.
- ++i; // '-' was copied to dest already
- break;
- }
- if(i==labelStart) {
- // label starts with "-"
- info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN;
- }
- if((i+1)==srcLength || srcArray[i+1]==0x2e) {
- // label ends with "-"
- info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN;
- }
- } else if(c==0x2e) { // dot
- if(isLabel) {
- // Replacing with U+FFFD can be complicated for toASCII.
- ++i; // '.' was copied to dest already
- break;
- }
- if(i==labelStart) {
- info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
- }
- if(toASCII && (i-labelStart)>63) {
- info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
- }
- info.errors|=info.labelErrors;
- info.labelErrors=0;
- labelStart=i+1;
- }
- }
- }
- info.errors|=info.labelErrors;
- dest.releaseBuffer(i);
- processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode);
- if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 &&
- (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(dest.getBuffer(), labelStart)))
- ) {
- info.errors|=UIDNA_ERROR_BIDI;
- }
- return dest;
-}
-
-void
-UTS46::processUTF8(StringPiece src,
- UBool isLabel, UBool toASCII,
- ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return;
- }
- const char *srcArray=src.data();
- int32_t srcLength=src.length();
- if(srcArray==NULL && srcLength!=0) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- // Arguments are fine, reset output values.
- info.reset();
- if(srcLength==0) {
- info.errors|=UIDNA_ERROR_EMPTY_LABEL;
- dest.Flush();
- return;
- }
- UnicodeString destString;
- int32_t labelStart=0;
- if(srcLength<=256) { // length of stackArray[]
- // ASCII fastpath
- char stackArray[256];
- int32_t destCapacity;
- char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20,
- stackArray, UPRV_LENGTHOF(stackArray), &destCapacity);
- UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
- int32_t i;
- for(i=0;; ++i) {
- if(i==srcLength) {
- if(toASCII) {
- if((i-labelStart)>63) {
- info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
- }
- // There is a trailing dot if labelStart==i.
- if(!isLabel && i>=254 && (i>254 || labelStart<i)) {
- info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
- }
- }
- info.errors|=info.labelErrors;
- dest.Append(destArray, i);
- dest.Flush();
- return;
- }
- char c=srcArray[i];
- if((int8_t)c<0) { // (uint8_t)c>0x7f
- break;
- }
- int cData=asciiData[(int)c]; // Cast: gcc warns about indexing with a char.
- if(cData>0) {
- destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter.
- } else if(cData<0 && disallowNonLDHDot) {
- break; // Replacing with U+FFFD can be complicated for toASCII.
- } else {
- destArray[i]=c;
- if(c==0x2d) { // hyphen
- if(i==(labelStart+3) && srcArray[i-1]==0x2d) {
- // "??--..." is Punycode or forbidden.
- break;
- }
- if(i==labelStart) {
- // label starts with "-"
- info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN;
- }
- if((i+1)==srcLength || srcArray[i+1]==0x2e) {
- // label ends with "-"
- info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN;
- }
- } else if(c==0x2e) { // dot
- if(isLabel) {
- break; // Replacing with U+FFFD can be complicated for toASCII.
- }
- if(i==labelStart) {
- info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
- }
- if(toASCII && (i-labelStart)>63) {
- info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
- }
- info.errors|=info.labelErrors;
- info.labelErrors=0;
- labelStart=i+1;
- }
- }
- }
- info.errors|=info.labelErrors;
- // Convert the processed ASCII prefix of the current label to UTF-16.
- int32_t mappingStart=i-labelStart;
- destString=UnicodeString::fromUTF8(StringPiece(destArray+labelStart, mappingStart));
- // Output the previous ASCII labels and process the rest of src in UTF-16.
- dest.Append(destArray, labelStart);
- processUnicode(UnicodeString::fromUTF8(StringPiece(src, labelStart)), 0, mappingStart,
- isLabel, toASCII,
- destString, info, errorCode);
- } else {
- // src is too long for the ASCII fastpath implementation.
- processUnicode(UnicodeString::fromUTF8(src), 0, 0,
- isLabel, toASCII,
- destString, info, errorCode);
- }
- destString.toUTF8(dest); // calls dest.Flush()
- if(toASCII && !isLabel) {
- // length==labelStart==254 means that there is a trailing dot (ok) and
- // destString is empty (do not index at 253-labelStart).
- int32_t length=labelStart+destString.length();
- if( length>=254 && isASCIIString(destString) &&
- (length>254 ||
- (labelStart<254 && destString[253-labelStart]!=0x2e))
- ) {
- info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
- }
- }
- if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 &&
- (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(srcArray, labelStart)))
- ) {
- info.errors|=UIDNA_ERROR_BIDI;
- }
-}
-
-UnicodeString &
-UTS46::processUnicode(const UnicodeString &src,
- int32_t labelStart, int32_t mappingStart,
- UBool isLabel, UBool toASCII,
- UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const {
- if(mappingStart==0) {
- uts46Norm2.normalize(src, dest, errorCode);
- } else {
- uts46Norm2.normalizeSecondAndAppend(dest, src.tempSubString(mappingStart), errorCode);
- }
- if(U_FAILURE(errorCode)) {
- return dest;
- }
- UBool doMapDevChars=
- toASCII ? (options&UIDNA_NONTRANSITIONAL_TO_ASCII)==0 :
- (options&UIDNA_NONTRANSITIONAL_TO_UNICODE)==0;
- const UChar *destArray=dest.getBuffer();
- int32_t destLength=dest.length();
- int32_t labelLimit=labelStart;
- while(labelLimit<destLength) {
- UChar c=destArray[labelLimit];
- if(c==0x2e && !isLabel) {
- int32_t labelLength=labelLimit-labelStart;
- int32_t newLength=processLabel(dest, labelStart, labelLength,
- toASCII, info, errorCode);
- info.errors|=info.labelErrors;
- info.labelErrors=0;
- if(U_FAILURE(errorCode)) {
- return dest;
- }
- destArray=dest.getBuffer();
- destLength+=newLength-labelLength;
- labelLimit=labelStart+=newLength+1;
- continue;
- } else if(c<0xdf) {
- // pass
- } else if(c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) {
- info.isTransDiff=TRUE;
- if(doMapDevChars) {
- destLength=mapDevChars(dest, labelStart, labelLimit, errorCode);
- if(U_FAILURE(errorCode)) {
- return dest;
- }
- destArray=dest.getBuffer();
- // All deviation characters have been mapped, no need to check for them again.
- doMapDevChars=FALSE;
- // Do not increment labelLimit in case c was removed.
- continue;
- }
- } else if(U16_IS_SURROGATE(c)) {
- if(U16_IS_SURROGATE_LEAD(c) ?
- (labelLimit+1)==destLength || !U16_IS_TRAIL(destArray[labelLimit+1]) :
- labelLimit==labelStart || !U16_IS_LEAD(destArray[labelLimit-1])) {
- // Map an unpaired surrogate to U+FFFD before normalization so that when
- // that removes characters we do not turn two unpaired ones into a pair.
- info.labelErrors|=UIDNA_ERROR_DISALLOWED;
- dest.setCharAt(labelLimit, 0xfffd);
- destArray=dest.getBuffer();
- }
- }
- ++labelLimit;
- }
- // Permit an empty label at the end (0<labelStart==labelLimit==destLength is ok)
- // but not an empty label elsewhere nor a completely empty domain name.
- // processLabel() sets UIDNA_ERROR_EMPTY_LABEL when labelLength==0.
- if(0==labelStart || labelStart<labelLimit) {
- processLabel(dest, labelStart, labelLimit-labelStart,
- toASCII, info, errorCode);
- info.errors|=info.labelErrors;
- }
- return dest;
-}
-
-int32_t
-UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart,
- UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- int32_t length=dest.length();
- UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length);
- if(s==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return length;
- }
- int32_t capacity=dest.getCapacity();
- UBool didMapDevChars=FALSE;
- int32_t readIndex=mappingStart, writeIndex=mappingStart;
- do {
- UChar c=s[readIndex++];
- switch(c) {
- case 0xdf:
- // Map sharp s to ss.
- didMapDevChars=TRUE;
- s[writeIndex++]=0x73; // Replace sharp s with first s.
- // Insert second s and account for possible buffer reallocation.
- if(writeIndex==readIndex) {
- if(length==capacity) {
- dest.releaseBuffer(length);
- s=dest.getBuffer(length+1);
- if(s==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return length;
- }
- capacity=dest.getCapacity();
- }
- u_memmove(s+writeIndex+1, s+writeIndex, length-writeIndex);
- ++readIndex;
- }
- s[writeIndex++]=0x73;
- ++length;
- break;
- case 0x3c2: // Map final sigma to nonfinal sigma.
- didMapDevChars=TRUE;
- s[writeIndex++]=0x3c3;
- break;
- case 0x200c: // Ignore/remove ZWNJ.
- case 0x200d: // Ignore/remove ZWJ.
- didMapDevChars=TRUE;
- --length;
- break;
- default:
- // Only really necessary if writeIndex was different from readIndex.
- s[writeIndex++]=c;
- break;
- }
- } while(writeIndex<length);
- dest.releaseBuffer(length);
- if(didMapDevChars) {
- // Mapping deviation characters might have resulted in an un-NFC string.
- // We could use either the NFC or the UTS #46 normalizer.
- // By using the UTS #46 normalizer again, we avoid having to load a second .nrm data file.
- UnicodeString normalized;
- uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCode);
- if(U_SUCCESS(errorCode)) {
- dest.replace(labelStart, 0x7fffffff, normalized);
- if(dest.isBogus()) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- }
- return dest.length();
- }
- }
- return length;
-}
-
-// Some non-ASCII characters are equivalent to sequences with
-// non-LDH ASCII characters. To find them:
-// grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt)
-static inline UBool
-isNonASCIIDisallowedSTD3Valid(UChar32 c) {
- return c==0x2260 || c==0x226E || c==0x226F;
-}
-
-// Replace the label in dest with the label string, if the label was modified.
-// If &label==&dest then the label was modified in-place and labelLength
-// is the new label length, different from label.length().
-// If &label!=&dest then labelLength==label.length().
-// Returns labelLength (= the new label length).
-static int32_t
-replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLength,
- const UnicodeString &label, int32_t labelLength, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- if(&label!=&dest) {
- dest.replace(destLabelStart, destLabelLength, label);
- if(dest.isBogus()) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- }
- return labelLength;
-}
-
-int32_t
-UTS46::processLabel(UnicodeString &dest,
- int32_t labelStart, int32_t labelLength,
- UBool toASCII,
- IDNAInfo &info, UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- UnicodeString fromPunycode;
- UnicodeString *labelString;
- const UChar *label=dest.getBuffer()+labelStart;
- int32_t destLabelStart=labelStart;
- int32_t destLabelLength=labelLength;
- UBool wasPunycode;
- if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && label[3]==0x2d) {
- // Label starts with "xn--", try to un-Punycode it.
- wasPunycode=TRUE;
- UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most labels should fit
- if(unicodeBuffer==NULL) {
- // Should never occur if we used capacity==-1 which uses the internal buffer.
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return labelLength;
- }
- UErrorCode punycodeErrorCode=U_ZERO_ERROR;
- int32_t unicodeLength=u_strFromPunycode(label+4, labelLength-4,
- unicodeBuffer, fromPunycode.getCapacity(),
- NULL, &punycodeErrorCode);
- if(punycodeErrorCode==U_BUFFER_OVERFLOW_ERROR) {
- fromPunycode.releaseBuffer(0);
- unicodeBuffer=fromPunycode.getBuffer(unicodeLength);
- if(unicodeBuffer==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return labelLength;
- }
- punycodeErrorCode=U_ZERO_ERROR;
- unicodeLength=u_strFromPunycode(label+4, labelLength-4,
- unicodeBuffer, fromPunycode.getCapacity(),
- NULL, &punycodeErrorCode);
- }
- fromPunycode.releaseBuffer(unicodeLength);
- if(U_FAILURE(punycodeErrorCode)) {
- info.labelErrors|=UIDNA_ERROR_PUNYCODE;
- return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode);
- }
- // Check for NFC, and for characters that are not
- // valid or deviation characters according to the normalizer.
- // If there is something wrong, then the string will change.
- // Note that the normalizer passes through non-LDH ASCII and deviation characters.
- // Deviation characters are ok in Punycode even in transitional processing.
- // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES
- // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too.
- UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode);
- if(U_FAILURE(errorCode)) {
- return labelLength;
- }
- if(!isValid) {
- info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
- return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode);
- }
- labelString=&fromPunycode;
- label=fromPunycode.getBuffer();
- labelStart=0;
- labelLength=fromPunycode.length();
- } else {
- wasPunycode=FALSE;
- labelString=&dest;
- }
- // Validity check
- if(labelLength==0) {
- info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
- return replaceLabel(dest, destLabelStart, destLabelLength,
- *labelString, labelLength, errorCode);
- }
- // labelLength>0
- if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) {
- // label starts with "??--"
- info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4;
- }
- if(label[0]==0x2d) {
- // label starts with "-"
- info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN;
- }
- if(label[labelLength-1]==0x2d) {
- // label ends with "-"
- info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN;
- }
- // If the label was not a Punycode label, then it was the result of
- // mapping, normalization and label segmentation.
- // If the label was in Punycode, then we mapped it again above
- // and checked its validity.
- // Now we handle the STD3 restriction to LDH characters (if set)
- // and we look for U+FFFD which indicates disallowed characters
- // in a non-Punycode label or U+FFFD itself in a Punycode label.
- // We also check for dots which can come from the input to a single-label function.
- // Ok to cast away const because we own the UnicodeString.
- UChar *s=(UChar *)label;
- const UChar *limit=label+labelLength;
- UChar oredChars=0;
- // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed.
- UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
- do {
- UChar c=*s;
- if(c<=0x7f) {
- if(c==0x2e) {
- info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT;
- *s=0xfffd;
- } else if(disallowNonLDHDot && asciiData[c]<0) {
- info.labelErrors|=UIDNA_ERROR_DISALLOWED;
- *s=0xfffd;
- }
- } else {
- oredChars|=c;
- if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) {
- info.labelErrors|=UIDNA_ERROR_DISALLOWED;
- *s=0xfffd;
- } else if(c==0xfffd) {
- info.labelErrors|=UIDNA_ERROR_DISALLOWED;
- }
- }
- ++s;
- } while(s<limit);
- // Check for a leading combining mark after other validity checks
- // so that we don't report UIDNA_ERROR_DISALLOWED for the U+FFFD from here.
- UChar32 c;
- int32_t cpLength=0;
- // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD.
- U16_NEXT_UNSAFE(label, cpLength, c);
- if((U_GET_GC_MASK(c)&U_GC_M_MASK)!=0) {
- info.labelErrors|=UIDNA_ERROR_LEADING_COMBINING_MARK;
- labelString->replace(labelStart, cpLength, (UChar)0xfffd);
- label=labelString->getBuffer()+labelStart;
- labelLength+=1-cpLength;
- if(labelString==&dest) {
- destLabelLength=labelLength;
- }
- }
- if((info.labelErrors&severeErrors)==0) {
- // Do contextual checks only if we do not have U+FFFD from a severe error
- // because U+FFFD can make these checks fail.
- if((options&UIDNA_CHECK_BIDI)!=0 && (!info.isBiDi || info.isOkBiDi)) {
- checkLabelBiDi(label, labelLength, info);
- }
- if( (options&UIDNA_CHECK_CONTEXTJ)!=0 && (oredChars&0x200c)==0x200c &&
- !isLabelOkContextJ(label, labelLength)
- ) {
- info.labelErrors|=UIDNA_ERROR_CONTEXTJ;
- }
- if((options&UIDNA_CHECK_CONTEXTO)!=0 && oredChars>=0xb7) {
- checkLabelContextO(label, labelLength, info);
- }
- if(toASCII) {
- if(wasPunycode) {
- // Leave a Punycode label unchanged if it has no severe errors.
- if(destLabelLength>63) {
- info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
- }
- return destLabelLength;
- } else if(oredChars>=0x80) {
- // Contains non-ASCII characters.
- UnicodeString punycode;
- UChar *buffer=punycode.getBuffer(63); // 63==maximum DNS label length
- if(buffer==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return destLabelLength;
- }
- buffer[0]=0x78; // Write "xn--".
- buffer[1]=0x6e;
- buffer[2]=0x2d;
- buffer[3]=0x2d;
- int32_t punycodeLength=u_strToPunycode(label, labelLength,
- buffer+4, punycode.getCapacity()-4,
- NULL, &errorCode);
- if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
- errorCode=U_ZERO_ERROR;
- punycode.releaseBuffer(4);
- buffer=punycode.getBuffer(4+punycodeLength);
- if(buffer==NULL) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return destLabelLength;
- }
- punycodeLength=u_strToPunycode(label, labelLength,
- buffer+4, punycode.getCapacity()-4,
- NULL, &errorCode);
- }
- punycodeLength+=4;
- punycode.releaseBuffer(punycodeLength);
- if(U_FAILURE(errorCode)) {
- return destLabelLength;
- }
- if(punycodeLength>63) {
- info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
- }
- return replaceLabel(dest, destLabelStart, destLabelLength,
- punycode, punycodeLength, errorCode);
- } else {
- // all-ASCII label
- if(labelLength>63) {
- info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
- }
- }
- }
- } else {
- // If a Punycode label has severe errors,
- // then leave it but make sure it does not look valid.
- if(wasPunycode) {
- info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
- return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info, errorCode);
- }
- }
- return replaceLabel(dest, destLabelStart, destLabelLength,
- *labelString, labelLength, errorCode);
-}
-
-// Make sure an ACE label does not look valid.
-// Append U+FFFD if the label has only LDH characters.
-// If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD.
-int32_t
-UTS46::markBadACELabel(UnicodeString &dest,
- int32_t labelStart, int32_t labelLength,
- UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
- UBool isASCII=TRUE;
- UBool onlyLDH=TRUE;
- const UChar *label=dest.getBuffer()+labelStart;
- // Ok to cast away const because we own the UnicodeString.
- UChar *s=(UChar *)label+4; // After the initial "xn--".
- const UChar *limit=label+labelLength;
- do {
- UChar c=*s;
- if(c<=0x7f) {
- if(c==0x2e) {
- info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT;
- *s=0xfffd;
- isASCII=onlyLDH=FALSE;
- } else if(asciiData[c]<0) {
- onlyLDH=FALSE;
- if(disallowNonLDHDot) {
- *s=0xfffd;
- isASCII=FALSE;
- }
- }
- } else {
- isASCII=onlyLDH=FALSE;
- }
- } while(++s<limit);
- if(onlyLDH) {
- dest.insert(labelStart+labelLength, (UChar)0xfffd);
- if(dest.isBogus()) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- ++labelLength;
- } else {
- if(toASCII && isASCII && labelLength>63) {
- info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
- }
- }
- return labelLength;
-}
-
-const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT);
-const uint32_t R_AL_MASK=U_MASK(U_RIGHT_TO_LEFT)|U_MASK(U_RIGHT_TO_LEFT_ARABIC);
-const uint32_t L_R_AL_MASK=L_MASK|R_AL_MASK;
-
-const uint32_t R_AL_AN_MASK=R_AL_MASK|U_MASK(U_ARABIC_NUMBER);
-
-const uint32_t EN_AN_MASK=U_MASK(U_EUROPEAN_NUMBER)|U_MASK(U_ARABIC_NUMBER);
-const uint32_t R_AL_EN_AN_MASK=R_AL_MASK|EN_AN_MASK;
-const uint32_t L_EN_MASK=L_MASK|U_MASK(U_EUROPEAN_NUMBER);
-
-const uint32_t ES_CS_ET_ON_BN_NSM_MASK=
- U_MASK(U_EUROPEAN_NUMBER_SEPARATOR)|
- U_MASK(U_COMMON_NUMBER_SEPARATOR)|
- U_MASK(U_EUROPEAN_NUMBER_TERMINATOR)|
- U_MASK(U_OTHER_NEUTRAL)|
- U_MASK(U_BOUNDARY_NEUTRAL)|
- U_MASK(U_DIR_NON_SPACING_MARK);
-const uint32_t L_EN_ES_CS_ET_ON_BN_NSM_MASK=L_EN_MASK|ES_CS_ET_ON_BN_NSM_MASK;
-const uint32_t R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK=R_AL_MASK|EN_AN_MASK|ES_CS_ET_ON_BN_NSM_MASK;
-
-// We scan the whole label and check both for whether it contains RTL characters
-// and whether it passes the BiDi Rule.
-// In a BiDi domain name, all labels must pass the BiDi Rule, but we might find
-// that a domain name is a BiDi domain name (has an RTL label) only after
-// processing several earlier labels.
-void
-UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const {
- // IDNA2008 BiDi rule
- // Get the directionality of the first character.
- UChar32 c;
- int32_t i=0;
- U16_NEXT_UNSAFE(label, i, c);
- uint32_t firstMask=U_MASK(u_charDirection(c));
- // 1. The first character must be a character with BIDI property L, R
- // or AL. If it has the R or AL property, it is an RTL label; if it
- // has the L property, it is an LTR label.
- if((firstMask&~L_R_AL_MASK)!=0) {
- info.isOkBiDi=FALSE;
- }
- // Get the directionality of the last non-NSM character.
- uint32_t lastMask;
- for(;;) {
- if(i>=labelLength) {
- lastMask=firstMask;
- break;
- }
- U16_PREV_UNSAFE(label, labelLength, c);
- UCharDirection dir=u_charDirection(c);
- if(dir!=U_DIR_NON_SPACING_MARK) {
- lastMask=U_MASK(dir);
- break;
- }
- }
- // 3. In an RTL label, the end of the label must be a character with
- // BIDI property R, AL, EN or AN, followed by zero or more
- // characters with BIDI property NSM.
- // 6. In an LTR label, the end of the label must be a character with
- // BIDI property L or EN, followed by zero or more characters with
- // BIDI property NSM.
- if( (firstMask&L_MASK)!=0 ?
- (lastMask&~L_EN_MASK)!=0 :
- (lastMask&~R_AL_EN_AN_MASK)!=0
- ) {
- info.isOkBiDi=FALSE;
- }
- // Add the directionalities of the intervening characters.
- uint32_t mask=firstMask|lastMask;
- while(i<labelLength) {
- U16_NEXT_UNSAFE(label, i, c);
- mask|=U_MASK(u_charDirection(c));
- }
- if(firstMask&L_MASK) {
- // 5. In an LTR label, only characters with the BIDI properties L, EN,
- // ES, CS, ET, ON, BN and NSM are allowed.
- if((mask&~L_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) {
- info.isOkBiDi=FALSE;
- }
- } else {
- // 2. In an RTL label, only characters with the BIDI properties R, AL,
- // AN, EN, ES, CS, ET, ON, BN and NSM are allowed.
- if((mask&~R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) {
- info.isOkBiDi=FALSE;
- }
- // 4. In an RTL label, if an EN is present, no AN may be present, and
- // vice versa.
- if((mask&EN_AN_MASK)==EN_AN_MASK) {
- info.isOkBiDi=FALSE;
- }
- }
- // An RTL label is a label that contains at least one character of type
- // R, AL or AN. [...]
- // A "BIDI domain name" is a domain name that contains at least one RTL
- // label. [...]
- // The following rule, consisting of six conditions, applies to labels
- // in BIDI domain names.
- if((mask&R_AL_AN_MASK)!=0) {
- info.isBiDi=TRUE;
- }
-}
-
-// Special code for the ASCII prefix of a BiDi domain name.
-// The ASCII prefix is all-LTR.
-
-// IDNA2008 BiDi rule, parts relevant to ASCII labels:
-// 1. The first character must be a character with BIDI property L [...]
-// 5. In an LTR label, only characters with the BIDI properties L, EN,
-// ES, CS, ET, ON, BN and NSM are allowed.
-// 6. In an LTR label, the end of the label must be a character with
-// BIDI property L or EN [...]
-
-// UTF-16 version, called for mapped ASCII prefix.
-// Cannot contain uppercase A-Z.
-// s[length-1] must be the trailing dot.
-static UBool
-isASCIIOkBiDi(const UChar *s, int32_t length) {
- int32_t labelStart=0;
- for(int32_t i=0; i<length; ++i) {
- UChar c=s[i];
- if(c==0x2e) { // dot
- if(i>labelStart) {
- c=s[i-1];
- if(!(0x61<=c && c<=0x7a) && !(0x30<=c && c<=0x39)) {
- // Last character in the label is not an L or EN.
- return FALSE;
- }
- }
- labelStart=i+1;
- } else if(i==labelStart) {
- if(!(0x61<=c && c<=0x7a)) {
- // First character in the label is not an L.
- return FALSE;
- }
- } else {
- if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) {
- // Intermediate character in the label is a B, S or WS.
- return FALSE;
- }
- }
- }
- return TRUE;
-}
-
-// UTF-8 version, called for source ASCII prefix.
-// Can contain uppercase A-Z.
-// s[length-1] must be the trailing dot.
-static UBool
-isASCIIOkBiDi(const char *s, int32_t length) {
- int32_t labelStart=0;
- for(int32_t i=0; i<length; ++i) {
- char c=s[i];
- if(c==0x2e) { // dot
- if(i>labelStart) {
- c=s[i-1];
- if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a) && !(0x30<=c && c<=0x39)) {
- // Last character in the label is not an L or EN.
- return FALSE;
- }
- }
- labelStart=i+1;
- } else if(i==labelStart) {
- if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a)) {
- // First character in the label is not an L.
- return FALSE;
- }
- } else {
- if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) {
- // Intermediate character in the label is a B, S or WS.
- return FALSE;
- }
- }
- }
- return TRUE;
-}
-
-UBool
-UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
- // [IDNA2008-Tables]
- // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
- for(int32_t i=0; i<labelLength; ++i) {
- if(label[i]==0x200c) {
- // Appendix A.1. ZERO WIDTH NON-JOINER
- // Rule Set:
- // False;
- // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
- // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C
- // (Joining_Type:T)*(Joining_Type:{R,D})) Then True;
- if(i==0) {
- return FALSE;
- }
- UChar32 c;
- int32_t j=i;
- U16_PREV_UNSAFE(label, j, c);
- if(uts46Norm2.getCombiningClass(c)==9) {
- continue;
- }
- // check precontext (Joining_Type:{L,D})(Joining_Type:T)*
- for(;;) {
- UJoiningType type=ubidi_getJoiningType(c);
- if(type==U_JT_TRANSPARENT) {
- if(j==0) {
- return FALSE;
- }
- U16_PREV_UNSAFE(label, j, c);
- } else if(type==U_JT_LEFT_JOINING || type==U_JT_DUAL_JOINING) {
- break; // precontext fulfilled
- } else {
- return FALSE;
- }
- }
- // check postcontext (Joining_Type:T)*(Joining_Type:{R,D})
- for(j=i+1;;) {
- if(j==labelLength) {
- return FALSE;
- }
- U16_NEXT_UNSAFE(label, j, c);
- UJoiningType type=ubidi_getJoiningType(c);
- if(type==U_JT_TRANSPARENT) {
- // just skip this character
- } else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) {
- break; // postcontext fulfilled
- } else {
- return FALSE;
- }
- }
- } else if(label[i]==0x200d) {
- // Appendix A.2. ZERO WIDTH JOINER (U+200D)
- // Rule Set:
- // False;
- // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
- if(i==0) {
- return FALSE;
- }
- UChar32 c;
- int32_t j=i;
- U16_PREV_UNSAFE(label, j, c);
- if(uts46Norm2.getCombiningClass(c)!=9) {
- return FALSE;
- }
- }
- }
- return TRUE;
-}
-
-void
-UTS46::checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const {
- int32_t labelEnd=labelLength-1; // inclusive
- int32_t arabicDigits=0; // -1 for 066x, +1 for 06Fx
- for(int32_t i=0; i<=labelEnd; ++i) {
- UChar32 c=label[i];
- if(c<0xb7) {
- // ASCII fastpath
- } else if(c<=0x6f9) {
- if(c==0xb7) {
- // Appendix A.3. MIDDLE DOT (U+00B7)
- // Rule Set:
- // False;
- // If Before(cp) .eq. U+006C And
- // After(cp) .eq. U+006C Then True;
- if(!(0<i && label[i-1]==0x6c &&
- i<labelEnd && label[i+1]==0x6c)) {
- info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION;
- }
- } else if(c==0x375) {
- // Appendix A.4. GREEK LOWER NUMERAL SIGN (KERAIA) (U+0375)
- // Rule Set:
- // False;
- // If Script(After(cp)) .eq. Greek Then True;
- UScriptCode script=USCRIPT_INVALID_CODE;
- if(i<labelEnd) {
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t j=i+1;
- U16_NEXT(label, j, labelLength, c);
- script=uscript_getScript(c, &errorCode);
- }
- if(script!=USCRIPT_GREEK) {
- info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION;
- }
- } else if(c==0x5f3 || c==0x5f4) {
- // Appendix A.5. HEBREW PUNCTUATION GERESH (U+05F3)
- // Rule Set:
- // False;
- // If Script(Before(cp)) .eq. Hebrew Then True;
- //
- // Appendix A.6. HEBREW PUNCTUATION GERSHAYIM (U+05F4)
- // Rule Set:
- // False;
- // If Script(Before(cp)) .eq. Hebrew Then True;
- UScriptCode script=USCRIPT_INVALID_CODE;
- if(0<i) {
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t j=i;
- U16_PREV(label, 0, j, c);
- script=uscript_getScript(c, &errorCode);
- }
- if(script!=USCRIPT_HEBREW) {
- info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION;
- }
- } else if(0x660<=c /* && c<=0x6f9 */) {
- // Appendix A.8. ARABIC-INDIC DIGITS (0660..0669)
- // Rule Set:
- // True;
- // For All Characters:
- // If cp .in. 06F0..06F9 Then False;
- // End For;
- //
- // Appendix A.9. EXTENDED ARABIC-INDIC DIGITS (06F0..06F9)
- // Rule Set:
- // True;
- // For All Characters:
- // If cp .in. 0660..0669 Then False;
- // End For;
- if(c<=0x669) {
- if(arabicDigits>0) {
- info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS;
- }
- arabicDigits=-1;
- } else if(0x6f0<=c) {
- if(arabicDigits<0) {
- info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS;
- }
- arabicDigits=1;
- }
- }
- } else if(c==0x30fb) {
- // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB)
- // Rule Set:
- // False;
- // For All Characters:
- // If Script(cp) .in. {Hiragana, Katakana, Han} Then True;
- // End For;
- UErrorCode errorCode=U_ZERO_ERROR;
- for(int j=0;;) {
- if(j>labelEnd) {
- info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION;
- break;
- }
- U16_NEXT(label, j, labelLength, c);
- UScriptCode script=uscript_getScript(c, &errorCode);
- if(script==USCRIPT_HIRAGANA || script==USCRIPT_KATAKANA || script==USCRIPT_HAN) {
- break;
- }
- }
- }
- }
-}
-
-U_NAMESPACE_END
-
-// C API ------------------------------------------------------------------- ***
-
-U_NAMESPACE_USE
-
-U_CAPI UIDNA * U_EXPORT2
-uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) {
- return reinterpret_cast<UIDNA *>(IDNA::createUTS46Instance(options, *pErrorCode));
-}
-
-U_CAPI void U_EXPORT2
-uidna_close(UIDNA *idna) {
- delete reinterpret_cast<IDNA *>(idna);
-}
-
-static UBool
-checkArgs(const void *label, int32_t length,
- void *dest, int32_t capacity,
- UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return FALSE;
- }
- // sizeof(UIDNAInfo)=16 in the first API version.
- if(pInfo==NULL || pInfo->size<16) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- if( (label==NULL ? length!=0 : length<-1) ||
- (dest==NULL ? capacity!=0 : capacity<0) ||
- (dest==label && label!=NULL)
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- // Set all *pInfo bytes to 0 except for the size field itself.
- uprv_memset(&pInfo->size+1, 0, pInfo->size-sizeof(pInfo->size));
- return TRUE;
-}
-
-static void
-idnaInfoToStruct(IDNAInfo &info, UIDNAInfo *pInfo) {
- pInfo->isTransitionalDifferent=info.isTransitionalDifferent();
- pInfo->errors=info.getErrors();
-}
-
-U_CAPI int32_t U_EXPORT2
-uidna_labelToASCII(const UIDNA *idna,
- const UChar *label, int32_t length,
- UChar *dest, int32_t capacity,
- UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
- if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) {
- return 0;
- }
- UnicodeString src((UBool)(length<0), label, length);
- UnicodeString destString(dest, 0, capacity);
- IDNAInfo info;
- reinterpret_cast<const IDNA *>(idna)->labelToASCII(src, destString, info, *pErrorCode);
- idnaInfoToStruct(info, pInfo);
- return destString.extract(dest, capacity, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uidna_labelToUnicode(const UIDNA *idna,
- const UChar *label, int32_t length,
- UChar *dest, int32_t capacity,
- UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
- if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) {
- return 0;
- }
- UnicodeString src((UBool)(length<0), label, length);
- UnicodeString destString(dest, 0, capacity);
- IDNAInfo info;
- reinterpret_cast<const IDNA *>(idna)->labelToUnicode(src, destString, info, *pErrorCode);
- idnaInfoToStruct(info, pInfo);
- return destString.extract(dest, capacity, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uidna_nameToASCII(const UIDNA *idna,
- const UChar *name, int32_t length,
- UChar *dest, int32_t capacity,
- UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
- if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) {
- return 0;
- }
- UnicodeString src((UBool)(length<0), name, length);
- UnicodeString destString(dest, 0, capacity);
- IDNAInfo info;
- reinterpret_cast<const IDNA *>(idna)->nameToASCII(src, destString, info, *pErrorCode);
- idnaInfoToStruct(info, pInfo);
- return destString.extract(dest, capacity, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uidna_nameToUnicode(const UIDNA *idna,
- const UChar *name, int32_t length,
- UChar *dest, int32_t capacity,
- UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
- if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) {
- return 0;
- }
- UnicodeString src((UBool)(length<0), name, length);
- UnicodeString destString(dest, 0, capacity);
- IDNAInfo info;
- reinterpret_cast<const IDNA *>(idna)->nameToUnicode(src, destString, info, *pErrorCode);
- idnaInfoToStruct(info, pInfo);
- return destString.extract(dest, capacity, *pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uidna_labelToASCII_UTF8(const UIDNA *idna,
- const char *label, int32_t length,
- char *dest, int32_t capacity,
- UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
- if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) {
- return 0;
- }
- StringPiece src(label, length<0 ? static_cast<int32_t>(uprv_strlen(label)) : length);
- CheckedArrayByteSink sink(dest, capacity);
- IDNAInfo info;
- reinterpret_cast<const IDNA *>(idna)->labelToASCII_UTF8(src, sink, info, *pErrorCode);
- idnaInfoToStruct(info, pInfo);
- return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uidna_labelToUnicodeUTF8(const UIDNA *idna,
- const char *label, int32_t length,
- char *dest, int32_t capacity,
- UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
- if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) {
- return 0;
- }
- StringPiece src(label, length<0 ? static_cast<int32_t>(uprv_strlen(label)) : length);
- CheckedArrayByteSink sink(dest, capacity);
- IDNAInfo info;
- reinterpret_cast<const IDNA *>(idna)->labelToUnicodeUTF8(src, sink, info, *pErrorCode);
- idnaInfoToStruct(info, pInfo);
- return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uidna_nameToASCII_UTF8(const UIDNA *idna,
- const char *name, int32_t length,
- char *dest, int32_t capacity,
- UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
- if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) {
- return 0;
- }
- StringPiece src(name, length<0 ? static_cast<int32_t>(uprv_strlen(name)) : length);
- CheckedArrayByteSink sink(dest, capacity);
- IDNAInfo info;
- reinterpret_cast<const IDNA *>(idna)->nameToASCII_UTF8(src, sink, info, *pErrorCode);
- idnaInfoToStruct(info, pInfo);
- return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-uidna_nameToUnicodeUTF8(const UIDNA *idna,
- const char *name, int32_t length,
- char *dest, int32_t capacity,
- UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
- if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) {
- return 0;
- }
- StringPiece src(name, length<0 ? static_cast<int32_t>(uprv_strlen(name)) : length);
- CheckedArrayByteSink sink(dest, capacity);
- IDNAInfo info;
- reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pErrorCode);
- idnaInfoToStruct(info, pInfo);
- return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
-}
-
-#endif // UCONFIG_NO_IDNA
diff --git a/contrib/libs/icu/common/utypeinfo.h b/contrib/libs/icu/common/utypeinfo.h
deleted file mode 100644
index c6663734fc3..00000000000
--- a/contrib/libs/icu/common/utypeinfo.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2012-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*/
-
-#ifndef __UTYPEINFO_H__
-#define __UTYPEINFO_H__
-
-// Windows header <typeinfo> does not define 'exception' in 'std' namespace.
-// Therefore, a project using ICU cannot be compiled with _HAS_EXCEPTIONS
-// set to 0 on Windows with Visual Studio. To work around that, we have to
-// include <exception> explicitly and add using statement below.
-// Whenever 'typeid' is used, this header has to be included
-// instead of <typeinfo>.
-// Visual Studio 10 emits warning 4275 with this change. If you compile
-// with exception disabled, you have to suppress warning 4275.
-#if defined(_MSC_VER) && _HAS_EXCEPTIONS == 0
-#include <exception>
-using std::exception;
-#endif
-#if defined(__GLIBCXX__)
-namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364
-#endif
-#include <typeinfo> // for 'typeid' to work
-
-#endif
diff --git a/contrib/libs/icu/common/utypes.cpp b/contrib/libs/icu/common/utypes.cpp
deleted file mode 100644
index 7531e465683..00000000000
--- a/contrib/libs/icu/common/utypes.cpp
+++ /dev/null
@@ -1,226 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1997-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-*
-* FILE NAME : utypes.c (previously putil.c)
-*
-* Date Name Description
-* 10/07/2004 grhoten split from putil.c
-******************************************************************************
-*/
-
-#include "unicode/utypes.h"
-
-/* u_errorName() ------------------------------------------------------------ */
-
-static const char * const
-_uErrorInfoName[U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START]={
- "U_USING_FALLBACK_WARNING",
- "U_USING_DEFAULT_WARNING",
- "U_SAFECLONE_ALLOCATED_WARNING",
- "U_STATE_OLD_WARNING",
- "U_STRING_NOT_TERMINATED_WARNING",
- "U_SORT_KEY_TOO_SHORT_WARNING",
- "U_AMBIGUOUS_ALIAS_WARNING",
- "U_DIFFERENT_UCA_VERSION",
- "U_PLUGIN_CHANGED_LEVEL_WARNING",
-};
-
-static const char * const
-_uTransErrorName[U_PARSE_ERROR_LIMIT - U_PARSE_ERROR_START]={
- "U_BAD_VARIABLE_DEFINITION",
- "U_MALFORMED_RULE",
- "U_MALFORMED_SET",
- "U_MALFORMED_SYMBOL_REFERENCE",
- "U_MALFORMED_UNICODE_ESCAPE",
- "U_MALFORMED_VARIABLE_DEFINITION",
- "U_MALFORMED_VARIABLE_REFERENCE",
- "U_MISMATCHED_SEGMENT_DELIMITERS",
- "U_MISPLACED_ANCHOR_START",
- "U_MISPLACED_CURSOR_OFFSET",
- "U_MISPLACED_QUANTIFIER",
- "U_MISSING_OPERATOR",
- "U_MISSING_SEGMENT_CLOSE",
- "U_MULTIPLE_ANTE_CONTEXTS",
- "U_MULTIPLE_CURSORS",
- "U_MULTIPLE_POST_CONTEXTS",
- "U_TRAILING_BACKSLASH",
- "U_UNDEFINED_SEGMENT_REFERENCE",
- "U_UNDEFINED_VARIABLE",
- "U_UNQUOTED_SPECIAL",
- "U_UNTERMINATED_QUOTE",
- "U_RULE_MASK_ERROR",
- "U_MISPLACED_COMPOUND_FILTER",
- "U_MULTIPLE_COMPOUND_FILTERS",
- "U_INVALID_RBT_SYNTAX",
- "U_INVALID_PROPERTY_PATTERN",
- "U_MALFORMED_PRAGMA",
- "U_UNCLOSED_SEGMENT",
- "U_ILLEGAL_CHAR_IN_SEGMENT",
- "U_VARIABLE_RANGE_EXHAUSTED",
- "U_VARIABLE_RANGE_OVERLAP",
- "U_ILLEGAL_CHARACTER",
- "U_INTERNAL_TRANSLITERATOR_ERROR",
- "U_INVALID_ID",
- "U_INVALID_FUNCTION"
-};
-
-static const char * const
-_uErrorName[U_STANDARD_ERROR_LIMIT]={
- "U_ZERO_ERROR",
-
- "U_ILLEGAL_ARGUMENT_ERROR",
- "U_MISSING_RESOURCE_ERROR",
- "U_INVALID_FORMAT_ERROR",
- "U_FILE_ACCESS_ERROR",
- "U_INTERNAL_PROGRAM_ERROR",
- "U_MESSAGE_PARSE_ERROR",
- "U_MEMORY_ALLOCATION_ERROR",
- "U_INDEX_OUTOFBOUNDS_ERROR",
- "U_PARSE_ERROR",
- "U_INVALID_CHAR_FOUND",
- "U_TRUNCATED_CHAR_FOUND",
- "U_ILLEGAL_CHAR_FOUND",
- "U_INVALID_TABLE_FORMAT",
- "U_INVALID_TABLE_FILE",
- "U_BUFFER_OVERFLOW_ERROR",
- "U_UNSUPPORTED_ERROR",
- "U_RESOURCE_TYPE_MISMATCH",
- "U_ILLEGAL_ESCAPE_SEQUENCE",
- "U_UNSUPPORTED_ESCAPE_SEQUENCE",
- "U_NO_SPACE_AVAILABLE",
- "U_CE_NOT_FOUND_ERROR",
- "U_PRIMARY_TOO_LONG_ERROR",
- "U_STATE_TOO_OLD_ERROR",
- "U_TOO_MANY_ALIASES_ERROR",
- "U_ENUM_OUT_OF_SYNC_ERROR",
- "U_INVARIANT_CONVERSION_ERROR",
- "U_INVALID_STATE_ERROR",
- "U_COLLATOR_VERSION_MISMATCH",
- "U_USELESS_COLLATOR_ERROR",
- "U_NO_WRITE_PERMISSION"
-};
-static const char * const
-_uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
- "U_UNEXPECTED_TOKEN",
- "U_MULTIPLE_DECIMAL_SEPARATORS",
- "U_MULTIPLE_EXPONENTIAL_SYMBOLS",
- "U_MALFORMED_EXPONENTIAL_PATTERN",
- "U_MULTIPLE_PERCENT_SYMBOLS",
- "U_MULTIPLE_PERMILL_SYMBOLS",
- "U_MULTIPLE_PAD_SPECIFIERS",
- "U_PATTERN_SYNTAX_ERROR",
- "U_ILLEGAL_PAD_POSITION",
- "U_UNMATCHED_BRACES",
- "U_UNSUPPORTED_PROPERTY",
- "U_UNSUPPORTED_ATTRIBUTE",
- "U_ARGUMENT_TYPE_MISMATCH",
- "U_DUPLICATE_KEYWORD",
- "U_UNDEFINED_KEYWORD",
- "U_DEFAULT_KEYWORD_MISSING",
- "U_DECIMAL_NUMBER_SYNTAX_ERROR",
- "U_FORMAT_INEXACT_ERROR",
- "U_NUMBER_ARG_OUTOFBOUNDS_ERROR",
- "U_NUMBER_SKELETON_SYNTAX_ERROR",
-};
-
-static const char * const
-_uBrkErrorName[U_BRK_ERROR_LIMIT - U_BRK_ERROR_START] = {
- "U_BRK_INTERNAL_ERROR",
- "U_BRK_HEX_DIGITS_EXPECTED",
- "U_BRK_SEMICOLON_EXPECTED",
- "U_BRK_RULE_SYNTAX",
- "U_BRK_UNCLOSED_SET",
- "U_BRK_ASSIGN_ERROR",
- "U_BRK_VARIABLE_REDFINITION",
- "U_BRK_MISMATCHED_PAREN",
- "U_BRK_NEW_LINE_IN_QUOTED_STRING",
- "U_BRK_UNDEFINED_VARIABLE",
- "U_BRK_INIT_ERROR",
- "U_BRK_RULE_EMPTY_SET",
- "U_BRK_UNRECOGNIZED_OPTION",
- "U_BRK_MALFORMED_RULE_TAG"
-};
-
-static const char * const
-_uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = {
- "U_REGEX_INTERNAL_ERROR",
- "U_REGEX_RULE_SYNTAX",
- "U_REGEX_INVALID_STATE",
- "U_REGEX_BAD_ESCAPE_SEQUENCE",
- "U_REGEX_PROPERTY_SYNTAX",
- "U_REGEX_UNIMPLEMENTED",
- "U_REGEX_MISMATCHED_PAREN",
- "U_REGEX_NUMBER_TOO_BIG",
- "U_REGEX_BAD_INTERVAL",
- "U_REGEX_MAX_LT_MIN",
- "U_REGEX_INVALID_BACK_REF",
- "U_REGEX_INVALID_FLAG",
- "U_REGEX_LOOK_BEHIND_LIMIT",
- "U_REGEX_SET_CONTAINS_STRING",
- "U_REGEX_OCTAL_TOO_BIG",
- "U_REGEX_MISSING_CLOSE_BRACKET",
- "U_REGEX_INVALID_RANGE",
- "U_REGEX_STACK_OVERFLOW",
- "U_REGEX_TIME_OUT",
- "U_REGEX_STOPPED_BY_CALLER",
- "U_REGEX_PATTERN_TOO_BIG",
- "U_REGEX_INVALID_CAPTURE_GROUP_NAME"
-};
-
-static const char * const
-_uIDNAErrorName[U_IDNA_ERROR_LIMIT - U_IDNA_ERROR_START] = {
- "U_STRINGPREP_PROHIBITED_ERROR",
- "U_STRINGPREP_UNASSIGNED_ERROR",
- "U_STRINGPREP_CHECK_BIDI_ERROR",
- "U_IDNA_STD3_ASCII_RULES_ERROR",
- "U_IDNA_ACE_PREFIX_ERROR",
- "U_IDNA_VERIFICATION_ERROR",
- "U_IDNA_LABEL_TOO_LONG_ERROR",
- "U_IDNA_ZERO_LENGTH_LABEL_ERROR",
- "U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR"
-};
-
-static const char * const
-_uPluginErrorName[U_PLUGIN_ERROR_LIMIT - U_PLUGIN_ERROR_START] = {
- "U_PLUGIN_TOO_HIGH",
- "U_PLUGIN_DIDNT_SET_LEVEL",
-};
-
-U_CAPI const char * U_EXPORT2
-u_errorName(UErrorCode code) {
- if(U_ZERO_ERROR <= code && code < U_STANDARD_ERROR_LIMIT) {
- return _uErrorName[code];
- } else if(U_ERROR_WARNING_START <= code && code < U_ERROR_WARNING_LIMIT) {
- return _uErrorInfoName[code - U_ERROR_WARNING_START];
- } else if(U_PARSE_ERROR_START <= code && code < U_PARSE_ERROR_LIMIT){
- return _uTransErrorName[code - U_PARSE_ERROR_START];
- } else if(U_FMT_PARSE_ERROR_START <= code && code < U_FMT_PARSE_ERROR_LIMIT){
- return _uFmtErrorName[code - U_FMT_PARSE_ERROR_START];
- } else if (U_BRK_ERROR_START <= code && code < U_BRK_ERROR_LIMIT){
- return _uBrkErrorName[code - U_BRK_ERROR_START];
- } else if (U_REGEX_ERROR_START <= code && code < U_REGEX_ERROR_LIMIT) {
- return _uRegexErrorName[code - U_REGEX_ERROR_START];
- } else if(U_IDNA_ERROR_START <= code && code < U_IDNA_ERROR_LIMIT) {
- return _uIDNAErrorName[code - U_IDNA_ERROR_START];
- } else if(U_PLUGIN_ERROR_START <= code && code < U_PLUGIN_ERROR_LIMIT) {
- return _uPluginErrorName[code - U_PLUGIN_ERROR_START];
- } else {
- return "[BOGUS UErrorCode]";
- }
-}
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/contrib/libs/icu/common/uvector.cpp b/contrib/libs/icu/common/uvector.cpp
deleted file mode 100644
index cf19edf646f..00000000000
--- a/contrib/libs/icu/common/uvector.cpp
+++ /dev/null
@@ -1,567 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 1999-2013, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-* Date Name Description
-* 10/22/99 alan Creation.
-**********************************************************************
-*/
-
-#include "uvector.h"
-#include "cmemory.h"
-#include "uarrsort.h"
-#include "uelement.h"
-
-U_NAMESPACE_BEGIN
-
-#define DEFAULT_CAPACITY 8
-
-/*
- * Constants for hinting whether a key is an integer
- * or a pointer. If a hint bit is zero, then the associated
- * token is assumed to be an integer. This is needed for iSeries
- */
-#define HINT_KEY_POINTER (1)
-#define HINT_KEY_INTEGER (0)
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector)
-
-UVector::UVector(UErrorCode &status) :
- count(0),
- capacity(0),
- elements(0),
- deleter(0),
- comparer(0)
-{
- _init(DEFAULT_CAPACITY, status);
-}
-
-UVector::UVector(int32_t initialCapacity, UErrorCode &status) :
- count(0),
- capacity(0),
- elements(0),
- deleter(0),
- comparer(0)
-{
- _init(initialCapacity, status);
-}
-
-UVector::UVector(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status) :
- count(0),
- capacity(0),
- elements(0),
- deleter(d),
- comparer(c)
-{
- _init(DEFAULT_CAPACITY, status);
-}
-
-UVector::UVector(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status) :
- count(0),
- capacity(0),
- elements(0),
- deleter(d),
- comparer(c)
-{
- _init(initialCapacity, status);
-}
-
-void UVector::_init(int32_t initialCapacity, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return;
- }
- // Fix bogus initialCapacity values; avoid malloc(0) and integer overflow
- if ((initialCapacity < 1) || (initialCapacity > (int32_t)(INT32_MAX / sizeof(UElement)))) {
- initialCapacity = DEFAULT_CAPACITY;
- }
- elements = (UElement *)uprv_malloc(sizeof(UElement)*initialCapacity);
- if (elements == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- capacity = initialCapacity;
- }
-}
-
-UVector::~UVector() {
- removeAllElements();
- uprv_free(elements);
- elements = 0;
-}
-
-/**
- * Assign this object to another (make this a copy of 'other').
- * Use the 'assign' function to assign each element.
- */
-void UVector::assign(const UVector& other, UElementAssigner *assign, UErrorCode &ec) {
- if (ensureCapacity(other.count, ec)) {
- setSize(other.count, ec);
- if (U_SUCCESS(ec)) {
- for (int32_t i=0; i<other.count; ++i) {
- if (elements[i].pointer != 0 && deleter != 0) {
- (*deleter)(elements[i].pointer);
- }
- (*assign)(&elements[i], &other.elements[i]);
- }
- }
- }
-}
-
-// This only does something sensible if this object has a non-null comparer
-UBool UVector::operator==(const UVector& other) {
- int32_t i;
- if (count != other.count) return FALSE;
- if (comparer != NULL) {
- // Compare using this object's comparer
- for (i=0; i<count; ++i) {
- if (!(*comparer)(elements[i], other.elements[i])) {
- return FALSE;
- }
- }
- }
- return TRUE;
-}
-
-void UVector::addElement(void* obj, UErrorCode &status) {
- if (ensureCapacity(count + 1, status)) {
- elements[count++].pointer = obj;
- }
-}
-
-void UVector::addElement(int32_t elem, UErrorCode &status) {
- if (ensureCapacity(count + 1, status)) {
- elements[count].pointer = NULL; // Pointers may be bigger than ints.
- elements[count].integer = elem;
- count++;
- }
-}
-
-void UVector::setElementAt(void* obj, int32_t index) {
- if (0 <= index && index < count) {
- if (elements[index].pointer != 0 && deleter != 0) {
- (*deleter)(elements[index].pointer);
- }
- elements[index].pointer = obj;
- }
- /* else index out of range */
-}
-
-void UVector::setElementAt(int32_t elem, int32_t index) {
- if (0 <= index && index < count) {
- if (elements[index].pointer != 0 && deleter != 0) {
- // TODO: this should be an error. mixing up ints and pointers.
- (*deleter)(elements[index].pointer);
- }
- elements[index].pointer = NULL;
- elements[index].integer = elem;
- }
- /* else index out of range */
-}
-
-void UVector::insertElementAt(void* obj, int32_t index, UErrorCode &status) {
- // must have 0 <= index <= count
- if (0 <= index && index <= count && ensureCapacity(count + 1, status)) {
- for (int32_t i=count; i>index; --i) {
- elements[i] = elements[i-1];
- }
- elements[index].pointer = obj;
- ++count;
- }
- /* else index out of range */
-}
-
-void UVector::insertElementAt(int32_t elem, int32_t index, UErrorCode &status) {
- // must have 0 <= index <= count
- if (0 <= index && index <= count && ensureCapacity(count + 1, status)) {
- for (int32_t i=count; i>index; --i) {
- elements[i] = elements[i-1];
- }
- elements[index].pointer = NULL;
- elements[index].integer = elem;
- ++count;
- }
- /* else index out of range */
-}
-
-void* UVector::elementAt(int32_t index) const {
- return (0 <= index && index < count) ? elements[index].pointer : 0;
-}
-
-int32_t UVector::elementAti(int32_t index) const {
- return (0 <= index && index < count) ? elements[index].integer : 0;
-}
-
-UBool UVector::containsAll(const UVector& other) const {
- for (int32_t i=0; i<other.size(); ++i) {
- if (indexOf(other.elements[i]) < 0) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-UBool UVector::containsNone(const UVector& other) const {
- for (int32_t i=0; i<other.size(); ++i) {
- if (indexOf(other.elements[i]) >= 0) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-UBool UVector::removeAll(const UVector& other) {
- UBool changed = FALSE;
- for (int32_t i=0; i<other.size(); ++i) {
- int32_t j = indexOf(other.elements[i]);
- if (j >= 0) {
- removeElementAt(j);
- changed = TRUE;
- }
- }
- return changed;
-}
-
-UBool UVector::retainAll(const UVector& other) {
- UBool changed = FALSE;
- for (int32_t j=size()-1; j>=0; --j) {
- int32_t i = other.indexOf(elements[j]);
- if (i < 0) {
- removeElementAt(j);
- changed = TRUE;
- }
- }
- return changed;
-}
-
-void UVector::removeElementAt(int32_t index) {
- void* e = orphanElementAt(index);
- if (e != 0 && deleter != 0) {
- (*deleter)(e);
- }
-}
-
-UBool UVector::removeElement(void* obj) {
- int32_t i = indexOf(obj);
- if (i >= 0) {
- removeElementAt(i);
- return TRUE;
- }
- return FALSE;
-}
-
-void UVector::removeAllElements(void) {
- if (deleter != 0) {
- for (int32_t i=0; i<count; ++i) {
- if (elements[i].pointer != 0) {
- (*deleter)(elements[i].pointer);
- }
- }
- }
- count = 0;
-}
-
-UBool UVector::equals(const UVector &other) const {
- int i;
-
- if (this->count != other.count) {
- return FALSE;
- }
- if (comparer == 0) {
- for (i=0; i<count; i++) {
- if (elements[i].pointer != other.elements[i].pointer) {
- return FALSE;
- }
- }
- } else {
- UElement key;
- for (i=0; i<count; i++) {
- key.pointer = &other.elements[i];
- if (!(*comparer)(key, elements[i])) {
- return FALSE;
- }
- }
- }
- return TRUE;
-}
-
-
-
-int32_t UVector::indexOf(void* obj, int32_t startIndex) const {
- UElement key;
- key.pointer = obj;
- return indexOf(key, startIndex, HINT_KEY_POINTER);
-}
-
-int32_t UVector::indexOf(int32_t obj, int32_t startIndex) const {
- UElement key;
- key.integer = obj;
- return indexOf(key, startIndex, HINT_KEY_INTEGER);
-}
-
-// This only works if this object has a non-null comparer
-int32_t UVector::indexOf(UElement key, int32_t startIndex, int8_t hint) const {
- int32_t i;
- if (comparer != 0) {
- for (i=startIndex; i<count; ++i) {
- if ((*comparer)(key, elements[i])) {
- return i;
- }
- }
- } else {
- for (i=startIndex; i<count; ++i) {
- /* Pointers are not always the same size as ints so to perform
- * a valid comparision we need to know whether we are being
- * provided an int or a pointer. */
- if (hint & HINT_KEY_POINTER) {
- if (key.pointer == elements[i].pointer) {
- return i;
- }
- } else {
- if (key.integer == elements[i].integer) {
- return i;
- }
- }
- }
- }
- return -1;
-}
-
-UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
- if (minimumCapacity < 0) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- if (capacity < minimumCapacity) {
- if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- int32_t newCap = capacity * 2;
- if (newCap < minimumCapacity) {
- newCap = minimumCapacity;
- }
- if (newCap > (int32_t)(INT32_MAX / sizeof(UElement))) { // integer overflow check
- // We keep the original memory contents on bad minimumCapacity.
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- UElement* newElems = (UElement *)uprv_realloc(elements, sizeof(UElement)*newCap);
- if (newElems == NULL) {
- // We keep the original contents on the memory failure on realloc or bad minimumCapacity.
- status = U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
- elements = newElems;
- capacity = newCap;
- }
- return TRUE;
-}
-
-/**
- * Change the size of this vector as follows: If newSize is smaller,
- * then truncate the array, possibly deleting held elements for i >=
- * newSize. If newSize is larger, grow the array, filling in new
- * slots with NULL.
- */
-void UVector::setSize(int32_t newSize, UErrorCode &status) {
- int32_t i;
- if (newSize < 0) {
- return;
- }
- if (newSize > count) {
- if (!ensureCapacity(newSize, status)) {
- return;
- }
- UElement empty;
- empty.pointer = NULL;
- empty.integer = 0;
- for (i=count; i<newSize; ++i) {
- elements[i] = empty;
- }
- } else {
- /* Most efficient to count down */
- for (i=count-1; i>=newSize; --i) {
- removeElementAt(i);
- }
- }
- count = newSize;
-}
-
-/**
- * Fill in the given array with all elements of this vector.
- */
-void** UVector::toArray(void** result) const {
- void** a = result;
- for (int i=0; i<count; ++i) {
- *a++ = elements[i].pointer;
- }
- return result;
-}
-
-UObjectDeleter *UVector::setDeleter(UObjectDeleter *d) {
- UObjectDeleter *old = deleter;
- deleter = d;
- return old;
-}
-
-UElementsAreEqual *UVector::setComparer(UElementsAreEqual *d) {
- UElementsAreEqual *old = comparer;
- comparer = d;
- return old;
-}
-
-/**
- * Removes the element at the given index from this vector and
- * transfer ownership of it to the caller. After this call, the
- * caller owns the result and must delete it and the vector entry
- * at 'index' is removed, shifting all subsequent entries back by
- * one index and shortening the size of the vector by one. If the
- * index is out of range or if there is no item at the given index
- * then 0 is returned and the vector is unchanged.
- */
-void* UVector::orphanElementAt(int32_t index) {
- void* e = 0;
- if (0 <= index && index < count) {
- e = elements[index].pointer;
- for (int32_t i=index; i<count-1; ++i) {
- elements[i] = elements[i+1];
- }
- --count;
- }
- /* else index out of range */
- return e;
-}
-
-/**
- * Insert the given object into this vector at its sorted position
- * as defined by 'compare'. The current elements are assumed to
- * be sorted already.
- */
-void UVector::sortedInsert(void* obj, UElementComparator *compare, UErrorCode& ec) {
- UElement e;
- e.pointer = obj;
- sortedInsert(e, compare, ec);
-}
-
-/**
- * Insert the given integer into this vector at its sorted position
- * as defined by 'compare'. The current elements are assumed to
- * be sorted already.
- */
-void UVector::sortedInsert(int32_t obj, UElementComparator *compare, UErrorCode& ec) {
- UElement e;
- e.integer = obj;
- sortedInsert(e, compare, ec);
-}
-
-// ASSUME elements[] IS CURRENTLY SORTED
-void UVector::sortedInsert(UElement e, UElementComparator *compare, UErrorCode& ec) {
- // Perform a binary search for the location to insert tok at. Tok
- // will be inserted between two elements a and b such that a <=
- // tok && tok < b, where there is a 'virtual' elements[-1] always
- // less than tok and a 'virtual' elements[count] always greater
- // than tok.
- int32_t min = 0, max = count;
- while (min != max) {
- int32_t probe = (min + max) / 2;
- int8_t c = (*compare)(elements[probe], e);
- if (c > 0) {
- max = probe;
- } else {
- // assert(c <= 0);
- min = probe + 1;
- }
- }
- if (ensureCapacity(count + 1, ec)) {
- for (int32_t i=count; i>min; --i) {
- elements[i] = elements[i-1];
- }
- elements[min] = e;
- ++count;
- }
-}
-
-/**
- * Array sort comparator function.
- * Used from UVector::sort()
- * Conforms to function signature required for uprv_sortArray().
- * This function is essentially just a wrapper, to make a
- * UVector style comparator function usable with uprv_sortArray().
- *
- * The context pointer to this function is a pointer back
- * (with some extra indirection) to the user supplied comparator.
- *
- */
-static int32_t U_CALLCONV
-sortComparator(const void *context, const void *left, const void *right) {
- UElementComparator *compare = *static_cast<UElementComparator * const *>(context);
- UElement e1 = *static_cast<const UElement *>(left);
- UElement e2 = *static_cast<const UElement *>(right);
- int32_t result = (*compare)(e1, e2);
- return result;
-}
-
-
-/**
- * Array sort comparison function for use from UVector::sorti()
- * Compares int32_t vector elements.
- */
-static int32_t U_CALLCONV
-sortiComparator(const void * /*context */, const void *left, const void *right) {
- const UElement *e1 = static_cast<const UElement *>(left);
- const UElement *e2 = static_cast<const UElement *>(right);
- int32_t result = e1->integer < e2->integer? -1 :
- e1->integer == e2->integer? 0 : 1;
- return result;
-}
-
-/**
- * Sort the vector, assuming it constains ints.
- * (A more general sort would take a comparison function, but it's
- * not clear whether UVector's UElementComparator or
- * UComparator from uprv_sortAray would be more appropriate.)
- */
-void UVector::sorti(UErrorCode &ec) {
- if (U_SUCCESS(ec)) {
- uprv_sortArray(elements, count, sizeof(UElement),
- sortiComparator, NULL, FALSE, &ec);
- }
-}
-
-
-/**
- * Sort with a user supplied comparator.
- *
- * The comparator function handling is confusing because the function type
- * for UVector (as defined for sortedInsert()) is different from the signature
- * required by uprv_sortArray(). This is handled by passing the
- * the UVector sort function pointer via the context pointer to a
- * sortArray() comparator function, which can then call back to
- * the original user functtion.
- *
- * An additional twist is that it's not safe to pass a pointer-to-function
- * as a (void *) data pointer, so instead we pass a (data) pointer to a
- * pointer-to-function variable.
- */
-void UVector::sort(UElementComparator *compare, UErrorCode &ec) {
- if (U_SUCCESS(ec)) {
- uprv_sortArray(elements, count, sizeof(UElement),
- sortComparator, &compare, FALSE, &ec);
- }
-}
-
-
-/**
- * Stable sort with a user supplied comparator of type UComparator.
- */
-void UVector::sortWithUComparator(UComparator *compare, const void *context, UErrorCode &ec) {
- if (U_SUCCESS(ec)) {
- uprv_sortArray(elements, count, sizeof(UElement),
- compare, context, TRUE, &ec);
- }
-}
-
-U_NAMESPACE_END
-
diff --git a/contrib/libs/icu/common/uvector.h b/contrib/libs/icu/common/uvector.h
deleted file mode 100644
index 98318d14c60..00000000000
--- a/contrib/libs/icu/common/uvector.h
+++ /dev/null
@@ -1,415 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 10/22/99 alan Creation. This is an internal header.
-* It should not be exported.
-**********************************************************************
-*/
-
-#ifndef UVECTOR_H
-#define UVECTOR_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "cmemory.h"
-#include "uarrsort.h"
-#include "uelement.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * <p>Ultralightweight C++ implementation of a <tt>void*</tt> vector
- * that is (mostly) compatible with java.util.Vector.
- *
- * <p>This is a very simple implementation, written to satisfy an
- * immediate porting need. As such, it is not completely fleshed out,
- * and it aims for simplicity and conformity. Nonetheless, it serves
- * its purpose (porting code from java that uses java.util.Vector)
- * well, and it could be easily made into a more robust vector class.
- *
- * <p><b>Design notes</b>
- *
- * <p>There is index bounds checking, but little is done about it. If
- * indices are out of bounds, either nothing happens, or zero is
- * returned. We <em>do</em> avoid indexing off into the weeds.
- *
- * <p>There is detection of out of memory, but the handling is very
- * coarse-grained -- similar to UnicodeString's protocol, but even
- * coarser. The class contains <em>one static flag</em> that is set
- * when any call to <tt>new</tt> returns zero. This allows the caller
- * to use several vectors and make just one check at the end to see if
- * a memory failure occurred. This is more efficient than making a
- * check after each call on each vector when doing many operations on
- * multiple vectors. The single static flag works best when memory
- * failures are infrequent, and when recovery options are limited or
- * nonexistent.
- *
- * <p>Since we don't have garbage collection, UVector was given the
- * option to <em>own</em>its contents. To employ this, set a deleter
- * function. The deleter is called on a void* pointer when that
- * pointer is released by the vector, either when the vector itself is
- * destructed, or when a call to setElementAt() overwrites an element,
- * or when a call to remove() or one of its variants explicitly
- * removes an element. If no deleter is set, or the deleter is set to
- * zero, then it is assumed that the caller will delete elements as
- * needed.
- *
- * <p>In order to implement methods such as contains() and indexOf(),
- * UVector needs a way to compare objects for equality. To do so, it
- * uses a comparison function, or "comparer." If the comparer is not
- * set, or is set to zero, then all such methods will act as if the
- * vector contains no element. That is, indexOf() will always return
- * -1, contains() will always return FALSE, etc.
- *
- * <p><b>To do</b>
- *
- * <p>Improve the handling of index out of bounds errors.
- *
- * @author Alan Liu
- */
-class U_COMMON_API UVector : public UObject {
- // NOTE: UVector uses the UHashKey (union of void* and int32_t) as
- // its basic storage type. It uses UElementsAreEqual as its
- // comparison function. It uses UObjectDeleter as its deleter
- // function. These are named for hashtables, but used here as-is
- // rather than duplicating the type. This allows sharing of
- // support functions.
-
-private:
- int32_t count;
-
- int32_t capacity;
-
- UElement* elements;
-
- UObjectDeleter *deleter;
-
- UElementsAreEqual *comparer;
-
-public:
- UVector(UErrorCode &status);
-
- UVector(int32_t initialCapacity, UErrorCode &status);
-
- UVector(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status);
-
- UVector(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status);
-
- virtual ~UVector();
-
- /**
- * Assign this object to another (make this a copy of 'other').
- * Use the 'assign' function to assign each element.
- */
- void assign(const UVector& other, UElementAssigner *assign, UErrorCode &ec);
-
- /**
- * Compare this vector with another. They will be considered
- * equal if they are of the same size and all elements are equal,
- * as compared using this object's comparer.
- */
- UBool operator==(const UVector& other);
-
- /**
- * Equivalent to !operator==()
- */
- inline UBool operator!=(const UVector& other);
-
- //------------------------------------------------------------
- // java.util.Vector API
- //------------------------------------------------------------
-
- void addElement(void* obj, UErrorCode &status);
-
- void addElement(int32_t elem, UErrorCode &status);
-
- void setElementAt(void* obj, int32_t index);
-
- void setElementAt(int32_t elem, int32_t index);
-
- void insertElementAt(void* obj, int32_t index, UErrorCode &status);
-
- void insertElementAt(int32_t elem, int32_t index, UErrorCode &status);
-
- void* elementAt(int32_t index) const;
-
- int32_t elementAti(int32_t index) const;
-
- UBool equals(const UVector &other) const;
-
- inline void* firstElement(void) const;
-
- inline void* lastElement(void) const;
-
- inline int32_t lastElementi(void) const;
-
- int32_t indexOf(void* obj, int32_t startIndex = 0) const;
-
- int32_t indexOf(int32_t obj, int32_t startIndex = 0) const;
-
- inline UBool contains(void* obj) const;
-
- inline UBool contains(int32_t obj) const;
-
- UBool containsAll(const UVector& other) const;
-
- UBool removeAll(const UVector& other);
-
- UBool retainAll(const UVector& other);
-
- void removeElementAt(int32_t index);
-
- UBool removeElement(void* obj);
-
- void removeAllElements();
-
- inline int32_t size(void) const;
-
- inline UBool isEmpty(void) const;
-
- UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status);
-
- /**
- * Change the size of this vector as follows: If newSize is
- * smaller, then truncate the array, possibly deleting held
- * elements for i >= newSize. If newSize is larger, grow the
- * array, filling in new slots with NULL.
- */
- void setSize(int32_t newSize, UErrorCode &status);
-
- /**
- * Fill in the given array with all elements of this vector.
- */
- void** toArray(void** result) const;
-
- //------------------------------------------------------------
- // New API
- //------------------------------------------------------------
-
- UObjectDeleter *setDeleter(UObjectDeleter *d);
-
- UElementsAreEqual *setComparer(UElementsAreEqual *c);
-
- inline void* operator[](int32_t index) const;
-
- /**
- * Removes the element at the given index from this vector and
- * transfer ownership of it to the caller. After this call, the
- * caller owns the result and must delete it and the vector entry
- * at 'index' is removed, shifting all subsequent entries back by
- * one index and shortening the size of the vector by one. If the
- * index is out of range or if there is no item at the given index
- * then 0 is returned and the vector is unchanged.
- */
- void* orphanElementAt(int32_t index);
-
- /**
- * Returns true if this vector contains none of the elements
- * of the given vector.
- * @param other vector to be checked for containment
- * @return true if the test condition is met
- */
- UBool containsNone(const UVector& other) const;
-
- /**
- * Insert the given object into this vector at its sorted position
- * as defined by 'compare'. The current elements are assumed to
- * be sorted already.
- */
- void sortedInsert(void* obj, UElementComparator *compare, UErrorCode& ec);
-
- /**
- * Insert the given integer into this vector at its sorted position
- * as defined by 'compare'. The current elements are assumed to
- * be sorted already.
- */
- void sortedInsert(int32_t obj, UElementComparator *compare, UErrorCode& ec);
-
- /**
- * Sort the contents of the vector, assuming that the contents of the
- * vector are of type int32_t.
- */
- void sorti(UErrorCode &ec);
-
- /**
- * Sort the contents of this vector, using a caller-supplied function
- * to do the comparisons. (It's confusing that
- * UVector's UElementComparator function is different from the
- * UComparator function type defined in uarrsort.h)
- */
- void sort(UElementComparator *compare, UErrorCode &ec);
-
- /**
- * Stable sort the contents of this vector using a caller-supplied function
- * of type UComparator to do the comparison. Provides more flexibility
- * than UVector::sort() because an additional user parameter can be passed to
- * the comparison function.
- */
- void sortWithUComparator(UComparator *compare, const void *context, UErrorCode &ec);
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for this class.
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
- */
- virtual UClassID getDynamicClassID() const;
-
-private:
- void _init(int32_t initialCapacity, UErrorCode &status);
-
- int32_t indexOf(UElement key, int32_t startIndex = 0, int8_t hint = 0) const;
-
- void sortedInsert(UElement e, UElementComparator *compare, UErrorCode& ec);
-
- // Disallow
- UVector(const UVector&);
-
- // Disallow
- UVector& operator=(const UVector&);
-
-};
-
-
-/**
- * <p>Ultralightweight C++ implementation of a <tt>void*</tt> stack
- * that is (mostly) compatible with java.util.Stack. As in java, this
- * is merely a paper thin layer around UVector. See the UVector
- * documentation for further information.
- *
- * <p><b>Design notes</b>
- *
- * <p>The element at index <tt>n-1</tt> is (of course) the top of the
- * stack.
- *
- * <p>The poorly named <tt>empty()</tt> method doesn't empty the
- * stack; it determines if the stack is empty.
- *
- * @author Alan Liu
- */
-class U_COMMON_API UStack : public UVector {
-public:
- UStack(UErrorCode &status);
-
- UStack(int32_t initialCapacity, UErrorCode &status);
-
- UStack(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status);
-
- UStack(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status);
-
- virtual ~UStack();
-
- // It's okay not to have a virtual destructor (in UVector)
- // because UStack has no special cleanup to do.
-
- inline UBool empty(void) const;
-
- inline void* peek(void) const;
-
- inline int32_t peeki(void) const;
-
- void* pop(void);
-
- int32_t popi(void);
-
- inline void* push(void* obj, UErrorCode &status);
-
- inline int32_t push(int32_t i, UErrorCode &status);
-
- /*
- If the object o occurs as an item in this stack,
- this method returns the 1-based distance from the top of the stack.
- */
- int32_t search(void* obj) const;
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for this class.
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
- */
- virtual UClassID getDynamicClassID() const;
-
-private:
- // Disallow
- UStack(const UStack&);
-
- // Disallow
- UStack& operator=(const UStack&);
-};
-
-
-// UVector inlines
-
-inline int32_t UVector::size(void) const {
- return count;
-}
-
-inline UBool UVector::isEmpty(void) const {
- return count == 0;
-}
-
-inline UBool UVector::contains(void* obj) const {
- return indexOf(obj) >= 0;
-}
-
-inline UBool UVector::contains(int32_t obj) const {
- return indexOf(obj) >= 0;
-}
-
-inline void* UVector::firstElement(void) const {
- return elementAt(0);
-}
-
-inline void* UVector::lastElement(void) const {
- return elementAt(count-1);
-}
-
-inline int32_t UVector::lastElementi(void) const {
- return elementAti(count-1);
-}
-
-inline void* UVector::operator[](int32_t index) const {
- return elementAt(index);
-}
-
-inline UBool UVector::operator!=(const UVector& other) {
- return !operator==(other);
-}
-
-// UStack inlines
-
-inline UBool UStack::empty(void) const {
- return isEmpty();
-}
-
-inline void* UStack::peek(void) const {
- return lastElement();
-}
-
-inline int32_t UStack::peeki(void) const {
- return lastElementi();
-}
-
-inline void* UStack::push(void* obj, UErrorCode &status) {
- addElement(obj, status);
- return obj;
-}
-
-inline int32_t UStack::push(int32_t i, UErrorCode &status) {
- addElement(i, status);
- return i;
-}
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/uvectr32.cpp b/contrib/libs/icu/common/uvectr32.cpp
deleted file mode 100644
index d1ae6599585..00000000000
--- a/contrib/libs/icu/common/uvectr32.cpp
+++ /dev/null
@@ -1,335 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 1999-2015, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-* Date Name Description
-* 10/22/99 alan Creation.
-**********************************************************************
-*/
-
-#include "uvectr32.h"
-#include "cmemory.h"
-#include "putilimp.h"
-
-U_NAMESPACE_BEGIN
-
-#define DEFAULT_CAPACITY 8
-
-/*
- * Constants for hinting whether a key is an integer
- * or a pointer. If a hint bit is zero, then the associated
- * token is assumed to be an integer. This is needed for iSeries
- */
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector32)
-
-UVector32::UVector32(UErrorCode &status) :
- count(0),
- capacity(0),
- maxCapacity(0),
- elements(NULL)
-{
- _init(DEFAULT_CAPACITY, status);
-}
-
-UVector32::UVector32(int32_t initialCapacity, UErrorCode &status) :
- count(0),
- capacity(0),
- maxCapacity(0),
- elements(0)
-{
- _init(initialCapacity, status);
-}
-
-
-
-void UVector32::_init(int32_t initialCapacity, UErrorCode &status) {
- // Fix bogus initialCapacity values; avoid malloc(0)
- if (initialCapacity < 1) {
- initialCapacity = DEFAULT_CAPACITY;
- }
- if (maxCapacity>0 && maxCapacity<initialCapacity) {
- initialCapacity = maxCapacity;
- }
- if (initialCapacity > (int32_t)(INT32_MAX / sizeof(int32_t))) {
- initialCapacity = uprv_min(DEFAULT_CAPACITY, maxCapacity);
- }
- elements = (int32_t *)uprv_malloc(sizeof(int32_t)*initialCapacity);
- if (elements == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- capacity = initialCapacity;
- }
-}
-
-UVector32::~UVector32() {
- uprv_free(elements);
- elements = 0;
-}
-
-/**
- * Assign this object to another (make this a copy of 'other').
- */
-void UVector32::assign(const UVector32& other, UErrorCode &ec) {
- if (ensureCapacity(other.count, ec)) {
- setSize(other.count);
- for (int32_t i=0; i<other.count; ++i) {
- elements[i] = other.elements[i];
- }
- }
-}
-
-
-UBool UVector32::operator==(const UVector32& other) {
- int32_t i;
- if (count != other.count) return FALSE;
- for (i=0; i<count; ++i) {
- if (elements[i] != other.elements[i]) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-
-void UVector32::setElementAt(int32_t elem, int32_t index) {
- if (0 <= index && index < count) {
- elements[index] = elem;
- }
- /* else index out of range */
-}
-
-void UVector32::insertElementAt(int32_t elem, int32_t index, UErrorCode &status) {
- // must have 0 <= index <= count
- if (0 <= index && index <= count && ensureCapacity(count + 1, status)) {
- for (int32_t i=count; i>index; --i) {
- elements[i] = elements[i-1];
- }
- elements[index] = elem;
- ++count;
- }
- /* else index out of range */
-}
-
-UBool UVector32::containsAll(const UVector32& other) const {
- for (int32_t i=0; i<other.size(); ++i) {
- if (indexOf(other.elements[i]) < 0) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-UBool UVector32::containsNone(const UVector32& other) const {
- for (int32_t i=0; i<other.size(); ++i) {
- if (indexOf(other.elements[i]) >= 0) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-UBool UVector32::removeAll(const UVector32& other) {
- UBool changed = FALSE;
- for (int32_t i=0; i<other.size(); ++i) {
- int32_t j = indexOf(other.elements[i]);
- if (j >= 0) {
- removeElementAt(j);
- changed = TRUE;
- }
- }
- return changed;
-}
-
-UBool UVector32::retainAll(const UVector32& other) {
- UBool changed = FALSE;
- for (int32_t j=size()-1; j>=0; --j) {
- int32_t i = other.indexOf(elements[j]);
- if (i < 0) {
- removeElementAt(j);
- changed = TRUE;
- }
- }
- return changed;
-}
-
-void UVector32::removeElementAt(int32_t index) {
- if (index >= 0) {
- for (int32_t i=index; i<count-1; ++i) {
- elements[i] = elements[i+1];
- }
- --count;
- }
-}
-
-void UVector32::removeAllElements(void) {
- count = 0;
-}
-
-UBool UVector32::equals(const UVector32 &other) const {
- int i;
-
- if (this->count != other.count) {
- return FALSE;
- }
- for (i=0; i<count; i++) {
- if (elements[i] != other.elements[i]) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-
-
-
-int32_t UVector32::indexOf(int32_t key, int32_t startIndex) const {
- int32_t i;
- for (i=startIndex; i<count; ++i) {
- if (key == elements[i]) {
- return i;
- }
- }
- return -1;
-}
-
-
-UBool UVector32::expandCapacity(int32_t minimumCapacity, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return FALSE;
- }
- if (minimumCapacity < 0) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- if (capacity >= minimumCapacity) {
- return TRUE;
- }
- if (maxCapacity>0 && minimumCapacity>maxCapacity) {
- status = U_BUFFER_OVERFLOW_ERROR;
- return FALSE;
- }
- if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- int32_t newCap = capacity * 2;
- if (newCap < minimumCapacity) {
- newCap = minimumCapacity;
- }
- if (maxCapacity > 0 && newCap > maxCapacity) {
- newCap = maxCapacity;
- }
- if (newCap > (int32_t)(INT32_MAX / sizeof(int32_t))) { // integer overflow check
- // We keep the original memory contents on bad minimumCapacity/maxCapacity.
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- int32_t* newElems = (int32_t *)uprv_realloc(elements, sizeof(int32_t)*newCap);
- if (newElems == NULL) {
- // We keep the original contents on the memory failure on realloc.
- status = U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
- elements = newElems;
- capacity = newCap;
- return TRUE;
-}
-
-void UVector32::setMaxCapacity(int32_t limit) {
- U_ASSERT(limit >= 0);
- if (limit < 0) {
- limit = 0;
- }
- if (limit > (int32_t)(INT32_MAX / sizeof(int32_t))) { // integer overflow check for realloc
- // Something is very wrong, don't realloc, leave capacity and maxCapacity unchanged
- return;
- }
- maxCapacity = limit;
- if (capacity <= maxCapacity || maxCapacity == 0) {
- // Current capacity is within the new limit.
- return;
- }
-
- // New maximum capacity is smaller than the current size.
- // Realloc the storage to the new, smaller size.
- int32_t* newElems = (int32_t *)uprv_realloc(elements, sizeof(int32_t)*maxCapacity);
- if (newElems == NULL) {
- // Realloc to smaller failed.
- // Just keep what we had. No need to call it a failure.
- return;
- }
- elements = newElems;
- capacity = maxCapacity;
- if (count > capacity) {
- count = capacity;
- }
-}
-
-/**
- * Change the size of this vector as follows: If newSize is smaller,
- * then truncate the array, possibly deleting held elements for i >=
- * newSize. If newSize is larger, grow the array, filling in new
- * slots with NULL.
- */
-void UVector32::setSize(int32_t newSize) {
- int32_t i;
- if (newSize < 0) {
- return;
- }
- if (newSize > count) {
- UErrorCode ec = U_ZERO_ERROR;
- if (!ensureCapacity(newSize, ec)) {
- return;
- }
- for (i=count; i<newSize; ++i) {
- elements[i] = 0;
- }
- }
- count = newSize;
-}
-
-
-
-
-/**
- * Insert the given integer into this vector at its sorted position
- * as defined by 'compare'. The current elements are assumed to
- * be sorted already.
- */
-void UVector32::sortedInsert(int32_t tok, UErrorCode& ec) {
- // Perform a binary search for the location to insert tok at. Tok
- // will be inserted between two elements a and b such that a <=
- // tok && tok < b, where there is a 'virtual' elements[-1] always
- // less than tok and a 'virtual' elements[count] always greater
- // than tok.
- int32_t min = 0, max = count;
- while (min != max) {
- int32_t probe = (min + max) / 2;
- //int8_t c = (*compare)(elements[probe], tok);
- //if (c > 0) {
- if (elements[probe] > tok) {
- max = probe;
- } else {
- // assert(c <= 0);
- min = probe + 1;
- }
- }
- if (ensureCapacity(count + 1, ec)) {
- for (int32_t i=count; i>min; --i) {
- elements[i] = elements[i-1];
- }
- elements[min] = tok;
- ++count;
- }
-}
-
-
-
-
-
-U_NAMESPACE_END
-
diff --git a/contrib/libs/icu/common/uvectr32.h b/contrib/libs/icu/common/uvectr32.h
deleted file mode 100644
index ba47daa75f1..00000000000
--- a/contrib/libs/icu/common/uvectr32.h
+++ /dev/null
@@ -1,306 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-//
-// UVector32 is a class implementing a vector of 32 bit integers.
-// It is similar to UVector, but holds int32_t values rather than pointers.
-// Most of the code is unchanged from UVector.
-//
-
-#ifndef UVECTOR32_H
-#define UVECTOR32_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "uhash.h"
-#include "uassert.h"
-
-U_NAMESPACE_BEGIN
-
-
-
-/**
- * <p>Ultralightweight C++ implementation of a <tt>void*</tt> vector
- * that is (mostly) compatible with java.util.Vector.
- *
- * <p>This is a very simple implementation, written to satisfy an
- * immediate porting need. As such, it is not completely fleshed out,
- * and it aims for simplicity and conformity. Nonetheless, it serves
- * its purpose (porting code from java that uses java.util.Vector)
- * well, and it could be easily made into a more robust vector class.
- *
- * <p><b>Design notes</b>
- *
- * <p>There is index bounds checking, but little is done about it. If
- * indices are out of bounds, either nothing happens, or zero is
- * returned. We <em>do</em> avoid indexing off into the weeds.
- *
- * <p>There is detection of out of memory, but the handling is very
- * coarse-grained -- similar to UnicodeString's protocol, but even
- * coarser. The class contains <em>one static flag</em> that is set
- * when any call to <tt>new</tt> returns zero. This allows the caller
- * to use several vectors and make just one check at the end to see if
- * a memory failure occurred. This is more efficient than making a
- * check after each call on each vector when doing many operations on
- * multiple vectors. The single static flag works best when memory
- * failures are infrequent, and when recovery options are limited or
- * nonexistent.
- *
- * <p><b>To do</b>
- *
- * <p>Improve the handling of index out of bounds errors.
- *
- * @author Alan Liu
- */
-class U_COMMON_API UVector32 : public UObject {
-private:
- int32_t count;
-
- int32_t capacity;
-
- int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow.
-
- int32_t* elements;
-
-public:
- UVector32(UErrorCode &status);
-
- UVector32(int32_t initialCapacity, UErrorCode &status);
-
- virtual ~UVector32();
-
- /**
- * Assign this object to another (make this a copy of 'other').
- * Use the 'assign' function to assign each element.
- */
- void assign(const UVector32& other, UErrorCode &ec);
-
- /**
- * Compare this vector with another. They will be considered
- * equal if they are of the same size and all elements are equal,
- * as compared using this object's comparer.
- */
- UBool operator==(const UVector32& other);
-
- /**
- * Equivalent to !operator==()
- */
- inline UBool operator!=(const UVector32& other);
-
- //------------------------------------------------------------
- // java.util.Vector API
- //------------------------------------------------------------
-
- inline void addElement(int32_t elem, UErrorCode &status);
-
- void setElementAt(int32_t elem, int32_t index);
-
- void insertElementAt(int32_t elem, int32_t index, UErrorCode &status);
-
- inline int32_t elementAti(int32_t index) const;
-
- UBool equals(const UVector32 &other) const;
-
- inline int32_t lastElementi(void) const;
-
- int32_t indexOf(int32_t elem, int32_t startIndex = 0) const;
-
- inline UBool contains(int32_t elem) const;
-
- UBool containsAll(const UVector32& other) const;
-
- UBool removeAll(const UVector32& other);
-
- UBool retainAll(const UVector32& other);
-
- void removeElementAt(int32_t index);
-
- void removeAllElements();
-
- inline int32_t size(void) const;
-
- inline UBool isEmpty(void) const;
-
- // Inline. Use this one for speedy size check.
- inline UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status);
-
- // Out-of-line, handles actual growth. Called by ensureCapacity() when necessary.
- UBool expandCapacity(int32_t minimumCapacity, UErrorCode &status);
-
- /**
- * Change the size of this vector as follows: If newSize is
- * smaller, then truncate the array, possibly deleting held
- * elements for i >= newSize. If newSize is larger, grow the
- * array, filling in new slows with zero.
- */
- void setSize(int32_t newSize);
-
- //------------------------------------------------------------
- // New API
- //------------------------------------------------------------
-
- /**
- * Returns true if this vector contains none of the elements
- * of the given vector.
- * @param other vector to be checked for containment
- * @return true if the test condition is met
- */
- UBool containsNone(const UVector32& other) const;
-
-
- /**
- * Insert the given integer into this vector at its sorted position.
- * The current elements are assumed to be sorted already.
- */
- void sortedInsert(int32_t elem, UErrorCode& ec);
-
- /**
- * Returns a pointer to the internal array holding the vector.
- */
- inline int32_t *getBuffer() const;
-
- /**
- * Set the maximum allowed buffer capacity for this vector/stack.
- * Default with no limit set is unlimited, go until malloc() fails.
- * A Limit of zero means unlimited capacity.
- * Units are vector elements (32 bits each), not bytes.
- */
- void setMaxCapacity(int32_t limit);
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for this class.
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
- */
- virtual UClassID getDynamicClassID() const;
-
-private:
- void _init(int32_t initialCapacity, UErrorCode &status);
-
- // Disallow
- UVector32(const UVector32&);
-
- // Disallow
- UVector32& operator=(const UVector32&);
-
-
- // API Functions for Stack operations.
- // In the original UVector, these were in a separate derived class, UStack.
- // Here in UVector32, they are all together.
-public:
- inline UBool empty(void) const; // TODO: redundant, same as empty(). Remove it?
-
- inline int32_t peeki(void) const;
-
- inline int32_t popi(void);
-
- inline int32_t push(int32_t i, UErrorCode &status);
-
- inline int32_t *reserveBlock(int32_t size, UErrorCode &status);
- inline int32_t *popFrame(int32_t size);
-};
-
-
-// UVector32 inlines
-
-inline UBool UVector32::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
- if ((minimumCapacity >= 0) && (capacity >= minimumCapacity)) {
- return TRUE;
- } else {
- return expandCapacity(minimumCapacity, status);
- }
-}
-
-inline int32_t UVector32::elementAti(int32_t index) const {
- return (index >= 0 && count > 0 && count - index > 0) ? elements[index] : 0;
-}
-
-
-inline void UVector32::addElement(int32_t elem, UErrorCode &status) {
- if (ensureCapacity(count + 1, status)) {
- elements[count] = elem;
- count++;
- }
-}
-
-inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) {
- if (ensureCapacity(count+size, status) == FALSE) {
- return NULL;
- }
- int32_t *rp = elements+count;
- count += size;
- return rp;
-}
-
-inline int32_t *UVector32::popFrame(int32_t size) {
- U_ASSERT(count >= size);
- count -= size;
- if (count < 0) {
- count = 0;
- }
- return elements+count-size;
-}
-
-
-
-inline int32_t UVector32::size(void) const {
- return count;
-}
-
-inline UBool UVector32::isEmpty(void) const {
- return count == 0;
-}
-
-inline UBool UVector32::contains(int32_t obj) const {
- return indexOf(obj) >= 0;
-}
-
-inline int32_t UVector32::lastElementi(void) const {
- return elementAti(count-1);
-}
-
-inline UBool UVector32::operator!=(const UVector32& other) {
- return !operator==(other);
-}
-
-inline int32_t *UVector32::getBuffer() const {
- return elements;
-}
-
-
-// UStack inlines
-
-inline UBool UVector32::empty(void) const {
- return isEmpty();
-}
-
-inline int32_t UVector32::peeki(void) const {
- return lastElementi();
-}
-
-inline int32_t UVector32::push(int32_t i, UErrorCode &status) {
- addElement(i, status);
- return i;
-}
-
-inline int32_t UVector32::popi(void) {
- int32_t result = 0;
- if (count > 0) {
- count--;
- result = elements[count];
- }
- return result;
-}
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/uvectr64.cpp b/contrib/libs/icu/common/uvectr64.cpp
deleted file mode 100644
index 081565959cf..00000000000
--- a/contrib/libs/icu/common/uvectr64.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-* Copyright (C) 1999-2015, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-*/
-
-#include "uvectr64.h"
-#include "cmemory.h"
-#include "putilimp.h"
-
-U_NAMESPACE_BEGIN
-
-#define DEFAULT_CAPACITY 8
-
-/*
- * Constants for hinting whether a key is an integer
- * or a pointer. If a hint bit is zero, then the associated
- * token is assumed to be an integer. This is needed for iSeries
- */
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector64)
-
-UVector64::UVector64(UErrorCode &status) :
- count(0),
- capacity(0),
- maxCapacity(0),
- elements(NULL)
-{
- _init(DEFAULT_CAPACITY, status);
-}
-
-UVector64::UVector64(int32_t initialCapacity, UErrorCode &status) :
- count(0),
- capacity(0),
- maxCapacity(0),
- elements(0)
-{
- _init(initialCapacity, status);
-}
-
-
-
-void UVector64::_init(int32_t initialCapacity, UErrorCode &status) {
- // Fix bogus initialCapacity values; avoid malloc(0)
- if (initialCapacity < 1) {
- initialCapacity = DEFAULT_CAPACITY;
- }
- if (maxCapacity>0 && maxCapacity<initialCapacity) {
- initialCapacity = maxCapacity;
- }
- if (initialCapacity > (int32_t)(INT32_MAX / sizeof(int64_t))) {
- initialCapacity = uprv_min(DEFAULT_CAPACITY, maxCapacity);
- }
- elements = (int64_t *)uprv_malloc(sizeof(int64_t)*initialCapacity);
- if (elements == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- capacity = initialCapacity;
- }
-}
-
-UVector64::~UVector64() {
- uprv_free(elements);
- elements = 0;
-}
-
-/**
- * Assign this object to another (make this a copy of 'other').
- */
-void UVector64::assign(const UVector64& other, UErrorCode &ec) {
- if (ensureCapacity(other.count, ec)) {
- setSize(other.count);
- for (int32_t i=0; i<other.count; ++i) {
- elements[i] = other.elements[i];
- }
- }
-}
-
-
-UBool UVector64::operator==(const UVector64& other) {
- int32_t i;
- if (count != other.count) return FALSE;
- for (i=0; i<count; ++i) {
- if (elements[i] != other.elements[i]) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-
-void UVector64::setElementAt(int64_t elem, int32_t index) {
- if (0 <= index && index < count) {
- elements[index] = elem;
- }
- /* else index out of range */
-}
-
-void UVector64::insertElementAt(int64_t elem, int32_t index, UErrorCode &status) {
- // must have 0 <= index <= count
- if (0 <= index && index <= count && ensureCapacity(count + 1, status)) {
- for (int32_t i=count; i>index; --i) {
- elements[i] = elements[i-1];
- }
- elements[index] = elem;
- ++count;
- }
- /* else index out of range */
-}
-
-void UVector64::removeAllElements(void) {
- count = 0;
-}
-
-UBool UVector64::expandCapacity(int32_t minimumCapacity, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return FALSE;
- }
- if (minimumCapacity < 0) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- if (capacity >= minimumCapacity) {
- return TRUE;
- }
- if (maxCapacity>0 && minimumCapacity>maxCapacity) {
- status = U_BUFFER_OVERFLOW_ERROR;
- return FALSE;
- }
- if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- int32_t newCap = capacity * 2;
- if (newCap < minimumCapacity) {
- newCap = minimumCapacity;
- }
- if (maxCapacity > 0 && newCap > maxCapacity) {
- newCap = maxCapacity;
- }
- if (newCap > (int32_t)(INT32_MAX / sizeof(int64_t))) { // integer overflow check
- // We keep the original memory contents on bad minimumCapacity/maxCapacity.
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- int64_t* newElems = (int64_t *)uprv_realloc(elements, sizeof(int64_t)*newCap);
- if (newElems == NULL) {
- // We keep the original contents on the memory failure on realloc.
- status = U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
- elements = newElems;
- capacity = newCap;
- return TRUE;
-}
-
-void UVector64::setMaxCapacity(int32_t limit) {
- U_ASSERT(limit >= 0);
- if (limit < 0) {
- limit = 0;
- }
- if (limit > (int32_t)(INT32_MAX / sizeof(int64_t))) { // integer overflow check for realloc
- // Something is very wrong, don't realloc, leave capacity and maxCapacity unchanged
- return;
- }
- maxCapacity = limit;
- if (capacity <= maxCapacity || maxCapacity == 0) {
- // Current capacity is within the new limit.
- return;
- }
-
- // New maximum capacity is smaller than the current size.
- // Realloc the storage to the new, smaller size.
- int64_t* newElems = (int64_t *)uprv_realloc(elements, sizeof(int64_t)*maxCapacity);
- if (newElems == NULL) {
- // Realloc to smaller failed.
- // Just keep what we had. No need to call it a failure.
- return;
- }
- elements = newElems;
- capacity = maxCapacity;
- if (count > capacity) {
- count = capacity;
- }
-}
-
-/**
- * Change the size of this vector as follows: If newSize is smaller,
- * then truncate the array, possibly deleting held elements for i >=
- * newSize. If newSize is larger, grow the array, filling in new
- * slots with NULL.
- */
-void UVector64::setSize(int32_t newSize) {
- int32_t i;
- if (newSize < 0) {
- return;
- }
- if (newSize > count) {
- UErrorCode ec = U_ZERO_ERROR;
- if (!ensureCapacity(newSize, ec)) {
- return;
- }
- for (i=count; i<newSize; ++i) {
- elements[i] = 0;
- }
- }
- count = newSize;
-}
-
-U_NAMESPACE_END
-
diff --git a/contrib/libs/icu/common/uvectr64.h b/contrib/libs/icu/common/uvectr64.h
deleted file mode 100644
index 1cc9a501917..00000000000
--- a/contrib/libs/icu/common/uvectr64.h
+++ /dev/null
@@ -1,279 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 1999-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-//
-// UVector64 is a class implementing a vector of 64 bit integers.
-// It is similar to UVector32, but holds int64_t values rather than int32_t.
-// Most of the code is unchanged from UVector.
-//
-
-#ifndef UVECTOR64_H
-#define UVECTOR64_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "uhash.h"
-#include "uassert.h"
-
-U_NAMESPACE_BEGIN
-
-
-
-/**
- * <p>Ultralightweight C++ implementation of an <tt>int64_t</tt> vector
- * that has a subset of methods from UVector32
- *
- * <p>This is a very simple implementation, written to satisfy an
- * immediate porting need. As such, it is not completely fleshed out,
- * and it aims for simplicity and conformity. Nonetheless, it serves
- * its purpose (porting code from java that uses java.util.Vector)
- * well, and it could be easily made into a more robust vector class.
- *
- * <p><b>Design notes</b>
- *
- * <p>There is index bounds checking, but little is done about it. If
- * indices are out of bounds, either nothing happens, or zero is
- * returned. We <em>do</em> avoid indexing off into the weeds.
- *
- * <p>There is detection of out of memory, but the handling is very
- * coarse-grained -- similar to UnicodeString's protocol, but even
- * coarser. The class contains <em>one static flag</em> that is set
- * when any call to <tt>new</tt> returns zero. This allows the caller
- * to use several vectors and make just one check at the end to see if
- * a memory failure occurred. This is more efficient than making a
- * check after each call on each vector when doing many operations on
- * multiple vectors. The single static flag works best when memory
- * failures are infrequent, and when recovery options are limited or
- * nonexistent.
- *
- * <p><b>To do</b>
- *
- * <p>Improve the handling of index out of bounds errors.
- *
- */
-class U_COMMON_API UVector64 : public UObject {
-private:
- int32_t count;
-
- int32_t capacity;
-
- int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow.
-
- int64_t* elements;
-
-public:
- UVector64(UErrorCode &status);
-
- UVector64(int32_t initialCapacity, UErrorCode &status);
-
- virtual ~UVector64();
-
- /**
- * Assign this object to another (make this a copy of 'other').
- * Use the 'assign' function to assign each element.
- */
- void assign(const UVector64& other, UErrorCode &ec);
-
- /**
- * Compare this vector with another. They will be considered
- * equal if they are of the same size and all elements are equal,
- * as compared using this object's comparer.
- */
- UBool operator==(const UVector64& other);
-
- /**
- * Equivalent to !operator==()
- */
- inline UBool operator!=(const UVector64& other);
-
- //------------------------------------------------------------
- // subset of java.util.Vector API
- //------------------------------------------------------------
-
- inline void addElement(int64_t elem, UErrorCode &status);
-
- void setElementAt(int64_t elem, int32_t index);
-
- void insertElementAt(int64_t elem, int32_t index, UErrorCode &status);
-
- inline int64_t elementAti(int32_t index) const;
-
- //UBool equals(const UVector64 &other) const;
-
- inline int64_t lastElementi(void) const;
-
- //int32_t indexOf(int64_t elem, int32_t startIndex = 0) const;
-
- //UBool contains(int64_t elem) const;
-
- //UBool containsAll(const UVector64& other) const;
-
- //UBool removeAll(const UVector64& other);
-
- //UBool retainAll(const UVector64& other);
-
- //void removeElementAt(int32_t index);
-
- void removeAllElements();
-
- inline int32_t size(void) const;
-
- inline UBool isEmpty(void) const { return count == 0; }
-
- // Inline. Use this one for speedy size check.
- inline UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status);
-
- // Out-of-line, handles actual growth. Called by ensureCapacity() when necessary.
- UBool expandCapacity(int32_t minimumCapacity, UErrorCode &status);
-
- /**
- * Change the size of this vector as follows: If newSize is
- * smaller, then truncate the array, possibly deleting held
- * elements for i >= newSize. If newSize is larger, grow the
- * array, filling in new slows with zero.
- */
- void setSize(int32_t newSize);
-
- //------------------------------------------------------------
- // New API
- //------------------------------------------------------------
-
- //UBool containsNone(const UVector64& other) const;
-
-
- //void sortedInsert(int64_t elem, UErrorCode& ec);
-
- /**
- * Returns a pointer to the internal array holding the vector.
- */
- inline int64_t *getBuffer() const;
-
- /**
- * Set the maximum allowed buffer capacity for this vector/stack.
- * Default with no limit set is unlimited, go until malloc() fails.
- * A Limit of zero means unlimited capacity.
- * Units are vector elements (64 bits each), not bytes.
- */
- void setMaxCapacity(int32_t limit);
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for this class.
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
- */
- virtual UClassID getDynamicClassID() const;
-
-private:
- void _init(int32_t initialCapacity, UErrorCode &status);
-
- // Disallow
- UVector64(const UVector64&);
-
- // Disallow
- UVector64& operator=(const UVector64&);
-
-
- // API Functions for Stack operations.
- // In the original UVector, these were in a separate derived class, UStack.
- // Here in UVector64, they are all together.
-public:
- //UBool empty(void) const; // TODO: redundant, same as empty(). Remove it?
-
- //int64_t peeki(void) const;
-
- inline int64_t popi(void);
-
- inline int64_t push(int64_t i, UErrorCode &status);
-
- inline int64_t *reserveBlock(int32_t size, UErrorCode &status);
- inline int64_t *popFrame(int32_t size);
-};
-
-
-// UVector64 inlines
-
-inline UBool UVector64::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
- if ((minimumCapacity >= 0) && (capacity >= minimumCapacity)) {
- return TRUE;
- } else {
- return expandCapacity(minimumCapacity, status);
- }
-}
-
-inline int64_t UVector64::elementAti(int32_t index) const {
- return (0 <= index && index < count) ? elements[index] : 0;
-}
-
-
-inline void UVector64::addElement(int64_t elem, UErrorCode &status) {
- if (ensureCapacity(count + 1, status)) {
- elements[count] = elem;
- count++;
- }
-}
-
-inline int64_t *UVector64::reserveBlock(int32_t size, UErrorCode &status) {
- if (ensureCapacity(count+size, status) == FALSE) {
- return NULL;
- }
- int64_t *rp = elements+count;
- count += size;
- return rp;
-}
-
-inline int64_t *UVector64::popFrame(int32_t size) {
- U_ASSERT(count >= size);
- count -= size;
- if (count < 0) {
- count = 0;
- }
- return elements+count-size;
-}
-
-
-
-inline int32_t UVector64::size(void) const {
- return count;
-}
-
-inline int64_t UVector64::lastElementi(void) const {
- return elementAti(count-1);
-}
-
-inline UBool UVector64::operator!=(const UVector64& other) {
- return !operator==(other);
-}
-
-inline int64_t *UVector64::getBuffer() const {
- return elements;
-}
-
-
-// UStack inlines
-
-inline int64_t UVector64::push(int64_t i, UErrorCode &status) {
- addElement(i, status);
- return i;
-}
-
-inline int64_t UVector64::popi(void) {
- int64_t result = 0;
- if (count > 0) {
- count--;
- result = elements[count];
- }
- return result;
-}
-
-U_NAMESPACE_END
-
-#endif
diff --git a/contrib/libs/icu/common/wintz.cpp b/contrib/libs/icu/common/wintz.cpp
deleted file mode 100644
index 115512e704c..00000000000
--- a/contrib/libs/icu/common/wintz.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-********************************************************************************
-* Copyright (C) 2005-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-********************************************************************************
-*
-* File WINTZ.CPP
-*
-********************************************************************************
-*/
-
-#include "unicode/utypes.h"
-
-#if U_PLATFORM_USES_ONLY_WIN32_API
-
-#include "wintz.h"
-#include "cmemory.h"
-#include "cstring.h"
-
-#include "unicode/ures.h"
-#include "unicode/ustring.h"
-#include "uresimp.h"
-
-#ifndef WIN32_LEAN_AND_MEAN
-# define WIN32_LEAN_AND_MEAN
-#endif
-# define VC_EXTRALEAN
-# define NOUSER
-# define NOSERVICE
-# define NOIME
-# define NOMCX
-#include <windows.h>
-
-U_NAMESPACE_BEGIN
-
-// The max size of TimeZoneKeyName is 128, defined in DYNAMIC_TIME_ZONE_INFORMATION
-#define MAX_TIMEZONE_ID_LENGTH 128
-
-/**
-* Main Windows time zone detection function.
-* Returns the Windows time zone converted to an ICU time zone as a heap-allocated buffer, or nullptr upon failure.
-* Note: We use the Win32 API GetDynamicTimeZoneInformation to get the current time zone info.
-* This API returns a non-localized time zone name, which we can then map to an ICU time zone name.
-*/
-U_INTERNAL const char* U_EXPORT2
-uprv_detectWindowsTimeZone()
-{
- UErrorCode status = U_ZERO_ERROR;
- char* icuid = nullptr;
- char dynamicTZKeyName[MAX_TIMEZONE_ID_LENGTH];
- char tmpid[MAX_TIMEZONE_ID_LENGTH];
- int32_t len;
- int id = GEOID_NOT_AVAILABLE;
- int errorCode;
- wchar_t ISOcodeW[3] = {}; /* 2 letter ISO code in UTF-16 */
- char ISOcode[3] = {}; /* 2 letter ISO code in UTF-8 */
-
- DYNAMIC_TIME_ZONE_INFORMATION dynamicTZI;
- uprv_memset(&dynamicTZI, 0, sizeof(dynamicTZI));
- uprv_memset(dynamicTZKeyName, 0, sizeof(dynamicTZKeyName));
- uprv_memset(tmpid, 0, sizeof(tmpid));
-
- /* Obtain TIME_ZONE_INFORMATION from the API and get the non-localized time zone name. */
- if (TIME_ZONE_ID_INVALID == GetDynamicTimeZoneInformation(&dynamicTZI)) {
- return nullptr;
- }
-
- id = GetUserGeoID(GEOCLASS_NATION);
- errorCode = GetGeoInfoW(id, GEO_ISO2, ISOcodeW, 3, 0);
-
- // convert from wchar_t* (UTF-16 on Windows) to char* (UTF-8).
- u_strToUTF8(ISOcode, UPRV_LENGTHOF(ISOcode), nullptr,
- reinterpret_cast<const UChar*>(ISOcodeW), UPRV_LENGTHOF(ISOcodeW), &status);
-
- LocalUResourceBundlePointer bundle(ures_openDirect(nullptr, "windowsZones", &status));
- ures_getByKey(bundle.getAlias(), "mapTimezones", bundle.getAlias(), &status);
-
- // convert from wchar_t* (UTF-16 on Windows) to char* (UTF-8).
- u_strToUTF8(dynamicTZKeyName, UPRV_LENGTHOF(dynamicTZKeyName), nullptr,
- reinterpret_cast<const UChar*>(dynamicTZI.TimeZoneKeyName), -1, &status);
-
- if (U_FAILURE(status)) {
- return nullptr;
- }
-
- if (dynamicTZI.TimeZoneKeyName[0] != 0) {
- StackUResourceBundle winTZ;
- ures_getByKey(bundle.getAlias(), dynamicTZKeyName, winTZ.getAlias(), &status);
-
- if (U_SUCCESS(status)) {
- const UChar* icuTZ = nullptr;
- if (errorCode != 0) {
- icuTZ = ures_getStringByKey(winTZ.getAlias(), ISOcode, &len, &status);
- }
- if (errorCode == 0 || icuTZ == nullptr) {
- /* fallback to default "001" and reset status */
- status = U_ZERO_ERROR;
- icuTZ = ures_getStringByKey(winTZ.getAlias(), "001", &len, &status);
- }
-
- if (U_SUCCESS(status)) {
- int index = 0;
-
- while (!(*icuTZ == '\0' || *icuTZ == ' ')) {
- // time zone IDs only contain ASCII invariant characters.
- tmpid[index++] = (char)(*icuTZ++);
- }
- tmpid[index] = '\0';
- }
- }
- }
-
- // Copy the timezone ID to icuid to be returned.
- if (tmpid[0] != 0) {
- icuid = uprv_strdup(tmpid);
- }
-
- return icuid;
-}
-
-U_NAMESPACE_END
-#endif /* U_PLATFORM_USES_ONLY_WIN32_API */
diff --git a/contrib/libs/icu/common/wintz.h b/contrib/libs/icu/common/wintz.h
deleted file mode 100644
index cd8565eef1e..00000000000
--- a/contrib/libs/icu/common/wintz.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-********************************************************************************
-* Copyright (C) 2005-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-********************************************************************************
-*
-* File WINTZ.H
-*
-********************************************************************************
-*/
-
-#ifndef __WINTZ
-#define __WINTZ
-
-#include "unicode/utypes.h"
-
-#if U_PLATFORM_USES_ONLY_WIN32_API
-
-/**
- * \file
- * \brief C API: Utilities for dealing w/ Windows time zones.
- */
-
-U_CDECL_BEGIN
-/* Forward declarations for Windows types... */
-typedef struct _TIME_ZONE_INFORMATION TIME_ZONE_INFORMATION;
-U_CDECL_END
-
-U_INTERNAL const char* U_EXPORT2
-uprv_detectWindowsTimeZone();
-
-#endif /* U_PLATFORM_USES_ONLY_WIN32_API */
-
-#endif /* __WINTZ */