1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
|
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1998-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File ustdio.c
*
* Modification History:
*
* Date Name Description
* 11/18/98 stephen Creation.
* 03/12/99 stephen Modified for new C API.
* 07/19/99 stephen Fixed read() and gets()
******************************************************************************
*/
#include "unicode/ustdio.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/putil.h"
#include "cmemory.h"
#include "cstring.h"
#include "ufile.h"
#include "ufmt_cmn.h"
#include "unicode/ucnv.h"
#include "unicode/ustring.h"
#include <string.h>
#define DELIM_LF 0x000A
#define DELIM_VT 0x000B
#define DELIM_FF 0x000C
#define DELIM_CR 0x000D
#define DELIM_NEL 0x0085
#define DELIM_LS 0x2028
#define DELIM_PS 0x2029
/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
#if U_PLATFORM_USES_ONLY_WIN32_API
static const char16_t DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
static const uint32_t DELIMITERS_LEN = 2;
/* TODO: Default newline writing should be detected based upon the converter being used. */
#else
static const char16_t DELIMITERS [] = { DELIM_LF, 0x0000 };
static const uint32_t DELIMITERS_LEN = 1;
#endif
#define IS_FIRST_STRING_DELIMITER(c1) \
(UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
|| (c1) == DELIM_NEL \
|| (c1) == DELIM_LS \
|| (c1) == DELIM_PS)
#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
#define IS_COMBINED_STRING_DELIMITER(c1, c2) \
(UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
#if !UCONFIG_NO_TRANSLITERATION
U_CAPI UTransliterator* U_EXPORT2
u_fsettransliterator(UFILE *file, UFileDirection direction,
UTransliterator *adopt, UErrorCode *status)
{
UTransliterator *old = nullptr;
if(U_FAILURE(*status))
{
return adopt;
}
if(!file)
{
*status = U_ILLEGAL_ARGUMENT_ERROR;
return adopt;
}
if(direction & U_READ)
{
/** TODO: implement */
*status = U_UNSUPPORTED_ERROR;
return adopt;
}
if(adopt == nullptr) /* they are clearing it */
{
if(file->fTranslit != nullptr)
{
/* TODO: Check side */
old = file->fTranslit->translit;
uprv_free(file->fTranslit->buffer);
file->fTranslit->buffer=nullptr;
uprv_free(file->fTranslit);
file->fTranslit=nullptr;
}
}
else
{
if(file->fTranslit == nullptr)
{
file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
if(!file->fTranslit)
{
*status = U_MEMORY_ALLOCATION_ERROR;
return adopt;
}
file->fTranslit->capacity = 0;
file->fTranslit->length = 0;
file->fTranslit->pos = 0;
file->fTranslit->buffer = nullptr;
}
else
{
old = file->fTranslit->translit;
ufile_flush_translit(file);
}
file->fTranslit->translit = adopt;
}
return old;
}
static const char16_t * u_file_translit(UFILE *f, const char16_t *src, int32_t *count, UBool flush)
{
int32_t newlen;
int32_t junkCount = 0;
int32_t textLength;
int32_t textLimit;
UTransPosition pos;
UErrorCode status = U_ZERO_ERROR;
if(count == nullptr)
{
count = &junkCount;
}
if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
{
/* fast path */
return src;
}
/* First: slide over everything */
if(f->fTranslit->length > f->fTranslit->pos)
{
memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
(f->fTranslit->length - f->fTranslit->pos)*sizeof(char16_t));
}
f->fTranslit->length -= f->fTranslit->pos; /* always */
f->fTranslit->pos = 0;
/* Calculate new buffer size needed */
newlen = (*count + f->fTranslit->length) * 4;
if(newlen > f->fTranslit->capacity)
{
if(f->fTranslit->buffer == nullptr)
{
f->fTranslit->buffer = static_cast<char16_t*>(uprv_malloc(newlen * sizeof(char16_t)));
}
else
{
f->fTranslit->buffer = static_cast<char16_t*>(uprv_realloc(f->fTranslit->buffer, newlen * sizeof(char16_t)));
}
/* Check for malloc/realloc failure. */
if (f->fTranslit->buffer == nullptr) {
return nullptr;
}
f->fTranslit->capacity = newlen;
}
/* Now, copy any data over */
u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
src,
*count);
f->fTranslit->length += *count;
/* Now, translit in place as much as we can */
if(flush == false)
{
textLength = f->fTranslit->length;
pos.contextStart = 0;
pos.contextLimit = textLength;
pos.start = 0;
pos.limit = textLength;
utrans_transIncrementalUChars(f->fTranslit->translit,
f->fTranslit->buffer, /* because we shifted */
&textLength,
f->fTranslit->capacity,
&pos,
&status);
/* now: start/limit point to the transliterated text */
/* Transliterated is [buffer..pos.start) */
*count = pos.start;
f->fTranslit->pos = pos.start;
f->fTranslit->length = pos.limit;
return f->fTranslit->buffer;
}
else
{
textLength = f->fTranslit->length;
textLimit = f->fTranslit->length;
utrans_transUChars(f->fTranslit->translit,
f->fTranslit->buffer,
&textLength,
f->fTranslit->capacity,
0,
&textLimit,
&status);
/* out: converted len */
*count = textLimit;
/* Set pointers to 0 */
f->fTranslit->pos = 0;
f->fTranslit->length = 0;
return f->fTranslit->buffer;
}
}
#endif
void
ufile_flush_translit(UFILE *f)
{
#if !UCONFIG_NO_TRANSLITERATION
if((!f)||(!f->fTranslit))
return;
#endif
u_file_write_flush(nullptr, 0, f, false, true);
}
void
ufile_flush_io(UFILE *f)
{
if((!f) || (!f->fFile)) {
return; /* skip if no file */
}
u_file_write_flush(nullptr, 0, f, true, false);
}
void
ufile_close_translit(UFILE *f)
{
#if !UCONFIG_NO_TRANSLITERATION
if((!f)||(!f->fTranslit))
return;
#endif
ufile_flush_translit(f);
#if !UCONFIG_NO_TRANSLITERATION
if(f->fTranslit->translit)
utrans_close(f->fTranslit->translit);
if(f->fTranslit->buffer)
{
uprv_free(f->fTranslit->buffer);
}
uprv_free(f->fTranslit);
f->fTranslit = nullptr;
#endif
}
/* Input/output */
U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputs(const char16_t *s,
UFILE *f)
{
int32_t count = u_file_write(s, u_strlen(s), f);
count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
return count;
}
U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputc(UChar32 uc,
UFILE *f)
{
char16_t buf[2];
int32_t idx = 0;
UBool isError = false;
U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError);
if (isError) {
return U_EOF;
}
return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
}
U_CFUNC int32_t U_EXPORT2
u_file_write_flush(const char16_t *chars,
int32_t count,
UFILE *f,
UBool flushIO,
UBool flushTranslit)
{
/* Set up conversion parameters */
UErrorCode status = U_ZERO_ERROR;
const char16_t *mySource = chars;
const char16_t *mySourceBegin;
const char16_t *mySourceEnd;
char charBuffer[UFILE_CHARBUFFER_SIZE];
char *myTarget = charBuffer;
int32_t written = 0;
int32_t numConverted = 0;
if (count < 0) {
count = u_strlen(chars);
}
#if !UCONFIG_NO_TRANSLITERATION
if((f->fTranslit) && (f->fTranslit->translit))
{
/* Do the transliteration */
mySource = u_file_translit(f, chars, &count, flushTranslit);
}
#endif
/* Write to a string. */
if (!f->fFile) {
int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
if (flushIO && charsLeft > count) {
count++;
}
written = ufmt_min(count, charsLeft);
u_strncpy(f->str.fPos, mySource, written);
f->str.fPos += written;
return written;
}
mySourceEnd = mySource + count;
/* Perform the conversion in a loop */
do {
mySourceBegin = mySource; /* beginning location for this loop */
status = U_ZERO_ERROR;
if(f->fConverter != nullptr) { /* We have a valid converter */
ucnv_fromUnicode(f->fConverter,
&myTarget,
charBuffer + UFILE_CHARBUFFER_SIZE,
&mySource,
mySourceEnd,
nullptr,
flushIO,
&status);
} else { /*weiv: do the invariant conversion */
int32_t convertChars = (int32_t) (mySourceEnd - mySource);
if (convertChars > UFILE_CHARBUFFER_SIZE) {
convertChars = UFILE_CHARBUFFER_SIZE;
status = U_BUFFER_OVERFLOW_ERROR;
}
u_UCharsToChars(mySource, myTarget, convertChars);
mySource += convertChars;
myTarget += convertChars;
}
numConverted = (int32_t)(myTarget - charBuffer);
if (numConverted > 0) {
/* write the converted bytes */
fwrite(charBuffer,
sizeof(char),
numConverted,
f->fFile);
written += (int32_t) (mySource - mySourceBegin);
}
myTarget = charBuffer;
}
while(status == U_BUFFER_OVERFLOW_ERROR);
/* return # of chars written */
return written;
}
U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_file_write( const char16_t *chars,
int32_t count,
UFILE *f)
{
return u_file_write_flush(chars,count,f,false,false);
}
/* private function used for buffering input */
void
ufile_fill_uchar_buffer(UFILE *f)
{
UErrorCode status;
const char *mySource;
const char *mySourceEnd;
char16_t *myTarget;
int32_t bufferSize;
int32_t maxCPBytes;
int32_t bytesRead;
int32_t availLength;
int32_t dataSize;
char charBuffer[UFILE_CHARBUFFER_SIZE];
u_localized_string *str;
if (f->fFile == nullptr) {
/* There is nothing to do. It's a string. */
return;
}
str = &f->str;
dataSize = static_cast<int32_t>(str->fLimit - str->fPos);
if (f->fFileno == 0 && dataSize > 0) {
/* Don't read from stdin too many times. There is still some data. */
return;
}
/* shift the buffer if it isn't empty */
if(dataSize != 0) {
u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */
}
/* record how much buffer space is available */
availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
/* Determine the # of codepage bytes needed to fill our char16_t buffer */
/* weiv: if converter is nullptr, we use invariant converter with charwidth = 1)*/
maxCPBytes = availLength / (f->fConverter!=nullptr?(2*ucnv_getMinCharSize(f->fConverter)):1);
/* Read in the data to convert */
if (f->fFileno == 0) {
/* Special case. Read from stdin one line at a time. */
char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
bytesRead = static_cast<int32_t>(retStr ? uprv_strlen(charBuffer) : 0);
}
else {
/* A normal file */
bytesRead = static_cast<int32_t>(fread(charBuffer,
sizeof(char),
ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
f->fFile));
}
/* Set up conversion parameters */
status = U_ZERO_ERROR;
mySource = charBuffer;
mySourceEnd = charBuffer + bytesRead;
myTarget = f->fUCBuffer + dataSize;
bufferSize = UFILE_UCHARBUFFER_SIZE;
if(f->fConverter != nullptr) { /* We have a valid converter */
/* Perform the conversion */
ucnv_toUnicode(f->fConverter,
&myTarget,
f->fUCBuffer + bufferSize,
&mySource,
mySourceEnd,
nullptr,
static_cast<UBool>(feof(f->fFile) != 0),
&status);
} else { /*weiv: do the invariant conversion */
u_charsToUChars(mySource, myTarget, bytesRead);
myTarget += bytesRead;
}
/* update the pointers into our array */
str->fPos = str->fBuffer;
str->fLimit = myTarget;
}
U_CAPI char16_t* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgets(char16_t *s,
int32_t n,
UFILE *f)
{
int32_t dataSize;
int32_t count;
char16_t *alias;
const char16_t *limit;
char16_t *sItr;
char16_t currDelim = 0;
u_localized_string *str;
if (n <= 0) {
/* Caller screwed up. We need to write the null terminatior. */
return nullptr;
}
/* fill the buffer if needed */
str = &f->str;
if (str->fPos >= str->fLimit) {
ufile_fill_uchar_buffer(f);
}
/* subtract 1 from n to compensate for the terminator */
--n;
/* determine the amount of data in the buffer */
dataSize = (int32_t)(str->fLimit - str->fPos);
/* if 0 characters were left, return 0 */
if (dataSize == 0)
return nullptr;
/* otherwise, iteratively fill the buffer and copy */
count = 0;
sItr = s;
currDelim = 0;
while (dataSize > 0 && count < n) {
alias = str->fPos;
/* Find how much to copy */
if (dataSize < (n - count)) {
limit = str->fLimit;
}
else {
limit = alias + (n - count);
}
if (!currDelim) {
/* Copy UChars until we find the first occurrence of a delimiter character */
while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
count++;
*(sItr++) = *(alias++);
}
/* Preserve the newline */
if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
currDelim = *alias;
}
else {
currDelim = 1; /* This isn't a newline, but it's used to say
that we should break later. We've checked all
possible newline combinations even across buffer
boundaries. */
}
count++;
*(sItr++) = *(alias++);
}
}
/* If we have a CRLF combination, preserve that too. */
if (alias < limit) {
if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
count++;
*(sItr++) = *(alias++);
}
currDelim = 1; /* This isn't a newline, but it's used to say
that we should break later. We've checked all
possible newline combinations even across buffer
boundaries. */
}
/* update the current buffer position */
str->fPos = alias;
/* if we found a delimiter */
if (currDelim == 1) {
/* break out */
break;
}
/* refill the buffer */
ufile_fill_uchar_buffer(f);
/* determine the amount of data in the buffer */
dataSize = (int32_t)(str->fLimit - str->fPos);
}
/* add the terminator and return s */
*sItr = 0x0000;
return s;
}
U_CFUNC UBool U_EXPORT2
ufile_getch(UFILE *f, char16_t *ch)
{
UBool isValidChar = false;
*ch = U_EOF;
/* if we have an available character in the buffer, return it */
if(f->str.fPos < f->str.fLimit){
*ch = *(f->str.fPos)++;
isValidChar = true;
}
else {
/* otherwise, fill the buffer and return the next character */
if(f->str.fPos >= f->str.fLimit) {
ufile_fill_uchar_buffer(f);
}
if(f->str.fPos < f->str.fLimit) {
*ch = *(f->str.fPos)++;
isValidChar = true;
}
}
return isValidChar;
}
U_CAPI char16_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgetc(UFILE *f)
{
char16_t ch;
ufile_getch(f, &ch);
return ch;
}
U_CFUNC UBool U_EXPORT2
ufile_getch32(UFILE *f, UChar32 *c32)
{
UBool isValidChar = false;
u_localized_string *str;
*c32 = U_EOF;
/* Fill the buffer if it is empty */
str = &f->str;
if (str->fPos + 1 >= str->fLimit) {
ufile_fill_uchar_buffer(f);
}
/* Get the next character in the buffer */
if (str->fPos < str->fLimit) {
*c32 = *(str->fPos)++;
if (U_IS_LEAD(*c32)) {
if (str->fPos < str->fLimit) {
char16_t c16 = *(str->fPos)++;
*c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
isValidChar = true;
}
else {
*c32 = U_EOF;
}
}
else {
isValidChar = true;
}
}
return isValidChar;
}
U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgetcx(UFILE *f)
{
UChar32 ch;
ufile_getch32(f, &ch);
return ch;
}
U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fungetc(UChar32 ch,
UFILE *f)
{
u_localized_string *str;
str = &f->str;
/* if we're at the beginning of the buffer, sorry! */
if (str->fPos == str->fBuffer
|| (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
{
ch = U_EOF;
}
else {
/* otherwise, put the character back */
/* Remember, read them back on in the reverse order. */
if (U_IS_LEAD(ch)) {
if (*--(str->fPos) != U16_TRAIL(ch)
|| *--(str->fPos) != U16_LEAD(ch))
{
ch = U_EOF;
}
}
else if (*--(str->fPos) != ch) {
ch = U_EOF;
}
}
return ch;
}
U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_file_read( char16_t *chars,
int32_t count,
UFILE *f)
{
int32_t dataSize;
int32_t read = 0;
u_localized_string *str = &f->str;
do {
/* determine the amount of data in the buffer */
dataSize = (int32_t)(str->fLimit - str->fPos);
if (dataSize <= 0) {
/* fill the buffer */
ufile_fill_uchar_buffer(f);
dataSize = (int32_t)(str->fLimit - str->fPos);
}
/* Make sure that we don't read too much */
if (dataSize > (count - read)) {
dataSize = count - read;
}
/* copy the current data in the buffer */
memcpy(chars + read, str->fPos, dataSize * sizeof(char16_t));
/* update number of items read */
read += dataSize;
/* update the current buffer position */
str->fPos += dataSize;
}
while (dataSize != 0 && read < count);
return read;
}
#endif
|