1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1998-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * File ustdio.c
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 11/18/98 stephen Creation.
15 * 03/12/99 stephen Modified for new C API.
16 * 07/19/99 stephen Fixed read() and gets()
17 ******************************************************************************
18 */
19
20 #include "unicode/ustdio.h"
21
22 #if !UCONFIG_NO_CONVERSION
23
24 #include "unicode/putil.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27 #include "ufile.h"
28 #include "ufmt_cmn.h"
29 #include "unicode/ucnv.h"
30 #include "unicode/ustring.h"
31
32 #include <string.h>
33
34 #define DELIM_LF 0x000A
35 #define DELIM_VT 0x000B
36 #define DELIM_FF 0x000C
37 #define DELIM_CR 0x000D
38 #define DELIM_NEL 0x0085
39 #define DELIM_LS 0x2028
40 #define DELIM_PS 0x2029
41
42 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
43 #if U_PLATFORM_USES_ONLY_WIN32_API
44 static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
45 static const uint32_t DELIMITERS_LEN = 2;
46 /* TODO: Default newline writing should be detected based upon the converter being used. */
47 #else
48 static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
49 static const uint32_t DELIMITERS_LEN = 1;
50 #endif
51
52 #define IS_FIRST_STRING_DELIMITER(c1) \
53 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
54 || (c1) == DELIM_NEL \
55 || (c1) == DELIM_LS \
56 || (c1) == DELIM_PS)
57 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
58 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \
59 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
60
61
62 #if !UCONFIG_NO_TRANSLITERATION
63
64 U_CAPI UTransliterator* U_EXPORT2
u_fsettransliterator(UFILE * file,UFileDirection direction,UTransliterator * adopt,UErrorCode * status)65 u_fsettransliterator(UFILE *file, UFileDirection direction,
66 UTransliterator *adopt, UErrorCode *status)
67 {
68 UTransliterator *old = NULL;
69
70 if(U_FAILURE(*status))
71 {
72 return adopt;
73 }
74
75 if(!file)
76 {
77 *status = U_ILLEGAL_ARGUMENT_ERROR;
78 return adopt;
79 }
80
81 if(direction & U_READ)
82 {
83 /** TODO: implement */
84 *status = U_UNSUPPORTED_ERROR;
85 return adopt;
86 }
87
88 if(adopt == NULL) /* they are clearing it */
89 {
90 if(file->fTranslit != NULL)
91 {
92 /* TODO: Check side */
93 old = file->fTranslit->translit;
94 uprv_free(file->fTranslit->buffer);
95 file->fTranslit->buffer=NULL;
96 uprv_free(file->fTranslit);
97 file->fTranslit=NULL;
98 }
99 }
100 else
101 {
102 if(file->fTranslit == NULL)
103 {
104 file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
105 if(!file->fTranslit)
106 {
107 *status = U_MEMORY_ALLOCATION_ERROR;
108 return adopt;
109 }
110 file->fTranslit->capacity = 0;
111 file->fTranslit->length = 0;
112 file->fTranslit->pos = 0;
113 file->fTranslit->buffer = NULL;
114 }
115 else
116 {
117 old = file->fTranslit->translit;
118 ufile_flush_translit(file);
119 }
120
121 file->fTranslit->translit = adopt;
122 }
123
124 return old;
125 }
126
u_file_translit(UFILE * f,const UChar * src,int32_t * count,UBool flush)127 static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
128 {
129 int32_t newlen;
130 int32_t junkCount = 0;
131 int32_t textLength;
132 int32_t textLimit;
133 UTransPosition pos;
134 UErrorCode status = U_ZERO_ERROR;
135
136 if(count == NULL)
137 {
138 count = &junkCount;
139 }
140
141 if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
142 {
143 /* fast path */
144 return src;
145 }
146
147 /* First: slide over everything */
148 if(f->fTranslit->length > f->fTranslit->pos)
149 {
150 memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
151 (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
152 }
153 f->fTranslit->length -= f->fTranslit->pos; /* always */
154 f->fTranslit->pos = 0;
155
156 /* Calculate new buffer size needed */
157 newlen = (*count + f->fTranslit->length) * 4;
158
159 if(newlen > f->fTranslit->capacity)
160 {
161 if(f->fTranslit->buffer == NULL)
162 {
163 f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
164 }
165 else
166 {
167 f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
168 }
169 /* Check for malloc/realloc failure. */
170 if (f->fTranslit->buffer == NULL) {
171 return NULL;
172 }
173 f->fTranslit->capacity = newlen;
174 }
175
176 /* Now, copy any data over */
177 u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
178 src,
179 *count);
180 f->fTranslit->length += *count;
181
182 /* Now, translit in place as much as we can */
183 if(flush == FALSE)
184 {
185 textLength = f->fTranslit->length;
186 pos.contextStart = 0;
187 pos.contextLimit = textLength;
188 pos.start = 0;
189 pos.limit = textLength;
190
191 utrans_transIncrementalUChars(f->fTranslit->translit,
192 f->fTranslit->buffer, /* because we shifted */
193 &textLength,
194 f->fTranslit->capacity,
195 &pos,
196 &status);
197
198 /* now: start/limit point to the transliterated text */
199 /* Transliterated is [buffer..pos.start) */
200 *count = pos.start;
201 f->fTranslit->pos = pos.start;
202 f->fTranslit->length = pos.limit;
203
204 return f->fTranslit->buffer;
205 }
206 else
207 {
208 textLength = f->fTranslit->length;
209 textLimit = f->fTranslit->length;
210
211 utrans_transUChars(f->fTranslit->translit,
212 f->fTranslit->buffer,
213 &textLength,
214 f->fTranslit->capacity,
215 0,
216 &textLimit,
217 &status);
218
219 /* out: converted len */
220 *count = textLimit;
221
222 /* Set pointers to 0 */
223 f->fTranslit->pos = 0;
224 f->fTranslit->length = 0;
225
226 return f->fTranslit->buffer;
227 }
228 }
229
230 #endif
231
232 void
ufile_flush_translit(UFILE * f)233 ufile_flush_translit(UFILE *f)
234 {
235 #if !UCONFIG_NO_TRANSLITERATION
236 if((!f)||(!f->fTranslit))
237 return;
238 #endif
239
240 u_file_write_flush(NULL, 0, f, FALSE, TRUE);
241 }
242
243
244 void
ufile_flush_io(UFILE * f)245 ufile_flush_io(UFILE *f)
246 {
247 if((!f) || (!f->fFile)) {
248 return; /* skip if no file */
249 }
250
251 u_file_write_flush(NULL, 0, f, TRUE, FALSE);
252 }
253
254
255 void
ufile_close_translit(UFILE * f)256 ufile_close_translit(UFILE *f)
257 {
258 #if !UCONFIG_NO_TRANSLITERATION
259 if((!f)||(!f->fTranslit))
260 return;
261 #endif
262
263 ufile_flush_translit(f);
264
265 #if !UCONFIG_NO_TRANSLITERATION
266 if(f->fTranslit->translit)
267 utrans_close(f->fTranslit->translit);
268
269 if(f->fTranslit->buffer)
270 {
271 uprv_free(f->fTranslit->buffer);
272 }
273
274 uprv_free(f->fTranslit);
275 f->fTranslit = NULL;
276 #endif
277 }
278
279
280 /* Input/output */
281
282 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputs(const UChar * s,UFILE * f)283 u_fputs(const UChar *s,
284 UFILE *f)
285 {
286 int32_t count = u_file_write(s, u_strlen(s), f);
287 count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
288 return count;
289 }
290
291 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputc(UChar32 uc,UFILE * f)292 u_fputc(UChar32 uc,
293 UFILE *f)
294 {
295 UChar buf[2];
296 int32_t idx = 0;
297 UBool isError = FALSE;
298
299 U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError);
300 if (isError) {
301 return U_EOF;
302 }
303 return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
304 }
305
306
307 U_CFUNC int32_t U_EXPORT2
u_file_write_flush(const UChar * chars,int32_t count,UFILE * f,UBool flushIO,UBool flushTranslit)308 u_file_write_flush(const UChar *chars,
309 int32_t count,
310 UFILE *f,
311 UBool flushIO,
312 UBool flushTranslit)
313 {
314 /* Set up conversion parameters */
315 UErrorCode status = U_ZERO_ERROR;
316 const UChar *mySource = chars;
317 const UChar *mySourceBegin;
318 const UChar *mySourceEnd;
319 char charBuffer[UFILE_CHARBUFFER_SIZE];
320 char *myTarget = charBuffer;
321 int32_t written = 0;
322 int32_t numConverted = 0;
323
324 if (count < 0) {
325 count = u_strlen(chars);
326 }
327
328 #if !UCONFIG_NO_TRANSLITERATION
329 if((f->fTranslit) && (f->fTranslit->translit))
330 {
331 /* Do the transliteration */
332 mySource = u_file_translit(f, chars, &count, flushTranslit);
333 }
334 #endif
335
336 /* Write to a string. */
337 if (!f->fFile) {
338 int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
339 if (flushIO && charsLeft > count) {
340 count++;
341 }
342 written = ufmt_min(count, charsLeft);
343 u_strncpy(f->str.fPos, mySource, written);
344 f->str.fPos += written;
345 return written;
346 }
347
348 mySourceEnd = mySource + count;
349
350 /* Perform the conversion in a loop */
351 do {
352 mySourceBegin = mySource; /* beginning location for this loop */
353 status = U_ZERO_ERROR;
354 if(f->fConverter != NULL) { /* We have a valid converter */
355 ucnv_fromUnicode(f->fConverter,
356 &myTarget,
357 charBuffer + UFILE_CHARBUFFER_SIZE,
358 &mySource,
359 mySourceEnd,
360 NULL,
361 flushIO,
362 &status);
363 } else { /*weiv: do the invariant conversion */
364 int32_t convertChars = (int32_t) (mySourceEnd - mySource);
365 if (convertChars > UFILE_CHARBUFFER_SIZE) {
366 convertChars = UFILE_CHARBUFFER_SIZE;
367 status = U_BUFFER_OVERFLOW_ERROR;
368 }
369 u_UCharsToChars(mySource, myTarget, convertChars);
370 mySource += convertChars;
371 myTarget += convertChars;
372 }
373 numConverted = (int32_t)(myTarget - charBuffer);
374
375 if (numConverted > 0) {
376 /* write the converted bytes */
377 fwrite(charBuffer,
378 sizeof(char),
379 numConverted,
380 f->fFile);
381
382 written += (int32_t) (mySource - mySourceBegin);
383 }
384 myTarget = charBuffer;
385 }
386 while(status == U_BUFFER_OVERFLOW_ERROR);
387
388 /* return # of chars written */
389 return written;
390 }
391
392 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_file_write(const UChar * chars,int32_t count,UFILE * f)393 u_file_write( const UChar *chars,
394 int32_t count,
395 UFILE *f)
396 {
397 return u_file_write_flush(chars,count,f,FALSE,FALSE);
398 }
399
400
401 /* private function used for buffering input */
402 void
ufile_fill_uchar_buffer(UFILE * f)403 ufile_fill_uchar_buffer(UFILE *f)
404 {
405 UErrorCode status;
406 const char *mySource;
407 const char *mySourceEnd;
408 UChar *myTarget;
409 int32_t bufferSize;
410 int32_t maxCPBytes;
411 int32_t bytesRead;
412 int32_t availLength;
413 int32_t dataSize;
414 char charBuffer[UFILE_CHARBUFFER_SIZE];
415 u_localized_string *str;
416
417 if (f->fFile == NULL) {
418 /* There is nothing to do. It's a string. */
419 return;
420 }
421
422 str = &f->str;
423 dataSize = (int32_t)(str->fLimit - str->fPos);
424 if (f->fFileno == 0 && dataSize > 0) {
425 /* Don't read from stdin too many times. There is still some data. */
426 return;
427 }
428
429 /* shift the buffer if it isn't empty */
430 if(dataSize != 0) {
431 uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); /* not accessing beyond memory */
432 }
433
434
435 /* record how much buffer space is available */
436 availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
437
438 /* Determine the # of codepage bytes needed to fill our UChar buffer */
439 /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
440 maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
441
442 /* Read in the data to convert */
443 if (f->fFileno == 0) {
444 /* Special case. Read from stdin one line at a time. */
445 char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
446 bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
447 }
448 else {
449 /* A normal file */
450 bytesRead = (int32_t)fread(charBuffer,
451 sizeof(char),
452 ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
453 f->fFile);
454 }
455
456 /* Set up conversion parameters */
457 status = U_ZERO_ERROR;
458 mySource = charBuffer;
459 mySourceEnd = charBuffer + bytesRead;
460 myTarget = f->fUCBuffer + dataSize;
461 bufferSize = UFILE_UCHARBUFFER_SIZE;
462
463 if(f->fConverter != NULL) { /* We have a valid converter */
464 /* Perform the conversion */
465 ucnv_toUnicode(f->fConverter,
466 &myTarget,
467 f->fUCBuffer + bufferSize,
468 &mySource,
469 mySourceEnd,
470 NULL,
471 (UBool)(feof(f->fFile) != 0),
472 &status);
473
474 } else { /*weiv: do the invariant conversion */
475 u_charsToUChars(mySource, myTarget, bytesRead);
476 myTarget += bytesRead;
477 }
478
479 /* update the pointers into our array */
480 str->fPos = str->fBuffer;
481 str->fLimit = myTarget;
482 }
483
484 U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgets(UChar * s,int32_t n,UFILE * f)485 u_fgets(UChar *s,
486 int32_t n,
487 UFILE *f)
488 {
489 int32_t dataSize;
490 int32_t count;
491 UChar *alias;
492 const UChar *limit;
493 UChar *sItr;
494 UChar currDelim = 0;
495 u_localized_string *str;
496
497 if (n <= 0) {
498 /* Caller screwed up. We need to write the null terminatior. */
499 return NULL;
500 }
501
502 /* fill the buffer if needed */
503 str = &f->str;
504 if (str->fPos >= str->fLimit) {
505 ufile_fill_uchar_buffer(f);
506 }
507
508 /* subtract 1 from n to compensate for the terminator */
509 --n;
510
511 /* determine the amount of data in the buffer */
512 dataSize = (int32_t)(str->fLimit - str->fPos);
513
514 /* if 0 characters were left, return 0 */
515 if (dataSize == 0)
516 return NULL;
517
518 /* otherwise, iteratively fill the buffer and copy */
519 count = 0;
520 sItr = s;
521 currDelim = 0;
522 while (dataSize > 0 && count < n) {
523 alias = str->fPos;
524
525 /* Find how much to copy */
526 if (dataSize < (n - count)) {
527 limit = str->fLimit;
528 }
529 else {
530 limit = alias + (n - count);
531 }
532
533 if (!currDelim) {
534 /* Copy UChars until we find the first occurrence of a delimiter character */
535 while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
536 count++;
537 *(sItr++) = *(alias++);
538 }
539 /* Preserve the newline */
540 if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
541 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
542 currDelim = *alias;
543 }
544 else {
545 currDelim = 1; /* This isn't a newline, but it's used to say
546 that we should break later. We've checked all
547 possible newline combinations even across buffer
548 boundaries. */
549 }
550 count++;
551 *(sItr++) = *(alias++);
552 }
553 }
554 /* If we have a CRLF combination, preserve that too. */
555 if (alias < limit) {
556 if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
557 count++;
558 *(sItr++) = *(alias++);
559 }
560 currDelim = 1; /* This isn't a newline, but it's used to say
561 that we should break later. We've checked all
562 possible newline combinations even across buffer
563 boundaries. */
564 }
565
566 /* update the current buffer position */
567 str->fPos = alias;
568
569 /* if we found a delimiter */
570 if (currDelim == 1) {
571 /* break out */
572 break;
573 }
574
575 /* refill the buffer */
576 ufile_fill_uchar_buffer(f);
577
578 /* determine the amount of data in the buffer */
579 dataSize = (int32_t)(str->fLimit - str->fPos);
580 }
581
582 /* add the terminator and return s */
583 *sItr = 0x0000;
584 return s;
585 }
586
587 U_CFUNC UBool U_EXPORT2
ufile_getch(UFILE * f,UChar * ch)588 ufile_getch(UFILE *f, UChar *ch)
589 {
590 UBool isValidChar = FALSE;
591
592 *ch = U_EOF;
593 /* if we have an available character in the buffer, return it */
594 if(f->str.fPos < f->str.fLimit){
595 *ch = *(f->str.fPos)++;
596 isValidChar = TRUE;
597 }
598 else {
599 /* otherwise, fill the buffer and return the next character */
600 if(f->str.fPos >= f->str.fLimit) {
601 ufile_fill_uchar_buffer(f);
602 }
603 if(f->str.fPos < f->str.fLimit) {
604 *ch = *(f->str.fPos)++;
605 isValidChar = TRUE;
606 }
607 }
608 return isValidChar;
609 }
610
611 U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgetc(UFILE * f)612 u_fgetc(UFILE *f)
613 {
614 UChar ch;
615 ufile_getch(f, &ch);
616 return ch;
617 }
618
619 U_CFUNC UBool U_EXPORT2
ufile_getch32(UFILE * f,UChar32 * c32)620 ufile_getch32(UFILE *f, UChar32 *c32)
621 {
622 UBool isValidChar = FALSE;
623 u_localized_string *str;
624
625 *c32 = U_EOF;
626
627 /* Fill the buffer if it is empty */
628 str = &f->str;
629 if (f && str->fPos + 1 >= str->fLimit) {
630 ufile_fill_uchar_buffer(f);
631 }
632
633 /* Get the next character in the buffer */
634 if (str->fPos < str->fLimit) {
635 *c32 = *(str->fPos)++;
636 if (U_IS_LEAD(*c32)) {
637 if (str->fPos < str->fLimit) {
638 UChar c16 = *(str->fPos)++;
639 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
640 isValidChar = TRUE;
641 }
642 else {
643 *c32 = U_EOF;
644 }
645 }
646 else {
647 isValidChar = TRUE;
648 }
649 }
650
651 return isValidChar;
652 }
653
654 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgetcx(UFILE * f)655 u_fgetcx(UFILE *f)
656 {
657 UChar32 ch;
658 ufile_getch32(f, &ch);
659 return ch;
660 }
661
662 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fungetc(UChar32 ch,UFILE * f)663 u_fungetc(UChar32 ch,
664 UFILE *f)
665 {
666 u_localized_string *str;
667
668 str = &f->str;
669
670 /* if we're at the beginning of the buffer, sorry! */
671 if (str->fPos == str->fBuffer
672 || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
673 {
674 ch = U_EOF;
675 }
676 else {
677 /* otherwise, put the character back */
678 /* Remember, read them back on in the reverse order. */
679 if (U_IS_LEAD(ch)) {
680 if (*--(str->fPos) != U16_TRAIL(ch)
681 || *--(str->fPos) != U16_LEAD(ch))
682 {
683 ch = U_EOF;
684 }
685 }
686 else if (*--(str->fPos) != ch) {
687 ch = U_EOF;
688 }
689 }
690 return ch;
691 }
692
693 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_file_read(UChar * chars,int32_t count,UFILE * f)694 u_file_read( UChar *chars,
695 int32_t count,
696 UFILE *f)
697 {
698 int32_t dataSize;
699 int32_t read = 0;
700 u_localized_string *str = &f->str;
701
702 do {
703
704 /* determine the amount of data in the buffer */
705 dataSize = (int32_t)(str->fLimit - str->fPos);
706 if (dataSize <= 0) {
707 /* fill the buffer */
708 ufile_fill_uchar_buffer(f);
709 dataSize = (int32_t)(str->fLimit - str->fPos);
710 }
711
712 /* Make sure that we don't read too much */
713 if (dataSize > (count - read)) {
714 dataSize = count - read;
715 }
716
717 /* copy the current data in the buffer */
718 memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
719
720 /* update number of items read */
721 read += dataSize;
722
723 /* update the current buffer position */
724 str->fPos += dataSize;
725 }
726 while (dataSize != 0 && read < count);
727
728 return read;
729 }
730 #endif
731