1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  ******************************************************************************
5  *
6  *   Copyright (C) 1998-2016, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  ******************************************************************************
10  *
11  * File ustdio.c
12  *
13  * Modification History:
14  *
15  *   Date        Name        Description
16  *   11/18/98    stephen     Creation.
17  *   03/12/99    stephen     Modified for new C API.
18  *   07/19/99    stephen     Fixed read() and gets()
19  ******************************************************************************
20  */
21 
22 #include "unicode/ustdio.h"
23 
24 #if !UCONFIG_NO_CONVERSION
25 
26 #include "unicode/putil.h"
27 #include "cmemory.h"
28 #include "cstring.h"
29 #include "ufile.h"
30 #include "ufmt_cmn.h"
31 #include "unicode/ucnv.h"
32 #include "unicode/ustring.h"
33 
34 #include <string.h>
35 
36 #define DELIM_LF 0x000A
37 #define DELIM_VT 0x000B
38 #define DELIM_FF 0x000C
39 #define DELIM_CR 0x000D
40 #define DELIM_NEL 0x0085
41 #define DELIM_LS 0x2028
42 #define DELIM_PS 0x2029
43 
44 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
45 #if U_PLATFORM_USES_ONLY_WIN32_API
46 static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
47 static const uint32_t DELIMITERS_LEN = 2;
48 /* TODO: Default newline writing should be detected based upon the converter being used. */
49 #else
50 static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
51 static const uint32_t DELIMITERS_LEN = 1;
52 #endif
53 
54 #define IS_FIRST_STRING_DELIMITER(c1) \
55  (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
56         || (c1) == DELIM_NEL \
57         || (c1) == DELIM_LS \
58         || (c1) == DELIM_PS)
59 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
60 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \
61  (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
62 
63 
64 #if !UCONFIG_NO_TRANSLITERATION
65 
66 U_CAPI UTransliterator* U_EXPORT2
u_fsettransliterator(UFILE * file,UFileDirection direction,UTransliterator * adopt,UErrorCode * status)67 u_fsettransliterator(UFILE *file, UFileDirection direction,
68                      UTransliterator *adopt, UErrorCode *status)
69 {
70     UTransliterator *old = NULL;
71 
72     if(U_FAILURE(*status))
73     {
74         return adopt;
75     }
76 
77     if(!file)
78     {
79         *status = U_ILLEGAL_ARGUMENT_ERROR;
80         return adopt;
81     }
82 
83     if(direction & U_READ)
84     {
85         /** TODO: implement */
86         *status = U_UNSUPPORTED_ERROR;
87         return adopt;
88     }
89 
90     if(adopt == NULL) /* they are clearing it */
91     {
92         if(file->fTranslit != NULL)
93         {
94             /* TODO: Check side */
95             old = file->fTranslit->translit;
96             uprv_free(file->fTranslit->buffer);
97             file->fTranslit->buffer=NULL;
98             uprv_free(file->fTranslit);
99             file->fTranslit=NULL;
100         }
101     }
102     else
103     {
104         if(file->fTranslit == NULL)
105         {
106             file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
107             if(!file->fTranslit)
108             {
109                 *status = U_MEMORY_ALLOCATION_ERROR;
110                 return adopt;
111             }
112             file->fTranslit->capacity = 0;
113             file->fTranslit->length = 0;
114             file->fTranslit->pos = 0;
115             file->fTranslit->buffer = NULL;
116         }
117         else
118         {
119             old = file->fTranslit->translit;
120             ufile_flush_translit(file);
121         }
122 
123         file->fTranslit->translit = adopt;
124     }
125 
126     return old;
127 }
128 
u_file_translit(UFILE * f,const UChar * src,int32_t * count,UBool flush)129 static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
130 {
131     int32_t newlen;
132     int32_t junkCount = 0;
133     int32_t textLength;
134     int32_t textLimit;
135     UTransPosition pos;
136     UErrorCode status = U_ZERO_ERROR;
137 
138     if(count == NULL)
139     {
140         count = &junkCount;
141     }
142 
143     if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
144     {
145         /* fast path */
146         return src;
147     }
148 
149     /* First: slide over everything */
150     if(f->fTranslit->length > f->fTranslit->pos)
151     {
152         memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
153             (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
154     }
155     f->fTranslit->length -= f->fTranslit->pos; /* always */
156     f->fTranslit->pos = 0;
157 
158     /* Calculate new buffer size needed */
159     newlen = (*count + f->fTranslit->length) * 4;
160 
161     if(newlen > f->fTranslit->capacity)
162     {
163         if(f->fTranslit->buffer == NULL)
164         {
165             f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
166         }
167         else
168         {
169             f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
170         }
171         /* Check for malloc/realloc failure. */
172         if (f->fTranslit->buffer == NULL) {
173         	return NULL;
174         }
175         f->fTranslit->capacity = newlen;
176     }
177 
178     /* Now, copy any data over */
179     u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
180         src,
181         *count);
182     f->fTranslit->length += *count;
183 
184     /* Now, translit in place as much as we can  */
185     if(flush == FALSE)
186     {
187         textLength = f->fTranslit->length;
188         pos.contextStart = 0;
189         pos.contextLimit = textLength;
190         pos.start        = 0;
191         pos.limit        = textLength;
192 
193         utrans_transIncrementalUChars(f->fTranslit->translit,
194             f->fTranslit->buffer, /* because we shifted */
195             &textLength,
196             f->fTranslit->capacity,
197             &pos,
198             &status);
199 
200         /* now: start/limit point to the transliterated text */
201         /* Transliterated is [buffer..pos.start) */
202         *count            = pos.start;
203         f->fTranslit->pos = pos.start;
204         f->fTranslit->length = pos.limit;
205 
206         return f->fTranslit->buffer;
207     }
208     else
209     {
210         textLength = f->fTranslit->length;
211         textLimit = f->fTranslit->length;
212 
213         utrans_transUChars(f->fTranslit->translit,
214             f->fTranslit->buffer,
215             &textLength,
216             f->fTranslit->capacity,
217             0,
218             &textLimit,
219             &status);
220 
221         /* out: converted len */
222         *count = textLimit;
223 
224         /* Set pointers to 0 */
225         f->fTranslit->pos = 0;
226         f->fTranslit->length = 0;
227 
228         return f->fTranslit->buffer;
229     }
230 }
231 
232 #endif
233 
234 void
ufile_flush_translit(UFILE * f)235 ufile_flush_translit(UFILE *f)
236 {
237 #if !UCONFIG_NO_TRANSLITERATION
238     if((!f)||(!f->fTranslit))
239         return;
240 #endif
241 
242     u_file_write_flush(NULL, 0, f, FALSE, TRUE);
243 }
244 
245 
246 void
ufile_flush_io(UFILE * f)247 ufile_flush_io(UFILE *f)
248 {
249   if((!f) || (!f->fFile)) {
250     return; /* skip if no file */
251   }
252 
253   u_file_write_flush(NULL, 0, f, TRUE, FALSE);
254 }
255 
256 
257 void
ufile_close_translit(UFILE * f)258 ufile_close_translit(UFILE *f)
259 {
260 #if !UCONFIG_NO_TRANSLITERATION
261     if((!f)||(!f->fTranslit))
262         return;
263 #endif
264 
265     ufile_flush_translit(f);
266 
267 #if !UCONFIG_NO_TRANSLITERATION
268     if(f->fTranslit->translit)
269         utrans_close(f->fTranslit->translit);
270 
271     if(f->fTranslit->buffer)
272     {
273         uprv_free(f->fTranslit->buffer);
274     }
275 
276     uprv_free(f->fTranslit);
277     f->fTranslit = NULL;
278 #endif
279 }
280 
281 
282 /* Input/output */
283 
284 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputs(const UChar * s,UFILE * f)285 u_fputs(const UChar    *s,
286         UFILE        *f)
287 {
288     int32_t count = u_file_write(s, u_strlen(s), f);
289     count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
290     return count;
291 }
292 
293 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputc(UChar32 uc,UFILE * f)294 u_fputc(UChar32      uc,
295         UFILE        *f)
296 {
297     UChar buf[2];
298     int32_t idx = 0;
299     UBool isError = FALSE;
300 
301     U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError);
302     if (isError) {
303         return U_EOF;
304     }
305     return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
306 }
307 
308 
309 U_CFUNC int32_t U_EXPORT2
u_file_write_flush(const UChar * chars,int32_t count,UFILE * f,UBool flushIO,UBool flushTranslit)310 u_file_write_flush(const UChar *chars,
311                    int32_t     count,
312                    UFILE       *f,
313                    UBool       flushIO,
314                    UBool       flushTranslit)
315 {
316     /* Set up conversion parameters */
317     UErrorCode  status       = U_ZERO_ERROR;
318     const UChar *mySource    = chars;
319     const UChar *mySourceBegin;
320     const UChar *mySourceEnd;
321     char        charBuffer[UFILE_CHARBUFFER_SIZE];
322     char        *myTarget   = charBuffer;
323     int32_t     written      = 0;
324     int32_t     numConverted = 0;
325 
326     if (count < 0) {
327         count = u_strlen(chars);
328     }
329 
330 #if !UCONFIG_NO_TRANSLITERATION
331     if((f->fTranslit) && (f->fTranslit->translit))
332     {
333         /* Do the transliteration */
334         mySource = u_file_translit(f, chars, &count, flushTranslit);
335     }
336 #endif
337 
338     /* Write to a string. */
339     if (!f->fFile) {
340         int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
341         if (flushIO && charsLeft > count) {
342             count++;
343         }
344         written = ufmt_min(count, charsLeft);
345         u_strncpy(f->str.fPos, mySource, written);
346         f->str.fPos += written;
347         return written;
348     }
349 
350     mySourceEnd = mySource + count;
351 
352     /* Perform the conversion in a loop */
353     do {
354         mySourceBegin = mySource; /* beginning location for this loop */
355         status     = U_ZERO_ERROR;
356         if(f->fConverter != NULL) { /* We have a valid converter */
357             ucnv_fromUnicode(f->fConverter,
358                 &myTarget,
359                 charBuffer + UFILE_CHARBUFFER_SIZE,
360                 &mySource,
361                 mySourceEnd,
362                 NULL,
363                 flushIO,
364                 &status);
365         } else { /*weiv: do the invariant conversion */
366             int32_t convertChars = (int32_t) (mySourceEnd - mySource);
367             if (convertChars > UFILE_CHARBUFFER_SIZE) {
368                 convertChars = UFILE_CHARBUFFER_SIZE;
369                 status = U_BUFFER_OVERFLOW_ERROR;
370             }
371             u_UCharsToChars(mySource, myTarget, convertChars);
372             mySource += convertChars;
373             myTarget += convertChars;
374         }
375         numConverted = (int32_t)(myTarget - charBuffer);
376 
377         if (numConverted > 0) {
378             /* write the converted bytes */
379             fwrite(charBuffer,
380                 sizeof(char),
381                 numConverted,
382                 f->fFile);
383 
384             written     += (int32_t) (mySource - mySourceBegin);
385         }
386         myTarget     = charBuffer;
387     }
388     while(status == U_BUFFER_OVERFLOW_ERROR);
389 
390     /* return # of chars written */
391     return written;
392 }
393 
394 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_file_write(const UChar * chars,int32_t count,UFILE * f)395 u_file_write(    const UChar     *chars,
396              int32_t        count,
397              UFILE         *f)
398 {
399     return u_file_write_flush(chars,count,f,FALSE,FALSE);
400 }
401 
402 
403 /* private function used for buffering input */
404 void
ufile_fill_uchar_buffer(UFILE * f)405 ufile_fill_uchar_buffer(UFILE *f)
406 {
407     UErrorCode  status;
408     const char  *mySource;
409     const char  *mySourceEnd;
410     UChar       *myTarget;
411     int32_t     bufferSize;
412     int32_t     maxCPBytes;
413     int32_t     bytesRead;
414     int32_t     availLength;
415     int32_t     dataSize;
416     char        charBuffer[UFILE_CHARBUFFER_SIZE];
417     u_localized_string *str;
418 
419     if (f->fFile == NULL) {
420         /* There is nothing to do. It's a string. */
421         return;
422     }
423 
424     str = &f->str;
425     dataSize = (int32_t)(str->fLimit - str->fPos);
426     if (f->fFileno == 0 && dataSize > 0) {
427         /* Don't read from stdin too many times. There is still some data. */
428         return;
429     }
430 
431     /* shift the buffer if it isn't empty */
432     if(dataSize != 0) {
433         u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */
434     }
435 
436 
437     /* record how much buffer space is available */
438     availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
439 
440     /* Determine the # of codepage bytes needed to fill our UChar buffer */
441     /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
442     maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
443 
444     /* Read in the data to convert */
445     if (f->fFileno == 0) {
446         /* Special case. Read from stdin one line at a time. */
447         char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
448         bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
449     }
450     else {
451         /* A normal file */
452         bytesRead = (int32_t)fread(charBuffer,
453             sizeof(char),
454             ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
455             f->fFile);
456     }
457 
458     /* Set up conversion parameters */
459     status      = U_ZERO_ERROR;
460     mySource    = charBuffer;
461     mySourceEnd = charBuffer + bytesRead;
462     myTarget    = f->fUCBuffer + dataSize;
463     bufferSize  = UFILE_UCHARBUFFER_SIZE;
464 
465     if(f->fConverter != NULL) { /* We have a valid converter */
466         /* Perform the conversion */
467         ucnv_toUnicode(f->fConverter,
468             &myTarget,
469             f->fUCBuffer + bufferSize,
470             &mySource,
471             mySourceEnd,
472             NULL,
473             (UBool)(feof(f->fFile) != 0),
474             &status);
475 
476     } else { /*weiv: do the invariant conversion */
477         u_charsToUChars(mySource, myTarget, bytesRead);
478         myTarget += bytesRead;
479     }
480 
481     /* update the pointers into our array */
482     str->fPos    = str->fBuffer;
483     str->fLimit  = myTarget;
484 }
485 
486 U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgets(UChar * s,int32_t n,UFILE * f)487 u_fgets(UChar        *s,
488         int32_t       n,
489         UFILE        *f)
490 {
491     int32_t dataSize;
492     int32_t count;
493     UChar *alias;
494     const UChar *limit;
495     UChar *sItr;
496     UChar currDelim = 0;
497     u_localized_string *str;
498 
499     if (n <= 0) {
500         /* Caller screwed up. We need to write the null terminatior. */
501         return NULL;
502     }
503 
504     /* fill the buffer if needed */
505     str = &f->str;
506     if (str->fPos >= str->fLimit) {
507         ufile_fill_uchar_buffer(f);
508     }
509 
510     /* subtract 1 from n to compensate for the terminator */
511     --n;
512 
513     /* determine the amount of data in the buffer */
514     dataSize = (int32_t)(str->fLimit - str->fPos);
515 
516     /* if 0 characters were left, return 0 */
517     if (dataSize == 0)
518         return NULL;
519 
520     /* otherwise, iteratively fill the buffer and copy */
521     count = 0;
522     sItr = s;
523     currDelim = 0;
524     while (dataSize > 0 && count < n) {
525         alias = str->fPos;
526 
527         /* Find how much to copy */
528         if (dataSize < (n - count)) {
529             limit = str->fLimit;
530         }
531         else {
532             limit = alias + (n - count);
533         }
534 
535         if (!currDelim) {
536             /* Copy UChars until we find the first occurrence of a delimiter character */
537             while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
538                 count++;
539                 *(sItr++) = *(alias++);
540             }
541             /* Preserve the newline */
542             if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
543                 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
544                     currDelim = *alias;
545                 }
546                 else {
547                     currDelim = 1;  /* This isn't a newline, but it's used to say
548                                     that we should break later. We've checked all
549                                     possible newline combinations even across buffer
550                                     boundaries. */
551                 }
552                 count++;
553                 *(sItr++) = *(alias++);
554             }
555         }
556         /* If we have a CRLF combination, preserve that too. */
557         if (alias < limit) {
558             if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
559                 count++;
560                 *(sItr++) = *(alias++);
561             }
562             currDelim = 1;  /* This isn't a newline, but it's used to say
563                             that we should break later. We've checked all
564                             possible newline combinations even across buffer
565                             boundaries. */
566         }
567 
568         /* update the current buffer position */
569         str->fPos = alias;
570 
571         /* if we found a delimiter */
572         if (currDelim == 1) {
573             /* break out */
574             break;
575         }
576 
577         /* refill the buffer */
578         ufile_fill_uchar_buffer(f);
579 
580         /* determine the amount of data in the buffer */
581         dataSize = (int32_t)(str->fLimit - str->fPos);
582     }
583 
584     /* add the terminator and return s */
585     *sItr = 0x0000;
586     return s;
587 }
588 
589 U_CFUNC UBool U_EXPORT2
ufile_getch(UFILE * f,UChar * ch)590 ufile_getch(UFILE *f, UChar *ch)
591 {
592     UBool isValidChar = FALSE;
593 
594     *ch = U_EOF;
595     /* if we have an available character in the buffer, return it */
596     if(f->str.fPos < f->str.fLimit){
597         *ch = *(f->str.fPos)++;
598         isValidChar = TRUE;
599     }
600     else {
601         /* otherwise, fill the buffer and return the next character */
602         if(f->str.fPos >= f->str.fLimit) {
603             ufile_fill_uchar_buffer(f);
604         }
605         if(f->str.fPos < f->str.fLimit) {
606             *ch = *(f->str.fPos)++;
607             isValidChar = TRUE;
608         }
609     }
610     return isValidChar;
611 }
612 
613 U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgetc(UFILE * f)614 u_fgetc(UFILE        *f)
615 {
616     UChar ch;
617     ufile_getch(f, &ch);
618     return ch;
619 }
620 
621 U_CFUNC UBool U_EXPORT2
ufile_getch32(UFILE * f,UChar32 * c32)622 ufile_getch32(UFILE *f, UChar32 *c32)
623 {
624     UBool isValidChar = FALSE;
625     u_localized_string *str;
626 
627     *c32 = U_EOF;
628 
629     /* Fill the buffer if it is empty */
630     str = &f->str;
631     if (f && str->fPos + 1 >= str->fLimit) {
632         ufile_fill_uchar_buffer(f);
633     }
634 
635     /* Get the next character in the buffer */
636     if (str->fPos < str->fLimit) {
637         *c32 = *(str->fPos)++;
638         if (U_IS_LEAD(*c32)) {
639             if (str->fPos < str->fLimit) {
640                 UChar c16 = *(str->fPos)++;
641                 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
642                 isValidChar = TRUE;
643             }
644             else {
645                 *c32 = U_EOF;
646             }
647         }
648         else {
649             isValidChar = TRUE;
650         }
651     }
652 
653     return isValidChar;
654 }
655 
656 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgetcx(UFILE * f)657 u_fgetcx(UFILE        *f)
658 {
659     UChar32 ch;
660     ufile_getch32(f, &ch);
661     return ch;
662 }
663 
664 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fungetc(UChar32 ch,UFILE * f)665 u_fungetc(UChar32        ch,
666     UFILE        *f)
667 {
668     u_localized_string *str;
669 
670     str = &f->str;
671 
672     /* if we're at the beginning of the buffer, sorry! */
673     if (str->fPos == str->fBuffer
674         || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
675     {
676         ch = U_EOF;
677     }
678     else {
679         /* otherwise, put the character back */
680         /* Remember, read them back on in the reverse order. */
681         if (U_IS_LEAD(ch)) {
682             if (*--(str->fPos) != U16_TRAIL(ch)
683                 || *--(str->fPos) != U16_LEAD(ch))
684             {
685                 ch = U_EOF;
686             }
687         }
688         else if (*--(str->fPos) != ch) {
689             ch = U_EOF;
690         }
691     }
692     return ch;
693 }
694 
695 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_file_read(UChar * chars,int32_t count,UFILE * f)696 u_file_read(    UChar        *chars,
697     int32_t        count,
698     UFILE         *f)
699 {
700     int32_t dataSize;
701     int32_t read = 0;
702     u_localized_string *str = &f->str;
703 
704     do {
705 
706         /* determine the amount of data in the buffer */
707         dataSize = (int32_t)(str->fLimit - str->fPos);
708         if (dataSize <= 0) {
709             /* fill the buffer */
710             ufile_fill_uchar_buffer(f);
711             dataSize = (int32_t)(str->fLimit - str->fPos);
712         }
713 
714         /* Make sure that we don't read too much */
715         if (dataSize > (count - read)) {
716             dataSize = count - read;
717         }
718 
719         /* copy the current data in the buffer */
720         memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
721 
722         /* update number of items read */
723         read += dataSize;
724 
725         /* update the current buffer position */
726         str->fPos += dataSize;
727     }
728     while (dataSize != 0 && read < count);
729 
730     return read;
731 }
732 #endif
733