1 /*
2  ******************************************************************************
3  *
4  *   Copyright (C) 1998-2014, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  ******************************************************************************
8  *
9  * File ustdio.c
10  *
11  * Modification History:
12  *
13  *   Date        Name        Description
14  *   11/18/98    stephen     Creation.
15  *   03/12/99    stephen     Modified for new C API.
16  *   07/19/99    stephen     Fixed read() and gets()
17  ******************************************************************************
18  */
19 
20 #include "unicode/ustdio.h"
21 
22 #if !UCONFIG_NO_CONVERSION
23 
24 #include "unicode/putil.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27 #include "ufile.h"
28 #include "ufmt_cmn.h"
29 #include "unicode/ucnv.h"
30 #include "unicode/ustring.h"
31 
32 #include <string.h>
33 
34 #define DELIM_LF 0x000A
35 #define DELIM_VT 0x000B
36 #define DELIM_FF 0x000C
37 #define DELIM_CR 0x000D
38 #define DELIM_NEL 0x0085
39 #define DELIM_LS 0x2028
40 #define DELIM_PS 0x2029
41 
42 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
43 #if U_PLATFORM_USES_ONLY_WIN32_API
44 static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
45 static const uint32_t DELIMITERS_LEN = 2;
46 /* TODO: Default newline writing should be detected based upon the converter being used. */
47 #else
48 static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
49 static const uint32_t DELIMITERS_LEN = 1;
50 #endif
51 
52 #define IS_FIRST_STRING_DELIMITER(c1) \
53  (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
54         || (c1) == DELIM_NEL \
55         || (c1) == DELIM_LS \
56         || (c1) == DELIM_PS)
57 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
58 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \
59  (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
60 
61 
62 #if !UCONFIG_NO_TRANSLITERATION
63 
64 U_CAPI UTransliterator* U_EXPORT2
u_fsettransliterator(UFILE * file,UFileDirection direction,UTransliterator * adopt,UErrorCode * status)65 u_fsettransliterator(UFILE *file, UFileDirection direction,
66                      UTransliterator *adopt, UErrorCode *status)
67 {
68     UTransliterator *old = NULL;
69 
70     if(U_FAILURE(*status))
71     {
72         return adopt;
73     }
74 
75     if(!file)
76     {
77         *status = U_ILLEGAL_ARGUMENT_ERROR;
78         return adopt;
79     }
80 
81     if(direction & U_READ)
82     {
83         /** TODO: implement */
84         *status = U_UNSUPPORTED_ERROR;
85         return adopt;
86     }
87 
88     if(adopt == NULL) /* they are clearing it */
89     {
90         if(file->fTranslit != NULL)
91         {
92             /* TODO: Check side */
93             old = file->fTranslit->translit;
94             uprv_free(file->fTranslit->buffer);
95             file->fTranslit->buffer=NULL;
96             uprv_free(file->fTranslit);
97             file->fTranslit=NULL;
98         }
99     }
100     else
101     {
102         if(file->fTranslit == NULL)
103         {
104             file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
105             if(!file->fTranslit)
106             {
107                 *status = U_MEMORY_ALLOCATION_ERROR;
108                 return adopt;
109             }
110             file->fTranslit->capacity = 0;
111             file->fTranslit->length = 0;
112             file->fTranslit->pos = 0;
113             file->fTranslit->buffer = NULL;
114         }
115         else
116         {
117             old = file->fTranslit->translit;
118             ufile_flush_translit(file);
119         }
120 
121         file->fTranslit->translit = adopt;
122     }
123 
124     return old;
125 }
126 
u_file_translit(UFILE * f,const UChar * src,int32_t * count,UBool flush)127 static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
128 {
129     int32_t newlen;
130     int32_t junkCount = 0;
131     int32_t textLength;
132     int32_t textLimit;
133     UTransPosition pos;
134     UErrorCode status = U_ZERO_ERROR;
135 
136     if(count == NULL)
137     {
138         count = &junkCount;
139     }
140 
141     if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
142     {
143         /* fast path */
144         return src;
145     }
146 
147     /* First: slide over everything */
148     if(f->fTranslit->length > f->fTranslit->pos)
149     {
150         memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
151             (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
152     }
153     f->fTranslit->length -= f->fTranslit->pos; /* always */
154     f->fTranslit->pos = 0;
155 
156     /* Calculate new buffer size needed */
157     newlen = (*count + f->fTranslit->length) * 4;
158 
159     if(newlen > f->fTranslit->capacity)
160     {
161         if(f->fTranslit->buffer == NULL)
162         {
163             f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
164         }
165         else
166         {
167             f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
168         }
169         /* Check for malloc/realloc failure. */
170         if (f->fTranslit->buffer == NULL) {
171         	return NULL;
172         }
173         f->fTranslit->capacity = newlen;
174     }
175 
176     /* Now, copy any data over */
177     u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
178         src,
179         *count);
180     f->fTranslit->length += *count;
181 
182     /* Now, translit in place as much as we can  */
183     if(flush == FALSE)
184     {
185         textLength = f->fTranslit->length;
186         pos.contextStart = 0;
187         pos.contextLimit = textLength;
188         pos.start        = 0;
189         pos.limit        = textLength;
190 
191         utrans_transIncrementalUChars(f->fTranslit->translit,
192             f->fTranslit->buffer, /* because we shifted */
193             &textLength,
194             f->fTranslit->capacity,
195             &pos,
196             &status);
197 
198         /* now: start/limit point to the transliterated text */
199         /* Transliterated is [buffer..pos.start) */
200         *count            = pos.start;
201         f->fTranslit->pos = pos.start;
202         f->fTranslit->length = pos.limit;
203 
204         return f->fTranslit->buffer;
205     }
206     else
207     {
208         textLength = f->fTranslit->length;
209         textLimit = f->fTranslit->length;
210 
211         utrans_transUChars(f->fTranslit->translit,
212             f->fTranslit->buffer,
213             &textLength,
214             f->fTranslit->capacity,
215             0,
216             &textLimit,
217             &status);
218 
219         /* out: converted len */
220         *count = textLimit;
221 
222         /* Set pointers to 0 */
223         f->fTranslit->pos = 0;
224         f->fTranslit->length = 0;
225 
226         return f->fTranslit->buffer;
227     }
228 }
229 
230 #endif
231 
232 void
ufile_flush_translit(UFILE * f)233 ufile_flush_translit(UFILE *f)
234 {
235 #if !UCONFIG_NO_TRANSLITERATION
236     if((!f)||(!f->fTranslit))
237         return;
238 #endif
239 
240     u_file_write_flush(NULL, 0, f, FALSE, TRUE);
241 }
242 
243 
244 void
ufile_flush_io(UFILE * f)245 ufile_flush_io(UFILE *f)
246 {
247   if((!f) || (!f->fFile)) {
248     return; /* skip if no file */
249   }
250 
251   u_file_write_flush(NULL, 0, f, TRUE, FALSE);
252 }
253 
254 
255 void
ufile_close_translit(UFILE * f)256 ufile_close_translit(UFILE *f)
257 {
258 #if !UCONFIG_NO_TRANSLITERATION
259     if((!f)||(!f->fTranslit))
260         return;
261 #endif
262 
263     ufile_flush_translit(f);
264 
265 #if !UCONFIG_NO_TRANSLITERATION
266     if(f->fTranslit->translit)
267         utrans_close(f->fTranslit->translit);
268 
269     if(f->fTranslit->buffer)
270     {
271         uprv_free(f->fTranslit->buffer);
272     }
273 
274     uprv_free(f->fTranslit);
275     f->fTranslit = NULL;
276 #endif
277 }
278 
279 
280 /* Input/output */
281 
282 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputs(const UChar * s,UFILE * f)283 u_fputs(const UChar    *s,
284         UFILE        *f)
285 {
286     int32_t count = u_file_write(s, u_strlen(s), f);
287     count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
288     return count;
289 }
290 
291 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputc(UChar32 uc,UFILE * f)292 u_fputc(UChar32      uc,
293         UFILE        *f)
294 {
295     UChar buf[2];
296     int32_t idx = 0;
297     UBool isError = FALSE;
298 
299     U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError);
300     if (isError) {
301         return U_EOF;
302     }
303     return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
304 }
305 
306 
307 U_CFUNC int32_t U_EXPORT2
u_file_write_flush(const UChar * chars,int32_t count,UFILE * f,UBool flushIO,UBool flushTranslit)308 u_file_write_flush(const UChar *chars,
309                    int32_t     count,
310                    UFILE       *f,
311                    UBool       flushIO,
312                    UBool       flushTranslit)
313 {
314     /* Set up conversion parameters */
315     UErrorCode  status       = U_ZERO_ERROR;
316     const UChar *mySource    = chars;
317     const UChar *mySourceBegin;
318     const UChar *mySourceEnd;
319     char        charBuffer[UFILE_CHARBUFFER_SIZE];
320     char        *myTarget   = charBuffer;
321     int32_t     written      = 0;
322     int32_t     numConverted = 0;
323 
324     if (count < 0) {
325         count = u_strlen(chars);
326     }
327 
328 #if !UCONFIG_NO_TRANSLITERATION
329     if((f->fTranslit) && (f->fTranslit->translit))
330     {
331         /* Do the transliteration */
332         mySource = u_file_translit(f, chars, &count, flushTranslit);
333     }
334 #endif
335 
336     /* Write to a string. */
337     if (!f->fFile) {
338         int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
339         if (flushIO && charsLeft > count) {
340             count++;
341         }
342         written = ufmt_min(count, charsLeft);
343         u_strncpy(f->str.fPos, mySource, written);
344         f->str.fPos += written;
345         return written;
346     }
347 
348     mySourceEnd = mySource + count;
349 
350     /* Perform the conversion in a loop */
351     do {
352         mySourceBegin = mySource; /* beginning location for this loop */
353         status     = U_ZERO_ERROR;
354         if(f->fConverter != NULL) { /* We have a valid converter */
355             ucnv_fromUnicode(f->fConverter,
356                 &myTarget,
357                 charBuffer + UFILE_CHARBUFFER_SIZE,
358                 &mySource,
359                 mySourceEnd,
360                 NULL,
361                 flushIO,
362                 &status);
363         } else { /*weiv: do the invariant conversion */
364             int32_t convertChars = (int32_t) (mySourceEnd - mySource);
365             if (convertChars > UFILE_CHARBUFFER_SIZE) {
366                 convertChars = UFILE_CHARBUFFER_SIZE;
367                 status = U_BUFFER_OVERFLOW_ERROR;
368             }
369             u_UCharsToChars(mySource, myTarget, convertChars);
370             mySource += convertChars;
371             myTarget += convertChars;
372         }
373         numConverted = (int32_t)(myTarget - charBuffer);
374 
375         if (numConverted > 0) {
376             /* write the converted bytes */
377             fwrite(charBuffer,
378                 sizeof(char),
379                 numConverted,
380                 f->fFile);
381 
382             written     += (int32_t) (mySource - mySourceBegin);
383         }
384         myTarget     = charBuffer;
385     }
386     while(status == U_BUFFER_OVERFLOW_ERROR);
387 
388     /* return # of chars written */
389     return written;
390 }
391 
392 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_file_write(const UChar * chars,int32_t count,UFILE * f)393 u_file_write(    const UChar     *chars,
394              int32_t        count,
395              UFILE         *f)
396 {
397     return u_file_write_flush(chars,count,f,FALSE,FALSE);
398 }
399 
400 
401 /* private function used for buffering input */
402 void
ufile_fill_uchar_buffer(UFILE * f)403 ufile_fill_uchar_buffer(UFILE *f)
404 {
405     UErrorCode  status;
406     const char  *mySource;
407     const char  *mySourceEnd;
408     UChar       *myTarget;
409     int32_t     bufferSize;
410     int32_t     maxCPBytes;
411     int32_t     bytesRead;
412     int32_t     availLength;
413     int32_t     dataSize;
414     char        charBuffer[UFILE_CHARBUFFER_SIZE];
415     u_localized_string *str;
416 
417     if (f->fFile == NULL) {
418         /* There is nothing to do. It's a string. */
419         return;
420     }
421 
422     str = &f->str;
423     dataSize = (int32_t)(str->fLimit - str->fPos);
424     if (f->fFileno == 0 && dataSize > 0) {
425         /* Don't read from stdin too many times. There is still some data. */
426         return;
427     }
428 
429     /* shift the buffer if it isn't empty */
430     if(dataSize != 0) {
431         uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); /* not accessing beyond memory */
432     }
433 
434 
435     /* record how much buffer space is available */
436     availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
437 
438     /* Determine the # of codepage bytes needed to fill our UChar buffer */
439     /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
440     maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
441 
442     /* Read in the data to convert */
443     if (f->fFileno == 0) {
444         /* Special case. Read from stdin one line at a time. */
445         char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
446         bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
447     }
448     else {
449         /* A normal file */
450         bytesRead = (int32_t)fread(charBuffer,
451             sizeof(char),
452             ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
453             f->fFile);
454     }
455 
456     /* Set up conversion parameters */
457     status      = U_ZERO_ERROR;
458     mySource    = charBuffer;
459     mySourceEnd = charBuffer + bytesRead;
460     myTarget    = f->fUCBuffer + dataSize;
461     bufferSize  = UFILE_UCHARBUFFER_SIZE;
462 
463     if(f->fConverter != NULL) { /* We have a valid converter */
464         /* Perform the conversion */
465         ucnv_toUnicode(f->fConverter,
466             &myTarget,
467             f->fUCBuffer + bufferSize,
468             &mySource,
469             mySourceEnd,
470             NULL,
471             (UBool)(feof(f->fFile) != 0),
472             &status);
473 
474     } else { /*weiv: do the invariant conversion */
475         u_charsToUChars(mySource, myTarget, bytesRead);
476         myTarget += bytesRead;
477     }
478 
479     /* update the pointers into our array */
480     str->fPos    = str->fBuffer;
481     str->fLimit  = myTarget;
482 }
483 
484 U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgets(UChar * s,int32_t n,UFILE * f)485 u_fgets(UChar        *s,
486         int32_t       n,
487         UFILE        *f)
488 {
489     int32_t dataSize;
490     int32_t count;
491     UChar *alias;
492     const UChar *limit;
493     UChar *sItr;
494     UChar currDelim = 0;
495     u_localized_string *str;
496 
497     if (n <= 0) {
498         /* Caller screwed up. We need to write the null terminatior. */
499         return NULL;
500     }
501 
502     /* fill the buffer if needed */
503     str = &f->str;
504     if (str->fPos >= str->fLimit) {
505         ufile_fill_uchar_buffer(f);
506     }
507 
508     /* subtract 1 from n to compensate for the terminator */
509     --n;
510 
511     /* determine the amount of data in the buffer */
512     dataSize = (int32_t)(str->fLimit - str->fPos);
513 
514     /* if 0 characters were left, return 0 */
515     if (dataSize == 0)
516         return NULL;
517 
518     /* otherwise, iteratively fill the buffer and copy */
519     count = 0;
520     sItr = s;
521     currDelim = 0;
522     while (dataSize > 0 && count < n) {
523         alias = str->fPos;
524 
525         /* Find how much to copy */
526         if (dataSize < (n - count)) {
527             limit = str->fLimit;
528         }
529         else {
530             limit = alias + (n - count);
531         }
532 
533         if (!currDelim) {
534             /* Copy UChars until we find the first occurrence of a delimiter character */
535             while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
536                 count++;
537                 *(sItr++) = *(alias++);
538             }
539             /* Preserve the newline */
540             if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
541                 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
542                     currDelim = *alias;
543                 }
544                 else {
545                     currDelim = 1;  /* This isn't a newline, but it's used to say
546                                     that we should break later. We've checked all
547                                     possible newline combinations even across buffer
548                                     boundaries. */
549                 }
550                 count++;
551                 *(sItr++) = *(alias++);
552             }
553         }
554         /* If we have a CRLF combination, preserve that too. */
555         if (alias < limit) {
556             if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
557                 count++;
558                 *(sItr++) = *(alias++);
559             }
560             currDelim = 1;  /* This isn't a newline, but it's used to say
561                             that we should break later. We've checked all
562                             possible newline combinations even across buffer
563                             boundaries. */
564         }
565 
566         /* update the current buffer position */
567         str->fPos = alias;
568 
569         /* if we found a delimiter */
570         if (currDelim == 1) {
571             /* break out */
572             break;
573         }
574 
575         /* refill the buffer */
576         ufile_fill_uchar_buffer(f);
577 
578         /* determine the amount of data in the buffer */
579         dataSize = (int32_t)(str->fLimit - str->fPos);
580     }
581 
582     /* add the terminator and return s */
583     *sItr = 0x0000;
584     return s;
585 }
586 
587 U_CFUNC UBool U_EXPORT2
ufile_getch(UFILE * f,UChar * ch)588 ufile_getch(UFILE *f, UChar *ch)
589 {
590     UBool isValidChar = FALSE;
591 
592     *ch = U_EOF;
593     /* if we have an available character in the buffer, return it */
594     if(f->str.fPos < f->str.fLimit){
595         *ch = *(f->str.fPos)++;
596         isValidChar = TRUE;
597     }
598     else {
599         /* otherwise, fill the buffer and return the next character */
600         if(f->str.fPos >= f->str.fLimit) {
601             ufile_fill_uchar_buffer(f);
602         }
603         if(f->str.fPos < f->str.fLimit) {
604             *ch = *(f->str.fPos)++;
605             isValidChar = TRUE;
606         }
607     }
608     return isValidChar;
609 }
610 
611 U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgetc(UFILE * f)612 u_fgetc(UFILE        *f)
613 {
614     UChar ch;
615     ufile_getch(f, &ch);
616     return ch;
617 }
618 
619 U_CFUNC UBool U_EXPORT2
ufile_getch32(UFILE * f,UChar32 * c32)620 ufile_getch32(UFILE *f, UChar32 *c32)
621 {
622     UBool isValidChar = FALSE;
623     u_localized_string *str;
624 
625     *c32 = U_EOF;
626 
627     /* Fill the buffer if it is empty */
628     str = &f->str;
629     if (f && str->fPos + 1 >= str->fLimit) {
630         ufile_fill_uchar_buffer(f);
631     }
632 
633     /* Get the next character in the buffer */
634     if (str->fPos < str->fLimit) {
635         *c32 = *(str->fPos)++;
636         if (U_IS_LEAD(*c32)) {
637             if (str->fPos < str->fLimit) {
638                 UChar c16 = *(str->fPos)++;
639                 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
640                 isValidChar = TRUE;
641             }
642             else {
643                 *c32 = U_EOF;
644             }
645         }
646         else {
647             isValidChar = TRUE;
648         }
649     }
650 
651     return isValidChar;
652 }
653 
654 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgetcx(UFILE * f)655 u_fgetcx(UFILE        *f)
656 {
657     UChar32 ch;
658     ufile_getch32(f, &ch);
659     return ch;
660 }
661 
662 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fungetc(UChar32 ch,UFILE * f)663 u_fungetc(UChar32        ch,
664     UFILE        *f)
665 {
666     u_localized_string *str;
667 
668     str = &f->str;
669 
670     /* if we're at the beginning of the buffer, sorry! */
671     if (str->fPos == str->fBuffer
672         || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
673     {
674         ch = U_EOF;
675     }
676     else {
677         /* otherwise, put the character back */
678         /* Remember, read them back on in the reverse order. */
679         if (U_IS_LEAD(ch)) {
680             if (*--(str->fPos) != U16_TRAIL(ch)
681                 || *--(str->fPos) != U16_LEAD(ch))
682             {
683                 ch = U_EOF;
684             }
685         }
686         else if (*--(str->fPos) != ch) {
687             ch = U_EOF;
688         }
689     }
690     return ch;
691 }
692 
693 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_file_read(UChar * chars,int32_t count,UFILE * f)694 u_file_read(    UChar        *chars,
695     int32_t        count,
696     UFILE         *f)
697 {
698     int32_t dataSize;
699     int32_t read = 0;
700     u_localized_string *str = &f->str;
701 
702     do {
703 
704         /* determine the amount of data in the buffer */
705         dataSize = (int32_t)(str->fLimit - str->fPos);
706         if (dataSize <= 0) {
707             /* fill the buffer */
708             ufile_fill_uchar_buffer(f);
709             dataSize = (int32_t)(str->fLimit - str->fPos);
710         }
711 
712         /* Make sure that we don't read too much */
713         if (dataSize > (count - read)) {
714             dataSize = count - read;
715         }
716 
717         /* copy the current data in the buffer */
718         memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
719 
720         /* update number of items read */
721         read += dataSize;
722 
723         /* update the current buffer position */
724         str->fPos += dataSize;
725     }
726     while (dataSize != 0 && read < count);
727 
728     return read;
729 }
730 #endif
731