1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2000-2010, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: uparse.h 11 * encoding: US-ASCII 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2000apr18 16 * created by: Markus W. Scherer 17 * 18 * This file provides a parser for files that are delimited by one single 19 * character like ';' or TAB. Example: the Unicode Character Properties files 20 * like UnicodeData.txt are semicolon-delimited. 21 */ 22 23 #ifndef __UPARSE_H__ 24 #define __UPARSE_H__ 25 26 #include "unicode/utypes.h" 27 28 /** 29 * Is c an invariant-character whitespace? 30 * @param c invariant character 31 */ 32 #define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n') 33 34 U_CDECL_BEGIN 35 36 /** 37 * Skip space ' ' and TAB '\t' characters. 38 * 39 * @param s Pointer to characters. 40 * @return Pointer to first character at or after s that is not a space or TAB. 41 */ 42 U_CAPI const char * U_EXPORT2 43 u_skipWhitespace(const char *s); 44 45 /** 46 * Trim whitespace (including line endings) from the end of the string. 47 * 48 * @param s Pointer to the string. 49 * @return Pointer to the new end of the string. 50 */ 51 U_CAPI char * U_EXPORT2 52 u_rtrim(char *s); 53 54 /** Function type for u_parseDelimitedFile(). */ 55 typedef void U_CALLCONV 56 UParseLineFn(void *context, 57 char *fields[][2], 58 int32_t fieldCount, 59 UErrorCode *pErrorCode); 60 61 /** 62 * Parser for files that are similar to UnicodeData.txt: 63 * This function opens the file and reads it line by line. It skips empty lines 64 * and comment lines that start with a '#'. 65 * All other lines are separated into fields with one delimiter character 66 * (semicolon for Unicode Properties files) between two fields. The last field in 67 * a line does not need to be terminated with a delimiter. 68 * 69 * For each line, after segmenting it, a line function is called. 70 * It gets passed the array of field start and limit pointers that is 71 * passed into this parser and filled by it for each line. 72 * For each field i of the line, the start pointer in fields[i][0] 73 * points to the beginning of the field, while the limit pointer in fields[i][1] 74 * points behind the field, i.e., to the delimiter or the line end. 75 * 76 * The context parameter of the line function is 77 * the same as the one for the parse function. 78 * 79 * The line function may modify the contents of the fields including the 80 * limit characters. 81 * 82 * If the file cannot be opened, or there is a parsing error or a field function 83 * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code. 84 */ 85 U_CAPI void U_EXPORT2 86 u_parseDelimitedFile(const char *filename, char delimiter, 87 char *fields[][2], int32_t fieldCount, 88 UParseLineFn *lineFn, void *context, 89 UErrorCode *pErrorCode); 90 91 /** 92 * Parse a string of code points like 0061 0308 0300. 93 * s must end with either ';' or NUL. 94 * 95 * @return Number of code points. 96 */ 97 U_CAPI int32_t U_EXPORT2 98 u_parseCodePoints(const char *s, 99 uint32_t *dest, int32_t destCapacity, 100 UErrorCode *pErrorCode); 101 102 /** 103 * Parse a list of code points like 0061 0308 0300 104 * into a UChar * string. 105 * s must end with either ';' or NUL. 106 * 107 * Set the first code point in *pFirst. 108 * 109 * @param s Input char * string. 110 * @param dest Output string buffer. 111 * @param destCapacity Capacity of dest in numbers of UChars. 112 * @param pFirst If pFirst!=NULL the *pFirst will be set to the first 113 * code point in the string. 114 * @param pErrorCode ICU error code. 115 * @return The length of the string in numbers of UChars. 116 */ 117 U_CAPI int32_t U_EXPORT2 118 u_parseString(const char *s, 119 UChar *dest, int32_t destCapacity, 120 uint32_t *pFirst, 121 UErrorCode *pErrorCode); 122 123 /** 124 * Parse a code point range like 125 * 0085 or 126 * 4E00..9FA5. 127 * 128 * s must contain such a range and end with either ';' or NUL. 129 * 130 * @return Length of code point range, end-start+1 131 */ 132 U_CAPI int32_t U_EXPORT2 133 u_parseCodePointRange(const char *s, 134 uint32_t *pStart, uint32_t *pEnd, 135 UErrorCode *pErrorCode); 136 137 /** 138 * Same as u_parseCodePointRange() but the range may be terminated by 139 * any character. The position of the terminating character is returned via 140 * the *terminator output parameter. 141 */ 142 U_CAPI int32_t U_EXPORT2 143 u_parseCodePointRangeAnyTerminator(const char *s, 144 uint32_t *pStart, uint32_t *pEnd, 145 const char **terminator, 146 UErrorCode *pErrorCode); 147 148 U_CAPI int32_t U_EXPORT2 149 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status); 150 151 U_CDECL_END 152 153 #endif 154