1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picobase.h
18  *
19  * base functionality
20  *
21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22  * All rights reserved.
23  *
24  * History:
25  * - 2009-04-20 -- initial version
26  *
27  */
28 
29 #ifndef PICOBASE_H_
30 #define PICOBASE_H_
31 
32 #include "picoos.h"
33 
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37 #if 0
38 }
39 #endif
40 
41 /* maximum number of bytes of an UTF8 character */
42 #define PICOBASE_UTF8_MAXLEN    4
43 
44 typedef picoos_uint8  picobase_utf8char[PICOBASE_UTF8_MAXLEN+1];  /* always zero terminated */
45 typedef picoos_uint8  picobase_utf8;
46 typedef picoos_uint16 picobase_utf16;
47 typedef picoos_uint32 picobase_utf32;
48 
49 /* ***************************************************************/
50 /* Unicode UTF8 functions */
51 /* ***************************************************************/
52 
53 /**
54  * Determines the number of UTF8 characters contained in
55  *            the UTF8 string 'utf8str' of maximum length maxlen (in bytes)
56  * @param    utf8str : a string encoded in UTF8
57  * @param    maxlen  : max length (in bytes) accessible in utf8str
58  * @return   >=0 : length of the UTF8 string in number of UTF8 characters
59  *                     up to the first '\0' or maxlen
60  * @return   <0 : not starting with a valid UTF8 character
61  * @remarks  strict implementation, not allowing invalid utf8
62 */
63 picoos_int32 picobase_utf8_length(const picoos_uint8 *utf8str,
64                                   const picoos_uint16 maxlen);
65 
66 
67 /**
68  * Determines the number of bytes an UTF8 character used based
69  *            on the first byte of the UTF8 character
70  * @param    firstchar: the first (and maybe only) byte of an UTF8 character
71  * @return   positive value in {1,4} : number of bytes of the UTF8 character
72  * @return   0 :if not a valid UTF8 character start
73  * @remarks strict implementation, not allowing invalid utf8
74 */
75 /* picoos_uint8 picobase_det_utf8_length(const picoos_uint8 firstchar); */
76 
77 #define picobase_det_utf8_length(x)  (  ((x)<(picoos_uint8)'\200')?1:(((x)>=(picoos_uint8)'\370')?0:(((x)>=(picoos_uint8)'\360')?4:(((x)>=(picoos_uint8)'\340')?3:(((x)>=(picoos_uint8)'\300')?2:0)))) )
78 
79 /**
80  * Converts the content of 'utf8str' to lowercase and stores it on 'lowercase'
81  *            on the first byte of the UTF8 character
82  * @param    utf8str : utf8 string
83  * @param    lowercaseMaxLen : maximal number of bytes available in 'lowercase'
84  * @param    lowercase : string converted to lowercase (output)
85  * @param    done : flag to report success/failure of the operation (output)
86  * @return  TRUE if successful, FALSE otherwise
87 */
88 picoos_int32 picobase_lowercase_utf8_str (picoos_uchar utf8str[], picoos_char lowercase[], picoos_int32 lowercaseMaxLen, picoos_uint8 * done);
89 
90 /**
91  * Converts the content of 'utf8str' to upperrcase and stores it on 'uppercase'
92  * @param    utf8str : utf8 string
93  * @param    uppercase : string converted to uppercase (output)
94  * @param    uppercaseMaxLen : maximal number of bytes available in 'uppercase'
95  * @param    done : flag to report success/failure of the operation (output)
96  * @return  TRUE if successful, FALSE otherwise
97 */
98 picoos_int32 picobase_uppercase_utf8_str (picoos_uchar utf8str[], picoos_char uppercase[], int uppercaseMaxLen, picoos_uint8 * done);
99 
100 /**
101  * Gets next UTF8 character 'utf8char' from the UTF8 string
102  *            'utf8s' starting at position 'pos'
103  * @param    utf8s : UTF8 string
104  * @param    utf8slenmax : max length accessible in utf8s
105  * @param    pos : position from where the UTF8 character is checked and copied
106  *            (set also as output to the position directly following the UTF8 char)
107  * @param    utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output)
108  * @return  TRUE if okay
109  * @return  FALSE if there is no valid UTF8 char or no more UTF8 char available within utf8len
110 */
111 picoos_uint8 picobase_get_next_utf8char(const picoos_uint8 *utf8s,
112                                         const picoos_uint32 utf8slenmax,
113                                         picoos_uint32 *pos,
114                                         picobase_utf8char utf8char);
115 
116 /**
117  * Same as picobase_get_next_utf8char
118  *            without copying the char to utf8char
119 */
120 picoos_uint8 picobase_get_next_utf8charpos(const picoos_uint8 *utf8s,
121                                            const picoos_uint32 utf8slenmax,
122                                            picoos_uint32 *pos);
123 
124 /**
125  * Gets previous UTF8 character 'utf8char' from the UTF8 string
126  *             'utf8s' starting the backward search at position 'pos-1'
127  * @param    utf8s : UTF8 string
128  * @param    utf8slenmin : min length accessible in utf8s
129  * @param    pos : the search for the prev UTF8 char starts at [pos-1]
130  *            (set also as output to the start position of the prev UTF8 character)
131  * @param    utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output)
132  * @return  TRUE if okay
133  * @return  FALSE if there is no valid UTF8 char preceeding pos or no more UTF8 char available within utf8len
134 */
135 picoos_uint8 picobase_get_prev_utf8char(const picoos_uint8 *utf8s,
136                                         const picoos_uint32 utf8slenmin,
137                                         picoos_uint32 *pos,
138                                         picobase_utf8char utf8char);
139 
140 /**
141  * Same as picobase_get_prev_utf8char
142  *            without copying the char to utf8char
143 */
144 picoos_uint8 picobase_get_prev_utf8charpos(const picoos_uint8 *utf8s,
145                                            const picoos_uint32 utf8slenmin,
146                                            picoos_uint32 *pos);
147 
148 
149 /**
150  * returns TRUE if the input string is UTF8 and uppercase
151  * @param    str : UTF8 string
152  * @param    strmaxlen : max length for the input string
153  * @return  TRUE if string is UTF8 and uppercase
154  * @return  FALSE otherwise
155 */
156 extern picoos_bool picobase_is_utf8_uppercase (picoos_uchar str[], picoos_int32 strmaxlen);
157 
158 /**
159  * returns TRUE if the input string is UTF8 and lowercase
160  * @param    str : UTF8 string
161  * @param    strmaxlen : max length for the input string
162  * @return  TRUE if string is UTF8 and lowercase
163  * @return  FALSE otherwise
164 */
165 extern picoos_bool picobase_is_utf8_lowercase (picoos_uchar str[], picoos_int32 strmaxlen);
166 
167 #ifdef __cplusplus
168 }
169 #endif
170 
171 #endif /*PICOBASE_H_*/
172