1 /*
2 * wchar_t helpers
3 */
4
5 typedef uint16_t cffi_char16_t;
6 typedef uint32_t cffi_char32_t;
7
8
9 #if Py_UNICODE_SIZE == 2
10
11 /* Before Python 2.7, PyUnicode_FromWideChar is not able to convert
12 wchar_t values greater than 65535 into two-unicode-characters surrogates.
13 But even the Python 2.7 version doesn't detect wchar_t values that are
14 out of range(1114112), and just returns nonsense.
15
16 From cffi 1.11 we can't use it anyway, because we need a version
17 with char32_t input types.
18 */
19 static PyObject *
_my_PyUnicode_FromChar32(const cffi_char32_t * w,Py_ssize_t size)20 _my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size)
21 {
22 PyObject *unicode;
23 register Py_ssize_t i;
24 Py_ssize_t alloc;
25 const cffi_char32_t *orig_w;
26
27 alloc = size;
28 orig_w = w;
29 for (i = size; i > 0; i--) {
30 if (*w > 0xFFFF)
31 alloc++;
32 w++;
33 }
34 w = orig_w;
35 unicode = PyUnicode_FromUnicode(NULL, alloc);
36 if (!unicode)
37 return NULL;
38
39 /* Copy the wchar_t data into the new object */
40 {
41 register Py_UNICODE *u;
42 u = PyUnicode_AS_UNICODE(unicode);
43 for (i = size; i > 0; i--) {
44 if (*w > 0xFFFF) {
45 cffi_char32_t ordinal;
46 if (*w > 0x10FFFF) {
47 PyErr_Format(PyExc_ValueError,
48 "char32_t out of range for "
49 "conversion to unicode: 0x%x", (int)*w);
50 Py_DECREF(unicode);
51 return NULL;
52 }
53 ordinal = *w++;
54 ordinal -= 0x10000;
55 *u++ = 0xD800 | (ordinal >> 10);
56 *u++ = 0xDC00 | (ordinal & 0x3FF);
57 }
58 else
59 *u++ = *w++;
60 }
61 }
62 return unicode;
63 }
64
65 static PyObject *
_my_PyUnicode_FromChar16(const cffi_char16_t * w,Py_ssize_t size)66 _my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size)
67 {
68 return PyUnicode_FromUnicode((const Py_UNICODE *)w, size);
69 }
70
71 #else /* Py_UNICODE_SIZE == 4 */
72
73 static PyObject *
_my_PyUnicode_FromChar32(const cffi_char32_t * w,Py_ssize_t size)74 _my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size)
75 {
76 return PyUnicode_FromUnicode((const Py_UNICODE *)w, size);
77 }
78
79 static PyObject *
_my_PyUnicode_FromChar16(const cffi_char16_t * w,Py_ssize_t size)80 _my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size)
81 {
82 /* 'size' is the length of the 'w' array */
83 PyObject *result = PyUnicode_FromUnicode(NULL, size);
84
85 if (result != NULL) {
86 Py_UNICODE *u_base = PyUnicode_AS_UNICODE(result);
87 Py_UNICODE *u = u_base;
88
89 if (size == 1) { /* performance only */
90 *u = (cffi_char32_t)*w;
91 }
92 else {
93 while (size > 0) {
94 cffi_char32_t ch = *w++;
95 size--;
96 if (0xD800 <= ch && ch <= 0xDBFF && size > 0) {
97 cffi_char32_t ch2 = *w;
98 if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
99 ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
100 w++;
101 size--;
102 }
103 }
104 *u++ = ch;
105 }
106 if (PyUnicode_Resize(&result, u - u_base) < 0) {
107 Py_DECREF(result);
108 return NULL;
109 }
110 }
111 }
112 return result;
113 }
114
115 #endif
116
117
118 #define IS_SURROGATE(u) (0xD800 <= (u)[0] && (u)[0] <= 0xDBFF && \
119 0xDC00 <= (u)[1] && (u)[1] <= 0xDFFF)
120 #define AS_SURROGATE(u) (0x10000 + (((u)[0] - 0xD800) << 10) + \
121 ((u)[1] - 0xDC00))
122
123 static int
_my_PyUnicode_AsSingleChar16(PyObject * unicode,cffi_char16_t * result,char * err_got)124 _my_PyUnicode_AsSingleChar16(PyObject *unicode, cffi_char16_t *result,
125 char *err_got)
126 {
127 Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
128 if (PyUnicode_GET_SIZE(unicode) != 1) {
129 sprintf(err_got, "unicode string of length %zd",
130 PyUnicode_GET_SIZE(unicode));
131 return -1;
132 }
133 #if Py_UNICODE_SIZE == 4
134 if (((unsigned int)u[0]) > 0xFFFF)
135 {
136 sprintf(err_got, "larger-than-0xFFFF character");
137 return -1;
138 }
139 #endif
140 *result = (cffi_char16_t)u[0];
141 return 0;
142 }
143
144 static int
_my_PyUnicode_AsSingleChar32(PyObject * unicode,cffi_char32_t * result,char * err_got)145 _my_PyUnicode_AsSingleChar32(PyObject *unicode, cffi_char32_t *result,
146 char *err_got)
147 {
148 Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
149 if (PyUnicode_GET_SIZE(unicode) == 1) {
150 *result = (cffi_char32_t)u[0];
151 return 0;
152 }
153 #if Py_UNICODE_SIZE == 2
154 if (PyUnicode_GET_SIZE(unicode) == 2 && IS_SURROGATE(u)) {
155 *result = AS_SURROGATE(u);
156 return 0;
157 }
158 #endif
159 sprintf(err_got, "unicode string of length %zd",
160 PyUnicode_GET_SIZE(unicode));
161 return -1;
162 }
163
_my_PyUnicode_SizeAsChar16(PyObject * unicode)164 static Py_ssize_t _my_PyUnicode_SizeAsChar16(PyObject *unicode)
165 {
166 Py_ssize_t length = PyUnicode_GET_SIZE(unicode);
167 Py_ssize_t result = length;
168
169 #if Py_UNICODE_SIZE == 4
170 Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
171 Py_ssize_t i;
172
173 for (i=0; i<length; i++) {
174 if (u[i] > 0xFFFF)
175 result++;
176 }
177 #endif
178 return result;
179 }
180
_my_PyUnicode_SizeAsChar32(PyObject * unicode)181 static Py_ssize_t _my_PyUnicode_SizeAsChar32(PyObject *unicode)
182 {
183 Py_ssize_t length = PyUnicode_GET_SIZE(unicode);
184 Py_ssize_t result = length;
185
186 #if Py_UNICODE_SIZE == 2
187 Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
188 Py_ssize_t i;
189
190 for (i=0; i<length-1; i++) {
191 if (IS_SURROGATE(u+i))
192 result--;
193 }
194 #endif
195 return result;
196 }
197
_my_PyUnicode_AsChar16(PyObject * unicode,cffi_char16_t * result,Py_ssize_t resultlen)198 static int _my_PyUnicode_AsChar16(PyObject *unicode,
199 cffi_char16_t *result,
200 Py_ssize_t resultlen)
201 {
202 Py_ssize_t len = PyUnicode_GET_SIZE(unicode);
203 Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
204 Py_ssize_t i;
205 for (i=0; i<len; i++) {
206 #if Py_UNICODE_SIZE == 2
207 cffi_char16_t ordinal = u[i];
208 #else
209 cffi_char32_t ordinal = u[i];
210 if (ordinal > 0xFFFF) {
211 if (ordinal > 0x10FFFF) {
212 PyErr_Format(PyExc_ValueError,
213 "unicode character out of range for "
214 "conversion to char16_t: 0x%x", (int)ordinal);
215 return -1;
216 }
217 ordinal -= 0x10000;
218 *result++ = 0xD800 | (ordinal >> 10);
219 *result++ = 0xDC00 | (ordinal & 0x3FF);
220 continue;
221 }
222 #endif
223 *result++ = ordinal;
224 }
225 return 0;
226 }
227
_my_PyUnicode_AsChar32(PyObject * unicode,cffi_char32_t * result,Py_ssize_t resultlen)228 static int _my_PyUnicode_AsChar32(PyObject *unicode,
229 cffi_char32_t *result,
230 Py_ssize_t resultlen)
231 {
232 Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
233 Py_ssize_t i;
234 for (i=0; i<resultlen; i++) {
235 cffi_char32_t ordinal = *u;
236 #if Py_UNICODE_SIZE == 2
237 if (IS_SURROGATE(u)) {
238 ordinal = AS_SURROGATE(u);
239 u++;
240 }
241 #endif
242 result[i] = ordinal;
243 u++;
244 }
245 return 0;
246 }
247