Lines Matching full:unicode
8 Unicode implementation based on original code by Fredrik Lundh,
10 Unicode Integration Proposal. (See
11 http://www.egenix.com/files/python/unicode-proposal.txt).
19 * Yet another Unicode string type for Python. This type supports the
31 * This Unicode String Type is
62 /* --- Internal Unicode Format -------------------------------------------- */
64 /* Python 3.x requires unicode */
74 Otherwise, Unicode strings are stored as UCS-2 (with limited support
86 /* Py_UNICODE was the native Unicode storage format (code unit) used by
87 Python and represents a single Unicode element in the Unicode type.
111 unicode representations. */
116 /* --- Internal Unicode Operations ---------------------------------------- */
189 /* --- Unicode Type ------------------------------------------------------- */
198 /* There are 4 forms of Unicode strings:
312 /* Compact is with respect to the allocation scheme. Compact unicode
355 } data; /* Canonical, smallest-form Unicode buffer */
461 /* Return a void pointer to the raw unicode buffer. */
519 #define PyUnicode_READ_CHAR(unicode, index) \ argument
520 (assert(PyUnicode_Check(unicode)), \
521 assert(PyUnicode_IS_READY(unicode)), \
523 (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
524 ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
525 (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
526 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
527 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
531 /* Returns the length of the unicode string. The caller has to make sure that
571 /* This Unicode character will be used as replacement character during
573 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
574 Unicode 3.0. */
582 /* With PEP 393, this is the recommended way to allocate a new unicode object.
593 wstr/Py_UNICODE representation. This function is used to convert Unicode
601 PyObject *unicode /* Unicode object */
605 /* Get a copy of a Unicode string. */
608 PyObject *unicode
612 /* Copy character from one unicode object into another, this function performs
653 unicode[start:start+length].
661 PyObject *unicode,
670 PyObject *unicode,
677 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
682 that modifying the Unicode object contents after construction is
689 const Py_UNICODE *u, /* Unicode buffer */
729 /* Compute the maximum character of the substring unicode[start:end].
732 PyObject *unicode,
744 PyObject *unicode,
752 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
756 /* Return a read-only pointer to the Unicode object's internal
762 PyObject *unicode /* Unicode object */
768 PyObject *unicode /* Unicode object */
771 /* Return a read-only pointer to the Unicode object's internal
777 PyObject *unicode, /* Unicode object */
783 /* Get the length of the Unicode object. */
786 PyObject *unicode
794 PyObject *unicode /* Unicode object */
801 PyObject *unicode,
811 PyObject *unicode,
818 /* Get the maximum ordinal for a Unicode character. */
822 /* Resize a Unicode object. The length is the number of characters, except
826 *unicode is modified to point to the new (resized) object and 0
833 is returned and *unicode left untouched.
839 PyObject **unicode, /* Pointer to the Unicode object */
843 /* Decode obj to a Unicode object.
849 All other objects (including Unicode objects) raise an exception.
862 /* Copy an instance of a Unicode subtype to a new true Unicode object if
863 necessary. If obj is already a true Unicode object (not a subtype), return
907 /* Initialize a Unicode writer.
950 /* Append a Unicode character.
957 /* Append a Unicode string.
961 PyObject *str /* Unicode string */
964 /* Append a substring of a Unicode string.
968 PyObject *str, /* Unicode string */
989 /* Get the value of the writer as a Unicode string. Clear the
1028 /* Create a Unicode Object from the wchar_t buffer w of the given
1038 /* Copies the Unicode Object contents into the wchar_t buffer w. At
1051 PyObject *unicode, /* Unicode object */
1056 /* Convert the Unicode object to a wide character string. The output string
1065 PyObject *unicode, /* Unicode object */
1075 /* --- Unicode ordinals --------------------------------------------------- */
1077 /* Create a Unicode Object from the given Unicode code point ordinal.
1088 /* Clear the free list used by the Unicode implementation.
1118 Unicode object unicode and the size of the encoded representation
1133 *** If you need to access the Unicode object as UTF-8 bytes string,
1139 PyObject *unicode,
1145 Unicode object unicode.
1159 *** If you need to access the Unicode object as UTF-8 bytes string,
1165 PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
1175 /* Create a Unicode object by decoding the encoded string s of the
1185 /* Decode a Unicode object unicode and return the result as Python
1193 PyObject *unicode, /* Unicode object */
1198 /* Decode a Unicode object unicode and return the result as Unicode
1206 PyObject *unicode, /* Unicode object */
1216 const Py_UNICODE *s, /* Unicode char buffer */
1223 /* Encodes a Unicode object and returns the result as Python
1232 PyObject *unicode, /* Unicode object */
1237 /* Encodes a Unicode object and returns the result as Python string
1241 PyObject *unicode, /* Unicode object */
1246 /* Encodes a Unicode object and returns the result as Unicode
1254 PyObject *unicode, /* Unicode object */
1282 const Py_UNICODE *data, /* Unicode char buffer */
1289 PyObject *unicode, /* Unicode object */
1312 PyObject *unicode /* Unicode object */
1317 PyObject *unicode,
1321 const Py_UNICODE *data, /* Unicode char buffer */
1330 the corresponding Unicode object.
1375 PyObject *unicode /* Unicode object */
1379 the Unicode data.
1389 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1396 const Py_UNICODE *data, /* Unicode char buffer */
1402 PyObject *object, /* Unicode object */
1411 the corresponding Unicode object.
1456 PyObject *unicode /* Unicode object */
1460 the Unicode data.
1470 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1481 const Py_UNICODE *data, /* Unicode char buffer */
1487 PyObject* unicode, /* Unicode object */
1493 /* --- Unicode-Escape Codecs ---------------------------------------------- */
1496 const char *string, /* Unicode-Escape encoded string */
1505 const char *string, /* Unicode-Escape encoded string */
1515 PyObject *unicode /* Unicode object */
1520 const Py_UNICODE *data, /* Unicode char buffer */
1525 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
1528 const char *string, /* Raw-Unicode-Escape encoded string */
1534 PyObject *unicode /* Unicode object */
1539 const Py_UNICODE *data, /* Unicode char buffer */
1544 /* --- Unicode Internal Codec ---------------------------------------------
1558 Note: Latin-1 corresponds to the first 256 Unicode ordinals.
1569 PyObject *unicode /* Unicode object */
1574 PyObject* unicode,
1578 const Py_UNICODE *data, /* Unicode char buffer */
1597 PyObject *unicode /* Unicode object */
1602 PyObject* unicode,
1606 const Py_UNICODE *data, /* Unicode char buffer */
1617 255) to Unicode strings, integers (which are then interpreted as Unicode
1622 Encoding mappings must map Unicode ordinal integers to bytes objects,
1637 PyObject *unicode, /* Unicode object */
1643 const Py_UNICODE *data, /* Unicode char buffer */
1649 PyObject *unicode, /* Unicode object */
1656 character mapping table to it and return the resulting Unicode
1659 The mapping table must map Unicode ordinal integers to Unicode strings,
1660 Unicode ordinal integers or None (causing deletion of the character).
1670 const Py_UNICODE *data, /* Unicode char buffer */
1705 PyObject *unicode /* Unicode object */
1710 const Py_UNICODE *data, /* Unicode char buffer */
1719 PyObject *unicode, /* Unicode object */
1729 /* Takes a Unicode string holding a decimal value and writes it into
1737 \0 as-is. Characters outside this range (Unicode ordinals 1-256)
1752 Py_UNICODE *s, /* Unicode buffer */
1761 Returns a new Unicode string on success, NULL on failure.
1765 Py_UNICODE *s, /* Unicode buffer */
1769 /* Coverts a Unicode object holding a decimal value to an ASCII string
1777 PyObject *unicode /* Unicode object */
1804 /* Encode a Unicode object to the current locale encoding. The encoder is
1810 PyObject *unicode,
1822 /* ParseTuple converter: decode bytes objects to unicode using
1852 /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
1860 PyObject *unicode
1865 These are capable of handling Unicode objects and strings on input
1867 Unicode objects or integers as appropriate. */
1869 /* Concat two strings giving a new Unicode string. */
1892 /* Split a string giving a list of Unicode strings.
1934 /* Split a string giving a list of Unicode strings.
1954 return the resulting Unicode object.
1956 The mapping table must map Unicode ordinal integers to Unicode strings,
1957 Unicode ordinal integers or None (causing deletion of the character).
1972 the resulting Unicode string. */
2031 and return the resulting Unicode object. */
2051 /* Test whether a unicode is equal to ASCII identifier. Return 1 if true,
2061 /* Compare a Unicode object with C string and return -1, 0, 1 for less than,
2073 /* Test whether a unicode is equal to ASCII string. Return 1 if true,
2102 the resulting Unicode string. */
2112 element has to coerce to a one element Unicode string. -1 is
2125 /* Externally visible for str.strip(unicode) */
2163 Py_UCS4 ch /* Unicode character */
2167 Py_UCS4 ch /* Unicode character */
2171 Py_UCS4 ch /* Unicode character */
2175 Py_UCS4 ch /* Unicode character */
2179 Py_UCS4 ch /* Unicode character */
2183 const Py_UCS4 ch /* Unicode character */
2187 const Py_UCS4 ch /* Unicode character */
2191 Py_UCS4 ch /* Unicode character */
2195 Py_UCS4 ch /* Unicode character */
2199 Py_UCS4 ch /* Unicode character */
2203 Py_UCS4 ch, /* Unicode character */
2208 Py_UCS4 ch, /* Unicode character */
2213 Py_UCS4 ch, /* Unicode character */
2218 Py_UCS4 ch, /* Unicode character */
2223 Py_UCS4 ch /* Unicode character */
2227 Py_UCS4 ch /* Unicode character */
2231 Py_UCS4 ch /* Unicode character */
2235 Py_UCS4 ch /* Unicode character */
2239 Py_UCS4 ch /* Unicode character */
2243 Py_UCS4 ch /* Unicode character */
2247 Py_UCS4 ch /* Unicode character */
2251 Py_UCS4 ch /* Unicode character */
2255 Py_UCS4 ch /* Unicode character */
2259 Py_UCS4 ch /* Unicode character */
2301 /* Create a copy of a unicode string ending with a nul character. Return NULL
2306 PyObject *unicode
2321 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
2326 /* Fast equality check when the inputs are known to be exact unicode types