1 /*
2 ******************************************************************************
3 *
4 * © 2016 and later: Unicode, Inc. and others.
5 * License & terms of use: http://www.unicode.org/copyright.html
6 *
7 ******************************************************************************
8 *   file name:  ubiditransform.h
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2016jul24
14 *   created by: Lina Kemmel
15 *
16 */
17 
18 #ifndef UBIDITRANSFORM_H
19 #define UBIDITRANSFORM_H
20 
21 #include "unicode/utypes.h"
22 #include "unicode/ubidi.h"
23 #include "unicode/uchar.h"
24 #include "unicode/localpointer.h"
25 
26 /**
27  * \file
28  * \brief Bidi Transformations
29  *
30  * <code>UBiDiOrder</code> indicates the order of text.<p>
31  * This bidi transformation engine supports all possible combinations (4 in
32  * total) of input and output text order:
33  * <ul>
34  * <li><logical input, visual output>: unless the output direction is RTL, this
35  * corresponds to a normal operation of the Bidi algorithm as described in the
36  * Unicode Technical Report and implemented by <code>UBiDi</code> when the
37  * reordering mode is set to <code>UBIDI_REORDER_DEFAULT</code>. Visual RTL
38  * mode is not supported by <code>UBiDi</code> and is accomplished through
39  * reversing a visual LTR string,</li>
40  * <li><visual input, logical output>: unless the input direction is RTL, this
41  * corresponds to an "inverse bidi algorithm" in <code>UBiDi</code> with the
42  * reordering mode set to <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>.
43  * Visual RTL mode is not not supported by <code>UBiDi</code> and is
44  * accomplished through reversing a visual LTR string,</li>
45  * <li><logical input, logical output>: if the input and output base directions
46  * mismatch, this corresponds to the <code>UBiDi</code> implementation with the
47  * reordering mode set to <code>UBIDI_REORDER_RUNS_ONLY</code>; and if the
48  * input and output base directions are identical, the transformation engine
49  * will only handle character mirroring and Arabic shaping operations without
50  * reordering,</li>
51  * <li><visual input, visual output>: this reordering mode is not supported by
52  * the <code>UBiDi</code> engine; it implies character mirroring, Arabic
53  * shaping, and - if the input/output base directions mismatch -  string
54  * reverse operations.</li>
55  * </ul>
56  * @see ubidi_setInverse
57  * @see ubidi_setReorderingMode
58  * @see UBIDI_REORDER_DEFAULT
59  * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
60  * @see UBIDI_REORDER_RUNS_ONLY
61  * @stable ICU 58
62  */
63 typedef enum {
64     /** 0: Constant indicating a logical order.
65       * This is the default for input text.
66       * @stable ICU 58
67       */
68     UBIDI_LOGICAL = 0,
69     /** 1: Constant indicating a visual order.
70       * This is a default for output text.
71       * @stable ICU 58
72       */
73     UBIDI_VISUAL
74 } UBiDiOrder;
75 
76 /**
77  * <code>UBiDiMirroring</code> indicates whether or not characters with the
78  * "mirrored" property in RTL runs should be replaced with their mirror-image
79  * counterparts.
80  * @see UBIDI_DO_MIRRORING
81  * @see ubidi_setReorderingOptions
82  * @see ubidi_writeReordered
83  * @see ubidi_writeReverse
84  * @stable ICU 58
85  */
86 typedef enum {
87     /** 0: Constant indicating that character mirroring should not be
88       * performed.
89       * This is the default.
90       * @stable ICU 58
91       */
92     UBIDI_MIRRORING_OFF = 0,
93     /** 1: Constant indicating that character mirroring should be performed.
94       * This corresponds to calling <code>ubidi_writeReordered</code> or
95       * <code>ubidi_writeReverse</code> with the
96       * <code>UBIDI_DO_MIRRORING</code> option bit set.
97       * @stable ICU 58
98       */
99     UBIDI_MIRRORING_ON
100 } UBiDiMirroring;
101 
102 /**
103  * Forward declaration of the <code>UBiDiTransform</code> structure that stores
104  * information used by the layout transformation engine.
105  * @stable ICU 58
106  */
107 typedef struct UBiDiTransform UBiDiTransform;
108 
109 /**
110  * Performs transformation of text from the bidi layout defined by the input
111  * ordering scheme to the bidi layout defined by the output ordering scheme,
112  * and applies character mirroring and Arabic shaping operations.<p>
113  * In terms of <code>UBiDi</code>, such a transformation implies:
114  * <ul>
115  * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the
116  * reordering mode is other than normal),</li>
117  * <li>calling <code>ubidi_setInverse</code> as needed (when text should be
118  * transformed from a visual to a logical form),</li>
119  * <li>resolving embedding levels of each character in the input text by
120  * calling <code>ubidi_setPara</code>,</li>
121  * <li>reordering the characters based on the computed embedding levels, also
122  * performing character mirroring as needed, and streaming the result to the
123  * output, by calling <code>ubidi_writeReordered</code>,</li>
124  * <li>performing Arabic digit and letter shaping on the output text by calling
125  * <code>u_shapeArabic</code>.</li>
126  * </ul>
127  * An "ordering scheme" encompasses the base direction and the order of text,
128  * and these characteristics must be defined by the caller for both input and
129  * output explicitly .<p>
130  * There are 36 possible combinations of <input, output> ordering schemes,
131  * which are partially supported by <code>UBiDi</code> already. Examples of the
132  * currently supported combinations:
133  * <ul>
134  * <li><Logical LTR, Visual LTR>: this is equivalent to calling
135  * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
136  * <li><Logical RTL, Visual LTR>: this is equivalent to calling
137  * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li>
138  * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to
139  * calling <code>ubidi_setPara</code> with
140  * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li>
141  * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to
142  * calling <code>ubidi_setPara</code> with
143  * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li>
144  * <li><Visual LTR, Logical LTR>: this is equivalent to
145  * calling <code>ubidi_setInverse(UBiDi*, TRUE)</code> and then
146  * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
147  * <li><Visual LTR, Logical RTL>: this is equivalent to
148  * calling <code>ubidi_setInverse(UBiDi*, TRUE)</code> and then
149  * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li>
150  * </ul>
151  * All combinations that involve the Visual RTL scheme are unsupported by
152  * <code>UBiDi</code>, for instance:
153  * <ul>
154  * <li><Logical LTR, Visual RTL>,</li>
155  * <li><Visual RTL, Logical RTL>.</li>
156  * </ul>
157  * <p>Example of usage of the transformation engine:<br>
158  * <pre>
159  * \code
160  * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
161  * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
162  * UErrorCode errorCode = U_ZERO_ERROR;
163  * // Run a transformation.
164  * ubiditransform_transform(pBidiTransform,
165  *          text1, -1, text2, -1,
166  *          UBIDI_LTR, UBIDI_VISUAL,
167  *          UBIDI_RTL, UBIDI_LOGICAL,
168  *          UBIDI_MIRRORING_OFF,
169  *          U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
170  *          &errorCode);
171  * // Do something with text2.
172  *  text2[4] = '2';
173  * // Run a reverse transformation.
174  * ubiditransform_transform(pBidiTransform,
175  *          text2, -1, text1, -1,
176  *          UBIDI_RTL, UBIDI_LOGICAL,
177  *          UBIDI_LTR, UBIDI_VISUAL,
178  *          UBIDI_MIRRORING_OFF,
179  *          U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
180  *          &errorCode);
181  *\endcode
182  * </pre>
183  * </p>
184  *
185  * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object
186  *        allocated with <code>ubiditransform_open()</code> or
187  *        <code>NULL</code>.<p>
188  *        This object serves for one-time setup to amortize initialization
189  *        overheads. Use of this object is not thread-safe. All other threads
190  *        should allocate a new <code>UBiDiTransform</code> object by calling
191  *        <code>ubiditransform_open()</code> before using it. Alternatively,
192  *        a caller can set this parameter to <code>NULL</code>, in which case
193  *        the object will be allocated by the engine on the fly.</p>
194  * @param src A pointer to the text that the Bidi layout transformations will
195  *        be performed on.
196  *        <p><strong>Note:</strong> the text must be (at least)
197  *        <code>srcLength</code> long.</p>
198  * @param srcLength The length of the text, in number of UChars. If
199  *        <code>length == -1</code> then the text must be zero-terminated.
200  * @param dest A pointer to where the processed text is to be copied.
201  * @param destSize The size of the <code>dest</code> buffer, in number of
202  *        UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set,
203  *        then the destination length could be as large as
204  *        <code>srcLength * 2</code>. Otherwise, the destination length will
205  *        not exceed <code>srcLength</code>. If the caller reserves the last
206  *        position for zero-termination, it should be excluded from
207  *        <code>destSize</code>.
208  *        <p><code>destSize == -1</code> is allowed and makes sense when
209  *        <code>dest</code> was holds some meaningful value, e.g. that of
210  *        <code>src</code>. In this case <code>dest</code> must be
211  *        zero-terminated.</p>
212  * @param inParaLevel A base embedding level of the input as defined in
213  *        <code>ubidi_setPara</code> documentation for the
214  *        <code>paraLevel</code> parameter.
215  * @param inOrder An order of the input, which can be one of the
216  *        <code>UBiDiOrder</code> values.
217  * @param outParaLevel A base embedding level of the output as defined in
218  *        <code>ubidi_setPara</code> documentation for the
219  *        <code>paraLevel</code> parameter.
220  * @param outOrder An order of the output, which can be one of the
221  *        <code>UBiDiOrder</code> values.
222  * @param doMirroring Indicates whether or not to perform character mirroring,
223  *        and can accept one of the <code>UBiDiMirroring</code> values.
224  * @param shapingOptions Arabic digit and letter shaping options defined in the
225  *        ushape.h documentation.
226  *        <p><strong>Note:</strong> Direction indicator options are computed by
227  *        the transformation engine based on the effective ordering schemes, so
228  *        user-defined direction indicators will be ignored.</p>
229  * @param pErrorCode A pointer to an error code value.
230  *
231  * @return The destination length, i.e. the number of UChars written to
232  *         <code>dest</code>. If the transformation fails, the return value
233  *         will be 0 (and the error code will be written to
234  *         <code>pErrorCode</code>).
235  *
236  * @see UBiDiLevel
237  * @see UBiDiOrder
238  * @see UBiDiMirroring
239  * @see ubidi_setPara
240  * @see u_shapeArabic
241  * @stable ICU 58
242  */
243 U_STABLE uint32_t U_EXPORT2
244 ubiditransform_transform(UBiDiTransform *pBiDiTransform,
245             const UChar *src, int32_t srcLength,
246             UChar *dest, int32_t destSize,
247             UBiDiLevel inParaLevel, UBiDiOrder inOrder,
248             UBiDiLevel outParaLevel, UBiDiOrder outOrder,
249             UBiDiMirroring doMirroring, uint32_t shapingOptions,
250             UErrorCode *pErrorCode);
251 
252 /**
253  * Allocates a <code>UBiDiTransform</code> object. This object can be reused,
254  * e.g. with different ordering schemes, mirroring or shaping options.<p>
255  * <strong>Note:</strong>The object can only be reused in the same thread.
256  * All other threads should allocate a new <code>UBiDiTransform</code> object
257  * before using it.<p>
258  * Example of usage:<p>
259  * <pre>
260  * \code
261  * UErrorCode errorCode = U_ZERO_ERROR;
262  * // Open a new UBiDiTransform.
263  * UBiDiTransform* transform = ubiditransform_open(&errorCode);
264  * // Run a transformation.
265  * ubiditransform_transform(transform,
266  *          text1, -1, text2, -1,
267  *          UBIDI_RTL, UBIDI_LOGICAL,
268  *          UBIDI_LTR, UBIDI_VISUAL,
269  *          UBIDI_MIRRORING_ON,
270  *          U_SHAPE_DIGITS_EN2AN,
271  *          &errorCode);
272  * // Do something with the output text and invoke another transformation using
273  * //   that text as input.
274  * ubiditransform_transform(transform,
275  *          text2, -1, text3, -1,
276  *          UBIDI_LTR, UBIDI_VISUAL,
277  *          UBIDI_RTL, UBIDI_VISUAL,
278  *          UBIDI_MIRRORING_ON,
279  *          0, &errorCode);
280  *\endcode
281  * </pre>
282  * <p>
283  * The <code>UBiDiTransform</code> object must be deallocated by calling
284  * <code>ubiditransform_close()</code>.
285  *
286  * @return An empty <code>UBiDiTransform</code> object.
287  * @stable ICU 58
288  */
289 U_STABLE UBiDiTransform* U_EXPORT2
290 ubiditransform_open(UErrorCode *pErrorCode);
291 
292 /**
293  * Deallocates the given <code>UBiDiTransform</code> object.
294  * @stable ICU 58
295  */
296 U_STABLE void U_EXPORT2
297 ubiditransform_close(UBiDiTransform *pBidiTransform);
298 
299 #if U_SHOW_CPLUSPLUS_API
300 
301 U_NAMESPACE_BEGIN
302 
303 /**
304  * \class LocalUBiDiTransformPointer
305  * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close().
306  * For most methods see the LocalPointerBase base class.
307  *
308  * @see LocalPointerBase
309  * @see LocalPointer
310  * @stable ICU 58
311  */
312 U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
313 
314 U_NAMESPACE_END
315 
316 #endif
317 
318 #endif
319