1 /*
2  * Copyright © 2013 Ran Benita
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #ifndef _XKBCOMMON_COMPOSE_H
25 #define _XKBCOMMON_COMPOSE_H
26 
27 #include <xkbcommon/xkbcommon.h>
28 
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32 
33 /**
34  * @file
35  * libxkbcommon Compose API - support for Compose and dead-keys.
36  */
37 
38 /**
39  * @defgroup compose Compose and dead-keys support
40  * Support for Compose and dead-keys.
41  * @since 0.5.0
42  *
43  * @{
44  */
45 
46 /**
47  * @page compose-overview Overview
48  * @parblock
49  *
50  * Compose and dead-keys are a common feature of many keyboard input
51  * systems.  They extend the range of the keysysm that can be produced
52  * directly from a keyboard by using a sequence of key strokes, instead
53  * of just one.
54  *
55  * Here are some example sequences, in the libX11 Compose file format:
56  *
57  *     <dead_acute> <a>         : "á"   aacute # LATIN SMALL LETTER A WITH ACUTE
58  *     <Multi_key> <A> <T>      : "@"   at     # COMMERCIAL AT
59  *
60  * When the user presses a key which produces the `<dead_acute>` keysym,
61  * nothing initially happens (thus the key is dubbed a "dead-key").  But
62  * when the user enters `<a>`, "á" is "composed", in place of "a".  If
63  * instead the user had entered a keysym which does not follow
64  * `<dead_acute>` in any compose sequence, the sequence is said to be
65  * "cancelled".
66  *
67  * Compose files define many such sequences.  For a description of the
68  * common file format for Compose files, see the Compose(5) man page.
69  *
70  * A successfuly-composed sequence has two results: a keysym and a UTF-8
71  * string.  At least one of the two is defined for each sequence.  If only
72  * a keysym is given, the keysym's string representation is used for the
73  * result string (using xkb_keysym_to_utf8()).
74  *
75  * This library provides low-level support for Compose file parsing and
76  * processing.  Higher-level APIs (such as libX11's `Xutf8LookupString`(3))
77  * may be built upon it, or it can be used directly.
78  *
79  * @endparblock
80  */
81 
82 /**
83  * @page compose-conflicting Conflicting Sequences
84  * @parblock
85  *
86  * To avoid ambiguity, a sequence is not allowed to be a prefix of another.
87  * In such a case, the conflict is resolved thus:
88  *
89  * 1. A longer sequence overrides a shorter one.
90  * 2. An equal sequence overrides an existing one.
91  * 3. A shorter sequence does not override a longer one.
92  *
93  * Sequences of length 1 are allowed.
94  *
95  * @endparblock
96  */
97 
98 /**
99  * @page compose-cancellation Cancellation Behavior
100  * @parblock
101  *
102  * What should happen when a sequence is cancelled?  For example, consider
103  * there are only the above sequences, and the input keysyms are
104  * `<dead_acute> <b>`.  There are a few approaches:
105  *
106  * 1. Swallow the cancelling keysym; that is, no keysym is produced.
107  *    This is the approach taken by libX11.
108  * 2. Let the cancelling keysym through; that is, `<b>` is produced.
109  * 3. Replay the entire sequence; that is, `<dead_acute> <b>` is produced.
110  *    This is the approach taken by Microsoft Windows (approximately;
111  *    instead of `<dead_acute>`, the underlying key is used.  This is
112  *    difficult to simulate with XKB keymaps).
113  *
114  * You can program whichever approach best fits users' expectations.
115  *
116  * @endparblock
117  */
118 
119 /**
120  * @struct xkb_compose_table
121  * Opaque Compose table object.
122  *
123  * The compose table holds the definitions of the Compose sequences, as
124  * gathered from Compose files.  It is immutable.
125  */
126 struct xkb_compose_table;
127 
128 /**
129  * @struct xkb_compose_state
130  * Opaque Compose state object.
131  *
132  * The compose state maintains state for compose sequence matching, such
133  * as which possible sequences are being matched, and the position within
134  * these sequences.  It acts as a simple state machine wherein keysyms are
135  * the input, and composed keysyms and strings are the output.
136  *
137  * The compose state is usually associated with a keyboard device.
138  */
139 struct xkb_compose_state;
140 
141 /** Flags affecting Compose file compilation. */
142 enum xkb_compose_compile_flags {
143     /** Do not apply any flags. */
144     XKB_COMPOSE_COMPILE_NO_FLAGS = 0
145 };
146 
147 /** The recognized Compose file formats. */
148 enum xkb_compose_format {
149     /** The classic libX11 Compose text format, described in Compose(5). */
150     XKB_COMPOSE_FORMAT_TEXT_V1 = 1
151 };
152 
153 /**
154  * @page compose-locale Compose Locale
155  * @parblock
156  *
157  * Compose files are locale dependent:
158  * - Compose files are written for a locale, and the locale is used when
159  *   searching for the appropriate file to use.
160  * - Compose files may reference the locale internally, with directives
161  *   such as \%L.
162  *
163  * As such, functions like xkb_compose_table_new_from_locale() require
164  * a `locale` parameter.  This will usually be the current locale (see
165  * locale(7) for more details).  You may also want to allow the user to
166  * explicitly configure it, so he can use the Compose file of a given
167  * locale, but not use that locale for other things.
168  *
169  * You may query the current locale as follows:
170  * @code
171  *     const char *locale;
172  *     locale = setlocale(LC_CTYPE, NULL);
173  * @endcode
174  *
175  * This will only give useful results if the program had previously set
176  * the current locale using setlocale(3), with `LC_CTYPE` or `LC_ALL`
177  * and a non-NULL argument.
178  *
179  * If you prefer not to use the locale system of the C runtime library,
180  * you may nevertheless obtain the user's locale directly using
181  * environment variables, as described in locale(7).  For example,
182  * @code
183  *     const char *locale;
184  *     locale = getenv("LC_ALL");
185  *     if (!locale || !*locale)
186  *         locale = getenv("LC_CTYPE");
187  *     if (!locale || !*locale)
188  *         locale = getenv("LANG");
189  *     if (!locale || !*locale)
190  *         locale = "C";
191  * @endcode
192  *
193  * Note that some locales supported by the C standard library may not
194  * have a Compose file assigned.
195  *
196  * @endparblock
197  */
198 
199 /**
200  * Create a compose table for a given locale.
201  *
202  * The locale is used for searching the file-system for an appropriate
203  * Compose file.  The search order is described in Compose(5).  It is
204  * affected by the following environment variables:
205  *
206  * 1. `XCOMPOSEFILE` - see Compose(5).
207  * 2. `XDG_CONFIG_HOME` - before `$HOME/.XCompose` is checked,
208  *    `$XDG_CONFIG_HOME/XCompose` is checked (with a fall back to
209  *    `$HOME/.config/XCompose` if `XDG_CONFIG_HOME` is not defined).
210  *    This is a libxkbcommon extension to the search procedure in
211  *    Compose(5) (since libxkbcommon 1.0.0). Note that other
212  *    implementations, such as libX11, might not find a Compose file in
213  *    this path.
214  * 3. `HOME` - see Compose(5).
215  * 4. `XLOCALEDIR` - if set, used as the base directory for the system's
216  *    X locale files, e.g. `/usr/share/X11/locale`, instead of the
217  *    preconfigured directory.
218  *
219  * @param context
220  *     The library context in which to create the compose table.
221  * @param locale
222  *     The current locale.  See @ref compose-locale.
223  *     \n
224  *     The value is copied, so it is safe to pass the result of getenv(3)
225  *     (or similar) without fear of it being invalidated by a subsequent
226  *     setenv(3) (or similar).
227  * @param flags
228  *     Optional flags for the compose table, or 0.
229  *
230  * @returns A compose table for the given locale, or NULL if the
231  * compilation failed or a Compose file was not found.
232  *
233  * @memberof xkb_compose_table
234  */
235 struct xkb_compose_table *
236 xkb_compose_table_new_from_locale(struct xkb_context *context,
237                                   const char *locale,
238                                   enum xkb_compose_compile_flags flags);
239 
240 /**
241  * Create a new compose table from a Compose file.
242  *
243  * @param context
244  *     The library context in which to create the compose table.
245  * @param file
246  *     The Compose file to compile.
247  * @param locale
248  *     The current locale.  See @ref compose-locale.
249  * @param format
250  *     The text format of the Compose file to compile.
251  * @param flags
252  *     Optional flags for the compose table, or 0.
253  *
254  * @returns A compose table compiled from the given file, or NULL if
255  * the compilation failed.
256  *
257  * @memberof xkb_compose_table
258  */
259 struct xkb_compose_table *
260 xkb_compose_table_new_from_file(struct xkb_context *context,
261                                 FILE *file,
262                                 const char *locale,
263                                 enum xkb_compose_format format,
264                                 enum xkb_compose_compile_flags flags);
265 
266 /**
267  * Create a new compose table from a memory buffer.
268  *
269  * This is just like xkb_compose_table_new_from_file(), but instead of
270  * a file, gets the table as one enormous string.
271  *
272  * @see xkb_compose_table_new_from_file()
273  * @memberof xkb_compose_table
274  */
275 struct xkb_compose_table *
276 xkb_compose_table_new_from_buffer(struct xkb_context *context,
277                                   const char *buffer, size_t length,
278                                   const char *locale,
279                                   enum xkb_compose_format format,
280                                   enum xkb_compose_compile_flags flags);
281 
282 /**
283  * Take a new reference on a compose table.
284  *
285  * @returns The passed in object.
286  *
287  * @memberof xkb_compose_table
288  */
289 struct xkb_compose_table *
290 xkb_compose_table_ref(struct xkb_compose_table *table);
291 
292 /**
293  * Release a reference on a compose table, and possibly free it.
294  *
295  * @param table The object.  If it is NULL, this function does nothing.
296  *
297  * @memberof xkb_compose_table
298  */
299 void
300 xkb_compose_table_unref(struct xkb_compose_table *table);
301 
302 /** Flags for compose state creation. */
303 enum xkb_compose_state_flags {
304     /** Do not apply any flags. */
305     XKB_COMPOSE_STATE_NO_FLAGS = 0
306 };
307 
308 /**
309  * Create a new compose state object.
310  *
311  * @param table
312  *     The compose table the state will use.
313  * @param flags
314  *     Optional flags for the compose state, or 0.
315  *
316  * @returns A new compose state, or NULL on failure.
317  *
318  * @memberof xkb_compose_state
319  */
320 struct xkb_compose_state *
321 xkb_compose_state_new(struct xkb_compose_table *table,
322                       enum xkb_compose_state_flags flags);
323 
324 /**
325  * Take a new reference on a compose state object.
326  *
327  * @returns The passed in object.
328  *
329  * @memberof xkb_compose_state
330  */
331 struct xkb_compose_state *
332 xkb_compose_state_ref(struct xkb_compose_state *state);
333 
334 /**
335  * Release a reference on a compose state object, and possibly free it.
336  *
337  * @param state The object.  If NULL, do nothing.
338  *
339  * @memberof xkb_compose_state
340  */
341 void
342 xkb_compose_state_unref(struct xkb_compose_state *state);
343 
344 /**
345  * Get the compose table which a compose state object is using.
346  *
347  * @returns The compose table which was passed to xkb_compose_state_new()
348  * when creating this state object.
349  *
350  * This function does not take a new reference on the compose table; you
351  * must explicitly reference it yourself if you plan to use it beyond the
352  * lifetime of the state.
353  *
354  * @memberof xkb_compose_state
355  */
356 struct xkb_compose_table *
357 xkb_compose_state_get_compose_table(struct xkb_compose_state *state);
358 
359 /** Status of the Compose sequence state machine. */
360 enum xkb_compose_status {
361     /** The initial state; no sequence has started yet. */
362     XKB_COMPOSE_NOTHING,
363     /** In the middle of a sequence. */
364     XKB_COMPOSE_COMPOSING,
365     /** A complete sequence has been matched. */
366     XKB_COMPOSE_COMPOSED,
367     /** The last sequence was cancelled due to an unmatched keysym. */
368     XKB_COMPOSE_CANCELLED
369 };
370 
371 /** The effect of a keysym fed to xkb_compose_state_feed(). */
372 enum xkb_compose_feed_result {
373     /** The keysym had no effect - it did not affect the status. */
374     XKB_COMPOSE_FEED_IGNORED,
375     /** The keysym started, advanced or cancelled a sequence. */
376     XKB_COMPOSE_FEED_ACCEPTED
377 };
378 
379 /**
380  * Feed one keysym to the Compose sequence state machine.
381  *
382  * This function can advance into a compose sequence, cancel a sequence,
383  * start a new sequence, or do nothing in particular.  The resulting
384  * status may be observed with xkb_compose_state_get_status().
385  *
386  * Some keysyms, such as keysyms for modifier keys, are ignored - they
387  * have no effect on the status or otherwise.
388  *
389  * The following is a description of the possible status transitions, in
390  * the format CURRENT STATUS => NEXT STATUS, given a non-ignored input
391  * keysym `keysym`:
392  *
393    @verbatim
394    NOTHING or CANCELLED or COMPOSED =>
395       NOTHING   if keysym does not start a sequence.
396       COMPOSING if keysym starts a sequence.
397       COMPOSED  if keysym starts and terminates a single-keysym sequence.
398 
399    COMPOSING =>
400       COMPOSING if keysym advances any of the currently possible
401                 sequences but does not terminate any of them.
402       COMPOSED  if keysym terminates one of the currently possible
403                 sequences.
404       CANCELLED if keysym does not advance any of the currently
405                 possible sequences.
406    @endverbatim
407  *
408  * The current Compose formats do not support multiple-keysyms.
409  * Therefore, if you are using a function such as xkb_state_key_get_syms()
410  * and it returns more than one keysym, consider feeding XKB_KEY_NoSymbol
411  * instead.
412  *
413  * @param state
414  *     The compose state object.
415  * @param keysym
416  *     A keysym, usually obtained after a key-press event, with a
417  *     function such as xkb_state_key_get_one_sym().
418  *
419  * @returns Whether the keysym was ignored.  This is useful, for example,
420  * if you want to keep a record of the sequence matched thus far.
421  *
422  * @memberof xkb_compose_state
423  */
424 enum xkb_compose_feed_result
425 xkb_compose_state_feed(struct xkb_compose_state *state,
426                        xkb_keysym_t keysym);
427 
428 /**
429  * Reset the Compose sequence state machine.
430  *
431  * The status is set to XKB_COMPOSE_NOTHING, and the current sequence
432  * is discarded.
433  *
434  * @memberof xkb_compose_state
435  */
436 void
437 xkb_compose_state_reset(struct xkb_compose_state *state);
438 
439 /**
440  * Get the current status of the compose state machine.
441  *
442  * @see xkb_compose_status
443  * @memberof xkb_compose_state
444  **/
445 enum xkb_compose_status
446 xkb_compose_state_get_status(struct xkb_compose_state *state);
447 
448 /**
449  * Get the result Unicode/UTF-8 string for a composed sequence.
450  *
451  * See @ref compose-overview for more details.  This function is only
452  * useful when the status is XKB_COMPOSE_COMPOSED.
453  *
454  * @param[in] state
455  *     The compose state.
456  * @param[out] buffer
457  *     A buffer to write the string into.
458  * @param[in] size
459  *     Size of the buffer.
460  *
461  * @warning If the buffer passed is too small, the string is truncated
462  * (though still NUL-terminated).
463  *
464  * @returns
465  *   The number of bytes required for the string, excluding the NUL byte.
466  *   If the sequence is not complete, or does not have a viable result
467  *   string, returns 0, and sets `buffer` to the empty string (if possible).
468  * @returns
469  *   You may check if truncation has occurred by comparing the return value
470  *   with the size of `buffer`, similarly to the `snprintf`(3) function.
471  *   You may safely pass NULL and 0 to `buffer` and `size` to find the
472  *   required size (without the NUL-byte).
473  *
474  * @memberof xkb_compose_state
475  **/
476 int
477 xkb_compose_state_get_utf8(struct xkb_compose_state *state,
478                            char *buffer, size_t size);
479 
480 /**
481  * Get the result keysym for a composed sequence.
482  *
483  * See @ref compose-overview for more details.  This function is only
484  * useful when the status is XKB_COMPOSE_COMPOSED.
485  *
486  * @returns The result keysym.  If the sequence is not complete, or does
487  * not specify a result keysym, returns XKB_KEY_NoSymbol.
488  *
489  * @memberof xkb_compose_state
490  **/
491 xkb_keysym_t
492 xkb_compose_state_get_one_sym(struct xkb_compose_state *state);
493 
494 /** @} */
495 
496 #ifdef __cplusplus
497 } /* extern "C" */
498 #endif
499 
500 #endif /* _XKBCOMMON_COMPOSE_H */
501