1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Unicode character groups.
6 
7 // The codes get split into ranges of 16-bit codes
8 // and ranges of 32-bit codes.  It would be simpler
9 // to use only 32-bit ranges, but these tables are large
10 // enough to warrant extra care.
11 //
12 // Using just 32-bit ranges gives 27 kB of data.
13 // Adding 16-bit ranges gives 18 kB of data.
14 // Adding an extra table of 16-bit singletons would reduce
15 // to 16.5 kB of data but make the data harder to use;
16 // we don't bother.
17 
18 #ifndef RE2_UNICODE_GROUPS_H__
19 #define RE2_UNICODE_GROUPS_H__
20 
21 #include "util/util.h"
22 
23 namespace re2 {
24 
25 struct URange16
26 {
27   uint16 lo;
28   uint16 hi;
29 };
30 
31 struct URange32
32 {
33   uint32 lo;
34   uint32 hi;
35 };
36 
37 struct UGroup
38 {
39   const char *name;
40   int sign;  // +1 for [abc], -1 for [^abc]
41   URange16 *r16;
42   int nr16;
43   URange32 *r32;
44   int nr32;
45 };
46 
47 // Named by property or script name (e.g., "Nd", "N", "Han").
48 // Negated groups are not included.
49 extern UGroup unicode_groups[];
50 extern int num_unicode_groups;
51 
52 // Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]").
53 // Negated groups are included.
54 extern UGroup posix_groups[];
55 extern int num_posix_groups;
56 
57 // Named by Perl name (e.g., "\\d", "\\D").
58 // Negated groups are included.
59 extern UGroup perl_groups[];
60 extern int num_perl_groups;
61 
62 }  // namespace re2
63 
64 #endif  // RE2_UNICODE_GROUPS_H__
65