1 /*
2  * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 package test.java.util.regex;
25 
26 import java.util.HashMap;
27 import java.util.Locale;
28 
29 public final class POSIX_Unicode {
30 
isAlpha(int ch)31     public static boolean isAlpha(int ch) {
32         return Character.isAlphabetic(ch);
33     }
34 
isLower(int ch)35     public static boolean isLower(int ch) {
36         return Character.isLowerCase(ch);
37     }
38 
isUpper(int ch)39     public static boolean isUpper(int ch) {
40         return Character.isUpperCase(ch);
41     }
42 
43     // \p{Whitespace}
isSpace(int ch)44     public static boolean isSpace(int ch) {
45         return ((((1 << Character.SPACE_SEPARATOR) |
46                   (1 << Character.LINE_SEPARATOR) |
47                   (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
48                    != 0 ||
49                (ch >= 0x9 && ch <= 0xd) ||
50                (ch == 0x85);
51     }
52 
53     // \p{gc=Control}
isCntrl(int ch)54     public static boolean isCntrl(int ch) {
55         return Character.getType(ch) == Character.CONTROL;
56     }
57 
58     // \p{gc=Punctuation}
isPunct(int ch)59     public static boolean isPunct(int ch) {
60         return ((((1 << Character.CONNECTOR_PUNCTUATION) |
61                   (1 << Character.DASH_PUNCTUATION) |
62                   (1 << Character.START_PUNCTUATION) |
63                   (1 << Character.END_PUNCTUATION) |
64                   (1 << Character.OTHER_PUNCTUATION) |
65                   (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
66                   (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
67               != 0;
68     }
69 
70     // \p{gc=Decimal_Number}
71     // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
isHexDigit(int ch)72     public static boolean isHexDigit(int ch) {
73         return Character.isDigit(ch) ||
74                (ch >= 0x0030 && ch <= 0x0039) ||
75                (ch >= 0x0041 && ch <= 0x0046) ||
76                (ch >= 0x0061 && ch <= 0x0066) ||
77                (ch >= 0xFF10 && ch <= 0xFF19) ||
78                (ch >= 0xFF21 && ch <= 0xFF26) ||
79                (ch >= 0xFF41 && ch <= 0xFF46);
80     }
81 
82     // \p{gc=Decimal_Number}
isDigit(int ch)83     public static boolean isDigit(int ch) {
84         return Character.isDigit(ch);
85     };
86 
87     // \p{alpha}
88     // \p{digit}
isAlnum(int ch)89     public static boolean isAlnum(int ch) {
90         return Character.isAlphabetic(ch) || Character.isDigit(ch);
91     }
92 
93     // \p{Whitespace} --
94     // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
95     //  \p{gc=Line_Separator}
96     //  \p{gc=Paragraph_Separator}]
isBlank(int ch)97     public static boolean isBlank(int ch) {
98         int type = Character.getType(ch);
99         return isSpace(ch) &&
100                ch != 0xa & ch != 0xb && ch !=0xc && ch != 0xd && ch != 0x85 &&
101                type != Character.LINE_SEPARATOR &&
102                type != Character.PARAGRAPH_SEPARATOR;
103     }
104 
105     // [^
106     //  \p{space}
107     //  \p{gc=Control}
108     //  \p{gc=Surrogate}
109     //  \p{gc=Unassigned}]
isGraph(int ch)110     public static boolean isGraph(int ch) {
111         int type = Character.getType(ch);
112         return !(isSpace(ch) ||
113                  Character.CONTROL == type ||
114                  Character.SURROGATE == type ||
115                  Character.UNASSIGNED == type);
116     }
117 
118     // \p{graph}
119     // \p{blank}
120     // -- \p{cntrl}
isPrint(int ch)121     public static boolean isPrint(int ch) {
122         return (isGraph(ch) || isBlank(ch)) && !isCntrl(ch);
123     }
124 
125     // PropList.txt:Noncharacter_Code_Point
isNoncharacterCodePoint(int ch)126     public static boolean isNoncharacterCodePoint(int ch) {
127         return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
128     }
129 
isJoinControl(int ch)130     public static boolean isJoinControl(int ch) {
131         return (ch == 0x200C || ch == 0x200D);
132     }
133 
134     //  \p{alpha}
135     //  \p{gc=Mark}
136     //  \p{digit}
137     //  \p{gc=Connector_Punctuation}
isWord(int ch)138     public static boolean isWord(int ch) {
139         return isAlpha(ch) ||
140                ((((1 << Character.NON_SPACING_MARK) |
141                   (1 << Character.ENCLOSING_MARK) |
142                   (1 << Character.COMBINING_SPACING_MARK) |
143                   (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
144                != 0 ||
145                isDigit(ch) ||
146                isJoinControl(ch);
147     }
148 }
149