1 /* 2 * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package test.java.util.regex; 25 26 import java.util.HashMap; 27 import java.util.Locale; 28 29 public final class POSIX_Unicode { 30 isAlpha(int ch)31 public static boolean isAlpha(int ch) { 32 return Character.isAlphabetic(ch); 33 } 34 isLower(int ch)35 public static boolean isLower(int ch) { 36 return Character.isLowerCase(ch); 37 } 38 isUpper(int ch)39 public static boolean isUpper(int ch) { 40 return Character.isUpperCase(ch); 41 } 42 43 // \p{Whitespace} isSpace(int ch)44 public static boolean isSpace(int ch) { 45 return ((((1 << Character.SPACE_SEPARATOR) | 46 (1 << Character.LINE_SEPARATOR) | 47 (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1) 48 != 0 || 49 (ch >= 0x9 && ch <= 0xd) || 50 (ch == 0x85); 51 } 52 53 // \p{gc=Control} isCntrl(int ch)54 public static boolean isCntrl(int ch) { 55 return Character.getType(ch) == Character.CONTROL; 56 } 57 58 // \p{gc=Punctuation} isPunct(int ch)59 public static boolean isPunct(int ch) { 60 return ((((1 << Character.CONNECTOR_PUNCTUATION) | 61 (1 << Character.DASH_PUNCTUATION) | 62 (1 << Character.START_PUNCTUATION) | 63 (1 << Character.END_PUNCTUATION) | 64 (1 << Character.OTHER_PUNCTUATION) | 65 (1 << Character.INITIAL_QUOTE_PUNCTUATION) | 66 (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1) 67 != 0; 68 } 69 70 // \p{gc=Decimal_Number} 71 // \p{Hex_Digit} -> PropList.txt: Hex_Digit isHexDigit(int ch)72 public static boolean isHexDigit(int ch) { 73 return Character.isDigit(ch) || 74 (ch >= 0x0030 && ch <= 0x0039) || 75 (ch >= 0x0041 && ch <= 0x0046) || 76 (ch >= 0x0061 && ch <= 0x0066) || 77 (ch >= 0xFF10 && ch <= 0xFF19) || 78 (ch >= 0xFF21 && ch <= 0xFF26) || 79 (ch >= 0xFF41 && ch <= 0xFF46); 80 } 81 82 // \p{gc=Decimal_Number} isDigit(int ch)83 public static boolean isDigit(int ch) { 84 return Character.isDigit(ch); 85 }; 86 87 // \p{alpha} 88 // \p{digit} isAlnum(int ch)89 public static boolean isAlnum(int ch) { 90 return Character.isAlphabetic(ch) || Character.isDigit(ch); 91 } 92 93 // \p{Whitespace} -- 94 // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85 95 // \p{gc=Line_Separator} 96 // \p{gc=Paragraph_Separator}] isBlank(int ch)97 public static boolean isBlank(int ch) { 98 int type = Character.getType(ch); 99 return isSpace(ch) && 100 ch != 0xa & ch != 0xb && ch !=0xc && ch != 0xd && ch != 0x85 && 101 type != Character.LINE_SEPARATOR && 102 type != Character.PARAGRAPH_SEPARATOR; 103 } 104 105 // [^ 106 // \p{space} 107 // \p{gc=Control} 108 // \p{gc=Surrogate} 109 // \p{gc=Unassigned}] isGraph(int ch)110 public static boolean isGraph(int ch) { 111 int type = Character.getType(ch); 112 return !(isSpace(ch) || 113 Character.CONTROL == type || 114 Character.SURROGATE == type || 115 Character.UNASSIGNED == type); 116 } 117 118 // \p{graph} 119 // \p{blank} 120 // -- \p{cntrl} isPrint(int ch)121 public static boolean isPrint(int ch) { 122 return (isGraph(ch) || isBlank(ch)) && !isCntrl(ch); 123 } 124 125 // PropList.txt:Noncharacter_Code_Point isNoncharacterCodePoint(int ch)126 public static boolean isNoncharacterCodePoint(int ch) { 127 return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef); 128 } 129 isJoinControl(int ch)130 public static boolean isJoinControl(int ch) { 131 return (ch == 0x200C || ch == 0x200D); 132 } 133 134 // \p{alpha} 135 // \p{gc=Mark} 136 // \p{digit} 137 // \p{gc=Connector_Punctuation} isWord(int ch)138 public static boolean isWord(int ch) { 139 return isAlpha(ch) || 140 ((((1 << Character.NON_SPACING_MARK) | 141 (1 << Character.ENCLOSING_MARK) | 142 (1 << Character.COMBINING_SPACING_MARK) | 143 (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1) 144 != 0 || 145 isDigit(ch) || 146 isJoinControl(ch); 147 } 148 } 149