1# 2# Copyright (C) 2016 and later: Unicode, Inc. and others. 3# License & terms of use: http://www.unicode.org/copyright.html 4# Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved. 5 6# file: grapheme.txt 7# 8# Reference Grapheme Break rules for intltest rbbi/RBBIMonkeyTest 9# 10# 11# Note: Rule syntax and the monkey test itself are still a work in progress. 12# They are expected to change with review and the addition of support for rule tailoring. 13 14type = grapheme; # one of grapheme | word | line | sentence 15locale = en; 16 17CR = [\p{Grapheme_Cluster_Break = CR}]; 18LF = [\p{Grapheme_Cluster_Break = LF}]; 19 20Control = [[\p{Grapheme_Cluster_Break = Control}]]; 21Extend = [[\p{Grapheme_Cluster_Break = Extend}]]; 22ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}]; 23Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}]; 24Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; 25SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; 26 27# 28# Korean Syllable Definitions 29# 30L = [\p{Grapheme_Cluster_Break = L}]; 31V = [\p{Grapheme_Cluster_Break = V}]; 32T = [\p{Grapheme_Cluster_Break = T}]; 33LV = [\p{Grapheme_Cluster_Break = LV}]; 34LVT = [\p{Grapheme_Cluster_Break = LVT}]; 35 36# Emoji defintions 37 38Extended_Pict = [:ExtPict:]; 39 40GB3: CR LF; 41GB4: (Control | CR | LF) ÷; 42GB5: . ÷ (Control | CR | LF); 43 44GB6: L (L | V | LV | LVT); 45GB7: (LV | V) (V | T); 46GB8: (LVT | T) T; 47 48GB11: Extended_Pict Extend* ZWJ Extended_Pict; 49GB9: . (Extend | ZWJ); 50 51GB9a: . SpacingMark; 52GB9b: Prepend .; 53 54# Regional Indicators, split into pairs. 55# Note that a pair of RIs that is not followed by a third RI will fall into 56# the normal rules for Extend, etc. 57# 58GB12: Regional_Indicator Regional_Indicator ÷ Regional_Indicator; 59GB13: Regional_Indicator Regional_Indicator; 60 61GB999: . ÷; 62