1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html#License 3# 4# File: ThaiLogical_Latin.txt 5# Generated from CLDR 6# 7 8# Thai-Latin 9# This set of rules follows ISO 11940 10# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf 11# except that that does not mention an implicit vowel, so we use o\u0323 12# 13# The transcription is fairly ugly, so we ought to also do the UNGEGN version 14# see: http://www.eki.ee/wgrs/rom1_th.pdf 15# and probably make that the main variant. 16# 17# Note: this is an internal file. The NFD/NFC is handled externally, in the index 18# The insertion of spaces between words, the reversal of the vowels 19# and the conversion of space to semicolon are done *outside* of these rules. 20# So as far as these rules are concerned, the vowels are in logical order! 21# insert implicit vowel (and remove it going the other way) 22# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically 23#$consonant = [ก-ฮ]; 24#$vowel = [ะ-\u0E3Aเ-ไ\u0E47]; 25#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ; 26#\uE000 → o\u0323 ; 27# ← o\u0323 ; 28$notAbove = [^\p{ccc=0}\p{ccc=above}] ; 29$notBelow = [^\p{ccc=0}\p{ccc=below}] ; 30# Consonants 31# Warning: the 'h's need to be handled carefully! 32# What we really want to say is the following, but we can't 33# $notHAccent = !($notAbove* \u0304 | $notBelow* \u0323) ; 34# Since the only accents we care about that could cause problems are free-standing accents below, we use instead: 35$freeStandingBelow = [\u0325 ]; 36$hAccent = [ \u0304 \u0323]; 37$notHAccent0 = [^$freeStandingBelow$hAccent]; 38$notHAccent1 = $freeStandingBelow [^$hAccent]; 39ห → h\u0304 ; # THAI CHARACTER HO HIP 40ห | $1 ← h ($notAbove*) \u0304; # backward case, account for reordering 41ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK 42ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI 43ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT 44ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON 45ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG 46ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI 47ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI 48ก ↔ k ; # THAI CHARACTER KO KAI 49ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO 50ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG 51พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN 52พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN 53ป ↔ p ; # THAI CHARACTER PO PLA 54ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING 55ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE 56ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG 57ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG 58จ ↔ c ; # THAI CHARACTER CHO CHAN 59ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN 60ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO 61ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO 62ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG 63ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG 64ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN 65ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN 66#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick. 67ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK 68ต ↔ t ; # THAI CHARACTER TO TAO 69# since there is no singleton g (generated), don't worry about that. 70ง ↔ ng ; # THAI CHARACTER NGO NGU 71ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN 72น ↔ n ; # THAI CHARACTER NO NU 73ญ ↔ y\u0323 ; # THAI CHARACTER YO YING 74ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA 75ด ↔ d ; # THAI CHARACTER DO DEK 76บ ↔ b ; # THAI CHARACTER BO BAIMAI 77ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA 78ฝ | $1 ← f ($notAbove*) \u0304; # backward case, account for reordering 79ม ↔ m ; # THAI CHARACTER MO MA 80ย ↔ y ; # THAI CHARACTER YO YAK 81ร ↔ r ; # THAI CHARACTER RO RUA 82ฤ ↔ v ; # THAI CHARACTER RU 83ฦ ↔ ł ; # THAI CHARACTER LU 84ว ↔ w ; # THAI CHARACTER WO WAEN 85ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA*** 86ศ | $1 ← s \u0323 ($notAbove*) \u0304; # backward case, account for reordering 87ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI 88ส → s\u0304 ; # THAI CHARACTER SO SUA*** 89ส | $1 ← s ($notAbove*) \u0304; # backward case, account for reordering 90ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA 91ล ↔ l ; # THAI CHARACTER LO LING 92ฟ ↔ f ; # THAI CHARACTER FO FAN 93อ ↔ x ; # THAI CHARACTER O ANG 94ซ ↔ s ; # THAI CHARACTER SO SO 95# vowels 96\u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT 97า → a\u0304 ; # THAI CHARACTER SARA AA 98า | $1 ← a ($notAbove*) \u0304; # backward case, account for reordering 99# We deviate from ISO for SARA AM for disambiguation 100ำ → a \u0309; # THAI CHARACTER SARA AM 101ำ | $1 ← a ($notAbove*) \u0309 ; # backward case, account for reordering 102ะ ↔ a ; # THAI CHARACTER SARA A 103\u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II 104\u0E35 | $1 ← i ($notAbove*) \u0304 ; # backward case, account for reordering 105\u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE 106\u0E37 | $1 ← u \u0323 ($notAbove*) \u0304 ; # backward case, account for reordering 107\u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE 108\u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU 109\u0E39 | $1 ← u ($notAbove*) \u0304 ; # backward case, account for reordering 110\u0E38 ↔ u ; # THAI CHARACTER SARA U 111ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI 112# ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT 113เ ↔ e ; # THAI CHARACTER SARA E 114แ ↔ æ ; # THAI CHARACTER SARA AE 115โ ↔ o ; # THAI CHARACTER SARA O 116ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN 117ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI 118ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO 119\u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU 120\u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK 121\u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO 122\u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI 123\u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA 124\u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT 125\u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN 126# We deviate from ISO for disambiguation 127\u0E4D ↔ \u030A ; # THAI CHARACTER NIKHAHIT 128๏ ↔ '§' ; # THAI CHARACTER FONGMAN 129๐ ↔ 0 ; # THAI DIGIT ZERO 130๑ ↔ 1 ; # THAI DIGIT ONE 131๒ ↔ 2 ; # THAI DIGIT TWO 132๓ ↔ 3 ; # THAI DIGIT THREE 133๔ ↔ 4 ; # THAI DIGIT FOUR 134๕ ↔ 5 ; # THAI DIGIT FIVE 135๖ ↔ 6 ; # THAI DIGIT SIX 136๗ ↔ 7 ; # THAI DIGIT SEVEN 137๘ ↔ 8 ; # THAI DIGIT EIGHT 138๙ ↔ 9 ; # THAI DIGIT NINE 139๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU 140๛ ↔ » ; # THAI CHARACTER KHOMUT 141ๆ ↔ « ; # THAI CHARACTER MAIYAMOK 142# moved down to make shorter first 143#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below. 144\u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU 145\u0E34 ↔ i ; # THAI CHARACTER SARA I 146# fallbacks 147| k ← g ; 148| k ← h ; 149| c ← j ; 150| k ← q ; 151| s ← z ; 152:: (lower); 153 154