convert to the consonant. ([\u102b-\u103f]) \u1040 ($nondigits) → $1 \u101d $2; ([\u102b-\u103f]) \u1044 ($nondigits) → $1 \u104E $2; #### #### STAGE 1.1: Strip spaces immediately before combining characters. #### Move e-vowel after consonants and medials #### Now every codepoint is Unicode. This starts conversion #### from semi-visual order to logical order. #### ::Null; # Don't remove spaces before E vowel or medial Ra at this stage ($wspace) \u1037 > \u1037 $1; ($wspace+) ([\u102b-\u1030\u1032-\u103b\u103d\u103e]) → $2; # Remove a duplicate early \u1037+ → \u1037; # Move e-vowel after medials and consonants. \u1031+ $ukinzi ($consonant) > $ukinzi $1 \u1031; \u1031+ \u1037+ ($consonant) > $1 \u1031 \u1037 ; \u1031+ \u103c ($consonant) > $1 \u103c \u1031; # Move medials other than 103c before the 1031. Leave 103c for # the next consonant. \u1031+ ($consonant) ([\u103b\u103d\u103e]+) > $1 $2 \u1031; \u1031+ ($vowelsAndConsonants) > $1 \u1031; #### #### STAGE 2: POST REORDERING RULES FOR UNICODE RENDERING #### ::Null; \u103b \u103a > \u103a \u103b; # Simpler replacements for Zawgyi 1025 \u1025 \u102E → \u1026; # Asat and dot below reordering, to Unicode NFC. \u103A\u1037 → \u1037\u103A; # Reorder some vowel signs \u1036 ($umedial*) ($vowelsign+) → $1 $2 \u1036 ; ([\u102B\u102C\u102F\u1030]) ([\u102D\u102E\u1032]) → $2 $1; # Move ra medial which precedes consonant, but not other medials. \u103C ($consonant) → $1 \u103C; #### #### Stage 3 #### Move \u1036, and \u103C after consonants. ::Null; ($umedial) \u1039 ($consonant) > \u1039 $2 $1; \u103C \u103A \u1039 ($consonant) → \u103A \u1039 $1 \u103C; \u1036 ($umedial+) → $1 \u1036; #### #### Stage 4 #### Reordering medials, dot below, contractions, E sign, and asat. ::Null; # Reorder the medials ([\u103C\u103D\u103E]+) \u103B → \u103B $1; ([\u103D\u103E]+) \u103C → \u103C $1; \u103E\u103D → \u103D\u103E ; # Contractions with vowel signs ([\u1031]+) ($vowelsign*) \u1039 ($consonant) → \u1039 $3 $1 $2; ($vowelsign+) \u1039 ($consonant) → \u1039 $2 $1; # Move vowel sign E \u1031 after medials, but not across consonants ($umedial*) ([\u1031]+) ($umedial*) → $1 $3 $2; # Reorder dot below after medials and vowel diacritics \u1037 ([\u102D-\u1030\u1032\u1036\u103b-\u103e]+) → $1 \u1037; # Move vowel signs after medials ($vowelsign+) ($umedial+) → $2 $1; # Reorder modifiers and asat ($consonant) ([\u102B-\u1032\u1036\u103B-\u103E]) \u103A ($consonant) → $1 \u103A $2 $3; #### #### Stage 5. More reorderings #### Vowel signs after medials, sort medials, #### ::Null; # Replace CA + YA with JHA after moving other things beyond the medials. \u1005 \u103b → \u1008; # More moving vowel signs after medials ([\u102b-\u1032]) ($umedial) → $2 $1; # Sort the medials ([\u103C\u103D\u103E]) \u103B → \u103B $1; ([\u103D\u103E]) \u103C → \u103C $1; \u103E\u103D → \u103D\u103E ; # Move visarga after other signs \u1038 ($vowelmedial) → $1 \u1038; # Reorder \u1036 \u102f → \u102f \u1036; ### ### Stage 6 ### Finish conflicting and extra diacritics. Remove some white space ### ::Null; # Fix duplicate combiners \u102D \u102D+ → \u102D; \u102E \u102E+ → \u102E; \u102F \u102F+ → \u102F; \u1030 \u1030+ → \u1030; \u1032 \u1032+ → \u1032; \u1036 \u1036+ → \u1036; \u1037 \u1037+ → \u1037; \u1039 \u1039+ → \u1039; \u103a \u103a+ → \u103a; \u103b \u103b+ → \u103b; \u103c \u103c+ → \u103c; \u103d \u103d+ → \u103d; \u103e \u103e+ → \u103e; # http://unicode.org/cldr/trac/ticket/10386 # Fix overlapping signs \u102F [\u1030\u103a] → \u102F; \u102D \u102E → \u102E; # Remove space directly before diacritics. ($wspace)+ ([\u102b-\u1032\u1036-\u103e]) → $2; # Remove ZWSP at start and end ^ \u200b+ → ; \u200b+ $ → ; # Fix multiple spaces around ZWSP to single ZWSP. $wspace* \u200b $wspace* → \u200b; ]]>