#Converting Mixtec orthographics to phonetic script #Version 39b, 2018-02-14 #Define classes define AlphaNum [a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|ã|ẽ|ĩ|õ|ũ|w̃|ỹ|ñ|1|2|3|4]; define Tone [1|2|3|4]; define TP (%{) [Tone|%}]+; define DelTone %( Tone %) ; define DelToneP %( %{ (Tone) %} %) ; define TG [ DelTone | DelToneP | TP (DelTone|DelToneP) ] ; define OVwl [a|e|i|o|u]; define NVwl [ã|ẽ|ĩ|õ|ũ]; define Vwl [OVwl|NVwl]; define Xvwl [Xa|Xe|Xi|Xo|Xu|Xã|Xẽ|Xĩ|Xõ|Xũ]; define SuperVwl [Vwl TP |a (%') TP a TP|e (%') TP e TP|i (%') TP i TP|o (%') TP o TP|u (%') TP u TP|ã (%') TP ã TP|ẽ (%') TP ẽ TP|ĩ (%') TP ĩ TP|õ (%') TP õ TP|ũ (%') TP ũ TP]; define Punc [%.|%,|%¡|%!|%¿|%?|%"]; define WdBnd [ Punc | %[ | %] | .#. | { } ]; define MorphBnd [WdBnd | %- | %= | %+]; define Cons ~$[Vwl | MorphBnd | TG | %' ]; define InitSyll MorphBnd Cons* SuperVwl Cons*; define NCon [n|m|ñ]; define Glide [w|y|W|Y|w̃|ỹ]; define StripProcesses [AlphaNum* %>]+ -> 0 || %{ _ AlphaNum* %}; #Remove punctuation and material within brackets #define StripPunc Punc+ @-> 0 || .#. _ ? .o. # Punc+ @-> 0 || ? _ .#. .o. # %[ %. %. %. %] -> UNINT .o. define StripPunc %[ %. %. %. %] -> UNINT .o. ?+ @> 0 || %[ _ %] .o. # %[ %] -> 0 || ? _ .o. # %[ %] -> 0 || _ ? .o. # %[ %] -> %[ %. %. %. %] .o. %[ %] -> 0 .o. UNINT -> %[ %. %. %. %] ; #Lexical exceptions #Pipe in last rule is to prevent nasalization of e define LexEx {sa4ka(2)=e} -> {se4ke} || WdBnd _ [2|3|4] WdBnd .o. {sa4ka(2)=en4} -> {se4kẽ4} || WdBnd _ WdBnd .o. {li3mu42=e4} -> {li3mwe424} || WdBnd _ WdBnd .o. {ma14a(3)=e4} -> {ma14|e4} || WdBnd _ WdBnd; #Globally replace some letters #And protect "n" in loanwords define Replace Ñ -> ñ .o. á -> a .o. í -> i .o. ó -> o .o. [..] -> %| || Vwl _ n Vwl; #Palatalization define Pal i -> y || _ Vwl .o. u -> w || _ Vwl ; #Portmanteaux and deletion of =2 define Port %= [a|e|o|u] n 4 %= e -> %= %= e n 4 || _ (%{) [2|3] (%}) [WdBnd | %=] .o. %= [o|u] n 4 %= a -> %= %= a n 4 || _ (%{) [2|3] (%}) [WdBnd | %=] .o. %= [o|u] n %( 4 %) %= a -> %= &= a n || _ (%{) [2|3] (%}) [WdBnd | %=] .o. %= [e|o] 4 %= e -> %= %= e 4 || _ (%{) [2|3] (%}) [WdBnd | %=] .o. %= (%() 2 (%)) @-> %= || _ %= Vwl (n) 2 ; #Denasalization before e4/o4 clitics define Denasal n -> n d || _ Vwl ((%') TP Vwl) TG %=+ [e|o] (%{) 4 (%}) .o. m -> m b || _ Vwl ((%') TP Vwl) TG %=+ [e|o] (%{) 4 (%}) .o. ñ -> n y || _ Vwl ((%') TP Vwl) TG %=+ [e|o] (%{) 4 (%}); #Nasalize vowels after nasal consonants #also nasalize oral clitics except e4/o4 define Nasalize [..] -> n || NCon (Vwl) Vwl _ .o. [..] -> n || NCon Vwl n (%') TP Vwl _ (%') TG .o. [..] -> n || n TG %=+ i _ (%{) [1|2|3|4] (%}) ~Tone .o. [..] -> n || n TG %=+ [a|e] _ (%{) [1|2|3] (%}) ~Tone ; #Spread stem nasality across long/glottalized vowels #clitics =e4 and =o4 block this spreading define NasBlock n -> 0 || _ TG %= [e|o] (%{) 4 (%}) ~Tone .o. n -> 0 || _ (%') TP Vwl TG %= [e|o] (%{) 4 (%}) ~Tone ; define StemNasSpread [..] -> n || Vwl _ (%') TP Vwl n ; #Spread clitic nasality rightward to 2nd clitic define Nas2ndCl [..] -> n || %= Vwl n TG %= Vwl _ TG; #Convert vowel/glide + "n" into nasalized segments ãẽĩõũw̃ỹ define NasVowels a n -> ã ,, e n -> ẽ ,, i n -> ĩ ,, o n -> õ ,, u n -> ũ ; #Block all clitics but one define BlockCl %= -> %+ || _ [~0]+ %=; define BlockCl2 %= -> %+ || _ [? - [%= | %+]]+ .#. .o. %= %= -> %+ %+ || _ [? - [%= | %+]]+ .#.; #Harmonization of Stem vowel with clitic vowel, part 1: define VwlHarm1 [e|ẽ] -> Xe || [e|ẽ] (%') TP _ TG %=+ Vwl .o. [e|ẽ] -> Y Xe || InitSyll _ ((%') TP Xe) TG %=+ Vwl .o. [e|ẽ] -> Y Xe || _ ((%') TP Xe) TG %=+ [ã|ẽ|e|o] (%{) 4 (%}) ~Tone .o. [e|ẽ] -> Y Xe || _ ((%') TP Xe) TG %=+ [a|ã|e|ẽ] (%{) [1|2|3] (%}) ~Tone .o. [e|ẽ] -> Xe || _ TG %=+ Vwl .o. [i|ĩ] -> Xi || [i|ĩ] (%') TP _ TG %=+ Vwl .o. [i|ĩ] -> Y Xi || _ ((%') TP Xi) TG %=+ Vwl .o. [u|ũ] -> Xu || [u|ũ] (%') TP _ TG %=+ Vwl .o. [u|ũ] -> W Xu || _ ((%') TP Xu) TG %=+ Vwl .o. [o|õ] -> Xo || [o|õ] (%') TP _ TG %=+ Vwl .o. [o|õ] -> W Xo || _ ((%') TP Xo) TG %=+ [ã|ẽ|e|o] (%{) 4 (%}) ~Tone .o. [o|õ] -> W Xo || _ ((%') TP Xo) TG %=+ [a|ã|e|ẽ] (%{) [1|2|3] (%}) ~Tone .o. [o|õ] -> W Xo || _ ( TP Xo) TG %=+ Vwl .o. [a|ã] -> Xa || [a|ã] (%') TP _ TG %=+ Vwl .o. [a|ã] -> Xa || _ ((%') TP Xa) TG %=+ [ã|ẽ|e|o] (%{) 4 (%}) ~Tone .o. [a|ã] -> Xa || _ ((%') TP Xa) TG %=+ [a|ã|e|ẽ] (%{) [1|2|3] (%}) ~Tone .o. [a|ã] -> Xa || _ ( TP Xa) TG %=+ Vwl; #Vowel Harmonization Part 2: define VwlHarm2 Xvwl -> a || _ ((%') TP Xvwl) TG %=+ a .o. Xvwl -> e || _ ((%') TP Xvwl) TG %=+ e .o. Xvwl -> i || _ ((%') TP Xvwl) TG %=+ i .o. Xvwl -> o || _ ((%') TP Xvwl) TG %=+ o .o. Xvwl -> u || _ ((%') TP Xvwl) TG %=+ u .o. Xvwl -> ã || _ ((%') TP Xvwl) TG %=+ ã .o. Xvwl -> ẽ || _ ((%') TP Xvwl) TG %=+ ẽ .o. Xvwl -> ĩ || _ ((%') TP Xvwl) TG %=+ ĩ .o. Xvwl -> õ || _ ((%') TP Xvwl) TG %=+ õ .o. Xvwl -> ũ || _ ((%') TP Xvwl) TG %=+ ũ; #Delete glide if it matches the clitic vowel in frontness/backness #but block in case of en42/en43/e42/e43 define DelGlide [..] -> XXX || _ %=+ [e|ẽ] (%{) 4 (%}) [2|3] ~Tone .o. Y @-> 0 || _ Vwl ((%') TP Vwl) TG %=+ [i|e|ẽ|ĩ] .o. W @-> 0 || _ Vwl ((%') TP Vwl) TG %=+ [u|o|õ|ũ] .o. XXX -> 0; #Spread clitic nasality to stem vowels and inserted glides, if any define ClNasSpread [..] -> n || OVwl _ ((%') TP Vwl (n)) TG %=+ NVwl ; define CliticMove [..] -> %= || _ Vwl (TG) %=+ D Vwl ,, %= >@ 0 || Vwl (TG) %=* _ D Vwl; #Clitic Vowel Deletion -- note that any clitic tone will remain. #Any deleteable Stem Tone also deleted. define CliticDel DelTone -> 0 || _ %=+ [Vwl|Tone] .o. DelToneP -> %{ %} || _ %=+ [Vwl|Tone] .o. [..] @-> D || %=+ _ [\ %=] .o. CliticMove .o. CliticMove .o. CliticMove .o. CliticMove .o. D (Vwl) @-> 0 ; #Clitic Ruleset define CliticSet VwlHarm1 .o. VwlHarm2 .o. DelGlide .o. ClNasSpread .o. NasVowels .o. CliticDel ; #Unblock 2nd clitics define Unblock %+ -> %= ; #Any remaining Deletable Tones are undeleted? define KeepTone %( -> 0 .o. %) -> 0; define ToneShift [..] @-> 1 || [W|Y] 1 %=+ [~Tone]+ _ TG WdBnd .o. [..] -> 2 || [W|Y] 2 %=+ [~Tone]+ _ TG WdBnd .o. [..] -> 3 || [W|Y] 3 %=+ [~Tone]+ _ TG WdBnd .o. [..] -> 4 || [W|Y] 4 %=+ [~Tone]+ _ TG WdBnd .o. [..] -> %{ 1 %} || [W|Y] %{ 1 %} %=+ [~Tone]+ _ TG WdBnd .o. [..] -> %{ 2 %} || [W|Y] %{ 2 %} %=+ [~Tone]+ _ TG WdBnd .o. [..] -> %{ 3 %} || [W|Y] %{ 3 %} %=+ [~Tone]+ _ TG WdBnd .o. [..] -> %{ 4 %} || [W|Y] %{ 4 %} %=+ [~Tone]+ _ TG WdBnd .o. Tone -> 0 || [W|Y] (%{) _ (%}) ; #Denasalization of ãẽĩõũ after nd define NDRules ã -> a || n d (Glide) _ (%') TP %=* NVwl .o. ẽ -> e || n d (Glide) _ (%') TP %=* NVwl .o. ĩ -> i || n d (Glide) _ (%') TP %=* NVwl .o. õ -> o || n d (Glide) _ (%') TP %=* NVwl .o. ũ -> u || n d (Glide) _ (%') TP %=* NVwl ; #Consolidate adjacent identical tones define SimpleTones 1 1+ @-> 1,, 2 2+ @-> 2,, 3 3+ @-> 3,, 4 4+ @-> 4,, %{ 1 %} 1+ @-> %{ %} 1,, %{ 2 %} 2+ @-> %{ %} 2,, %{ 3 %} 3+ @-> %{ %} 3,, %{ 4 %} 4+ @-> %{ %} 4; define NasGlides w @-> w̃ || _ [Glide | %=]* NVwl .o. w̃ -> w || k _ .o. W @-> w̃ || _ [Glide | %=]* NVwl .o. [Y|y] @-> ỹ || _ [Glide | %=]* NVwl .o. W -> w .o. Y -> y .o. [y|ỹ] -> 0 || n y _ .o. [y|ỹ] -> 0 || c h _ [w|w̃]; define Detokenize %| -> 0 ; define CleanUpProcesses [..] -> %} || _ %= Vwl %} .o. %} -> 0 || %} %= Vwl _ .o. [..] -> %{ %} || _ %=+ Vwl %{ %} .o. %{ %} -> 0 || %{ %} %=+ (Vwl) _; regex StripPunc; regex StripProcesses; regex LexEx; regex Replace; regex Pal; regex Port; regex Denasal; regex Nasalize; regex NasBlock; regex StemNasSpread; regex Nas2ndCl; regex NasVowels; regex BlockCl; regex CliticSet; regex Unblock; regex BlockCl2; regex CliticSet; regex Unblock; regex KeepTone; regex ToneShift; regex NDRules; regex SimpleTones; regex NasGlides; regex Detokenize; regex CleanUpProcesses; save stack wt39b_saves-punct-segs.bin #Version 39b, 2018-02-14