#Converting Mixtec orthographics to phonetic script #Version 39b, 2018-02-14 #Define classes define Tone [1|2|3|4]; define TP Tone+; define DelTone %( Tone %) ; define TG [ DelTone | TP (DelTone) ] ; define OVwl [a|e|i|o|u]; define NVwl [ã|ẽ|ĩ|õ|ũ]; define Vwl [OVwl|NVwl]; define Xvwl [Xa|Xe|Xi|Xo|Xu|Xã|Xẽ|Xĩ|Xõ|Xũ]; define SuperVwl [Vwl TP |a (%') TP a TP|e (%') TP e TP|i (%') TP i TP|o (%') TP o TP|u (%') TP u TP|ã (%') TP ã TP|ẽ (%') TP ẽ TP|ĩ (%') TP ĩ TP|õ (%') TP õ TP|ũ (%') TP ũ TP]; define Punc [%.|%,|%¡|%!|%¿|%?|%"]; define WdBnd [ Punc | %[ | %] | .#. | { } ]; define MorphBnd [WdBnd | %- | %= | %+]; define Cons ~$[Vwl | MorphBnd | TG | %' ]; define InitSyll MorphBnd Cons* SuperVwl Cons*; define NCon [n|m|ñ]; define Glide [w|y|W|Y|w̃|ỹ]; #Remove punctuation and material within brackets #define StripPunc Punc+ @-> 0 || .#. _ ? .o. # Punc+ @-> 0 || ? _ .#. .o. # %[ %. %. %. %] -> UNINT .o. define StripPunc %[ %. %. %. %] -> UNINT .o. ?+ @> 0 || %[ _ %] .o. # %[ %] -> 0 || ? _ .o. # %[ %] -> 0 || _ ? .o. # %[ %] -> %[ %. %. %. %] .o. %[ %] -> 0 .o. UNINT -> %[ %. %. %. %] ; #Lexical exceptions #Pipe in last rule is to prevent nasalization of e define LexEx {sa4ka(2)=e} -> {se4ke} || WdBnd _ [2|3|4] WdBnd .o. {sa4ka(2)=en4} -> {se4kẽ4} || WdBnd _ WdBnd .o. {li3mu42=e4} -> {li3mwe424} || WdBnd _ WdBnd .o. {ma14a(3)=e4} -> {ma14|e4} || WdBnd _ WdBnd; #Globally replace some letters #And protect "n" in loanwords define Replace Ñ -> ñ .o. á -> a .o. í -> i .o. ó -> o .o. [..] -> %| || Vwl _ n Vwl; #Palatalization define Pal i -> y || _ Vwl .o. u -> w || _ Vwl ; #Portmanteaux and deletion of =2 define Port %= [a|e|o|u] n 4 %= e -> %= e n 4 || _ [2|3] [WdBnd | %=] .o. %= [o|u] n 4 %= a -> %= a n 4 || _ [2|3] [WdBnd | %=] .o. %= [o|u] n %( 4 %) %= a -> %= a n || _ [2|3] [WdBnd | %=] .o. %= [e|o] 4 %= e -> %= e 4 || _ [2|3] [WdBnd | %=] .o. %= (%() 2 (%)) @-> 0 || _ %= Vwl (n) 2 ; #Denasalization before e4/o4 clitics define Denasal n -> n d || _ Vwl ((%') TP Vwl) TG %= [e|o] 4 .o. m -> m b || _ Vwl ((%') TP Vwl) TG %= [e|o] 4 .o. ñ -> n y || _ Vwl ((%') TP Vwl) TG %= [e|o] 4; #Nasalize vowels after nasal consonants #also nasalize oral clitics except e4/o4 define Nasalize [..] -> n || NCon (Vwl) Vwl _ .o. [..] -> n || NCon Vwl n (%') TP Vwl _ (%') TG .o. [..] -> n || n TG %= i _ [1|2|3|4] ~Tone .o. [..] -> n || n TG %= [a|e] _ [1|2|3] ~Tone ; #Spread stem nasality across long/glottalized vowels #clitics =e4 and =o4 block this spreading define NasBlock n -> 0 || _ TG %= [e|o] 4 ~Tone .o. n -> 0 || _ (%') TP Vwl TG %= [e|o] 4 ~Tone ; define StemNasSpread [..] -> n || Vwl _ (%') TP Vwl n ; #Spread clitic nasality rightward to 2nd clitic define Nas2ndCl [..] -> n || %= Vwl n TG %= Vwl _ TG; #Convert vowel/glide + "n" into nasalized segments ãẽĩõũw̃ỹ define NasVowels a n -> ã ,, e n -> ẽ ,, i n -> ĩ ,, o n -> õ ,, u n -> ũ ; #Block all clitics but one define BlockCl %= -> %+ || _ ?* %=; #Harmonization of Stem vowel with clitic vowel, part 1: define VwlHarm1 [e|ẽ] -> Xe || [e|ẽ] (%') TP _ TG %= Vwl .o. [e|ẽ] -> Y Xe || InitSyll _ ((%') TP Xe) TG %= Vwl .o. [e|ẽ] -> Y Xe || _ ((%') TP Xe) TG %= [ã|ẽ|e|o] 4 ~Tone .o. [e|ẽ] -> Y Xe || _ ((%') TP Xe) TG %= [a|ã|e|ẽ] [1|2|3] ~Tone .o. [e|ẽ] -> Xe || _ TG %= Vwl .o. [i|ĩ] -> Xi || [i|ĩ] (%') TP _ TG %= Vwl .o. [i|ĩ] -> Y Xi || _ ((%') TP Xi) TG %= Vwl .o. [u|ũ] -> Xu || [u|ũ] (%') TP _ TG %= Vwl .o. [u|ũ] -> W Xu || _ ((%') TP Xu) TG %= Vwl .o. [o|õ] -> Xo || [o|õ] (%') TP _ TG %= Vwl .o. [o|õ] -> W Xo || _ ((%') TP Xo) TG %= [ã|ẽ|e|o] 4 ~Tone .o. [o|õ] -> W Xo || _ ((%') TP Xo) TG %= [a|ã|e|ẽ] [1|2|3] ~Tone .o. [o|õ] -> W Xo || _ ( TP Xo) TG %= Vwl .o. [a|ã] -> Xa || [a|ã] (%') TP _ TG %= Vwl .o. [a|ã] -> Xa || _ ((%') TP Xa) TG %= [ã|ẽ|e|o] 4 ~Tone .o. [a|ã] -> Xa || _ ((%') TP Xa) TG %= [a|ã|e|ẽ] [1|2|3] ~Tone .o. [a|ã] -> Xa || _ ( TP Xa) TG %= Vwl; #Vowel Harmonization Part 2: define VwlHarm2 Xvwl -> a || _ ((%') TP Xvwl) TG %= a .o. Xvwl -> e || _ ((%') TP Xvwl) TG %= e .o. Xvwl -> i || _ ((%') TP Xvwl) TG %= i .o. Xvwl -> o || _ ((%') TP Xvwl) TG %= o .o. Xvwl -> u || _ ((%') TP Xvwl) TG %= u .o. Xvwl -> ã || _ ((%') TP Xvwl) TG %= ã .o. Xvwl -> ẽ || _ ((%') TP Xvwl) TG %= ẽ .o. Xvwl -> ĩ || _ ((%') TP Xvwl) TG %= ĩ .o. Xvwl -> õ || _ ((%') TP Xvwl) TG %= õ .o. Xvwl -> ũ || _ ((%') TP Xvwl) TG %= ũ; #Delete glide if it matches the clitic vowel in frontness/backness #but block in case of en42/en43/e42/e43 define DelGlide [..] -> XXX || _ %= [e|ẽ] 4 [2|3] ~Tone .o. Y @-> 0 || _ Vwl ((%') TP Vwl) TG %= [i|e|ẽ|ĩ] .o. W @-> 0 || _ Vwl ((%') TP Vwl) TG %= [u|o|õ|ũ] .o. XXX -> 0; #Spread clitic nasality to stem vowels and inserted glides, if any define ClNasSpread [..] -> n || OVwl _ ((%') TP Vwl (n)) TG %= NVwl ; #Clitic Vowel Deletion -- note that any clitic tone will remain. #Any deleteable Stem Tone also deleted. define CliticDel DelTone -> 0 || _ %= [Vwl|Tone] .o. %= (Vwl) @-> 0 ; #Clitic Ruleset define CliticSet VwlHarm1 .o. VwlHarm2 .o. DelGlide .o. ClNasSpread .o. NasVowels .o. CliticDel ; #Unblock 2nd clitics define Unblock %+ -> %= ; #Any remaining Deletable Tones are undeleted? define KeepTone %( -> 0 .o. %) -> 0; #Denasalization of ãẽĩõũ after nd define NDRules ã -> a || n d (Glide) _ (%') TP NVwl .o. ẽ -> e || n d (Glide) _ (%') TP NVwl .o. ĩ -> i || n d (Glide) _ (%') TP NVwl .o. õ -> o || n d (Glide) _ (%') TP NVwl .o. ũ -> u || n d (Glide) _ (%') TP NVwl ; #Consolidate adjacent identical tones define SimpleTones 1 1+ @-> 1,, 2 2+ @-> 2,, 3 3+ @-> 3,, 4 4+ @-> 4; define NasGlides w @-> w̃ || _ Glide* NVwl .o. w̃ -> w || k _ .o. W @-> w̃ || _ Glide* NVwl .o. [Y|y] @-> ỹ || _ Glide* NVwl .o. W -> w .o. Y -> y .o. [y|ỹ] -> 0 || n y _ .o. [y|ỹ] -> 0 || c h _ [w|w̃]; define Tokenize [%- | %|] -> 0 ; regex StripPunc; regex LexEx; regex Replace; regex Pal; regex Port; regex Denasal; regex Nasalize; regex NasBlock; regex StemNasSpread; regex Nas2ndCl; regex NasVowels; regex BlockCl; regex CliticSet; regex Unblock; regex CliticSet; regex KeepTone; regex NDRules; regex SimpleTones; regex NasGlides; regex Tokenize; save stack wt39b_saves-punct.bin #Version 39b, 2018-02-14