c SHA_LNG.CCT - Convert Shoebox 2.0 settings to Shw language encoding(s).
c
c Input:  SHA.SET (from your DOS Shoebox working directory)
c Output: DEFAULT.LNG (to your user settings folder)
c
c 1995-08-01 Mark R. Pedrotti (ICS Software Development)
c 1995-09-01 MRP Add + to begin markers and separate var name from chars
c 1995-10-07 MRP Revised output markers
c 1995-10-10 MRP Added \case, \CASE, and \desc fields
c 1996-02-03 MRP Corrected conversion for multiline \case field inputs
c 1-Mar-96   KB  Modified for different structure of sort character set
c ...            modifying also the Variables, case, etc
c 21-Mar-96  KB  removing \CASE -- outputting all case stuff together
c                modifying to deal correctly with \( --
c                parenthesis as a character to be sorted -- and \) \{ \}
c  7-Apr-96  KB  fixed a problem at group exit, added variable D (digits)
c 26-July-96 KB  added endfile > do(dEndfile) to all groups --
c                having changed approach it ends in different places
c                than it used to
c 1996-07-17 MRP Correctly convert unanticipated uses of chevrons
c  Aug 1996  KB  Modifying to serve with SH2_CONV.EXE


begin >
    store(whitespace) ' ' 0d9 nl endstore
    store(cBackslash) '\' endstore
    set(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z)
    store(omit)
    use(gMain)

define(dEndfile) >
    do(dStandardCase)
    do(dEndPrevSortOrder)
    do(dEndPrevVar)

    endstore

c    '\+LanguageEncoding Updated' nl
c    '\desc ALL sort orders from SHA.SET.' nl
    nl

    c Case associations	
    '\case ' out(case) nl
    
    c Sort orders
    '\+srtset' nl
    out(srtset)
    '\-srtset' nl nl
    
    c User variables (including punctuation)
    '\+varset' nl
    out(varset)
    '\+var L' nl '\chars '
        'a b c d e f g h i j k l m n o p q r s t u v w x y z'
        nl '\-var' nl
    '\+var U' nl '\chars '
        'A B C D E F G H I J K L M N O P Q R S T U V W X Y Z'
        nl '\-var' nl
    '\+var @' nl '\chars '
        'A a B b C c D d E e F f G g H h I i J j K k L l M m '
        'N n O o P p Q q R r S s T t U u V v W w X x Y y Z z'
        nl '\-var' nl
    '\+var D' nl '\chars 0 1 2 3 4 5 6 7 8 9'
        nl '\-var' nl
    '\-varset' nl nl
    
    '\-LanguageEncoding' nl
    endfile

define(dStandardCase) >
        c All of SHW's \case associations must be explicit, unlike SH2.
        c Add Aa-Zz unless they have been explicitly disallowed.
        append(case)
        if(A) begin 'A a' nl end
        if(B) begin 'B b' nl end
        if(C) begin 'C c' nl end
        if(D) begin 'D d' nl end
        if(E) begin 'E e' nl end
        if(F) begin 'F f' nl end
        if(G) begin 'G g' nl end
        if(H) begin 'H h' nl end
        if(I) begin 'I i' nl end
        if(J) begin 'J j' nl end
        if(K) begin 'K k' nl end
        if(L) begin 'L l' nl end
        if(M) begin 'M m' nl end
        if(N) begin 'N n' nl end
        if(O) begin 'O o' nl end
        if(P) begin 'P p' nl end
        if(Q) begin 'Q q' nl end
        if(R) begin 'R r' nl end
        if(S) begin 'S s' nl end
        if(T) begin 'T t' nl end
        if(U) begin 'U u' nl end
        if(V) begin 'V v' nl end
        if(W) begin 'W w' nl end
        if(X) begin 'X x' nl end
        if(Y) begin 'Y y' nl end
        if(Z) begin 'Z z' nl end
        nl



define(dEndPrevSortOrder) >
    if(PrevSrt) begin   c first time this is called, skip case stuff
                        c switch PrevSrt is a 1st-time switch, set by
                        c encountering a sort order in group(gSort)
        append(srtset)
        
        '\SecPreceding ' outs(secPrecede) nl
        '\SecFollowing ' outs(secFollow) nl
        '\ignore ' outs(ignore) nl
        if(SecAfterBase) begin '\SecAfterBase' nl end
        '\-srt' nl nl
    end
    store(secPrecede,secFollow,ignore) endstore
    clear(follow,SecAfterBase)

define(dEndPrevVar) >
    if(PrevVar)
        append(varset)
        '\-var' nl nl
    endif

c ---------------------------------------------------------------
group(gMain)

'\+LanguageEncoding ' > endstore dup

'\srt ' > use(gSort)

'\var ' > use(gVariable)

'\pun ' >
    append(varset)
    '\+var !' nl  c ! is the SH2 punctuation variable metacharacter
    '\chars '
    set(PrevVar) 
    use(VariableCharacters, MultigraphsAndSpacing)
    
c We will convert the \mbk morpheme break character field
c once we decide its place in the revised interlinear processing

'\' >
    store(omit)
    use(gRestOfField)

endfile > do(dEndfile)

c ---------------------------------------------------------------
group(gSort)

'\name ' >
    do(dEndPrevSortOrder)
    append(srtset)
    '\+srt '
    set(PrevSrt)
    use(gSortName)

'\order ' >
    append(srtset)
    '\primary '
    use(gSortOrder,MultigraphsAndSpacing)

'\case ' >
    store(case1) '*'
c    use(gSortCase,MultigraphsAndSpacing)
    use(gSortCase)

'\sep after' nl >
    set(SecAfterBase)

'\' > dup back(1) use(gMain)        c something odd here

endfile > do(dEndfile)

c ---------------------------------------------------------------
group(gSortName)

nl fol(cBackslash) > dup '\desc ' nl endstore use(gMain)

endfile > do(dEndfile)

c ---------------------------------------------------------------
group(gSortOrder)

c Ignore characters are listed in separate fields in Shw
'(' > append(ignore)
      use(SortIgnore, MultigraphsAndSpacing)

c Secondary cahracters are split into two fields, those preceding
c the unmarked case, and those following the unmarked case
'{' > append(secPrecede)
      if(follow) append(secFollow) endif
      use(SortSecondary, MultigraphsAndSpacing)

c This was space, in chevrons.  It no longer occurs in the sort order.
c 0d174 "space" 0d175 any(whitespace) > ""       c (MRP)
0d174 "space" 0d175 > ""

'\srt '  > ''                                    c more of the same
'\srt \' > '\' back(1) clear(follow) use(gSort)  c new srt marker

c general group exit is done from MultigraphsAndSpacing
c '\'      > '\' back(1) clear(follow) use(gMain)  c new marker

endfile > do(dEndfile)

c ---------------------------------------------------------------
group(MultigraphsAndSpacing)
c this group deals with the spacing change from SH2 to SH3
c characters which were adjacent are separated by spaces
c characters which were separated by spaces are on separate lines
c multigraphs which were enclosed in chevrons are kept adjacent
c   but separated from other characters by space or nl appropriately

' ' nl > nl back(1)     c eat trailing spaces
'  ' > ' ' back(1)      c eat multiple spaces

nl   > next             c this divides primary groupings
' '  > nl               c   in the sort order

c in Variables and Case, there is no "primary" and "secondary"
c and the switch "follow" is irrelevant 
c but these things don't do any harm
''   > fwd(1)               c character in a primary group
       ifn(Multigraph) ' '  c don't space between characters of
       endif                c   a multigraph
       set(follow)          c from here on, secondaries follow

0d174 > set(Multigraph)         c opening chevron, start of multigraph
0d175 > ' ' clear(Multigraph)   c closing chevron, end of multigraph

c These are literal parentheses and curly brackets.  They can be
c included in the sort order, and in the ignore and secondaries as well.
c I'm not sure whether people would put them into the variables or not
c in this form, or if they would just use them directly.
'\(' > '( '
'\)' > ') '
'\{' > '{ '
'\}' > '} '

c The following are rare but have occurred, and must be matched specially
c to avoid confusion with the beginning and end of ignores or secondaries.
0d174 '(' 0d175 > '( '
0d174 ')' 0d175 > ') '
0d174 '{' 0d175 > '{ '
0d174 '}' 0d175 > '} '

'\' > dup back(1) clear(follow) use(gMain)  c new marker

endfile > do(dEndfile)

c ---------------------------------------------------------------
group(SortIgnore)
c Basically, we're waiting for the closing parenthesis.  Since
c curly brackets could be included, it was easier to wait in a group.

')' > append(srtset)
      use(gSortOrder,MultigraphsAndSpacing)

endfile > do(dEndfile)

c ---------------------------------------------------------------
group(SortSecondary)
c Basically, we're waiting for the closing curly bracket.  Since
c parentheses could be included, it was easier to wait in a group.

'}' > append(srtset)
      use(gSortOrder,MultigraphsAndSpacing)

endfile > do(dEndfile)

c ---------------------------------------------------------------
group(gSortCase)

c A single letter in the \case field cancels its implicit association.


'Aa' wd(whitespace) > next
'A' wd(whitespace) > next
'a' wd(whitespace) > clear(A) omit(1)

'Bb' wd(whitespace) > next
'B' wd(whitespace) > next
'b' wd(whitespace) > clear(B) omit(1)

'Cc' wd(whitespace) > next
'C' wd(whitespace) > next
'c' wd(whitespace) > clear(C) omit(1)

'Dd' wd(whitespace) > next
'D' wd(whitespace) > next
'd' wd(whitespace) > clear(D) omit(1)

'Ee' wd(whitespace) > next
'E' wd(whitespace) > next
'e' wd(whitespace) > clear(E) omit(1)

'Ff' wd(whitespace) > next
'F' wd(whitespace) > next
'f' wd(whitespace) > clear(F) omit(1)

'Gg' wd(whitespace) > next
'G' wd(whitespace) > next
'g' wd(whitespace) > clear(G) omit(1)

'Hh' wd(whitespace) > next
'H' wd(whitespace) > next
'h' wd(whitespace) > clear(H) omit(1)

'Ii' wd(whitespace) > next
'I' wd(whitespace) > next
'i' wd(whitespace) > clear(I) omit(1)

'Jj' wd(whitespace) > next
'J' wd(whitespace) > next
'j' wd(whitespace) > clear(J) omit(1)

'Kk' wd(whitespace) > next
'K' wd(whitespace) > next
'k' wd(whitespace) > clear(K) omit(1)

'Ll' wd(whitespace) > next
'L' wd(whitespace) > next
'l' wd(whitespace) > clear(L) omit(1)

'Mm' wd(whitespace) > next
'M' wd(whitespace) > next
'm' wd(whitespace) > clear(M) omit(1)

'Nn' wd(whitespace) > next
'N' wd(whitespace) > next
'n' wd(whitespace) > clear(N) omit(1)

'Oo' wd(whitespace) > next
'O' wd(whitespace) > next
'o' wd(whitespace) > clear(O) omit(1)

'Pp' wd(whitespace) > next
'P' wd(whitespace) > next
'p' wd(whitespace) > clear(P) omit(1)

'Qq' wd(whitespace) > next
'Q' wd(whitespace) > next
'q' wd(whitespace) > clear(Q) omit(1)

'Rr' wd(whitespace) > next
'R' wd(whitespace) > next
'r' wd(whitespace) > clear(R) omit(1)

'Ss' wd(whitespace) > next
'S' wd(whitespace) > next
's' wd(whitespace) > clear(S) omit(1)

'Tt' wd(whitespace) > next
'T' wd(whitespace) > next
't' wd(whitespace) > clear(T) omit(1)

'Uu' wd(whitespace) > next
'U' wd(whitespace) > next
'u' wd(whitespace) > clear(U) omit(1)

'Vv' wd(whitespace) > next
'V' wd(whitespace) > next
'v' wd(whitespace) > clear(V) omit(1)

'Ww' wd(whitespace) > next
'W' wd(whitespace) > next
'w' wd(whitespace) > clear(W) omit(1)

'Xx' wd(whitespace) > next
'X' wd(whitespace) > next
'x' wd(whitespace) > clear(X) omit(1)

'Yy' wd(whitespace) > next
'Y' wd(whitespace) > next
'y' wd(whitespace) > clear(Y) omit(1)

'Zz' wd(whitespace) > next
'Z' wd(whitespace) > next
'z' wd(whitespace) > clear(Z) omit(1)

'\srt '  > ""

'\srt \' > dup back(6)
           use(RerunCase)

'\'      > dup back(1)
           use(RerunCase)

endfile > do(dEndfile)

c ---------------------------------------------------------------

group(RerunCase)
'' > begin 
     ifneq(case1) '*'
        append(case1)
        back(1)
        endstore
        repeat
     endif
    append(case)
    use(MultigraphsAndSpacing)    
    end
              

c ---------------------------------------------------------------
group(gVariable)

'\var ' >           c this is second \var of nl \var \var (wierd!)
    do(dEndPrevVar) c initialize stuff, terminate previous
    append(varset)  c collect everything, but name is first
    '\+var '        c first marker
    set(PrevVar)    c 1st var now seen -- for define(dEndPrevVar)

c this is space following name
'  ' > ' ' back(1)
' '  > nl '\chars ' 
       use(VariableCharacters, MultigraphsAndSpacing)

endfile > do(dEndfile)

c ---------------------------------------------------------------
group(VariableCharacters)
c collect the set of characters which constitute the variables
c essentially all the work is done in MultigraphsAndSpacing

'\var \' > '\' back(1) use(gVariable)   c new Variable marker

c general group exit is done from MultigraphsAndSpacing
c '\' > dup back(1) use(gMain)

endfile > do(dEndfile)

c ---------------------------------------------------------------
group(gRestOfField)

nl '\' > 
    nl
    endstore
    '\' back(1)
    use(gMain)

endfile > do(dEndfile)