[sword-cvs] swordtools/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse Books.java, NONE, 1.1 Chapters.java, NONE, 1.1 Markers.java, NONE, 1.1 Parser.java, NONE, 1.1 Tanach.java, NONE, 1.1 Tokenizer.java, NONE, 1.1 Verses.java, NONE, 1.1 Words.java, NONE, 1.1 package.html, NONE, 1.1

Fri Jun 4 02:14:54 MST 2004

Committed by: mgruner

Update of /cvs/core/swordtools/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse
In directory www:/tmp/cvs-serv15120/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse

Added Files:
	Books.java Chapters.java Markers.java Parser.java Tanach.java 
	Tokenizer.java Verses.java Words.java package.html 
Log Message:
initial import of source files for the new BHS-replacement WLC (Westminster Leningrad Codex).
Not functional yet at all.

--- NEW FILE: Books.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;
import WLC2OSIS.Translate.* ;
import WLC2OSIS.Utilities.* ;
//==============================================================================
/**
 *  <b>Processes books.</b>
 *
 *  Special to WLC.
 */
//==============================================================================
public class Books{

WLC2OSIS A ;
Parser P ;
public BookName[] BookNames ;
BookName BookObject ;
String Filename ;
//-----------------------------------------------------------------------------

public Books(WLC2OSIS A, Parser P ) {
    this.A = A ;
    this.P = P ;
    BookNames = BookName.setBookNames() ;
    }    
//------------------------------------------------------------------------------

// Starts a book.  Enter with WLC BookCode

public void start(String BookCode){

    BookObject = BookName.getBookName(BookNames, BookCode) ;
    P.Book = BookObject.name ;
    P.TanachBookCount = P.TanachBookCount + 1 ;
    P.BookVerseCount = 0;
    P.BookChapterCount = 0 ;

//-----------------------------------------------------------------------------

// Begin writing the book as a file

    Filename = BookObject.filename ;

    A.w = new XMLWriter(A.OutputDirectory, Filename, "Tanach",
        "Tanach", "" ) ;

// Write the header
    Header.writeHeader(A, A.w) ;

//-----------------------------------------------------------------------------

    P.ChapterNumber = 0 ;
    P.VerseNumber = 0 ;
    P.WordNumber = 0 ;
    A.w.openTag("tanach", 0) ;
    A.w.openTag("book", 1) ;
    A.w.openTag("names", 2) ;
    A.w.writeString("name", 3, BookObject.name ) ;
    A.w.writeString("abbrev", 3, BookObject.abbrev) ;
    A.w.writeInt("number", 3, BookObject.number) ;
    A.w.writeString("filename", 3, BookObject.filename) ;
    A.w.writeString("hebrewname", 3, BookObject.hebrewname) ;
    A.w.closeTag("names", 2) ;   
    }
//------------------------------------------------------------------------------

// Ends a book.

public void end(){
    if (P.Book != null){

        P.BookChapterCount = P.ChapterNumber ;
        P.TanachChapterCount = P.TanachChapterCount + P.ChapterNumber ;

        A.w.writeInt("vs", 2, P.BookVerseCount) ;
        A.w.writeInt("cs", 2, P.BookChapterCount) ;
        A.w.closeTag("book", 1) ;
        A.w.closeTag("tanach", 0) ;

// Add the transcription notes.
        Note.writeNotes(A.w) ;

        A.w.close() ;
        System.out.println( Filename + ".xml has been written." ) ;
        }
    }
//----------------------------------------------------------------------------
}
//==============================================================================
//==============================================================================

--- NEW FILE: Chapters.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;
//==============================================================================
/**
 *  <b>Processes chapters.</b>
 */
//==============================================================================
public class Chapters{

WLC2OSIS A ;
Parser P ;
//-----------------------------------------------------------------------------

public Chapters(WLC2OSIS A, Parser P ) {
    this.A = A ;
    this.P = P ;
    }    
//------------------------------------------------------------------------------

// Starts a book.

public void start(){
    P.ChapterVerseCount = 0 ;
    P.ChapterNumber = P.ChapterNumber + 1 ;
    P.VerseNumber = 0 ;
    P.WordNumber = 0 ;
    A.w.openAttributedTag("c", 2, "n", new Integer(P.ChapterNumber).toString() ) ;
//    System.out.println("\nChapter " + A.F.i(P.ChapterNumber,2) + " start." ) ;
    }
//------------------------------------------------------------------------------

// Ends a book.

public void end(){
    if (P.ChapterNumber != 0){
        P.ChapterVerseCount = P.VerseNumber ;
        P.BookVerseCount = P.BookVerseCount + P.VerseNumber ;
        P.TanachVerseCount = P.TanachVerseCount + P.VerseNumber ;
        A.w.writeInt("vs", 3, P.VerseNumber) ;
        A.w.closeTag("c", 2) ;
//        System.out.println("Chapter " + A.F.i(P.ChapterNumber,2) + " ends with "
//            + A.F.i(P.VerseNumber,2) + " verses." ) ;
        }
    }
//----------------------------------------------------------------------------
}
//==============================================================================
//==============================================================================

--- NEW FILE: Markers.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;

import java.io.* ;
//==============================================================================
/**
 *  <b>Processes markers.</b> 
 *
 *  Before any marker is written, the TrailingMaqaf flag must be
 *  tested.  If a trailing maqaf word precedes the mark, it must be
 *  written before the marker.
 */
//==============================================================================
public class Markers{

WLC2OSIS A ;
Parser P ;

//-----------------------------------------------------------------------------

public Markers(WLC2OSIS A, Parser P ) {
    this.A = A ;
    this.P = P ;
    }    
//------------------------------------------------------------------------------

// Samek
public void samek(){
    testMaqafWord() ;
    A.w.writeMarker("samekh", 4) ;
    P.MarkerWritten = true ;
    }
//------------------------------------------------------------------------------

// Pe
public void pe(){
    testMaqafWord() ;
    A.w.writeMarker("pe", 4) ;
    P.MarkerWritten = true ;
    }
//------------------------------------------------------------------------------

// Line
public void line(){
    System.out.println("Markers: End-of-line encountered!") ;
    }
//----------------------------------------------------------------------------
/**
 *  Tests for a preceding trailing maqaf word and writes it. 
 *
 *  Before any marker is written, the TrailingMaqaf flag must be
 *  tested.  If a trailing maqaf word precedes the mark, it must be
 *  written before the marker.
 *
 *  Apparemtly ONLY EOLs cause this test to be activated.
 */
void testMaqafWord(){
   if (P.w.TrailingMaqaf){
//       System.out.print("Markers: Marker follows trailing maqaf at ") ;
//           P.printPosition() ;
       P.w.writeWord(P.w.MaqafWord, P.w.MaqafWordType) ;
       P.w.TrailingMaqaf = false ;
       }
   }
}
//==============================================================================
//==============================================================================

--- NEW FILE: Parser.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;
import WLC2OSIS.Translate.* ;
//==============================================================================
/**
 *  <b>Parser dispatches tokens to  Books, Chapters, Markers, Tanach,
 *     Verses, and Words start/end methods, special to WLC. </b>
 *
 *  Extensively modified for WLC.
 */
//==============================================================================
public class Parser{

WLC2OSIS A ;
Tokenizer t ;

//  Working classes

Tanach tanach ;
public Books b ;
Chapters c ;
Verses v ;
public Words w ;
Translate T ;
Markers m ;
public WKQ wkq ;

// Current state

public boolean MarkerWritten ;  // Indicates a marker has been written
                                // between two words.

String BookName ;

String Book ;
int Chapter ;
int Verse ;

String LastBookCode = "" ;
int LastChapter = -1 ;
int LastVerse = -1 ;

// Assorted counts
public int ChapterVerseCount ;
public int BookVerseCount ;
public int BookChapterCount ;

public int TanachVerseCount ;
public int TanachChapterCount ;
public int TanachBookCount ;

int ChapterNumber ;
int VerseNumber ;
int WordNumber ;

//-----------------------------------------------------------------------------
public Parser(WLC2OSIS A) {
    this.A = A ;

    T = new Translate(A, this) ;

// Set up the tokenizer
    t = new Tokenizer(A) ;

// Set up the Tanach, Books, Chapters, Verses, Words, and Markers classes.
    tanach = new Tanach(A, this) ;
    b = new Books(A, this) ;
    c = new Chapters(A, this) ;
    v = new Verses(A, this) ;
    w = new Words(A, this) ;
    m = new Markers(A, this) ;
    wkq = new WKQ(this) ;
    new MC() ;
    }    
//------------------------------------------------------------------------------

// Gets the next token as a String.

// EOF is indicated by a return of EOF.

public void parse(){
    String s ;
    boolean PreviousEOL = true ;
    System.out.println("\n") ;
    tanach.start() ;
    for (int k = 0; k < A.InputChars.length ; k++){

        s = t.nextToken() ;

        if(s.compareTo(t.EOF) == 0){
            break ;
            }

//-----------------------------------------------------------------------------

//  Process a line identifier

        if (PreviousEOL){
            int ColonIndex = s.indexOf(':') ;
            if(ColonIndex <=0 ){
                System.out.println("Parser: Incorrect line identifier: " + s + " !") ;
                break ;
                }
            PreviousEOL= false ;

// Parse the identifier
            String BookCode = s.substring(0,2) ;
            int Chapter = Integer.parseInt( s.substring(2, ColonIndex) ) ;
            int Verse = Integer.parseInt( s.substring(ColonIndex+1) ) ;

// Change in Book, start a book.
            if(BookCode.compareTo(LastBookCode) != 0){
                v.end() ;
                c.end() ;
                b.end() ;
                b.start(BookCode) ;
                c.start() ;
                v.start() ;
                LastBookCode = BookCode ;
                LastChapter = 1 ;
                LastVerse = 1 ;
                }

// Change in Chapter, start a Chapter.
            if(Chapter != LastChapter){
                v.end() ;
                c.end() ;
                c.start() ;
                v.start() ;
                LastChapter = Chapter ;
                LastVerse = 1 ;
                }

// Change in Verse, start a Verse.
            if(Verse != LastVerse){
                v.end() ;
                v.start() ;
                LastVerse = Verse ;
                }
            }
//-----------------------------------------------------------------------------

//   Process a word.

        else{
            if(s.compareTo(t.EOL) == 0){
                PreviousEOL = true ;
                }
            else{
                if (s.length() > 1){
                     w.process(s) ;
                     }
                else{
// Look for 1 letter words.
                    if( s.compareTo("P")==0){
                        m.pe();
                        }
                    else if( s.compareTo("S")==0){
                        m.samek() ;
                        }
                    else if( s.compareTo("?")==0){
                        m.line() ;       
                        }
                    else{  // It's a word
                        w.process(s) ;
                        }
                     }
                }
            }

        }
    v.end() ;
    c.end() ;
    b.end() ;   
    tanach.end() ;
    return ;
    }
//----------------------------------------------------------------------------

// Counts the number of occurences of a character in a String.

public int countChar(String W, char c){
   int Count = 0 ;
   for(int k=0; k < W.length(); k++){
       if(W.charAt(k)==c){
           Count++ ;
           }
       }
   return Count ;
   }
//----------------------------------------------------------------------------

// Prints a verse identification.

public void printPosition(){
    System.out.print(" at " + Book + " " + A.F.i(ChapterNumber,3) + ":" 
        + A.F.i(VerseNumber,2) 
            + "." + A.F.i(WordNumber,2) + "\n") ;
    }
//------------------------------------------------------------------------------
}
//==============================================================================
//==============================================================================

--- NEW FILE: Tanach.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;
import WLC2OSIS.Translate.* ;
import WLC2OSIS.Utilities.* ;

//==============================================================================
/**
 *  <b>Processes Tanach.</b>
 */
//==============================================================================
public class Tanach{

WLC2OSIS A ;
Parser P ;

//-----------------------------------------------------------------------------

public Tanach(WLC2OSIS A, Parser P ) {
    this.A = A ;
    this.P = P ;
    }    
//------------------------------------------------------------------------------

// Starts a book.

public void start(){
    P.ChapterVerseCount = 0 ;
    P.BookVerseCount = 0 ;
    P.BookChapterCount = 0 ;
    P.TanachVerseCount = 0 ;
    P.TanachChapterCount = 0 ;
    P.TanachBookCount = 0 ;
    }
//------------------------------------------------------------------------------

// Ends a book.

public void end(){
    if (P.Book != null){
        System.out.println("\nTanach ends with "
            + A.F.i(P.TanachBookCount,2) + " books." ) ;
        }

    }
//----------------------------------------------------------------------------
}
//==============================================================================
//==============================================================================

--- NEW FILE: Tokenizer.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;

import java.io.* ;
import javax.swing.* ;
import java.awt.* ;
//==============================================================================
/**
 *  <b>Tokenizer based on standard Java tokenizer, special to WLC.</b>
 *
 *  Special to WLC.
 */
//==============================================================================
public class Tokenizer{

public final String EOF = "***EOF***" ;
public final String EOL = "***EOL***" ;
WLC2OSIS A ;
CharArrayReader car ;
StreamTokenizer st ;

//-----------------------------------------------------------------------------

public Tokenizer(WLC2OSIS A ) {
    this.A = A ;

// Set up the tokenizer

    car = new CharArrayReader(A.InputChars) ;    
    st = new StreamTokenizer( car) ;
    st.resetSyntax() ;
    st.wordChars(33, 126 ) ; // All printables are word characters
    st.ordinaryChar(63) ; // ? is a special symbol, the EOL marker.
    st.eolIsSignificant(true) ;
    }    
//------------------------------------------------------------------------------

// Gets the next token as a String.

// EOF is indicated by a return of EOF.
// EOL is indicated by a return of EOL.

public String nextToken(){
        int TokenType = 0 ;
        boolean Found = false ;     // Found a word token or EOF ;
        String Value = "" ; // The returned String.
        do{
            try{
                TokenType = st.nextToken() ;
                }
            catch (IOException e) {
                System.out.println("Tokenizer: IOException in tokenization: " + e) ;
                }

            if (TokenType==StreamTokenizer.TT_WORD){
                Value = st.sval ;
                Found = true ;
                }
            else if(TokenType==StreamTokenizer.TT_EOF){
                Value = EOF ;
                Found = true ;
                }
            else if(TokenType==StreamTokenizer.TT_EOL){
                Value = EOL ;
                Found = true ;
                }
            if (TokenType==63){ // EOL marker
                Value = "?" ;
                Found = true ;
                }

            }while(!Found) ;

        return Value ;
        }
//------------------------------------------------------------------------------

// Does a pushBack on the StreamTokenizer, st.

public void pushBack(){
        st.pushBack() ;
        }
//-----------------------------------------------------------------------------
}
//==============================================================================
//==============================================================================

--- NEW FILE: Verses.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;
//==============================================================================
/**
 *  <b>Processes verses.</b>
 */
//==============================================================================
public class Verses{

WLC2OSIS A ;
Parser P ;
//-----------------------------------------------------------------------------

public Verses(WLC2OSIS A, Parser P ) {
    this.A = A ;
    this.P = P ;
    }    
//------------------------------------------------------------------------------

// Starts a book.

public void start(){
    P.VerseNumber = P.VerseNumber + 1 ;
    P.WordNumber = 0 ;
    A.w.openAttributedTag("v", 3, "n", new Integer(P.VerseNumber).toString() ) ;
//    System.out.println("\nVerse " + A.F.i(P.VerseNumber,2) + " start." ) ;
    }
//------------------------------------------------------------------------------

// Ends a book.

public void end(){
    if( P.VerseNumber != 0){
        A.w.closeTag("v", 3) ;
//        System.out.println("Verse " + A.F.i(P.VerseNumber,2) + " ends with " 
//            + A.F.i(P.WordNumber,2) + " words." ) ;
        }
    }
//----------------------------------------------------------------------------
}
//==============================================================================
//==============================================================================

--- NEW FILE: Words.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;
import WLC2OSIS.Translate.H ;
import WLC2OSIS.Utilities.* ;
//==============================================================================
/**
 *  <b>Processes words, sending them to the Translate class
 *  after their word, qere, ketiv properties have been determined.</b>
 */
//==============================================================================
public class Words{

WLC2OSIS A ;
Parser P ;

boolean TrailingMaqaf ;
String MaqafWord ;
String MaqafWordType ;

//-----------------------------------------------------------------------------

public Words(WLC2OSIS A, Parser P ) {
    this.A = A ;
    this.P = P ;
    }    
//------------------------------------------------------------------------------

// Processes a word,

public void process(String W){

//----------------------------------------------------------------------------------

// Process WKQ 
        P.wkq.process(W) ;

//----------------------------------------------------------------------------------

//    System.out.println(P.Book + " " + A.F.i(P.ChapterNumber,2) + ":" 
//        + A.F.i(P.VerseNumber,2) 
//        + "." + A.F.i(P.WordNumber,2) + "                " + W) ;
    }
//----------------------------------------------------------------------------------

// Translates and writes a word (simple, ketib, qere) with exception markers.

// All returns leave P.MarkerWritten = false ;

public void write(String W, String Type) {

   String Word = P.T.translate(W) ;

// Check for any KQ markers which should NOT be here!

    int asteriskcount = P.countChar(W, '*') ;

    if (asteriskcount > 0){
        System.out.print("Words.write: Unexpected KQ character * ") ;
        P.printPosition() ;
        System.out.println("Word: " + W) ;
        }

// Look for a case in which there's been trailing maqaf
// without an intervening marker.

   if(TrailingMaqaf & !P.MarkerWritten ){
       if(MaqafWordType.charAt(0)== Type.charAt(0) ){
           Word = MaqafWord+Word ;  // Combine them.
           }
       else{
//           System.out.print("Words: Mismatched types for combining "
//               + MaqafWordType + ", " + Type + " at " ) ;
//           P.printPosition() ;
           writeWord(MaqafWord, MaqafWordType) ;
           TrailingMaqaf = false ;
           }
       }

// Check for a trailing maqaf.
// Don't write the word here.

   TrailingMaqaf = false ;
   if( Word.charAt(Word.length()-1) == H.maqaf){
       TrailingMaqaf = true ;
       MaqafWord = Word ;
       MaqafWordType = Type ;
       P.MarkerWritten = false ;
       return ;
       }

   writeWord(Word, Type) ;
   } 
//----------------------------------------------------------------------------------

public void writeWord(String Word, String Type) {

// Check for any exception markers ]x 

   String Out = "" ;
   for (int k=0 ; k < Word.length() ; k++){
       char c = Word.charAt(k) ;
       if(c == ']'){
           k++ ;
           char ExceptionValue = Word.charAt(k) ;
           Out = Out + "<x>" + ExceptionValue +"</x>" ;
           }
       else{
           Out = Out + c ;
           }
       }
    A.w.writeString(Type, 4, Out) ;
    P.MarkerWritten = false ;
    }
}
//==============================================================================
//==============================================================================

--- NEW FILE: package.html ---
<HTML>
<BODY>
<b>Parsing classes</b>.
<p/>
<p align="right"> (<b><tt>BHS2XML/Parse/package.html</tt></b>)
</p>
Only the Parse and Tokenizer classes are specific to the WLC.
</BODY>
</HTML>