[sword-cvs] swordtools/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse Books.java, NONE, 1.1 Chapters.java, NONE, 1.1 Markers.java, NONE, 1.1 Parser.java, NONE, 1.1 Tanach.java, NONE, 1.1 Tokenizer.java, NONE, 1.1 Verses.java, NONE, 1.1 Words.java, NONE, 1.1 package.html, NONE, 1.1

sword at www.crosswire.org sword at www.crosswire.org
Fri Jun 4 02:14:54 MST 2004


Committed by: mgruner

Update of /cvs/core/swordtools/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse
In directory www:/tmp/cvs-serv15120/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse

Added Files:
	Books.java Chapters.java Markers.java Parser.java Tanach.java 
	Tokenizer.java Verses.java Words.java package.html 
Log Message:
initial import of source files for the new BHS-replacement WLC (Westminster Leningrad Codex).
Not functional yet at all.

--- NEW FILE: Books.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;
import WLC2OSIS.Translate.* ;
import WLC2OSIS.Utilities.* ;
//==============================================================================
/**
 *  <b>Processes books.</b>
 *
 *  Special to WLC.
 */
//==============================================================================
public class Books{

WLC2OSIS A ;
Parser P ;
public BookName[] BookNames ;
BookName BookObject ;
String Filename ;
//-----------------------------------------------------------------------------

public Books(WLC2OSIS A, Parser P ) {
    this.A = A ;
    this.P = P ;
    BookNames = BookName.setBookNames() ;
    }    
//------------------------------------------------------------------------------

// Starts a book.  Enter with WLC BookCode

public void start(String BookCode){

    BookObject = BookName.getBookName(BookNames, BookCode) ;
    P.Book = BookObject.name ;
    P.TanachBookCount = P.TanachBookCount + 1 ;
    P.BookVerseCount = 0;
    P.BookChapterCount = 0 ;

//-----------------------------------------------------------------------------

// Begin writing the book as a file

    Filename = BookObject.filename ;

    A.w = new XMLWriter(A.OutputDirectory, Filename, "Tanach",
        "Tanach", "" ) ;

// Write the header
    Header.writeHeader(A, A.w) ;
    
//-----------------------------------------------------------------------------
    
    P.ChapterNumber = 0 ;
    P.VerseNumber = 0 ;
    P.WordNumber = 0 ;
    A.w.openTag("tanach", 0) ;
    A.w.openTag("book", 1) ;
    A.w.openTag("names", 2) ;
    A.w.writeString("name", 3, BookObject.name ) ;
    A.w.writeString("abbrev", 3, BookObject.abbrev) ;
    A.w.writeInt("number", 3, BookObject.number) ;
    A.w.writeString("filename", 3, BookObject.filename) ;
    A.w.writeString("hebrewname", 3, BookObject.hebrewname) ;
    A.w.closeTag("names", 2) ;   
    }
//------------------------------------------------------------------------------

// Ends a book.

public void end(){
    if (P.Book != null){
    
        P.BookChapterCount = P.ChapterNumber ;
        P.TanachChapterCount = P.TanachChapterCount + P.ChapterNumber ;
        
        A.w.writeInt("vs", 2, P.BookVerseCount) ;
        A.w.writeInt("cs", 2, P.BookChapterCount) ;
        A.w.closeTag("book", 1) ;
        A.w.closeTag("tanach", 0) ;
    
// Add the transcription notes.
        Note.writeNotes(A.w) ;

        A.w.close() ;
        System.out.println( Filename + ".xml has been written." ) ;
        }
    }
//----------------------------------------------------------------------------
}
//==============================================================================
//==============================================================================

--- NEW FILE: Chapters.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;
//==============================================================================
/**
 *  <b>Processes chapters.</b>
 */
//==============================================================================
public class Chapters{

WLC2OSIS A ;
Parser P ;
//-----------------------------------------------------------------------------

public Chapters(WLC2OSIS A, Parser P ) {
    this.A = A ;
    this.P = P ;
    }    
//------------------------------------------------------------------------------

// Starts a book.

public void start(){
    P.ChapterVerseCount = 0 ;
    P.ChapterNumber = P.ChapterNumber + 1 ;
    P.VerseNumber = 0 ;
    P.WordNumber = 0 ;
    A.w.openAttributedTag("c", 2, "n", new Integer(P.ChapterNumber).toString() ) ;
//    System.out.println("\nChapter " + A.F.i(P.ChapterNumber,2) + " start." ) ;
    }
//------------------------------------------------------------------------------

// Ends a book.

public void end(){
    if (P.ChapterNumber != 0){
        P.ChapterVerseCount = P.VerseNumber ;
        P.BookVerseCount = P.BookVerseCount + P.VerseNumber ;
        P.TanachVerseCount = P.TanachVerseCount + P.VerseNumber ;
        A.w.writeInt("vs", 3, P.VerseNumber) ;
        A.w.closeTag("c", 2) ;
//        System.out.println("Chapter " + A.F.i(P.ChapterNumber,2) + " ends with "
//            + A.F.i(P.VerseNumber,2) + " verses." ) ;
        }
    }
//----------------------------------------------------------------------------
}
//==============================================================================
//==============================================================================

--- NEW FILE: Markers.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;

import java.io.* ;
//==============================================================================
/**
 *  <b>Processes markers.</b> 
 *
 *  Before any marker is written, the TrailingMaqaf flag must be
 *  tested.  If a trailing maqaf word precedes the mark, it must be
 *  written before the marker.
 */
//==============================================================================
public class Markers{

WLC2OSIS A ;
Parser P ;

//-----------------------------------------------------------------------------

public Markers(WLC2OSIS A, Parser P ) {
    this.A = A ;
    this.P = P ;
    }    
//------------------------------------------------------------------------------

// Samek
public void samek(){
    testMaqafWord() ;
    A.w.writeMarker("samekh", 4) ;
    P.MarkerWritten = true ;
    }
//------------------------------------------------------------------------------

// Pe
public void pe(){
    testMaqafWord() ;
    A.w.writeMarker("pe", 4) ;
    P.MarkerWritten = true ;
    }
//------------------------------------------------------------------------------

// Line
public void line(){
    System.out.println("Markers: End-of-line encountered!") ;
    }
//----------------------------------------------------------------------------
/**
 *  Tests for a preceding trailing maqaf word and writes it. 
 *
 *  Before any marker is written, the TrailingMaqaf flag must be
 *  tested.  If a trailing maqaf word precedes the mark, it must be
 *  written before the marker.
 *
 *  Apparemtly ONLY EOLs cause this test to be activated.
 */
void testMaqafWord(){
   if (P.w.TrailingMaqaf){
//       System.out.print("Markers: Marker follows trailing maqaf at ") ;
//           P.printPosition() ;
       P.w.writeWord(P.w.MaqafWord, P.w.MaqafWordType) ;
       P.w.TrailingMaqaf = false ;
       }
   }
}
//==============================================================================
//==============================================================================

--- NEW FILE: Parser.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;
import WLC2OSIS.Translate.* ;
//==============================================================================
/**
 *  <b>Parser dispatches tokens to  Books, Chapters, Markers, Tanach,
 *     Verses, and Words start/end methods, special to WLC. </b>
 *
 *  Extensively modified for WLC.
 */
//==============================================================================
public class Parser{

WLC2OSIS A ;
Tokenizer t ;

//  Working classes

Tanach tanach ;
public Books b ;
Chapters c ;
Verses v ;
public Words w ;
Translate T ;
Markers m ;
public WKQ wkq ;

// Current state

public boolean MarkerWritten ;  // Indicates a marker has been written
                                // between two words.

String BookName ;

String Book ;
int Chapter ;
int Verse ;

String LastBookCode = "" ;
int LastChapter = -1 ;
int LastVerse = -1 ;


// Assorted counts
public int ChapterVerseCount ;
public int BookVerseCount ;
public int BookChapterCount ;

public int TanachVerseCount ;
public int TanachChapterCount ;
public int TanachBookCount ;

int ChapterNumber ;
int VerseNumber ;
int WordNumber ;

//-----------------------------------------------------------------------------
public Parser(WLC2OSIS A) {
    this.A = A ;

    T = new Translate(A, this) ;
    
// Set up the tokenizer
    t = new Tokenizer(A) ;
    
// Set up the Tanach, Books, Chapters, Verses, Words, and Markers classes.
    tanach = new Tanach(A, this) ;
    b = new Books(A, this) ;
    c = new Chapters(A, this) ;
    v = new Verses(A, this) ;
    w = new Words(A, this) ;
    m = new Markers(A, this) ;
    wkq = new WKQ(this) ;
    new MC() ;
    }    
//------------------------------------------------------------------------------

// Gets the next token as a String.

// EOF is indicated by a return of EOF.

public void parse(){
    String s ;
    boolean PreviousEOL = true ;
    System.out.println("\n") ;
    tanach.start() ;
    for (int k = 0; k < A.InputChars.length ; k++){
    
        s = t.nextToken() ;

        if(s.compareTo(t.EOF) == 0){
            break ;
            }
            
//-----------------------------------------------------------------------------
        
//  Process a line identifier

        if (PreviousEOL){
            int ColonIndex = s.indexOf(':') ;
            if(ColonIndex <=0 ){
                System.out.println("Parser: Incorrect line identifier: " + s + " !") ;
                break ;
                }
            PreviousEOL= false ;
            
// Parse the identifier
            String BookCode = s.substring(0,2) ;
            int Chapter = Integer.parseInt( s.substring(2, ColonIndex) ) ;
            int Verse = Integer.parseInt( s.substring(ColonIndex+1) ) ;

// Change in Book, start a book.
            if(BookCode.compareTo(LastBookCode) != 0){
                v.end() ;
                c.end() ;
                b.end() ;
                b.start(BookCode) ;
                c.start() ;
                v.start() ;
                LastBookCode = BookCode ;
                LastChapter = 1 ;
                LastVerse = 1 ;
                }
                
// Change in Chapter, start a Chapter.
            if(Chapter != LastChapter){
                v.end() ;
                c.end() ;
                c.start() ;
                v.start() ;
                LastChapter = Chapter ;
                LastVerse = 1 ;
                }
                
// Change in Verse, start a Verse.
            if(Verse != LastVerse){
                v.end() ;
                v.start() ;
                LastVerse = Verse ;
                }
            }
//-----------------------------------------------------------------------------

//   Process a word.
              
        else{
            if(s.compareTo(t.EOL) == 0){
                PreviousEOL = true ;
                }
            else{
                if (s.length() > 1){
                     w.process(s) ;
                     }
                else{
// Look for 1 letter words.
                    if( s.compareTo("P")==0){
                        m.pe();
                        }
                    else if( s.compareTo("S")==0){
                        m.samek() ;
                        }
                    else if( s.compareTo("?")==0){
                        m.line() ;       
                        }
                    else{  // It's a word
                        w.process(s) ;
                        }
                     }
                }
            }
        
        }
    v.end() ;
    c.end() ;
    b.end() ;   
    tanach.end() ;
    return ;
    }
//----------------------------------------------------------------------------

// Counts the number of occurences of a character in a String.

public int countChar(String W, char c){
   int Count = 0 ;
   for(int k=0; k < W.length(); k++){
       if(W.charAt(k)==c){
           Count++ ;
           }
       }
   return Count ;
   }
//----------------------------------------------------------------------------

// Prints a verse identification.

public void printPosition(){
    System.out.print(" at " + Book + " " + A.F.i(ChapterNumber,3) + ":" 
        + A.F.i(VerseNumber,2) 
            + "." + A.F.i(WordNumber,2) + "\n") ;
    }
//------------------------------------------------------------------------------
}
//==============================================================================
//==============================================================================

--- NEW FILE: Tanach.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;
import WLC2OSIS.Translate.* ;
import WLC2OSIS.Utilities.* ;

//==============================================================================
/**
 *  <b>Processes Tanach.</b>
 */
//==============================================================================
public class Tanach{

WLC2OSIS A ;
Parser P ;

//-----------------------------------------------------------------------------

public Tanach(WLC2OSIS A, Parser P ) {
    this.A = A ;
    this.P = P ;
    }    
//------------------------------------------------------------------------------

// Starts a book.

public void start(){
    P.ChapterVerseCount = 0 ;
    P.BookVerseCount = 0 ;
    P.BookChapterCount = 0 ;
    P.TanachVerseCount = 0 ;
    P.TanachChapterCount = 0 ;
    P.TanachBookCount = 0 ;
    }
//------------------------------------------------------------------------------

// Ends a book.

public void end(){
    if (P.Book != null){
        System.out.println("\nTanach ends with "
            + A.F.i(P.TanachBookCount,2) + " books." ) ;
        }
        
    }
//----------------------------------------------------------------------------
}
//==============================================================================
//==============================================================================

--- NEW FILE: Tokenizer.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;

import java.io.* ;
import javax.swing.* ;
import java.awt.* ;
//==============================================================================
/**
 *  <b>Tokenizer based on standard Java tokenizer, special to WLC.</b>
 *
 *  Special to WLC.
 */
//==============================================================================
public class Tokenizer{

public final String EOF = "***EOF***" ;
public final String EOL = "***EOL***" ;
WLC2OSIS A ;
CharArrayReader car ;
StreamTokenizer st ;

//-----------------------------------------------------------------------------

public Tokenizer(WLC2OSIS A ) {
    this.A = A ;

// Set up the tokenizer

    car = new CharArrayReader(A.InputChars) ;    
    st = new StreamTokenizer( car) ;
    st.resetSyntax() ;
    st.wordChars(33, 126 ) ; // All printables are word characters
    st.ordinaryChar(63) ; // ? is a special symbol, the EOL marker.
    st.eolIsSignificant(true) ;
    }    
//------------------------------------------------------------------------------

// Gets the next token as a String.

// EOF is indicated by a return of EOF.
// EOL is indicated by a return of EOL.

public String nextToken(){
        int TokenType = 0 ;
        boolean Found = false ;     // Found a word token or EOF ;
        String Value = "" ; // The returned String.
        do{
            try{
                TokenType = st.nextToken() ;
                }
            catch (IOException e) {
                System.out.println("Tokenizer: IOException in tokenization: " + e) ;
                }

            if (TokenType==StreamTokenizer.TT_WORD){
                Value = st.sval ;
                Found = true ;
                }
            else if(TokenType==StreamTokenizer.TT_EOF){
                Value = EOF ;
                Found = true ;
                }
            else if(TokenType==StreamTokenizer.TT_EOL){
                Value = EOL ;
                Found = true ;
                }
            if (TokenType==63){ // EOL marker
                Value = "?" ;
                Found = true ;
                }

            }while(!Found) ;
        
        return Value ;
        }
//------------------------------------------------------------------------------

// Does a pushBack on the StreamTokenizer, st.

public void pushBack(){
        st.pushBack() ;
        }
//-----------------------------------------------------------------------------
}
//==============================================================================
//==============================================================================

--- NEW FILE: Verses.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;
//==============================================================================
/**
 *  <b>Processes verses.</b>
 */
//==============================================================================
public class Verses{

WLC2OSIS A ;
Parser P ;
//-----------------------------------------------------------------------------

public Verses(WLC2OSIS A, Parser P ) {
    this.A = A ;
    this.P = P ;
    }    
//------------------------------------------------------------------------------

// Starts a book.

public void start(){
    P.VerseNumber = P.VerseNumber + 1 ;
    P.WordNumber = 0 ;
    A.w.openAttributedTag("v", 3, "n", new Integer(P.VerseNumber).toString() ) ;
//    System.out.println("\nVerse " + A.F.i(P.VerseNumber,2) + " start." ) ;
    }
//------------------------------------------------------------------------------

// Ends a book.

public void end(){
    if( P.VerseNumber != 0){
        A.w.closeTag("v", 3) ;
//        System.out.println("Verse " + A.F.i(P.VerseNumber,2) + " ends with " 
//            + A.F.i(P.WordNumber,2) + " words." ) ;
        }
    }
//----------------------------------------------------------------------------
}
//==============================================================================
//==============================================================================

--- NEW FILE: Words.java ---
package WLC2OSIS.Parse ;

import WLC2OSIS.* ;
import WLC2OSIS.Translate.H ;
import WLC2OSIS.Utilities.* ;
//==============================================================================
/**
 *  <b>Processes words, sending them to the Translate class
 *  after their word, qere, ketiv properties have been determined.</b>
 */
//==============================================================================
public class Words{

WLC2OSIS A ;
Parser P ;

boolean TrailingMaqaf ;
String MaqafWord ;
String MaqafWordType ;

//-----------------------------------------------------------------------------

public Words(WLC2OSIS A, Parser P ) {
    this.A = A ;
    this.P = P ;
    }    
//------------------------------------------------------------------------------

// Processes a word,

public void process(String W){

//----------------------------------------------------------------------------------

// Process WKQ 
        P.wkq.process(W) ;
        
//----------------------------------------------------------------------------------

    
//    System.out.println(P.Book + " " + A.F.i(P.ChapterNumber,2) + ":" 
//        + A.F.i(P.VerseNumber,2) 
//        + "." + A.F.i(P.WordNumber,2) + "                " + W) ;
    }
//----------------------------------------------------------------------------------

// Translates and writes a word (simple, ketib, qere) with exception markers.

// All returns leave P.MarkerWritten = false ;

public void write(String W, String Type) {

   
   String Word = P.T.translate(W) ;
   
// Check for any KQ markers which should NOT be here!

    int asteriskcount = P.countChar(W, '*') ;

    if (asteriskcount > 0){
        System.out.print("Words.write: Unexpected KQ character * ") ;
        P.printPosition() ;
        System.out.println("Word: " + W) ;
        }

// Look for a case in which there's been trailing maqaf
// without an intervening marker.

   if(TrailingMaqaf & !P.MarkerWritten ){
       if(MaqafWordType.charAt(0)== Type.charAt(0) ){
           Word = MaqafWord+Word ;  // Combine them.
           }
       else{
//           System.out.print("Words: Mismatched types for combining "
//               + MaqafWordType + ", " + Type + " at " ) ;
//           P.printPosition() ;
           writeWord(MaqafWord, MaqafWordType) ;
           TrailingMaqaf = false ;
           }
       }
   
// Check for a trailing maqaf.
// Don't write the word here.

   TrailingMaqaf = false ;
   if( Word.charAt(Word.length()-1) == H.maqaf){
       TrailingMaqaf = true ;
       MaqafWord = Word ;
       MaqafWordType = Type ;
       P.MarkerWritten = false ;
       return ;
       }

   writeWord(Word, Type) ;
   } 
//----------------------------------------------------------------------------------

public void writeWord(String Word, String Type) {
        
// Check for any exception markers ]x 
   
   String Out = "" ;
   for (int k=0 ; k < Word.length() ; k++){
       char c = Word.charAt(k) ;
       if(c == ']'){
           k++ ;
           char ExceptionValue = Word.charAt(k) ;
           Out = Out + "<x>" + ExceptionValue +"</x>" ;
           }
       else{
           Out = Out + c ;
           }
       }
    A.w.writeString(Type, 4, Out) ;
    P.MarkerWritten = false ;
    }
}
//==============================================================================
//==============================================================================

--- NEW FILE: package.html ---
<HTML>
<BODY>
<b>Parsing classes</b>.
<p/>
<p align="right"> (<b><tt>BHS2XML/Parse/package.html</tt></b>)
</p>
Only the Parse and Tokenizer classes are specific to the WLC.
</BODY>
</HTML>



More information about the sword-cvs mailing list