[sword-svn] r147 - trunk/modules/perlconverters

chrislit at www.crosswire.org chrislit at www.crosswire.org
Wed Nov 12 22:30:38 MST 2008


Author: chrislit
Date: 2008-11-12 22:30:37 -0700 (Wed, 12 Nov 2008)
New Revision: 147

Modified:
   trunk/modules/perlconverters/usfm2osis.pl
Log:
updating to Daniel Owen's v. 1.4 edits

Modified: trunk/modules/perlconverters/usfm2osis.pl
===================================================================
--- trunk/modules/perlconverters/usfm2osis.pl	2008-10-22 19:32:03 UTC (rev 146)
+++ trunk/modules/perlconverters/usfm2osis.pl	2008-11-13 05:30:37 UTC (rev 147)
@@ -1,721 +1,765 @@
-#!/usr/bin/perl
-
-## USFM to OSIS (2.1.1) converter
-
-## Licensed under the standard BSD license:
-
-# Copyright (c) 2002-2008 CrossWire Bible Society <http://www.crosswire.org/>
-# All rights reserved.
-# 
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-# 
-#     * Redistributions of source code must retain the above copyright
-#        notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in
-#       the documentation and/or other materials provided with the
-#       distribution.
-#     * Neither the name of the CrossWire Bible Society nor the names of
-#       its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written
-#       permission.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-## For general inquiries, comments, suggestions, bug reports, etc. email:
-## sword-support at crosswire.org
-
-#########################################################################
-
-$version = "1.3";
-$date = "2008-06-12";
-$osisVersion = "2.1.1";
-
-%OSISbook = (
-# Theoretically, these are laid out according to <BooksPresent>, but I can really only guess without a spec
-"" => "", "GEN" => "Gen", "EXO" => "Exod", "LEV" => "Lev", "NUM" => "Num",
- "DEU" => "Deut", "JOS" => "Josh", "JDG" => "Judg", "RUT" => "Ruth",
- "1SA" => "1Sam", "2SA" => "2Sam", "1KI" => "1Kgs", "2KI" => "2Kgs",
- "1CH" => "1Chr", "2CH" => "2Chr", "EZR" => "Ezra", "NEH" => "Neh",
- "EST" => "Esth", "JOB" => "Job", "PSA" => "Ps", "PRO" => "Prov",
- "ECC" => "Eccl", "SNG" => "Song", "ISA" => "Isa", "JER" => "Jer",
- "LAM" => "Lam", "EZK" => "Ezek", "DAN" => "Dan", "HOS" => "Hos",
- "JOL" => "Joel", "AMO" => "Amos", "OBA" => "Obad", "JON" => "Jonah",
- "MIC" => "Mic", "NAM" => "Nah", "HAB" => "Hab", "ZEP" => "Zeph",
- "HAG" => "Hag", "ZEC" => "Zech", "MAL" => "Mal", "MAT" => "Matt",
- "MRK" => "Mark", "LUK" => "Luke", "JHN" => "John", "ACT" => "Acts",
- "ROM" => "Rom", "1CO" => "1Cor", "2CO" => "2Cor", "GAL" => "Gal",
- "EPH" => "Eph", "PHP" => "Phil", "COL" => "Col", "1TH" => "1Thess",
- "2TH" => "2Thess", "1TI" => "1Tim", "2TI" => "2Tim", "TIT" => "Titus",
- "PHM" => "Phlm", "HEB" => "Heb", "JAS" => "Jas", "1PE" => "1Pet",
- "2PE" => "2Pet", "1JN" => "1John", "2JN" => "2John", "3JN" => "3John",
- "JUD" => "Jude", "REV" => "Rev", "TOB" => "Tob", "JDT" => "Jdt",
- "ESG" => "Esth", "WIS" => "Wis", "SIR" => "Sir", "BAR" => "Bar",
- "LJE" => "EpJer", "S3Y" => "PrAzar", "SUS" => "Sus", "BEL" => "Bel",
- "1MA" => "1Macc", "2MA" => "2Macc", "3MA" => "3Macc", "4MA" => "4Macc",
- "1ES" => "1Esd", "2ES" => "2Esd", "MAN" => "PrMan",
-# Following this is just an uneducated guess
- "PS2" => "Ps151", "ODA" => "Odes", "PSS" => "PssSol", "JSA" => "Josh",
- "JSB" => "Josh", "TBS" => "Tob", "SST" => "Sus", "DNT" => "Dan",
- "BLT" => "Bel", "ADE" => "AddEsth"
-);
-
-use Encode;
- at encodingList = Encode->encodings(":all");
-foreach $enc (@encodingList) {
-    $encodings .= "$enc, ";
-}
-$encodings =~ s/\, $//;
-
-
-if (scalar(@ARGV) < 2) {
-    print "usfm2osis.pl -- USFM to OSIS $osisVersion converter version $version ($date)\nSyntax: usfm2osis.pl <osisWork> [-o OSIS-file] [-e USFM encoding] <USFM filenames|wildcard>\n\n";
-    print "Supported encodings include:\n\t$encodings\n\n";
-    print "If the encoding is omitted, utf8 is the default value.\n";
-    exit (-1);
-}
-
-$osisWork = $ARGV[0];
-
-$nextarg = 1;
-
-if ($ARGV[$nextarg] eq "-o") {
-    $outputFilename = "$ARGV[$nextarg+1]";
-    $nextarg += 2;
-}
-else {
-    $outputFilename = "$osisWork.osis.xml";
-}
-open (OUTF, , ">:utf8", "$outputFilename") or die "Could not open file $ARGV[2] for writing.";
-
-if ($ARGV[$nextarg] eq "-e") {
-    $inputEncoding = "$ARGV[$nextarg+1]";
-    $nextarg += 2;
-}
-else {
-    $inputEncoding = "utf8";
-}
-$encFound = 0;
-foreach $enc (@encodingList) {
-    if ($enc eq $inputEncoding) {
-	$encFound = 1;
-    }
-}
-if ($encFound == 0) {
-    die "Encoding $inputEncoding not supported.\nSupported encodings include:\n\t$encodings\n";
-}
-else {
-    print "Encoding \"$inputEncoding\" is supported.\n"
-}
-
-for (; $nextarg < scalar(@ARGV); $nextarg++) {
-    push(@files, $ARGV[$nextarg]);
-}
-
-push (@outdata, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<work osisWork=\"$osisWork\"\/>\n<\/header>\n");
-
-$tagStack = "<\/osisText><\/osis>";
-$chapClose = "";
-$versClose = "";
-
-sub closeTag {
-    $tag = @_[0];
-
-    if ($tagStack =~ /$tag/) {
-	$tagStack =~ s/^(.*?$tag)//;
-	$taglist = $1;
-	$taglist =~ s/>/>\n/g;
-	$taglist =~ s/(<\/\w+)\s+[^>]+>/$1>/g;
-	return $taglist;
-    }
-    else {
-	return:
-    }
-}
-
-sub openTag {
-    $tag = @_[0];
-    $tagStack = $tag . $tagStack;
-    return;
-}
-
-foreach $file (@files) {
-    print "Processing $file.\n";
-    open (SFM, "$file");
-    my @filedata = "";
-    while (<SFM>) {
-	my $sfline;
-	$sfline = decode($inputEncoding, $_);
-	push (@filedata, $sfline);
-    }
-    close (SFM);
-
-    $ollevel = 0;
-    $vers = 0;
-    $chap = 0;
-    $book = "";
-
-    #encoding stuff
-    for ($i = 0; $i < scalar(@filedata); $i++) {
-	$line = @filedata[$i];
-	$line =~ s/[\r\n]//g;
-
-	### Basic XML entity encoding
-
-	$line =~ s/&(?![a-zA-Z0-9])/&amp;/g;
-	$line =~ s/<< ?/\@/g;
-	$line =~ s/>>/\#/g;
-	$line =~ s/</\$/g;
-	$line =~ s/>/\%/g;
-
-	$line =~ s/(\w)\'(\w)/$1ʼ$2/g;
-	$line =~ s/\\fr 1\/2 \\fr\*/½/g;
-
-	@filedata[$i] = $line;
-    }
-
-    for ($i = 0; $i < scalar(@filedata); $i++) {
-	$line = @filedata[$i];
-
-	$line =~ s/LORD/<divineName>Lord<\/divineName>/g;
-
-	### File Identification
-
-	$line =~ s/\\v\b\s+(\d+)(\-\d+|\s*\\v\b\s+\d+)\s*\\v\b\s+(\d+)/\\v $1\-$3/;
-	$line =~ s/\\v\b\s+(\d+)\s*\\v\b\s+(\d+\-)?(\d+)/\\v $1\-$3/;
-	$line =~ s/^\\(p[is]|mi)\b/\\p/;
-	$line =~ s/^\\li\b/\\p/; #\li isn't part of USFM, so we'll make it \p
-
-	# \id (book marker)
-	if ($line =~ /^\\id\b\s*([^ ]*)/) {
-	    $book = $OSISbook{$1};
-	    $chap = 0;
-	    if ($versClose =~ /<verse/) {
-		push (@outdata, $versClose); # close verse
-		$versClose = "";
-	    }
-#	    push (@outdata, closeTag("<\/div[^>]*?>")); # close section
-	    if ($chapClose =~ /<chapter/) {
-		push (@outdata, $chapClose); # close chapter
-		$chapClose = "";
-	    }
-
-	    push (@outdata, closeTag("<\/div type=\"book\">")); #close book
-	    if ($book eq "") {
-		$book = "UnknownUSFMBook";
-	    }
-	    push (@outdata, "<div type=\"book\" osisID=\"$book\">\n"); # open current book
-	    openTag("<\/div type=\"book\">");
-	    $line = "";
-	}
-	
-	# \h (running header--discard)
-	if ($line =~ /^\\h\b/) {
-	    $line = "";
-	}
-
-	# \cc (concistent changes script--discard)
-	if ($line =~ /^\\cc\b/) {
-	    $line = "";
-	}
-
-
-	### Introduction
-
-	# \it title
-	if ($line =~ /^\\it\b\s*(.*)/) {
-	    $line = "<div type=\"introduction\">\n<title>$1<\/title>";
-	    openTag("<\/div>");
-	}
-
-	# \imt major title
-	if ($line =~ /^\\imt\b\s*(.+)/) {
-	    $line = "<title>$1<\/title>";
-	}
-
-	# \is introduction section title
-	if ($line =~ /^\\is(\d*)\b\s*(.*)/) {
-	    $level = $1;
-	    if ($level eq "") {
-		$level = "1";
-	    }
-	    $line = "<div type=\"section\"><title>$2<\/title>";
-	    openTag("<\/div>");
-	}
-	
-	# \iot introduction outline title
-	if ($line =~ /^\\iot\b\s*(.*)/) {
-	    $line = "<div type=\"outline\">\n<title>$1<\/title>";
-	}
-	
-	# \io\d+ introduction outline item
-	if ($line =~ /^\\io(\d+)\b\s*(.*)/) {
-	    if ($ollevel == $1) {
-		$line = "<item>$2<\/item>";
-	    }
-	    elsif ($ollevel > $1) {
-		$line = "";
-		while ($ollevel > $1) {
-		    $line .= "<\/list><\/item>\n";
-		    $ollevel--;
-		}
-		$line .= "<item>$2<\/item>";
-	    }
-	    elsif ($ollevel < $1) {
-		$line = "";
-		if ($ollevel != 0) {
-		    $line .= "<item>";
-		}
-		while ($ollevel < $1) {
-		    $line .= "<list>\n";
-		    $ollevel++;
-		}
-		$line .= "<item>$2<\/item>\n";
-	    }
-	
-	    if (@filedata[$i+1] !~ /^\\io/) {
-		while ($ollevel > 0) {
-		    $line .= "\n<\/list>";
-		    if ($ollevel > 1) {$line .= "<\/item>";}
-		    $ollevel--;
-		}
-		if ($ollevel == 0) {
-		    $line .= "\n<\/div>";
-		}
-	    }
-	}
-
-	# \ip introduction paragraph
-	if ($line =~ /^\\ip\b\s*(.*)/) {
-	    $line = "<p>$1<\/p>";
-	}
-
-
-	### Chapters and Verses
-
-	# \c chapter
-	if ($line =~ /^\\c\b\s*([^ ]*)/) {
-	    if ($1 ne "") {
-		$chap = $1;
-	    }
-	    else {
-		$chap++;
-	    }
-
-	    push (@outdata, $versClose);
-	    $versClose = "";
-	    push (@outdata, closeTag("<\/p>"));
-	    if ($chapClose =~ /<chapter/) {
-		push (@outdata, $chapClose); # close previous chapter
-		$chapClose = "";
-	    } else {
-		push (@outdata, closeTag("<\/div>")); # close introduction div
-	    }
-
-	    push (@outdata, "<chapter sID=\"$book.$chap\" osisID=\"$book.$chap\"\/>\n");
-	    $chapClose = "<chapter eID=\"$book.$chap\"\/>\n";
-	    $line =~ s/\\c\b\s*([^ ]*)//;
-	}
-
-	# \d \ms majorSection
-	if ($line =~ /^\\(ms|d)\b\s*(.+)/) {
-	    push (@outdata, closeTag("<\/p>"));
-	    push (@outdata, closeTag("<\/div type=\"majorSection\">"));
-	    push (@outdata, "<div type=\"majorSection\">\n");
-	    openTag("<\/div type=\"majorSection\">");
-	    $line =~ s/\\(ms|d)\b\s*(.+)/<title>$1<\/title>/;
-	}
-
-	# \s section
-	if ($line =~ /^\\s\b\s*(.+)/) {
-	    push (@outdata, closeTag("<\/p>"));
-	    push (@outdata, closeTag("<\/div type=\"section\">"));
-	    push (@outdata, "<div type=\"section\">\n");
-	    openTag("<\/div type=\"section\">");
-	    $line =~ s/\\s\b\s*(.+)/<title>$1<\/title>/;
-	    if ($line =~ /HEBREW TITLE/) {
-		$line =~ s/<title>/<title type=\"psalm\">/;
-	    }
-	}
-
-	# \ss \s2 subSection
-	if ($line =~ /^\\s[s2]\b\s*(.+)/) {
-	    $line =~ s/\\s[s2]\b\s*(.+)/<title>$1<\/title>/;
-	}
-
-	# \sss \s3 x-subsubSection
-	if ($line =~ /^\\s(ss|3)\b\s*(.+)/) {
-	    push (@outdata, closeTag("<\/p>"));
-	    push (@outdata, closeTag("<\/div type=\"x=subSubSection\">"));
-	    push (@outdata, "<div type=\"x-subSubSection\">\n");
-	    openTag("<\/div type=\"x-subSubSection\">");
-	    $line =~ s/\\s(ss|3)\b\s*(.+)/<title>$2<\/title>/;
-	}
-
-	# \p paragraph
-	if ($line =~ /^\\p\b\s*/) {
-	    push (@outdata, closeTag("<\/p>"));
-	    push (@outdata, "<p>\n");
-	    openTag("<\/p>");
-	    $line =~ s/\\p\b\s*//;
-	}
-
-	# \v verse
-	if ($line =~ /^\\v\b\s*(\d[^\\ ]*)?/) {
-	    if ($1 ne "") {
-		$vers = $1;
-	    }
-	    else {
-		$vers++;
-	    }
-
-	    push (@outdata, $versClose);
-	    $versClose = "";
-
-	    if ($vers =~ /(\d+[^\\\- ]*)\-(\d+[^\\ ]*)/) {
-		$vF = $1;
-		$vT = $2;
-		$vF =~ /^(\d+)/;
-		$vFn = scalar($1);
-		$vT =~ /^(\d+)/;
-		$vTn = scalar($1);
-		$osisID = "$book.$chap.$vF";
-		if ($vTn > $vFn && $vFn > 0) {
-		    for ($j = $vFn + 1; $j < $vTn; $j++) {
-			$osisID .=" $book.$chap.$j";
-		    }
-		}
-		$osisID .= " $book.$chap.$vT";
-	    }
-	    else {
-		$osisID = "$book.$chap.$vers";
-	    }
-	    push (@outdata, "<verse sID=\"$osisID\" osisID=\"$osisID\"\/>\n");
-	    $versClose = "<verse eID=\"$osisID\"\/>\n";
-	    $line =~ s/\\v\b\s*(\d[^\\ ]*)? *//;
-	}
-
-	## Notes
-
-	# \f note
-	for ($j = 2; $j > 0; $j--) {
-	    if ($line =~ /\\f\b\s*([^\s]+)\s*\\rf\s*([^\\]+)\\rf\*\s*/) {
-		$nVal = $1;
-		$scopeVal = $2;
-		
-		$scopeVal =~ s/://g;
-		$scopeVal = "$book.$scopeVal";
-		$scopeVal =~ s/(\d+)\.(\d[^\,]+)\,\s*(\d.+)/$1.$2 $book.$1.$3/;
-		$scopeVal =~ s/(\d+)\.(\d[^\-]+)\-+\s*(\d.+)/$1.$2\-$book.$1.$3/;
-		
-		$line =~ s/\\f\b\s*([^\s]+)\s*\\rf\s*([^\\]+)\\rf\*\s*/<note n=\"$nVal\" annotateRef=\"$scopeVal\">/;
-		$line =~ s/(<note [^>]+>)([A-Z][^a-z:]*?):/$1<catchWord>$2<\/catchWord>/g;
-	    }
-	}
-	# \f hebrew title note
-	if ($line =~ /\\f\b\s*([^\s]+)\s*HEBREW TITLE:\s*/) {
-	    $nVal = $1;
-	    $line =~ s/\\f\b\s*([^\s]+)\s*HEBREW TITLE:\s*/<note n=\"$nVal\">/;
-	}
-	# \f spare notes
-	if ($line =~ /\\f\b\s*([^\s]+)\s*\*\s*/) {
-	    $nVal = $1;
-	    $line =~ s/\\f\b\s*([^\s]+)\s*\*\s*/<note n=\"$nVal\">/;
-	    $line =~ s/(<note [^>]+>)([A-Z][^a-z:]*?):/$1<catchWord>$2<\/catchWord>/g;
-	}
-	
-	# \f if we STILL have notes, just change them to <note>
-	if ($line =~ /\\f\b\s*/) {
-	    $line =~ s/\\f\b\s*/<note>/;
-	}
-	
-	
-	# \x crossReference
-	for ($j = 2; $j > 0; $j--) {
-	    if ($line =~ /\\x\b\s*\\rf\s*([^\\]+)\\rf\*\s*/) {
-		$scopeVal = $1;
-		
-		$scopeVal =~ s/://g;
-		$scopeVal = "$book.$scopeVal";
-		$scopeVal =~ s/(\d+)\.(\d[^\,]*)\,\s*(\d.*)/$1.$2 $book.$1.$3/;
-		$scopeVal =~ s/(\d+)\.(\d[^\-]*)\-+\s*(\d.*)/$1.$2\-$book.$1.$3/;
-		
-		$line =~ s/\\x\b\s*\\rf\s*([^\\]+)\\rf\*\s*/<note type=\"crossReference\" annotateRef=\"$scopeVal\">/;
-		$line =~ s/\[2\]\s*([^\[]+?)(\s*\[1\]|\\x\*)/<seg type=\"x-dc\">$1<\/seg>$2/g;
-		$line =~ s/\[2\]\s*([^\[]+?)$/<seg type=\"x-dc\">$1<\/seg>/g;
-		$line =~ s/\s*\[1\]//g;
-		if ($line =~ /<note type=\"crossReference\" annotateRef=\"[^\"]+?\">\\bw/) {
-		    $line =~ s/(<note type=\"crossReference\" annotateRef=\"[^\"]+?\")>\\bw (.+?) \\bw\* /$1 n=\"$2\">/;
-		    $pa = $1;
-		    $line =~ s/\\bw (.+?) \\bw\* /<\/note>\n$pa n=\"$1\">/g;
-		}
-		if ($line =~ /<\/seg> \\bw/) {
-		    $line =~ /(<note type=\"crossReference\" annotateRef=\"[^\"]+?\")>/;
-		    $pa = $1;
-		    $line =~ s/\s*\\bw (.+?) \\bw\* /<\/note>\n$pa n=\"$1\">/g;
-		}
-	    }
-	    
-	}
-	# \x hebrew title crossReference
-	if ($line =~ /\\x\b\s*Title:\s*/) {
-	    $nVal = $1;
-	    $line =~ s/\\x\b\s*Title:\s*/<note type=\"crossReference\">/;
-	}
-	
-
-	# \[fx]* note/crossReference closers
-	if ($line =~ /\\[fx]\*/) {
-	    $line =~ s/\\[fx]\*/<\/note>/g;
-	}
-
-	
-	## Poetry
-
-	# \q line
-	if ($line =~ /^\\q/) {
-	    if ($l != 1) {
-		push (@outdata, "<lg>\n");
-		$l = 1;
-	    }
-	    if ($line =~ /\\q(c|\d*)$/) {
-		if ($1 eq "") {
-		    $line = "<l>\n";
-		}
-		elsif ($1 eq "c") {
-		    $line = "<l type=\"x-centered\">";
-		}
-		else {
-		    $line = "<l level=\"$1\">\n";
-		}
-		@filedata[$i+1] .= "<\/l>";
-		if (@filedata[$i+2] !~ /\\q/) {
-		    @filedata[$i+1] .= "\n<\/lg>";
-		    $l = 0;
-		}
-	    }
-	    else {
-		$line =~ s/\\q\b\s*(.+)/<l>$1<\/l>/;
-		$line =~ s/\\q(\d+)\b\s*(.+)/<l level=\"$1\">$2<\/l>/;
-		$line =~ s/\\qc\b\s*(.+)/<l type=\"x-centered\">$1<\/l>/;
-		if (@filedata[$i+1] !~ /\\q/) {
-		    $line .= "\n<\/lg>";
-		    $l = 0;
-		}
-	    }
-	}
-	
-
-	## Tables
-
-	# \th table heading
-	if ($line =~ /^\\t/) {
-	    if ($line =~ /^\\tr\b\s*(\\th.*)/) {
-		$line = "$1";
-		if ($table != 1) {
-		    push (@outdata, "<table>\n");
-		    $table = 1;
-		}
-		$line =~ s/\\th\d?\b\s*(.+?)\s*(?=(\\th|$))/<cell role=\"label\">$1<\/cell>/g;
-		$line = "<row>$line<\/row>";
-	    }	
-
-	    if ($line =~ /^\\tr\b\s*(\\tc.*)/) {
-		$line = $1;
-		if ($table != 1) {
-		    push (@outdata, "<table>\n");
-		    $table = 1;
-		}
-		$line =~ s/\\tcr?\d?\b\s*(.+?)\s*(?=(\\tc|$))/<cell>$1<\/cell>/g;
-		$line = "<row>$line<\/row>";
-		if (@filedata[$i+1] !~ /\\tr/) {
-		    $line .= "<\/table>\n";
-		    $table = 0;
-		}
-	    }
-
-	    if ($line =~ /^\\th1\b\s*(.*)/) {
-		if ($table != 1) {
-		    push (@outdata, "<table>\n");
-		    $table = 1;
-		}
-		$line = "<row><cell role=\"label\">$1<\/cell>\n";
-	    }	
-	    elsif ($line =~ /^\\th\d+\b\s*(.*)/) {
-		$line = "<cell role=\"label\">$1<\/cell>\n";
-	    }
-
-	    if ($line =~ /^\\tb1\b\s*(.*)/) {
-		if ($table != 1) {
-		    push (@outdata, "<table>\n");
-		    $table = 1;
-		}
-		else {
-		    push (@outdata, "<\/row>");
-		}
-		$line = "<row><cell>$1<\/cell>\n";
-		if (@filedata[$i+1] !~ /\\tb/) {
-		    $line .= "<\/row><\/table>\n";
-		    $table = 0;
-		}
-	    }	
-	    elsif ($line =~ /^\\tb\d+\b\s*(.*)/) {
-		$line = "<cell>$1<\/cell>\n";
-		if (@filedata[$i+1] !~ /\\tb/) {
-		    $line .= "<\/row><\/table>\n";
-		    $table = 0;
-		}
-	    }	
-	}
-
-	## Other
-
-	# \ls list
-	if ($line =~ /^\\ls\b\s*(.+)/) {
-	    if ($ls != 1) {
-		push (@outdata, "<list>\n");
-		$ls = 1;
-	    }
-	    $line = "<item>$1<\/item>\n";
-	    if (@filedata[$i+1] !~ /\\ls/) {
-		$line .= "<\/list>";
-		$ls = 0;
-	    }
-	}
-
-	# \mt\mt1 title
-	if ($line =~ /^\\mt[1]?\b\s*(.+)/) {
-	    $line = "<title type=\"main\">$1<\/title>";
-	}
-
-	# \mt2 title
-	if ($line =~ /^\\mt2\b\s*(.+)/) {
-	    $line = "<title type=\"continued\">$1<\/title>";
-	}
-
-	# \st,\st2 title
-	if ($line =~ /^\\st2?\b\s*(.+)/) {
-	    $line = "<title type=\"continued\">$1<\/title>";
-	}
-
-	# \st3 title
-	if ($line =~ /^\\st3\b\s*(.+)/) {
-	    $line = "<title type=\"sub\">$1<\/title>";
-	}
-
-	# \r sub title
-	if ($line =~ /^\\mr\b\s*(.+)/) {
-	    $line = "<title type=\"sub\">$1<\/title>";
-	}
-
-	# \r parallel title
-	if ($line =~ /^\\r\b\s*(.+)/) {
-	    $line = "<title type=\"parallel\">$1<\/title>";
-	}
-
-	# \sp speaker
-	if ($line =~ /^\\sp\b\s*(.+)/) {
-	    $line = "<speaker>$1<\/speaker>";
-	}
-
-	# \itw italic word
-	$line =~ s/\\itw\b\s*(.*?)\\itw\*/<hi type=\"italic\">$1<\/hi>/g;
-
-	# \n superscripted verse number
-	$line =~ s/\\n\b\s*(.*?)\\n\*\s*/<seg type=\"x-versenum\">$1<\/seg>/g;
-
-	# remove unnecessary tags
-	$line =~ s/\\b\b//;
-	$line =~ s/\\m\b//;
-	$line =~ s/\\restore\b//;
-
-
-	$line =~ s/\\bq\*/<\/p><\/q>/g;
-	$line =~ s/\\bq\b\s*/<q type=\"block\"><p>/g;
-	$line =~ s/\\pp/<\/p><p>/g;
-	$line =~ s/\\in\*/<\/p><\/inscription>/g;
-	$line =~ s/\\in\b\s*/<inscription><p>/g;
-	
-	
-	if ($line !~ /^\s*$/) {
-	    push (@outdata, "$line\n");
-	}
-    }
-}
-
-push (@outdata, closeTag("<\/osis>"));
-
-for ($i = 0; $i < scalar(@outdata); $i++) {
-    @outdata[$i] =~ s/---/―/g;
-    @outdata[$i] =~ s/--/—/g;
-    @outdata[$i] =~ s/([es]ID=\"[^\" ]+) [^\"]*\"/$1\"/;
-}
-
-for ($i = 0; $i < scalar(@outdata); $i++) {
-    if (@outdata[$i] !~ /^\s*$/) {
-	@outdata[$i] =~ s/[\r\n]+/\n/g;
-	@outdata[$i] =~ s/\n?$/\n/;
-	print OUTF @outdata[$i];
-    }
-}
-close (OUTF);
-
-print "Doing some cleanup.\n";
-
-open (INF, "$outputFilename");
- at filedata = <INF>;
-close (INF);
-open (OUTF, ">$outputFilename");
-
-#bubble chapter down
-for ($i = 0; $i < scalar(@filedata); $i++) {
-    if (@filedata[$i] =~ /^<\// && @filedata[$i-1] =~ /^<chapter.+\/>/) {
-	$temp = @filedata[$i];
-	@filedata[$i] = @filedata[$i-1];
-	@filedata[$i-1] = $temp;
-	$i -= 2;
-    }
-}
-for ($i = 0; $i < scalar(@filedata); $i++) {
-    $fullfile .= @filedata[$i];
-}
-$fullfile =~ s/<\/div>\n(<chapter eID[^>]+>)/$1\n<\/div>/mg; #swap the chapter back up one before the book closer
-
-print "Tagging quotations.\n";
-
-$q = 1;
-
-$fullfile =~ s/\$([^\%]+?)\%/"<q level=\"2\" sID=\"q2." . $q . "\"\/>" . $1 . "<q level=\"2\" eID=\"q2." . $q++ . "\"\/>"/eg;
-
-$fullfile =~ s/\$/"<milestone type=\"cQuote\" subType=\"x-level-2\"\/>"/eg;
-
-$q = 1;
-
-while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/) {
-    $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
-}
-while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/) {
-    $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
-}
-
-$fullfile =~ s/\@([^\#]+?)\#/"<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $1 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>"/eg;
-$fullfile =~ s/\@/"<milestone type=\"cQuote\" subType=\"x-level-1\"\/>"/eg;
-
-$fullfile =~ s/\^/"<q level=\"1\" eID=\"q1." . $q++ . ".false\"\/>"/eg;
-
-print OUTF $fullfile;
-close (OUTF);
-
-print "All done! OSIS file: $outputFilename\n";
+#!/usr/bin/perl
+
+## USFM to OSIS (2.1.1) converter
+
+## Licensed under the standard BSD license:
+
+# Copyright (c) 2002-2008 CrossWire Bible Society <http://www.crosswire.org/>
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+#     * Redistributions of source code must retain the above copyright
+#        notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of the CrossWire Bible Society nor the names of
+#       its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written
+#       permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+## For general inquiries, comments, suggestions, bug reports, etc. email:
+## sword-support at crosswire.org
+
+#########################################################################
+
+# Stores the script version and date
+$version = "1.4";
+$date = "2008-07-04";
+# Sets the version of OSIS used in the OSIS header
+$osisVersion = "2.1.1";
+# Stores the USFM Version 
+$usfmVersion = "2.1"; # The USFM reference document can be found at http://confluence.ubs-icap.org/display/USFM/Home;jsessionid=97071C5C1E562036A1CAF4FF77147565 (as of 2008-07-07)
+
+# This is the hash which maps the conversion of USFM book abbreviations to OSIS book abbreviations. ***I would like to add the ability to access an external file to provide options for other languages. In other words, in preparing a USFM file for conversion, a separate file could be created which could be used to map the conversion of abbreviated book names in other languages to OSIS. This would be especially useful for cross-references, but I haven't figured out how to do it yet.
+%OSISbook = (
+# Theoretically, these are laid out according to <BooksPresent>, but I can really only guess without a spec ***Need to check
+"" => "", "GEN" => "Gen", "EXO" => "Exod", "LEV" => "Lev", "NUM" => "Num",
+ "DEU" => "Deut", "JOS" => "Josh", "JDG" => "Judg", "RUT" => "Ruth",
+ "1SA" => "1Sam", "2SA" => "2Sam", "1KI" => "1Kgs", "2KI" => "2Kgs",
+ "1CH" => "1Chr", "2CH" => "2Chr", "EZR" => "Ezra", "NEH" => "Neh",
+ "EST" => "Esth", "JOB" => "Job", "PSA" => "Ps", "PRO" => "Prov",
+ "ECC" => "Eccl", "SNG" => "Song", "ISA" => "Isa", "JER" => "Jer",
+ "LAM" => "Lam", "EZK" => "Ezek", "DAN" => "Dan", "HOS" => "Hos",
+ "JOL" => "Joel", "AMO" => "Amos", "OBA" => "Obad", "JON" => "Jonah",
+ "MIC" => "Mic", "NAM" => "Nah", "HAB" => "Hab", "ZEP" => "Zeph",
+ "HAG" => "Hag", "ZEC" => "Zech", "MAL" => "Mal", "MAT" => "Matt",
+ "MRK" => "Mark", "LUK" => "Luke", "JHN" => "John", "ACT" => "Acts",
+ "ROM" => "Rom", "1CO" => "1Cor", "2CO" => "2Cor", "GAL" => "Gal",
+ "EPH" => "Eph", "PHP" => "Phil", "COL" => "Col", "1TH" => "1Thess",
+ "2TH" => "2Thess", "1TI" => "1Tim", "2TI" => "2Tim", "TIT" => "Titus",
+ "PHM" => "Phlm", "HEB" => "Heb", "JAS" => "Jas", "1PE" => "1Pet",
+ "2PE" => "2Pet", "1JN" => "1John", "2JN" => "2John", "3JN" => "3John",
+ "JUD" => "Jude", "REV" => "Rev", "TOB" => "Tob", "JDT" => "Jdt",
+ "ESG" => "Esth", "WIS" => "Wis", "SIR" => "Sir", "BAR" => "Bar",
+ "LJE" => "EpJer", "S3Y" => "PrAzar", "SUS" => "Sus", "BEL" => "Bel",
+ "1MA" => "1Macc", "2MA" => "2Macc", "3MA" => "3Macc", "4MA" => "4Macc",
+ "1ES" => "1Esd", "2ES" => "2Esd", "MAN" => "PrMan",
+# Following this is just an uneducated guess
+ "PS2" => "Ps151", "ODA" => "Odes", "PSS" => "PssSol", "JSA" => "Josh",
+ "JSB" => "Josh", "TBS" => "Tob", "SST" => "Sus", "DNT" => "Dan",
+ "BLT" => "Bel", "ADE" => "AddEsth"
+ );
+
+# Generates a list of available encodings.
+use Encode;
+ at encodingList = Encode->encodings(":all");
+foreach $enc (@encodingList) {
+    $encodings .= "$enc, ";
+}
+$encodings =~ s/\, $//;
+
+# Syntax instructions
+if (scalar(@ARGV) < 2) {
+    print "\nusfm2osis.pl -- USFM $usfmVersion to OSIS $osisVersion converter version $version ($date)\n\nSyntax: usfm2osis.pl <osisWork> [-o OSIS-file] [-e USFM encoding] <USFM filenames|wildcard>\n";
+    print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
+    print "- The osisWork is a short name with no spaces which will identify your module.\n";
+    print "- If no -o option is specified for the output filename, the default output file is: \n\tosisWork.osis.xml.\n";
+    print "- Supported encodings include:\n\t$encodings\n";
+    print "- If the encoding is omitted, utf8 is the default value.\n";
+    print "- USFM filenames with the SFM extension can be accessed using a wildcard: \n\t*.SFM\n";
+    print "As an example, if you want to generate the osisWork <bible> and your USFM files are encoded in utf8, located in the /Bible folder relative to this script with the file extension SFM, enter:\n\tperl usfm2osis.pl bible Bible/*.SFM\n\n";
+    exit (-1);
+}
+
+$osisWork = $ARGV[0];
+
+$nextarg = 1;
+
+if ($ARGV[$nextarg] eq "-o") {
+    $outputFilename = "$ARGV[$nextarg+1]";
+    $nextarg += 2;
+}
+else {
+    $outputFilename = "$osisWork.osis.xml";
+}
+open (OUTF, , ">:utf8", "$outputFilename") or die "Could not open file $ARGV[2] for writing.";
+
+if ($ARGV[$nextarg] eq "-e") {
+    $inputEncoding = "$ARGV[$nextarg+1]";
+    $nextarg += 2;
+}
+else {
+    $inputEncoding = "utf8";
+}
+$encFound = 0;
+foreach $enc (@encodingList) {
+    if ($enc eq $inputEncoding) {
+	$encFound = 1;
+    }
+}
+if ($encFound == 0) {
+    die "Encoding $inputEncoding not supported.\nSupported encodings include:\n\t$encodings\n";
+}
+else {
+    print "Encoding \"$inputEncoding\" is supported.\n"
+}
+
+for (; $nextarg < scalar(@ARGV); $nextarg++) {
+    push(@files, $ARGV[$nextarg]);
+}
+
+push (@outdata, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<work osisWork=\"$osisWork\"\/>\n<\/header>\n");
+
+$tagStack = "<\/osisText><\/osis>";
+$chapClose = "";
+$versClose = "";
+
+sub closeTag {
+    $tag = @_[0];
+
+    if ($tagStack =~ /$tag/) {
+	$tagStack =~ s/^(.*?$tag)//;
+	$taglist = $1;
+	$taglist =~ s/>/>\n/g;
+	$taglist =~ s/(<\/\w+)\s+[^>]+>/$1>/g;
+	return $taglist;
+    }
+    else {
+	return:
+    }
+}
+
+sub openTag {
+    $tag = @_[0];
+    $tagStack = $tag . $tagStack;
+    return;
+}
+
+foreach $file (@files) {
+    print "Processing $file.\n";
+    open (SFM, "$file");
+    my @filedata = "";
+    while (<SFM>) {
+	my $sfline;
+	$sfline = decode($inputEncoding, $_);
+	push (@filedata, $sfline);
+    }
+    close (SFM);
+
+    $ollevel = 0;
+    $vers = 0;
+    $chap = 0;
+    $book = "";
+    # Sets the initial value for the attribute "n" in footnotes.
+    $nFN = 0;
+	# Creates array for the attribute "n" in cross-references
+    @nCR = (a .. z);
+    # Sets the initial value for the attribute "n" in cross-references.
+    $nCR = @nCR [0]; 
+    
+    #encoding stuff
+    for ($i = 0; $i < scalar(@filedata); $i++) {
+	$line = @filedata[$i];
+	$line =~ s/[\r\n]//g;
+
+	### Basic XML entity encoding
+	$line =~ s/&(?![a-zA-Z0-9])/&amp;/g;
+	$line =~ s/<< ?/\@/g;
+	$line =~ s/>>/\#/g;
+	$line =~ s/</\$/g;
+	$line =~ s/>/\%/g;
+
+	$line =~ s/(\w)\'(\w)/$1ʼ$2/g;
+	$line =~ s/\\fr 1\/2 \\fr\*/½/g;
+
+	@filedata[$i] = $line;
+    }
+
+    for ($i = 0; $i < scalar(@filedata); $i++) {
+	$line = @filedata[$i];
+
+	### File Identification--Markers Supported: \id, \h, \ide, \sts, \rem, \toc1, \toc2, \toc3
+
+	$line =~ s/\\v\b\s+(\d+)(\-\d+|\s*\\v\b\s+\d+)\s*\\v\b\s+(\d+)/\\v $1\-$3/;
+	$line =~ s/\\v\b\s+(\d+)\s*\\v\b\s+(\d+\-)?(\d+)/\\v $1\-$3/;
+	$line =~ s/^\\(p[is]|mi)\b/\\p/;
+	$line =~ s/^\\li\b/\\p/; #\li isn't part of USFM, so we'll make it \p
+
+	# \id (book marker)
+	if ($line =~ /^\\id\b\s*([^ ]*)/) {
+	    $book = $OSISbook{$1};
+	    $chap = 0;
+	    if ($versClose =~ /<verse/) {
+		push (@outdata, $versClose); # close verse
+		$versClose = "";
+	    }
+#	    push (@outdata, closeTag("<\/div[^>]*?>")); # close section
+	    if ($chapClose =~ /<chapter/) {
+		push (@outdata, $chapClose); # close chapter
+		$chapClose = "";
+	    }
+
+	    push (@outdata, closeTag("<\/div type=\"book\">")); #close book
+	    if ($book eq "") {
+		$book = "UnknownUSFMBook";
+	    }
+	    push (@outdata, "<div type=\"book\" osisID=\"$book\">\n"); # open current book
+	    openTag("<\/div type=\"book\">");
+	    $line = "";
+	}
+	
+	# \h (running header--discard)
+	if ($line =~ /^\\h\b/) {
+	    $line = "";
+	}
+
+	# \ide Encoding (discard)
+	if ($line =~ /^\\ide\b/) {
+	    $line = "";
+	}
+
+	# \sts Status (discard)
+	if ($line =~ /^\\sts\b/) {
+	    $line = "";
+	}
+
+	# \rem Comments from translator (discard)
+	if ($line =~ /^\\rem\b/) {
+	    $line = "";
+	}
+
+	# \toc1 Table of Contents (discard)
+	if ($line =~ /^\\toc\d\b/) {
+	    $line = "";
+	}
+	
+	### Introduction--Markers Supported: \imt#, \is#, \iot, \io#, \ip
+	#### Markers Not Yet Supported: \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili, \ior...\ior*, \iex, \imte, \ie
+
+	# \it title (DCO: Commented out because \it is for italics not introduction titles in USFM 2.1)
+#	if ($line =~ /^\\it\b\s*(.*)/) {
+#	    $line = "<div type=\"introduction\">\n<title>$1<\/title>";
+#	    openTag("<\/div>");
+#	}
+
+	# \imt major title 
+	if ($line =~ /^\\imt\b\s*(.+)/) {
+	    $line = "<div type=\"introduction\">\n<title>$1<\/title>";
+	    openTag("<\/div>");
+	}
+
+
+	# \is introduction section title
+	if ($line =~ /^\\is(\d*)\b\s*(.*)/) {
+	    $level = $1;
+	    if ($level eq "") {
+		$level = "1";
+	    }
+	    $line = "<div type=\"section\"><title>$2<\/title>";
+	    openTag("<\/div>");
+	}
+	
+	# \iot introduction outline title
+	if ($line =~ /^\\iot\b\s*(.*)/) {
+	    $line = "<div type=\"outline\">\n<title>$1<\/title>";
+	}
+	
+	# \io\d+ introduction outline item
+	if ($line =~ /^\\io(\d+)\b\s*(.*)/) {
+	    if ($ollevel == $1) {
+		$line = "<item>$2<\/item>";
+	    }
+	    elsif ($ollevel > $1) {
+		$line = "";
+		while ($ollevel > $1) {
+		    $line .= "<\/list><\/item>\n";
+		    $ollevel--;
+		}
+		$line .= "<item>$2<\/item>";
+	    }
+	    elsif ($ollevel < $1) {
+		$line = "";
+		if ($ollevel != 0) {
+		    $line .= "<item>";
+		}
+		while ($ollevel < $1) {
+		    $line .= "<list>\n";
+		    $ollevel++;
+		}
+		$line .= "<item>$2<\/item>\n";
+	    }
+	
+	    if (@filedata[$i+1] !~ /^\\io/) {
+		while ($ollevel > 0) {
+		    $line .= "\n<\/list>";
+		    if ($ollevel > 1) {$line .= "<\/item>";}
+		    $ollevel--;
+		}
+		if ($ollevel == 0) {
+		    $line .= "\n<\/div>";
+		}
+	    }
+	}
+
+	# \ip introduction paragraph
+	if ($line =~ /^\\ip\b\s*(.*)/) {
+	    $line = "<p>$1<\/p>";
+	}
+
+	### Titles, Headings, and Labels (elsewhere?)--Markers Supported: \d, \ms#, \s#, \mt#, \r, \sp
+	#### Markers Not Yet Supported: \mte#, \mr, \sr, \rq...\rq* 
+	
+	# \d \ms majorSection 
+	if ($line =~ /^\\(ms|d)\b\s*(.+)/) {
+	    push (@outdata, closeTag("<\/p>"));
+	    push (@outdata, closeTag("<\/div type=\"majorSection\">"));
+	    push (@outdata, "<div type=\"majorSection\">\n");
+	    openTag("<\/div type=\"majorSection\">");
+	    $line =~ s/\\(ms|d)\b\s*(.+)/<title>$2<\/title>/;
+	}
+
+	# \s section (From Chapters and Verses)
+	if ($line =~ /^\\s\b\s*(.+)/) {
+	    push (@outdata, closeTag("<\/p>"));
+	    push (@outdata, closeTag("<\/div type=\"section\">"));
+	    push (@outdata, "<div type=\"section\">\n");
+	    openTag("<\/div type=\"section\">");
+	    $line =~ s/\\s\b\s*(.+)/<title>$1<\/title>/;
+	    if ($line =~ /HEBREW TITLE/) {
+		$line =~ s/<title>/<title type=\"psalm\">/;
+	    }
+	}
+
+	# \ss \s2 subSection (From Chapters and Verses)
+	if ($line =~ /^\\s[s2]\b\s*(.+)/) {
+	    $line =~ s/\\s[s2]\b\s*(.+)/<title>$1<\/title>/;
+	}
+
+	# \sss \s3 x-subsubSection (From Chapters and Verses)
+	if ($line =~ /^\\s(ss|3)\b\s*(.+)/) {
+	    push (@outdata, closeTag("<\/p>"));
+	    push (@outdata, closeTag("<\/div type=\"x=subSubSection\">"));
+	    push (@outdata, "<div type=\"x-subSubSection\">\n");
+	    openTag("<\/div type=\"x-subSubSection\">");
+	    $line =~ s/\\s(ss|3)\b\s*(.+)/<title>$2<\/title>/;
+	}
+	# \mt\mt1 title
+	if ($line =~ /^\\mt[1234]?\b\s*(.+)/) {
+	    $line = "<title type=\"main\">$1<\/title>";
+	}
+
+	# \mt2 title
+	if ($line =~ /^\\mt2\b\s*(.+)/) {
+	    $line = "<title type=\"continued\">$1<\/title>";
+	}
+
+	# \st,\st2 title
+	if ($line =~ /^\\st2?\b\s*(.+)/) {
+	    $line = "<title type=\"continued\">$1<\/title>";
+	}
+
+	# \st3 title
+	if ($line =~ /^\\st3\b\s*(.+)/) {
+	    $line = "<title type=\"sub\">$1<\/title>";
+	}
+
+	# \r sub title
+	if ($line =~ /^\\mr\b\s*(.+)/) {
+	    $line = "<title type=\"sub\">$1<\/title>";
+	}
+
+	# \r parallel title
+	if ($line =~ /^\\r\b\s*(.+)/) {
+	    $line = "<title type=\"parallel\">$1<\/title>";
+	}
+
+	# \sp speaker
+	if ($line =~ /^\\sp\b\s*(.+)/) {
+	    $line = "<speaker>$1<\/speaker>";
+	}
+
+
+	### Chapters and Verses--Markers Supported: \c, \v
+	#### Markers Not Yet Supported: \ca...\ca*, \cl, \cp, \cd, \va...\va*, \vp...\vp*
+
+	# \c chapter
+	if ($line =~ /^\\c\b\s*([^ ]*)/) {
+	    if ($1 ne "") {
+		$chap = $1;
+	    }
+	    else {
+		$chap++;
+	    }
+
+	    push (@outdata, $versClose);
+	    $versClose = "";
+	    push (@outdata, closeTag("<\/p>"));
+	    if ($chapClose =~ /<chapter/) {
+		push (@outdata, $chapClose); # close previous chapter
+		$chapClose = "";
+	    } else {
+		push (@outdata, closeTag("<\/div>")); # close introduction div
+	    }
+
+	    push (@outdata, "<chapter sID=\"$book.$chap\" osisID=\"$book.$chap\"\/>\n");
+	    $chapClose = "<chapter eID=\"$book.$chap\"\/>\n";
+	    $line =~ s/\\c\b\s*([^ ]*)//;
+	}
+
+	# \v verse
+	if ($line =~ /^\\v\b\s*(\d[^\\ ]*)?/) {
+	    if ($1 ne "") {
+		$vers = $1;
+	    }
+	    else {
+		$vers++;
+	    }
+
+	    push (@outdata, $versClose);
+	    $versClose = "";
+
+	    if ($vers =~ /(\d+[^\\\- ]*)\-(\d+[^\\ ]*)/) {
+		$vF = $1;
+		$vT = $2;
+		$vF =~ /^(\d+)/;
+		$vFn = scalar($1);
+		$vT =~ /^(\d+)/;
+		$vTn = scalar($1);
+		$osisID = "$book.$chap.$vF";
+		if ($vTn > $vFn && $vFn > 0) {
+		    for ($j = $vFn + 1; $j < $vTn; $j++) {
+			$osisID .=" $book.$chap.$j";
+		    }
+		}
+		$osisID .= " $book.$chap.$vT";
+	    }
+	    else {
+		$osisID = "$book.$chap.$vers";
+	    }
+	    push (@outdata, "<verse sID=\"$osisID\" osisID=\"$osisID\"\/>\n");
+	    $versClose = "<verse eID=\"$osisID\"\/>\n";
+	    $line =~ s/\\v\b\s*(\d[^\\ ]*)? *//;
+	}
+
+	### Paragraphs--Markers Supported: \p, \b, \m
+	#### Markers Not Yet Supported: \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b
+
+	# Hack to solve an issue in a module that used <R> for linebreaks in the usfm files--may be commented out (not USFM 2.1)
+	$line =~ s/\\lb\*/<lb \/>/g; 
+
+	# \p paragraph (From Chapters and Verses)
+	if ($line =~ /^\\p\b\s*/) {
+	    push (@outdata, closeTag("<\/p>"));
+	    push (@outdata, "<p>\n");
+	    openTag("<\/p>");
+	    $line =~ s/\\p\b\s*//;
+	}
+
+	# \b
+	$line =~ s/\\b\b//; 
+	# \m
+	$line =~ s/\\m\b//; 
+
+	### Poetry--Markers Supported: \q#, \qs...\qs*
+	#### Markers Not Yet Supported: \qr, \qc, \qa, \qac...\qac*, \qm#, \b
+
+	# \q line
+	if ($line =~ /^\\q/) {
+	    if ($l != 1) {
+		push (@outdata, "<lg>\n");
+		$l = 1;
+	    }
+	    if ($line =~ /\\q(c|\d*)$/) {
+		if ($1 eq "") {
+		    $line = "<l>\n";
+		}
+		elsif ($1 eq "c") {
+		    $line = "<l type=\"x-centered\">";
+		}
+		else {
+		    $line = "<l level=\"$1\">\n";
+		}
+		@filedata[$i+1] .= "<\/l>";
+		if (@filedata[$i+2] !~ /\\q/) {
+		    @filedata[$i+1] .= "\n<\/lg>";
+		    $l = 0;
+		}
+	    }
+	    else {
+		$line =~ s/\\q\b\s*(.+)/<l>$1<\/l>/;
+		$line =~ s/\\q(\d+)\b\s*(.+)/<l level=\"$1\">$2<\/l>/;
+		$line =~ s/\\qc\b\s*(.+)/<l type=\"x-centered\">$1<\/l>/;
+		if (@filedata[$i+1] !~ /\\q/) {
+		    $line .= "\n<\/lg>";
+		    $l = 0;
+		}
+	    }
+	}
+	
+	# \qs...\qs*, Selah
+	$line =~ s/\\qs\b\s*([^\\]+)\\qs\*/<l type="selah"> $1<\/l>/;
+
+	### Tables--Markers Supported: \tr, \th#, \tc#, \tcr#
+	####Markers Not Yet Supported: \thr#
+
+	# \th table heading
+	if ($line =~ /^\\t/) {
+	    if ($line =~ /^\\tr\b\s*(\\th.*)/) {
+		$line = "$1";
+		if ($table != 1) {
+		    push (@outdata, "<table>\n");
+		    $table = 1;
+		}
+		$line =~ s/\\th\d?\b\s*(.+?)\s*(?=(\\th|$))/<cell role=\"label\">$1<\/cell>/g;
+		$line = "<row>$line<\/row>";
+	    }	
+
+	    if ($line =~ /^\\tr\b\s*(\\tc.*)/) {
+		$line = $1;
+		if ($table != 1) {
+		    push (@outdata, "<table>\n");
+		    $table = 1;
+		}
+		$line =~ s/\\tcr?\d?\b\s*(.+?)\s*(?=(\\tc|$))/<cell>$1<\/cell>/g;
+		$line = "<row>$line<\/row>";
+		if (@filedata[$i+1] !~ /\\tr/) {
+		    $line .= "<\/table>\n";
+		    $table = 0;
+		}
+	    }
+
+	    if ($line =~ /^\\th1\b\s*(.*)/) {
+		if ($table != 1) {
+		    push (@outdata, "<table>\n");
+		    $table = 1;
+		}
+		$line = "<row><cell role=\"label\">$1<\/cell>\n";
+	    }	
+	    elsif ($line =~ /^\\th\d+\b\s*(.*)/) {
+		$line = "<cell role=\"label\">$1<\/cell>\n";
+	    }
+
+	    if ($line =~ /^\\tb1\b\s*(.*)/) {
+		if ($table != 1) {
+		    push (@outdata, "<table>\n");
+		    $table = 1;
+		}
+		else {
+		    push (@outdata, "<\/row>");
+		}
+		$line = "<row><cell>$1<\/cell>\n";
+		if (@filedata[$i+1] !~ /\\tb/) {
+		    $line .= "<\/row><\/table>\n";
+		    $table = 0;
+		}
+	    }	
+	    elsif ($line =~ /^\\tb\d+\b\s*(.*)/) {
+		$line = "<cell>$1<\/cell>\n";
+		if (@filedata[$i+1] !~ /\\tb/) {
+		    $line .= "<\/row><\/table>\n";
+		    $table = 0;
+		}
+	    }	
+	}
+	
+	### Footnotes--Markers Supported: \fk, \fq, \f...\f*, \fv
+	####Markers Not Yet Supported: \fe...\fe*, \fr, \fqa, \fl, \fp, \ft, \fdc...\fdc*, \fm...\fm* 
+
+	#\fk Catch Words (must precede \f)
+	$line =~ s/\\fk\s/\<catchWord\>/g;
+	$line =~ s/\\fk\*/\<\/catchWord\>/g;
+	#\fq Quotations in Footnotes (must precede \f)
+	$line =~ s/\\fq\s/\<q\>/g;
+	$line =~ s/\\fq\*/\<\/q\>/g;
+	#\fv Quotations in Footnotes (must precede \f)
+	$line =~ s/\\fv\s*(\d+)\b/<seg type="verseNumber">$1<\/seg>/g;
+
+	# \f note DCO--Made changes to match this: \f + \fr 3:20 \ft \fk catchWord\fk* plain text \fq text in quotes\fq*\f* (This works.)
+	for ($j = 2; $j > 0; $j--) {
+	    if ($line =~ /\\f\b\s*(.)\s\\fr\s*([^\s]+)\s*\\ft\s*([^\\]+)\\f\*\s*/) {
+		$nVal = $1;
+		$sourceVal = $2;
+		$noteText = $3;
+		
+		$nFN ++; 
+		$sourceVal =~ s/:/\./g;
+		$sourceVal = "$book.$sourceVal";
+		$sourceVal =~ s/(\d+)\.(\d[^\,]+)\-(\d+)/$1.$2-$book.$1.$3/;
+		$sourceVal =~ s/(\d+)\.(\d[^\-]+)\-+\s*(\d.+)/$1.$2\-$book.$1.$3/;
+		
+		$line =~ s/\\f\s(.)\s\\fr\s([^\s]+)\s*\\ft\s*([^\\]+)\\f\*\s*/<note osisRef="$sourceVal" osisID="$sourceVal\!footnote.$nFN" n="$nFN">$3<\/note>/g;
+	    }
+	}
+	
+	# \f if we STILL have notes, just change them to <note>
+	if ($line =~ /\\f\b\s*/) {
+	    $line =~ s/\\f\b\s*/<note>/;
+	}
+	# \f* Footnote closers
+	if ($line =~ /\\f\*/) {
+	    $line =~ s/\\f\*/<\/note>/g;
+	}
+
+	### Crossreferences--Markers Supported: \x + \xo...\x*
+	#### Markers Not Yet Supported: \xk, \xq, \xt, \xdc...\xdc* 
+	
+	# \x crossReference (note element with source attribute only) \x + \xo...\x*
+	if ($line =~ /\\x\s(.)\s\\xo\s([^\s]+)\s*\\xt\s*([^\\]+)\\x\*\s*/) {
+		$nVal = $1;
+		$sourceVal = $2;
+		$noteText = $3;
+
+		$sourceVal =~ s/:/\./g;
+		$sourceVal = "$book.$sourceVal";
+		$sourceVal =~ s/(\d+)\.(\d+)-(\d+)/$1.$2-$book.$1.$3/;
+		$sourceVal =~ s/(\d+)\.(\d[^\-]+)-+\s*(\d+)/$1.$2\-$book.$1.$3/;
+		
+		$line =~ s/\\x\s(.)\s\\xo\s([^\s]+)\s*\\xt\s*([^\\]+)\\x\*\s*/<note type="crossReference" n="$nCR" osisID="$sourceVal\!crossReference.$nCR" osisRef="$sourceVal">\\xt $3<\/note>/g;
+		
+		#osisID="Gen.6.5-Gen.6.8!crossReference.
+		$nCR = $nCR++;
+		$nCR = 'a' if $nCR++ eq 'z';
+		}
+	$line =~ s/osisID="([^\!\-"]+)\-([^\!"]+)\!crossReference./osisID="$1!crossReference./g; # Corrects the osisID of cross-references when the source reference has multiple verses; leaves the osisRef as-is
+	
+	# \xt crossReference target
+	if ($line =~ /\\xt\s*([^<]+)<\/note>/) {
+		$crText = $1;
+
+		$crText =~ s/\.//g;
+		$crText =~ s/;\s/<\/reference>; <reference osisRef="">/g;
+		$crText =~ s/\,\s*/<\/reference>\, <reference osisRef="">/g;		
+		
+		$line =~ s/\\xt\s*([^<]+)<\/note>/<reference osisRef="">$crText<\/reference><\/note>/g;
+		}
+	
+	# crossReference osisRef=""
+	$line =~ s/<reference osisRef="">([^<]+)<\/reference>/<reference osisRef="$1">$1<\/reference>/g;
+	$line =~ s/osisRef="\s/osisRef="\s/g;
+	$line =~ s/\s">/">/g;
+	$line =~ s/<reference osisRef="([^\s\"]+)\s/<reference osisRef="$1\./g; # Changes space after book name to a period 
+	
+	$line =~ s/<reference osisRef="([^\"]+):([^\"]+)"/<reference osisRef="$1\.$2"/g; # Gen 1:1
+	$line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.(\d+)-(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$2\.$4"/g; # Gen 1:1-2
+	$line =~ s/<reference osisRef="([^\.\"]+).(\d+):(\d+)-(\d+).(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$4\.$5"/g; # Gen 1:1-2:3
+	$line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.([^\"]+)">([^<]+)<\/reference>; <reference osisRef="(\d+)\.(\d+)"/<reference osisRef="$1\.$2\.$3">$4<\/reference>; <reference osisRef="$1\.$5\.$6"/g; # Gen. 1:1, 2:3
+	$line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.([^\"]+)">([^<]+)<\/reference>, <reference osisRef="(\d+)"/<reference osisRef="$1\.$2\.$3">$4<\/reference>, <reference osisRef="$1\.$2\.$5"/g; # Gen. 1:1, 3
+	$line =~ s/<reference osisRef="([^\"\.]+)\.(\d+)"/<reference osisRef="$1\.1\.$2"/g; # Jude 1
+
+	### Special Text and Character Styles--Markers Supported: \it...\it*, \nd...\nd*, \pn...\pn*, \tl...\tl* 
+	#### Markers Not Yet Supported: Special Text: \add...\add*, \bk...\bk*, \dc...\dc*, \k...\k*, \lit, \ord...\ord*, \qt...\qt*, \sig...\sig*, \sls...\sls*, \wj...\wj*; Character Styling: \em...\em*, \bd...\bd*, \bdit...\bdit*, \no...\no*, \sc...\sc*; Spacing and Breaks: !$, //, \pb; Special Features: \fig...\fig*, \ndx...\ndx*, \pro...\pro*, \w...\w*, \wg...\wg*, \wh...\wh*
+
+	# \it...\it*, italic text
+	$line =~ s/\\it\b\s*(.*?)\\it\*/<hi type=\"italic\">$1<\/hi>/g; 
+
+	# \nd...\nd*, Divine Name
+	$line =~ s/\\nd\b\s*(.*?)\\nd\*/<divineName>$1<\/divineName>/g; 
+
+	# \pn...\pn*, Proper name
+	$line =~ s/\\pn\b\s*(.*?)\\pn\*/<name>$1<\/name>/g; 
+
+	# \tl...\tl*, Foreign Langauge (treated here merely as transliterated text)
+	$line =~ s/\\tl\b\s*(.*?)\\tl\*/<hi type="italic">$1<\/hi>/g; 
+
+	$line =~ s/_/ /g;
+
+
+### End USFM 2.1 Items
+
+	if ($line !~ /^\s*$/) {
+	    push (@outdata, "$line\n");
+	}
+    }
+}
+
+push (@outdata, closeTag("<\/osis>"));
+
+for ($i = 0; $i < scalar(@outdata); $i++) {
+    #@outdata[$i] =~ s/---/―/g; # m-dash
+    #@outdata[$i] =~ s/--/—/g; # n-dash
+    @outdata[$i] =~ s/([es]ID=\"[^\" ]+) [^\"]*\"/$1\"/;
+}
+
+for ($i = 0; $i < scalar(@outdata); $i++) {
+    if (@outdata[$i] !~ /^\s*$/) {
+	@outdata[$i] =~ s/[\r\n]+/\n/g;
+	@outdata[$i] =~ s/\n?$/\n/;
+	print OUTF @outdata[$i];
+    }
+}
+close (OUTF);
+
+print "Doing some cleanup.\n";
+
+open (INF, "$outputFilename");
+ at filedata = <INF>;
+close (INF);
+open (OUTF, ">$outputFilename");
+
+#bubble chapter down
+for ($i = 0; $i < scalar(@filedata); $i++) {
+    if (@filedata[$i] =~ /^<\// && @filedata[$i-1] =~ /^<chapter.+\/>/) {
+	$temp = @filedata[$i];
+	@filedata[$i] = @filedata[$i-1];
+	@filedata[$i-1] = $temp;
+	$i -= 2;
+    }
+}
+for ($i = 0; $i < scalar(@filedata); $i++) {
+    $fullfile .= @filedata[$i];
+}
+$fullfile =~ s/<\/div>\n(<chapter eID[^>]+>)/$1\n<\/div>/mg; #swap the chapter back up one before the book closer
+
+print "Tagging quotations.\n";
+
+$q = 1;
+
+$fullfile =~ s/\$([^\%]+?)\%/"<q level=\"2\" sID=\"q2." . $q . "\"\/>" . $1 . "<q level=\"2\" eID=\"q2." . $q++ . "\"\/>"/eg;
+
+$fullfile =~ s/\$/"<milestone type=\"cQuote\" subType=\"x-level-2\"\/>"/eg;
+
+$q = 1;
+
+while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/) {
+    $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
+}
+while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/) {
+    $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
+}
+
+$fullfile =~ s/\@([^\#]+?)\#/"<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $1 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>"/eg;
+$fullfile =~ s/\@/"<milestone type=\"cQuote\" subType=\"x-level-1\"\/>"/eg;
+
+$fullfile =~ s/\^/"<q level=\"1\" eID=\"q1." . $q++ . ".false\"\/>"/eg;
+
+print OUTF $fullfile;
+close (OUTF);
+
+print "All done! OSIS file: $outputFilename\n";




More information about the sword-cvs mailing list