#!/usr/bin/perl

sub vs2osis {

        my $context = $_[0];
        $context =~ s/([1-5]?[A-Z][a-z]+)\ .*/$1/;
                
        my $ref = $_[0];
        $ref =~ s/,/:/;
                                                                              
        my $return = `vs2osisref "$ref" $context de`;
        $return =~ s/\n$//;
        $return;
        }
                                                                              

sub einleitung {
        $return = $_[0];
#        $return =~ s/(([12345]?[A-Z][a-z]+)(;?\s[0-9]+(,[0-9]+(\-[0-9]+)?)?)+)(\s|\)|;)/\\rq\ $1\ \\rq*/g;
        $return;
        }


sub footnote {
        $return = $_[0];
	$return =~ s/type=\"footnote_anchor\"\ style=\"\"\ xid=\".*?\"\ >//g;                
	$return =~ s/\[/\ \\add\ /g;
        $return =~ s/\]/\ \\add\*\ /g;
        $return =~ s/^\s*\(([0-9]+),([0-9]+)\)\s+/\ \\fr\ $1,$2\ \\ft\ /;
        $return =~ s/(w|od|bed|eig)\.\ /$1\.\ \\fqa\ /g;
        $return =~ s/(bersetzung|bersetzen|wiedergegeben)\:\ /$1\.\ \\fqa\ /g;
        $return =~ s/(=\ )/$1\.\ \\fqa\ /g;
        $return =~ s/(\\fqa\ .*?)\\fqa/$1\ /g;
        $return;
}

my @files=`ls -1 *.abw`;
my @ident=`cat books`;

foreach (@files){ 

        my @lines;
        my %vs;
        my %kw;
        
	chop;
	open USFM, ">>$_.usfm";
	chomp(@lines=`cat $_`);
 	@lines[1]="\\id @ident[$_-1] ";
	
	open KW, "<$_.keywords";
	open VS, "<$_.verse";
	

	while (<KW>) {	        
	        my @content = split(/\|/,$_);
	        chop($kw{@content[0]} = @content[1]); 	
	}

	while (<VS>) {
	        
	        my @content = split(/\|/,$_);
	        $vs {@content[0]} = @content[1]; 	
	}
	


	foreach (@lines) {
                s/<m\ .*?\/m>//;
                s/props\=\".*?\"//g;
		s/<p\ style=\"Kapitel\".*?><c.*?>(.*?)<\/c><\/p>/\n\\c\ $1\ \n\\v\ 1\ \ /g;
		s/<p\ style=\"Psalm Nr\".*?><c.*?>Psalm\ (.*?)<\/c>(<field.*?|)<\/p>$/\n\\c\ $1\n\\s1\ Psalm\ $1\ $2\n\\p/g;
		s/<p\ style=\"Textspalte links\".*?>/\n\\q\ /g;
		s/\\q\s+<c\ style=\"Verszahl\".*?>(.*?)<\/c>/\n\\v\ $1\ \n\\q /g;
		s/<c\ style=\"Verszahl\".*?>(.*?)<\/c>/\n\\v\ $1\ \ /g;
		s/<p\ style=\"Buchtitel\".*?>/\n\\mt\ /g;
		s/<p\ style=\"Bucheinleitung\".*?>/\n\\imt\ Einleitung\n\\ip\ /g;
		s/<p\ style=\"Spaltentitel\".*?><c.*?>(.*?)<\/c>(<field.*?|)<\/p>/\n\\s2\ $1\ $2\n\\p/g;
		s/<p\ style=\"Spaltenparallelen\".*?>(.*?)<\/p>/\n\\r\ $1/g;
		s/<p style=\"Footnote Text\".*><field\ footnote-id=\".*?\"/\ \\f\ +\ /g;
		s/<\/foot>/\\f\*\ /g;
		s/type=\"footnote_anchor\"\ style=\"\"\ xid=\".*?\"\ >//g;                
		if (!(/\\f.*?\[.*?\\f\*/)) {
		        s/\[/\ \\add\ /g;
		        }
		if (!(/\\f.*?\[.*?\\f\*/)) {	
		        s/\]/\ \\add\*\ /g;
		        }
		s/<p\ style=\"Kopfzeile\".*?p>//;
		
		s/<c\ style=\"Verweiswort\".*?><\/c>//g;
		s/<c\ style=\"Verweiswort\".*?>(.*?)<\/c>/$1\ \\x\ \\xk\ $1\\x\*\ /g;
                s/<br\/>/\n\\q\ /g;
		s/xid\=\".*?\"//g;
                s/<.*?>//g;
			              
	}
	@lines=split("\n",join("",@lines));
	
	my $chapter, $verse=1;
	my $c_found=0;  # to sniff out single chapter books
	
	foreach (@lines) {
	
	        if (/\\c\s+([0-9]+)\s/) {
	                $chapter=$1;
	                $c_found=1;
	        }
	        if (/\\v\s+([0-9]+)\s/) {
	                $verse=$1;
                        s/$/$vs{$chapter.":".$verse}/e;
	        
	        }
	        
	        s/\\x\s+\\xk\s+(.+?)\s*\\x\*/$kw{$chapter.":".$verse.":".substr($1,0,4)}/eg;
                s/\\f\ \+(.*?)\\f\*/"\\f +".footnote($1)." \\f*"/eg; 

                s/^\\r\s+(Kapitel|\(Psalm)/\\mr $1/g;
                s/^\\ip(.*?)$/"\\ip".einleitung($1)/eg;
		

	        s/^\s*\\s2...$//;
	        s/^\\q\s*$//g;
	        s/^\s*$//g;
	        s/\s+([\,\.\;\:])/$1\ /g;        
	        s/\s+/\ /g;
	        s/^$//;
	        s/$/\n/;
	        
	}
	if ($c_found=0) {
	        foreach (@lines) {
	                s/\\v\ 1\s/\\c\ 1\n\\v\ 1\ /;
	                }
	}                
	for ($i=0; $i<=@lines; $i++) {
	        if (@lines[$i]=~/^\\s2/) {
	                if (@lines[$i+1]=~/^\\q/) {
	                        @lines[$i+1] =~ s/\\q/\\mt2/;
	                }
	        }
        }	
	
	for ($i=0; $i<=@lines; $i++) {
	        if (@lines[$i]=~/^\\mr/) {
	                if (@lines[$i-2]=~/^\\s2/) {
	                        my $s  		= @lines[$i-1];
	                        @lines[$i-1]	= @lines[$i]; 
	                        @lines[$i]	= $s;
	                        @lines[$i-2]	=~ s/\\s2/\\s/;
	                }
	        }
        }	
	for ($i=0; $i<=@lines; $i++) {
	        if (@lines[$i]=~/^\\c/) {
	                if (@lines[$i-3]=~/^\\s2/) {
	                        my $s  		= @lines[$i-3];
	                        @lines[$i-3]	= @lines[$i]; 
	                        @lines[$i]	= $s;
                        }
	        }
	}                        
	for ($i=0; $i<=@lines; $i++) {
	        if (@lines[$i]=~/^\\s2/) {
	                if (@lines[$i-1]=~/^\\r/) {
	                        my $s  		= @lines[$i-2];
	                        @lines[$i-2]	= @lines[$i]; 
	                        @lines[$i]	= $s;
	                }
	        }
        }	
	for ($i=0; $i<=@lines; $i++) {
	        if (@lines[$i]=~/^\\r/) {
	                if (@lines[$i-1]=~/^\\p/) {
	                        if (@lines[$i-2]=~/^\\s2/) {
        	                        my $s  		= @lines[$i-1];
	                                @lines[$i-1]	= @lines[$i]; 
	                                @lines[$i]	= $s;
                                }
                                
	                }
	        }
        }	
	foreach (@lines) {
	     s/^$//;
	     }      
	print (USFM @lines);
	close USFM;
}                                                                                  
