[Tynstep-svn] r26 - in trunk/StepTools: . lib src

ChrisBurrell at crosswire.org ChrisBurrell at crosswire.org
Tue Nov 24 15:52:26 MST 2009


Author: ChrisBurrell
Date: 2009-11-24 15:52:26 -0700 (Tue, 24 Nov 2009)
New Revision: 26

Added:
   trunk/StepTools/lib/
   trunk/StepTools/lib/aopalliance.jar
   trunk/StepTools/lib/commons-codec-1.3.jar
   trunk/StepTools/lib/commons-httpclient-3.1.jar
   trunk/StepTools/lib/commons-io-1.4.jar
   trunk/StepTools/lib/commons-lang-2.4.jar
   trunk/StepTools/lib/commons-logging-1.1.1.jar
   trunk/StepTools/lib/javatar-2.5.jar
   trunk/StepTools/lib/jdom-1.0.jar
   trunk/StepTools/lib/jsword-1.6.jar
   trunk/StepTools/lib/jsword-common-1.6.jar
   trunk/StepTools/lib/log4j.jar
   trunk/StepTools/src/
   trunk/StepTools/src/BibleFileGenerator.java
   trunk/StepTools/src/BibleStatsAnalyser.java
   trunk/StepTools/src/ScriptureReference.java
   trunk/StepTools/src/log4j.properties
Log:


Added: trunk/StepTools/lib/aopalliance.jar
===================================================================
(Binary files differ)


Property changes on: trunk/StepTools/lib/aopalliance.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/StepTools/lib/commons-codec-1.3.jar
===================================================================
(Binary files differ)


Property changes on: trunk/StepTools/lib/commons-codec-1.3.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/StepTools/lib/commons-httpclient-3.1.jar
===================================================================
(Binary files differ)


Property changes on: trunk/StepTools/lib/commons-httpclient-3.1.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/StepTools/lib/commons-io-1.4.jar
===================================================================
(Binary files differ)


Property changes on: trunk/StepTools/lib/commons-io-1.4.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/StepTools/lib/commons-lang-2.4.jar
===================================================================
(Binary files differ)


Property changes on: trunk/StepTools/lib/commons-lang-2.4.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/StepTools/lib/commons-logging-1.1.1.jar
===================================================================
(Binary files differ)


Property changes on: trunk/StepTools/lib/commons-logging-1.1.1.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/StepTools/lib/javatar-2.5.jar
===================================================================
(Binary files differ)


Property changes on: trunk/StepTools/lib/javatar-2.5.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/StepTools/lib/jdom-1.0.jar
===================================================================
(Binary files differ)


Property changes on: trunk/StepTools/lib/jdom-1.0.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/StepTools/lib/jsword-1.6.jar
===================================================================
(Binary files differ)


Property changes on: trunk/StepTools/lib/jsword-1.6.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/StepTools/lib/jsword-common-1.6.jar
===================================================================
(Binary files differ)


Property changes on: trunk/StepTools/lib/jsword-common-1.6.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/StepTools/lib/log4j.jar
===================================================================
(Binary files differ)


Property changes on: trunk/StepTools/lib/log4j.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/StepTools/src/BibleFileGenerator.java
===================================================================
--- trunk/StepTools/src/BibleFileGenerator.java	                        (rev 0)
+++ trunk/StepTools/src/BibleFileGenerator.java	2009-11-24 22:52:26 UTC (rev 26)
@@ -0,0 +1,253 @@
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang.StringUtils;
+import org.crosswire.common.util.Language;
+import org.crosswire.jsword.book.Book;
+import org.crosswire.jsword.book.BookCategory;
+import org.crosswire.jsword.book.BookData;
+import org.crosswire.jsword.book.BookException;
+import org.crosswire.jsword.book.Books;
+import org.crosswire.jsword.book.OSISUtil;
+import org.crosswire.jsword.book.install.InstallException;
+import org.crosswire.jsword.book.install.sword.HttpSwordInstaller;
+import org.crosswire.jsword.passage.Key;
+import org.crosswire.jsword.passage.PassageKeyFactory;
+
+public class BibleFileGenerator extends Thread {
+	private final static String proxyHostProperty = "step.proxy.host";
+	private final static String proxyPortProperty = "step.proxy.port";
+	private String initials;
+
+
+	public BibleFileGenerator() {
+		this.initials = "";
+	}
+
+	
+	public BibleFileGenerator(String initials) {
+		this.initials = initials;
+	}
+
+	private String writeBible(String version) throws Exception {
+
+		StringBuffer sb = new StringBuffer(String.format("@%s%s", version, System.getProperty("line.separator")));
+		// check information has been passed in
+		if (StringUtils.isEmpty(version)) {
+			throw new Exception("Version was not provided");
+		}
+
+		Book currentBook = Books.installed().getBook(version);
+
+		Key global = currentBook.getGlobalKeyList();
+		int numVersesInVersion = global.getCardinality();
+
+		String formatBook = "";
+		String formatChapter = "";
+		String formatVerse = "";
+		
+		String previousBook = "none";
+		String previousChapter = "none";
+		String previousVerse = "none" ;
+		
+		String key = null;
+		boolean newBook = true;
+		for (int ii = 0; ii < numVersesInVersion; ii++) {
+			try {
+				key = global.get(ii).getOsisID();
+				
+				//assume just one verse?
+				String[] split = key.split("\\.");
+				formatBook = split[0];
+				formatChapter = split[1];
+				formatVerse = split[2];
+				
+				if(!formatBook.equals(previousBook)) {
+					previousBook = formatBook;
+					
+					sb.append('@');
+					sb.append(formatBook);
+					sb.append(System.getProperty("line.separator"));
+					
+					//if new book, also reset chapter
+					previousChapter = formatChapter;
+					sb.append("@Chapter ");
+					sb.append(formatChapter);
+					sb.append(System.getProperty("line.separator"));
+				} else if(!formatChapter.equals(previousChapter)) {
+					previousChapter = formatChapter;
+					sb.append("@Chapter ");
+					sb.append(formatChapter);
+					sb.append(System.getProperty("line.separator"));				
+				}
+				
+				BookData data = new BookData(currentBook, global.get(ii));
+				
+				sb.append(formatVerse);
+				sb.append(". ");
+				sb.append(OSISUtil.getCanonicalText(data.getOsis()).replace('\n', ' ').replace('\r', '\n'));
+				sb.append(System.getProperty("line.separator"));
+								
+				//ScriptureReference sr = new ScriptureReference(key);
+				
+//				if (!sr.getBook().equals(formatBook)) {
+//					sb.append(String.format("@%s%s", sr.getBook().toUpperCase(), System.getProperty("line.separator")));
+//					formatBook = sr.getBook();
+//					newBook = true;
+//				}
+//
+//				if (sr.getChapter() != null && !sr.getChapter().equals(formatChapter)) {
+//					sb.append(String.format("@Chapter %s%s", sr.getChapter(), System.getProperty("line.separator")));
+//					newBook = false;
+//					formatChapter = sr.getChapter();
+//				} else if(newBook) {
+//					sb.append(String.format("@Chapter 1%s", System.getProperty("line.separator")));
+//					newBook = false;
+//				}
+
+			} catch (BookException e) {
+				System.out
+						.println(initials + ":: A book exception has occurred whilte looking up the passage: " + key);
+				e.printStackTrace();
+				throw new Exception(e);
+			} catch (Exception ex) {
+				System.err.println(initials + ":: Could not parse key: " + key);
+				ex.printStackTrace();
+			} finally {
+				;
+			}
+		}
+		return sb.toString();
+
+	}
+
+	//TODO: here and elsewhere, ensure that the downloaders 
+	//are not hardcoded - there are at least two more sites to get 
+	//bible versions from.
+	private static HttpSwordInstaller getNewCustomInstaller() {
+		System.out.println("Creating new installer for JSword");
+		HttpSwordInstaller resourceInstaller = new HttpSwordInstaller();
+
+		System.out.println("Currently hardcoded installer host to:"
+				+ "www.crosswire.org");
+		System.out.println("Currently hardcoded property names for step");
+		String host = "www.crosswire.org";
+		String proxyHost = System.getProperty(proxyHostProperty);
+		String proxyPort = System.getProperty(proxyPortProperty);
+		System.out.println(String.format("Setting to (%1$s via %2$s:%3$s)",
+				"www.crosswire.org", proxyHost, proxyPort));
+
+		resourceInstaller.setHost(host);
+		if (proxyHost != null) {
+			resourceInstaller.setProxyHost(proxyHost);
+		}
+		if (proxyPort != null) {
+			resourceInstaller.setProxyPort(Integer.parseInt(proxyPort));
+		}
+
+		System.out.println("Setting package and catalog directories");
+		resourceInstaller
+				.setPackageDirectory("/ftpmirror/pub/sword/packages/rawzip");
+		resourceInstaller.setCatalogDirectory("/ftpmirror/pub/sword/raw");
+		return resourceInstaller;
+	}
+	
+	private void downloadAllBibles() throws InstallException {
+		HttpSwordInstaller installer = getNewCustomInstaller();
+
+		//TODO: ensure the comment in the comment of the function is in a warning
+		//somewhere...
+		installer.reloadBookList();
+		
+		List availableBooks = installer.getBooks();
+		
+		for(int ii = 0; ii < availableBooks.size(); ii++) {
+			Book b = (Book) availableBooks.get(ii);
+			Language en = new Language("en");
+			
+			//check book is a biblical text...
+			if(b.getBookCategory() == BookCategory.BIBLE &&
+					b.getBookMetaData().getLanguage().equals(en)
+					) {
+				String versionKey = b.getInitials();
+				
+				System.out.println("Downloading " + ((Book) availableBooks.get(ii)).getName() + "...");
+				if(Books.installed().getBook(versionKey) == null) {
+					installer.install(installer.getBook(versionKey));
+				}
+			}
+		}		
+	}
+
+	
+	@Override
+	public void run() {
+		try {
+			System.out.println("Starting thread for " + initials);
+			FileUtils.writeStringToFile(new File("output/" + initials + ".txt"), writeBible(initials));
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		
+	}
+
+	
+	
+	//TODO: could profile the application to figure out if we can speed things
+	//up with memory settings etc, more/less threads...
+	//could also investigate rewriting it so that disk IO happens
+	//during processing to allow other threads to execute.
+	//would be interesting to profile to find out where bottlenecks are.
+	public static void main(String args[]) throws Exception {
+		BibleFileGenerator bfg = new BibleFileGenerator();
+		int numThreads = 2;
+			
+		
+		
+		BlockingQueue<Runnable> queue = new LinkedBlockingQueue<Runnable>();
+		ThreadPoolExecutor tpe = new ThreadPoolExecutor(numThreads, numThreads,1, TimeUnit.MINUTES, queue);
+		
+		//add processor thing, to kick off all the downloads, and then
+		//TODO:
+		//wait for the downloads to complete for each version, before it continues on to the 
+		//bible processing...
+		//bfg.downloadAllBibles();
+		List<Book> books = Books.installed().getBooks();
+		
+		//of the installed books
+		for(Book b : books) {
+			System.out.println(String.format("%s %s", b.getInitials(), b.getName()));
+		}
+		
+		System.out.println("of available:\n\n");
+
+		
+		
+		List<Book> availableBooks = getNewCustomInstaller().getBooks();
+		for(Book b : availableBooks) {
+			System.out.println(String.format("%s %s", b.getInitials(), b.getName()));
+		}
+
+		new BibleFileGenerator("ESV").start();
+		
+//		for(int ii = 0; ii < books.size(); ii++) {
+//			if(books.get(ii).getBookCategory().equals(BookCategory.BIBLE)) {
+//				
+//				String initials = books.get(ii).getInitials();
+//				Thread r = new BibleFileGenerator(initials);
+//				tpe.execute(r);
+//				System.out.println("Size of runnable queue: " + queue.size());
+//			}
+//		}
+	}
+}

Added: trunk/StepTools/src/BibleStatsAnalyser.java
===================================================================
--- trunk/StepTools/src/BibleStatsAnalyser.java	                        (rev 0)
+++ trunk/StepTools/src/BibleStatsAnalyser.java	2009-11-24 22:52:26 UTC (rev 26)
@@ -0,0 +1,34 @@
+import java.io.File;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Hashtable;
+
+import org.apache.commons.io.FileUtils;
+
+
+public class BibleStatsAnalyser {
+	public BibleStatsAnalyser() {
+		// TODO Auto-generated constructor stub
+	}
+	
+	
+	public static void main(String args[]) throws IOException {
+		String esv = FileUtils.readFileToString(new File("output/ESV.txt"));
+		String[] allWords = esv.split("[ ,\\.!;\\']");
+		Hashtable<String, Integer> count = new Hashtable<String, Integer>();
+		
+		for(String s : allWords) {
+			if(count.containsKey(s)) {
+				Integer i = count.get(s);
+				i++;
+				count.put(s, i);
+			} else {
+				count.put(s, 1);
+			}
+		}
+		
+		for(String s : allWords) {
+			System.out.println(String.format("%s,%d", s, count.get(s).intValue()));
+		}
+	}
+}

Added: trunk/StepTools/src/ScriptureReference.java
===================================================================
--- trunk/StepTools/src/ScriptureReference.java	                        (rev 0)
+++ trunk/StepTools/src/ScriptureReference.java	2009-11-24 22:52:26 UTC (rev 26)
@@ -0,0 +1,60 @@
+public class ScriptureReference {
+	private String book = null;
+	private String chapter = null;
+	private String verse = null;
+
+	/**
+	 * Given a key looking like 1 Samuel 2:13 or Philemon 1 it stores it
+	 * internally as book,chapter,verse
+	 * 
+	 * @param key
+	 */
+	public ScriptureReference(String key) {
+		int lastColon = key.lastIndexOf(":");
+		int lastSpace = key.lastIndexOf(' ');
+
+		// there are two types of formats:
+		// 1 Samuel 2:13 and Philemon 1 where 1 in this case is the verse
+
+		try {
+			// ie, reference not like Philemon 1
+			if (lastColon != -1) {
+				verse = key.substring(lastColon + 1);
+				chapter = key.substring(lastSpace + 1, lastColon);
+			} else {
+				verse = key.substring(lastSpace + 1);
+				chapter = null;
+			}
+
+			book = key.substring(0, lastSpace);
+		} catch (Exception ex) {
+			System.err.println("Key: " + key);
+			System.err.println("lastColon: " + lastColon);
+			System.err.println("lastSpace: " + lastSpace);
+			System.err.println("verse: " + verse);
+			System.err.println("chapter: " + chapter);
+			
+		}
+	}
+
+	/**
+	 * @return the book
+	 */
+	public String getBook() {
+		return book;
+	}
+
+	/**
+	 * @return the chapter
+	 */
+	public String getChapter() {
+		return chapter;
+	}
+
+	/**
+	 * @return the verse
+	 */
+	public String getVerse() {
+		return verse;
+	}
+}

Added: trunk/StepTools/src/log4j.properties
===================================================================
--- trunk/StepTools/src/log4j.properties	                        (rev 0)
+++ trunk/StepTools/src/log4j.properties	2009-11-24 22:52:26 UTC (rev 26)
@@ -0,0 +1,9 @@
+# Set root logger level to DEBUG and its only appender to A1.
+log4j.rootLogger=WARN, A1
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.A1=org.apache.log4j.ConsoleAppender
+
+# A1 uses PatternLayout.
+log4j.appender.A1.layout=org.apache.log4j.PatternLayout
+log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n




More information about the Tynstep-svn mailing list