[Tynstep-svn] r108 - in trunk: . step-tools step-tools/.settings step-tools/src step-tools/src/main step-tools/src/main/java step-tools/src/main/resources

ChrisBurrell at crosswire.org ChrisBurrell at crosswire.org
Wed Apr 14 14:28:21 MST 2010


Author: ChrisBurrell
Date: 2010-04-14 14:28:21 -0700 (Wed, 14 Apr 2010)
New Revision: 108

Added:
   trunk/step-tools/
   trunk/step-tools/.classpath
   trunk/step-tools/.project
   trunk/step-tools/.settings/
   trunk/step-tools/.settings/org.eclipse.jdt.core.prefs
   trunk/step-tools/.settings/org.maven.ide.eclipse.prefs
   trunk/step-tools/pom.xml
   trunk/step-tools/src/
   trunk/step-tools/src/main/
   trunk/step-tools/src/main/java/
   trunk/step-tools/src/main/java/BibleFileGenerator.java
   trunk/step-tools/src/main/java/BibleStatsAnalyser.java
   trunk/step-tools/src/main/java/ScriptureReference.java
   trunk/step-tools/src/main/resources/
   trunk/step-tools/src/main/resources/log4j.properties
Log:
committing tools

Added: trunk/step-tools/.classpath
===================================================================
--- trunk/step-tools/.classpath	                        (rev 0)
+++ trunk/step-tools/.classpath	2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" output="target/classes" path="src/main/java"/>
+	<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
+	<classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/>
+	<classpathentry kind="output" path="target/classes"/>
+</classpath>

Added: trunk/step-tools/.project
===================================================================
--- trunk/step-tools/.project	                        (rev 0)
+++ trunk/step-tools/.project	2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>step-tools</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.maven.ide.eclipse.maven2Builder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.maven.ide.eclipse.maven2Nature</nature>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>

Added: trunk/step-tools/.settings/org.eclipse.jdt.core.prefs
===================================================================
--- trunk/step-tools/.settings/org.eclipse.jdt.core.prefs	                        (rev 0)
+++ trunk/step-tools/.settings/org.eclipse.jdt.core.prefs	2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,6 @@
+#Mon Apr 05 13:57:25 BST 2010
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.5

Added: trunk/step-tools/.settings/org.maven.ide.eclipse.prefs
===================================================================
--- trunk/step-tools/.settings/org.maven.ide.eclipse.prefs	                        (rev 0)
+++ trunk/step-tools/.settings/org.maven.ide.eclipse.prefs	2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,9 @@
+#Mon Apr 05 13:57:25 BST 2010
+activeProfiles=
+eclipse.preferences.version=1
+fullBuildGoals=process-test-resources
+includeModules=false
+resolveWorkspaceProjects=true
+resourceFilterGoals=process-resources resources\:testResources
+skipCompilerPlugin=true
+version=1

Added: trunk/step-tools/pom.xml
===================================================================
--- trunk/step-tools/pom.xml	                        (rev 0)
+++ trunk/step-tools/pom.xml	2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,31 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+	<parent>
+		<groupId>com.tyndalehouse</groupId>
+		<artifactId>step-parent</artifactId>
+		<version>1.0-SNAPSHOT</version>
+		<relativePath>..</relativePath>
+	</parent>
+
+	<modelVersion>4.0.0</modelVersion>
+	<groupId>com.tyndalehouse</groupId>
+	<artifactId>step-tools</artifactId>
+	<packaging>jar</packaging>
+	<name>step-tools</name>
+
+	<dependencies>
+				<dependency>
+			<groupId>org.crosswire</groupId>
+			<artifactId>jsword</artifactId>
+		</dependency>
+				<dependency>
+			<groupId>commons-io</groupId>
+			<artifactId>commons-io</artifactId>
+		</dependency>
+		
+		<dependency>
+			<groupId>commons-lang</groupId>
+			<artifactId>commons-lang</artifactId>
+		</dependency>
+	</dependencies>
+</project>

Added: trunk/step-tools/src/main/java/BibleFileGenerator.java
===================================================================
--- trunk/step-tools/src/main/java/BibleFileGenerator.java	                        (rev 0)
+++ trunk/step-tools/src/main/java/BibleFileGenerator.java	2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,253 @@
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang.StringUtils;
+import org.crosswire.common.util.Language;
+import org.crosswire.jsword.book.Book;
+import org.crosswire.jsword.book.BookCategory;
+import org.crosswire.jsword.book.BookData;
+import org.crosswire.jsword.book.BookException;
+import org.crosswire.jsword.book.Books;
+import org.crosswire.jsword.book.OSISUtil;
+import org.crosswire.jsword.book.install.InstallException;
+import org.crosswire.jsword.book.install.sword.HttpSwordInstaller;
+import org.crosswire.jsword.passage.Key;
+import org.crosswire.jsword.passage.PassageKeyFactory;
+
+public class BibleFileGenerator extends Thread {
+	private final static String proxyHostProperty = "step.proxy.host";
+	private final static String proxyPortProperty = "step.proxy.port";
+	private String initials;
+
+
+	public BibleFileGenerator() {
+		this.initials = "";
+	}
+
+	
+	public BibleFileGenerator(String initials) {
+		this.initials = initials;
+	}
+
+	private String writeBible(String version) throws Exception {
+
+		StringBuffer sb = new StringBuffer(String.format("@%s%s", version, System.getProperty("line.separator")));
+		// check information has been passed in
+		if (StringUtils.isEmpty(version)) {
+			throw new Exception("Version was not provided");
+		}
+
+		Book currentBook = Books.installed().getBook(version);
+
+		Key global = currentBook.getGlobalKeyList();
+		int numVersesInVersion = global.getCardinality();
+
+		String formatBook = "";
+		String formatChapter = "";
+		String formatVerse = "";
+		
+		String previousBook = "none";
+		String previousChapter = "none";
+		String previousVerse = "none" ;
+		
+		String key = null;
+		boolean newBook = true;
+		for (int ii = 0; ii < numVersesInVersion; ii++) {
+			try {
+				key = global.get(ii).getOsisID();
+				
+				//assume just one verse?
+				String[] split = key.split("\\.");
+				formatBook = split[0];
+				formatChapter = split[1];
+				formatVerse = split[2];
+				
+				if(!formatBook.equals(previousBook)) {
+					previousBook = formatBook;
+					
+					sb.append('@');
+					sb.append(formatBook);
+					sb.append(System.getProperty("line.separator"));
+					
+					//if new book, also reset chapter
+					previousChapter = formatChapter;
+					sb.append("@Chapter ");
+					sb.append(formatChapter);
+					sb.append(System.getProperty("line.separator"));
+				} else if(!formatChapter.equals(previousChapter)) {
+					previousChapter = formatChapter;
+					sb.append("@Chapter ");
+					sb.append(formatChapter);
+					sb.append(System.getProperty("line.separator"));				
+				}
+				
+				BookData data = new BookData(currentBook, global.get(ii));
+				
+				sb.append(formatVerse);
+				sb.append(". ");
+				sb.append(OSISUtil.getCanonicalText(data.getOsis()).replace('\n', ' ').replace('\r', '\n'));
+				sb.append(System.getProperty("line.separator"));
+								
+				//ScriptureReference sr = new ScriptureReference(key);
+				
+//				if (!sr.getBook().equals(formatBook)) {
+//					sb.append(String.format("@%s%s", sr.getBook().toUpperCase(), System.getProperty("line.separator")));
+//					formatBook = sr.getBook();
+//					newBook = true;
+//				}
+//
+//				if (sr.getChapter() != null && !sr.getChapter().equals(formatChapter)) {
+//					sb.append(String.format("@Chapter %s%s", sr.getChapter(), System.getProperty("line.separator")));
+//					newBook = false;
+//					formatChapter = sr.getChapter();
+//				} else if(newBook) {
+//					sb.append(String.format("@Chapter 1%s", System.getProperty("line.separator")));
+//					newBook = false;
+//				}
+
+			} catch (BookException e) {
+				System.out
+						.println(initials + ":: A book exception has occurred whilte looking up the passage: " + key);
+				e.printStackTrace();
+				throw new Exception(e);
+			} catch (Exception ex) {
+				System.err.println(initials + ":: Could not parse key: " + key);
+				ex.printStackTrace();
+			} finally {
+				;
+			}
+		}
+		return sb.toString();
+
+	}
+
+	//TODO: here and elsewhere, ensure that the downloaders 
+	//are not hardcoded - there are at least two more sites to get 
+	//bible versions from.
+	private static HttpSwordInstaller getNewCustomInstaller() {
+		System.out.println("Creating new installer for JSword");
+		HttpSwordInstaller resourceInstaller = new HttpSwordInstaller();
+
+		System.out.println("Currently hardcoded installer host to:"
+				+ "www.crosswire.org");
+		System.out.println("Currently hardcoded property names for step");
+		String host = "www.crosswire.org";
+		String proxyHost = System.getProperty(proxyHostProperty);
+		String proxyPort = System.getProperty(proxyPortProperty);
+		System.out.println(String.format("Setting to (%1$s via %2$s:%3$s)",
+				"www.crosswire.org", proxyHost, proxyPort));
+
+		resourceInstaller.setHost(host);
+		if (proxyHost != null) {
+			resourceInstaller.setProxyHost(proxyHost);
+		}
+		if (proxyPort != null) {
+			resourceInstaller.setProxyPort(Integer.parseInt(proxyPort));
+		}
+
+		System.out.println("Setting package and catalog directories");
+		resourceInstaller
+				.setPackageDirectory("/ftpmirror/pub/sword/packages/rawzip");
+		resourceInstaller.setCatalogDirectory("/ftpmirror/pub/sword/raw");
+		return resourceInstaller;
+	}
+	
+	private void downloadAllBibles() throws InstallException {
+		HttpSwordInstaller installer = getNewCustomInstaller();
+
+		//TODO: ensure the comment in the comment of the function is in a warning
+		//somewhere...
+		installer.reloadBookList();
+		
+		List availableBooks = installer.getBooks();
+		
+		for(int ii = 0; ii < availableBooks.size(); ii++) {
+			Book b = (Book) availableBooks.get(ii);
+			Language en = new Language("en");
+			
+			//check book is a biblical text...
+			if(b.getBookCategory() == BookCategory.BIBLE &&
+					b.getBookMetaData().getLanguage().equals(en)
+					) {
+				String versionKey = b.getInitials();
+				
+				System.out.println("Downloading " + ((Book) availableBooks.get(ii)).getName() + "...");
+				if(Books.installed().getBook(versionKey) == null) {
+					installer.install(installer.getBook(versionKey));
+				}
+			}
+		}		
+	}
+
+	
+	@Override
+	public void run() {
+		try {
+			System.out.println("Starting thread for " + initials);
+			FileUtils.writeStringToFile(new File("output/" + initials + ".txt"), writeBible(initials));
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		
+	}
+
+	
+	
+	//TODO: could profile the application to figure out if we can speed things
+	//up with memory settings etc, more/less threads...
+	//could also investigate rewriting it so that disk IO happens
+	//during processing to allow other threads to execute.
+	//would be interesting to profile to find out where bottlenecks are.
+	public static void main(String args[]) throws Exception {
+		BibleFileGenerator bfg = new BibleFileGenerator();
+		int numThreads = 2;
+			
+		
+		
+		BlockingQueue<Runnable> queue = new LinkedBlockingQueue<Runnable>();
+		ThreadPoolExecutor tpe = new ThreadPoolExecutor(numThreads, numThreads,1, TimeUnit.MINUTES, queue);
+		
+		//add processor thing, to kick off all the downloads, and then
+		//TODO:
+		//wait for the downloads to complete for each version, before it continues on to the 
+		//bible processing...
+		//bfg.downloadAllBibles();
+		List<Book> books = Books.installed().getBooks();
+		
+		//of the installed books
+		for(Book b : books) {
+			System.out.println(String.format("%s %s", b.getInitials(), b.getName()));
+		}
+		
+		System.out.println("of available:\n\n");
+
+		
+		
+		List<Book> availableBooks = getNewCustomInstaller().getBooks();
+		for(Book b : availableBooks) {
+			System.out.println(String.format("%s %s", b.getInitials(), b.getName()));
+		}
+
+		new BibleFileGenerator("ESV").start();
+		
+//		for(int ii = 0; ii < books.size(); ii++) {
+//			if(books.get(ii).getBookCategory().equals(BookCategory.BIBLE)) {
+//				
+//				String initials = books.get(ii).getInitials();
+//				Thread r = new BibleFileGenerator(initials);
+//				tpe.execute(r);
+//				System.out.println("Size of runnable queue: " + queue.size());
+//			}
+//		}
+	}
+}

Added: trunk/step-tools/src/main/java/BibleStatsAnalyser.java
===================================================================
--- trunk/step-tools/src/main/java/BibleStatsAnalyser.java	                        (rev 0)
+++ trunk/step-tools/src/main/java/BibleStatsAnalyser.java	2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,34 @@
+import java.io.File;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Hashtable;
+
+import org.apache.commons.io.FileUtils;
+
+
+public class BibleStatsAnalyser {
+	public BibleStatsAnalyser() {
+		// TODO Auto-generated constructor stub
+	}
+	
+	
+	public static void main(String args[]) throws IOException {
+		String esv = FileUtils.readFileToString(new File("output/ESV.txt"));
+		String[] allWords = esv.split("[ ,\\.!;\\']");
+		Hashtable<String, Integer> count = new Hashtable<String, Integer>();
+		
+		for(String s : allWords) {
+			if(count.containsKey(s)) {
+				Integer i = count.get(s);
+				i++;
+				count.put(s, i);
+			} else {
+				count.put(s, 1);
+			}
+		}
+		
+		for(String s : allWords) {
+			System.out.println(String.format("%s,%d", s, count.get(s).intValue()));
+		}
+	}
+}

Added: trunk/step-tools/src/main/java/ScriptureReference.java
===================================================================
--- trunk/step-tools/src/main/java/ScriptureReference.java	                        (rev 0)
+++ trunk/step-tools/src/main/java/ScriptureReference.java	2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,60 @@
+public class ScriptureReference {
+	private String book = null;
+	private String chapter = null;
+	private String verse = null;
+
+	/**
+	 * Given a key looking like 1 Samuel 2:13 or Philemon 1 it stores it
+	 * internally as book,chapter,verse
+	 * 
+	 * @param key
+	 */
+	public ScriptureReference(String key) {
+		int lastColon = key.lastIndexOf(":");
+		int lastSpace = key.lastIndexOf(' ');
+
+		// there are two types of formats:
+		// 1 Samuel 2:13 and Philemon 1 where 1 in this case is the verse
+
+		try {
+			// ie, reference not like Philemon 1
+			if (lastColon != -1) {
+				verse = key.substring(lastColon + 1);
+				chapter = key.substring(lastSpace + 1, lastColon);
+			} else {
+				verse = key.substring(lastSpace + 1);
+				chapter = null;
+			}
+
+			book = key.substring(0, lastSpace);
+		} catch (Exception ex) {
+			System.err.println("Key: " + key);
+			System.err.println("lastColon: " + lastColon);
+			System.err.println("lastSpace: " + lastSpace);
+			System.err.println("verse: " + verse);
+			System.err.println("chapter: " + chapter);
+			
+		}
+	}
+
+	/**
+	 * @return the book
+	 */
+	public String getBook() {
+		return book;
+	}
+
+	/**
+	 * @return the chapter
+	 */
+	public String getChapter() {
+		return chapter;
+	}
+
+	/**
+	 * @return the verse
+	 */
+	public String getVerse() {
+		return verse;
+	}
+}

Added: trunk/step-tools/src/main/resources/log4j.properties
===================================================================
--- trunk/step-tools/src/main/resources/log4j.properties	                        (rev 0)
+++ trunk/step-tools/src/main/resources/log4j.properties	2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,9 @@
+# Set root logger level to DEBUG and its only appender to A1.
+log4j.rootLogger=WARN, A1
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.A1=org.apache.log4j.ConsoleAppender
+
+# A1 uses PatternLayout.
+log4j.appender.A1.layout=org.apache.log4j.PatternLayout
+log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n




More information about the Tynstep-svn mailing list