[sword-svn] r3424 - trunk/utilities

refdoc at crosswire.org refdoc at crosswire.org
Sun Jun 26 22:19:44 MST 2016


Author: refdoc
Date: 2016-06-26 22:19:44 -0700 (Sun, 26 Jun 2016)
New Revision: 3424

Added:
   trunk/utilities/stripaccents.cpp
Modified:
   trunk/utilities/Makefile.am
Log:
added small utility to strip Arab, Greek and Hebrew diacritics from source texts. Useful for some
module making work



Modified: trunk/utilities/Makefile.am
===================================================================
--- trunk/utilities/Makefile.am	2016-06-25 13:21:48 UTC (rev 3423)
+++ trunk/utilities/Makefile.am	2016-06-27 05:19:44 UTC (rev 3424)
@@ -6,11 +6,12 @@
 endif
 LDADD = $(top_builddir)/lib/libsword.la
 noinst_PROGRAMS = cipherraw lexdump \
-	stepdump step2vpl gbfidx modwrite addvs emptyvss \
-	addgb genbookutil treeidxutil addld
+	stepdump step2vpl gbfidx modwrite addvs  \
+	addgb genbookutil treeidxutil addld  
 
 bin_PROGRAMS = mod2imp mod2osis osis2mod tei2mod vs2osisref vs2osisreftxt \
-	mod2vpl mkfastmod vpl2mod imp2vs installmgr xml2gbs imp2gbs imp2ld
+	mod2vpl mkfastmod vpl2mod imp2vs installmgr xml2gbs imp2gbs imp2ld \
+	stripaccents emptyvss
 
 
 if HAVE_LIBZ
@@ -48,6 +49,7 @@
 treeidxutil_SOURCES = treeidxutil.cpp
 addld_SOURCES = addld.cpp
 imp2ld_SOURCES = imp2ld.cpp
+stripaccents_SOURCES = stripaccents.cpp
 
 installmgr_SOURCES = installmgr.cpp
 

Added: trunk/utilities/stripaccents.cpp
===================================================================
--- trunk/utilities/stripaccents.cpp	                        (rev 0)
+++ trunk/utilities/stripaccents.cpp	2016-06-27 05:19:44 UTC (rev 3424)
@@ -0,0 +1,79 @@
+/******************************************************************************
+ *
+ *  stripaccents.cpp -	This simple utility strips Greek accents, Hebrew vowel
+ *			points and breathing marks and Arab diacritics from a 
+ *			given text.
+ *
+ *				stripaccents <n=1-15> <text-to-be-stripped>
+ *
+ * $Id: search.cpp 3269 2014-10-09 14:55:14Z scribe $
+ *
+ * Copyright 1997-2013 CrossWire Bible Society (http://www.crosswire.org)
+ *	CrossWire Bible Society
+ *	P. O. Box 2528
+ *	Tempe, AZ  85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <cstdlib>
+#include <swmgr.h>
+#include <markupfiltmgr.h>
+#include <iostream>
+
+#ifndef NO_SWORD_NAMESPACE
+using namespace sword;
+#endif
+
+int usage()
+{
+		fprintf(stderr, "\nusage: stripaccents <n=1-15> <text-to-be-stripped> \n");
+		fprintf(stderr, "\n n=1 \t strip Greek accents");
+		fprintf(stderr, "\n n=2 \t strip Arabic vowel points");
+		fprintf(stderr, "\n n=4 \t strip Hebrew vowel points");
+		fprintf(stderr, "\n n=8 \t strip Hebrew Cantillation marks\n");
+		fprintf(stderr, "\n several of the above filters can be engaged by adding the values\n");
+		exit(0);
+}        
+
+int main(int argc, char **argv)
+{
+//	SWMgr manager(0, 0, true, new MarkupFilterMgr(FMT_RTF, ENC_RTF));
+	SWMgr manager;
+
+	if ((argc < 3) || (argc > 3)) {
+	        usage();
+		exit(-1);
+	}
+
+        int stripFilters = atoi(argv[1]);
+        
+        if ((stripFilters < 1) || (stripFilters > 15)) {
+                usage();
+		exit(-1);
+        }        
+	SWBuf stripTerm = argv[2];
+	
+	manager.setGlobalOption("Greek Accents", "Off");
+	manager.setGlobalOption("Arabic Vowel Points", "Off");
+	manager.setGlobalOption("Hebrew Vowel Points", "Off");
+	manager.setGlobalOption("Hebrew Cantillation", "Off");
+	
+	if (stripFilters & (1 << 0)) manager.filterText("Hebrew Vowel Points", stripTerm);
+	if (stripFilters & (1 << 1)) manager.filterText("Hebrew Cantillation", stripTerm);
+	if (stripFilters & (1 << 2)) manager.filterText("Arabic Vowel Points", stripTerm);
+	if (stripFilters & (1 << 3)) manager.filterText("Greek Accents", stripTerm);
+	
+	fprintf(stdout,"%s\n",stripTerm.c_str());
+	return 0;
+
+}




More information about the sword-cvs mailing list