[sword-svn] r563 - trunk/migratetags/matchers

scribe at crosswire.org scribe at crosswire.org
Thu May 25 04:17:14 EDT 2023


Author: scribe
Date: 2023-05-25 04:17:14 -0400 (Thu, 25 May 2023)
New Revision: 563

Modified:
   trunk/migratetags/matchers/gntmatcher.h
Log:
added regularization and sigma normalization


Modified: trunk/migratetags/matchers/gntmatcher.h
===================================================================
--- trunk/migratetags/matchers/gntmatcher.h	2023-04-27 17:45:35 UTC (rev 562)
+++ trunk/migratetags/matchers/gntmatcher.h	2023-05-25 08:17:14 UTC (rev 563)
@@ -1,15 +1,21 @@
 #include "matcher.h"
 #include <utf8greekaccents.h>
+#include <map>
 
 #ifndef gntmatcher_h
 #define gntmatcher_h
 
+using std::map;
+
 class GNTMatcher : public Matcher {
 	UTF8GreekAccents sanitizeGreekAccentFilter;
+	map<SWBuf, SWBuf> globalRegs;
 public:
 
 	GNTMatcher() : sanitizeGreekAccentFilter() {
 		sanitizeGreekAccentFilter.setOptionValue("off");
+		globalRegs["ΘΣ"] = "ΘΕΟΣ";
+		globalRegs["ΚΥ"] = "ΚΥΡΙΟΥ";
 	}
 
 // Compares 2 words and tries to give a percentage assurance of a match
@@ -109,7 +115,6 @@
 	SWBuf t1 = word;
 	// remove greek accents
 	sanitizeGreekAccentFilter.processText(t1);
-	t1.toUpper();
 
 	// remove ignoreSeries characters
 	SWBuf o = t1;
@@ -122,8 +127,14 @@
 		SWBuf checkChar;
 		getUTF8FromUniChar(ch, &checkChar);
 		if (checkChar != " " && strstr(ignoreSeries, checkChar.c_str())) continue;
+		if (checkChar == "ϲ") checkChar = "σ";
+		if (checkChar == "ς") checkChar = "σ";
 		t1.append(checkChar);
 	}
+	t1.toUpper();
+	if (globalRegs.find(t1) != globalRegs.end()) {
+		t1 = globalRegs[t1];
+	}
 	return t1;
 }
 



More information about the sword-cvs mailing list