[sword-svn] r3905 - trunk/utilities

dmsmith at crosswire.org dmsmith at crosswire.org
Wed Jun 25 09:16:46 EDT 2025


Author: dmsmith
Date: 2025-06-25 09:16:46 -0400 (Wed, 25 Jun 2025)
New Revision: 3905

Modified:
   trunk/utilities/osis2mod.cpp
Log:
MODTOOLS-76: Enhance -v argument handling in osis2mod with case-insensitive and prefix matching, and improved error reporting.

- Outputs the name of the versification system being used at program start
- Adds layered resolution of versifications:
  1) Case-sensitive exact match
  2) Case-insensitive exact match
  3) Case-insensitive prefix match
- Shows detailed error messages when input is invalid or ambiguous:
  * Lists all matches in case of ambiguity
  * Lists input and valid options if no match is found


Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp	2025-06-19 20:31:16 UTC (rev 3904)
+++ trunk/utilities/osis2mod.cpp	2025-06-25 13:16:46 UTC (rev 3905)
@@ -33,6 +33,9 @@
 #include <vector>
 #include <iostream>
 #include <fstream>
+#include <string>
+#include <algorithm>
+#include <cctype>
 
 #include <utilstr.h>
 #include <swmgr.h>
@@ -42,6 +45,7 @@
 #include <utilxml.h>
 #include <listkey.h>
 #include <versekey.h>
+#include <versificationmgr.h>
 #include <swversion.h>
 
 #include <ztext.h>
@@ -117,6 +121,83 @@
 static bool inCanonicalOSISBook = true; // osisID is for a book that is not in Sword's canon
 static bool normalize           = true; // Whether to normalize UTF-8 to NFC
 
+// Safe case-insensitive comparison for SWBuf
+static bool ci_equals(const SWBuf &a, const SWBuf &b) {
+	if (a.length() != b.length()) return false;
+	for (size_t i = 0; i < a.length(); ++i) {
+		if (std::tolower(static_cast<unsigned char>(a[i])) !=
+		    std::tolower(static_cast<unsigned char>(b[i]))) {
+			return false;
+		}
+	}
+	return true;
+}
+
+// Safe case-insensitive prefix comparison for SWBuf
+static bool ci_starts_with(const SWBuf &full, const SWBuf &prefix) {
+	if (prefix.length() > full.length()) return false;
+	for (size_t i = 0; i < prefix.length(); ++i) {
+		if (tolower(static_cast<unsigned char>(full[i])) != tolower(static_cast<unsigned char>(prefix[i]))) {
+			return false;
+		}
+	}
+	return true;
+}
+
+/**
+ * Resolves an abbreviation or partial name against a list of candidate strings.
+ *
+ * The matching strategy is:
+ *   1. Case-sensitive exact match: returns immediately if a single exact match is found.
+ *   2. Case-insensitive exact match: uses UTF-8 safe toUpper() and returns immediately on match.
+ *   3. Case-insensitive prefix match: returns all matching candidates that begin with the input.
+ *
+ * This function does not assume anything about the semantic meaning of the entries —
+ * it can be used for versification systems, module names, etc.
+ *
+ * @param input The user-provided input string (abbreviation or full name).
+ * @param candidates The list of valid full names to resolve against.
+ * @return A StringList of matching entries (0 = no match, 1 = exact match, >1 = ambiguous).
+ */
+static StringList resolve_abbreviation(const SWBuf &input, const StringList &candidates) {
+	StringList matches;
+
+	// 1. Case-sensitive exact match
+	for (const SWBuf &candidate : candidates) {
+		if (input == candidate) {
+			matches.push_back(candidate);
+			return matches;
+		}
+	}
+
+	// Convert input to uppercase for case-insensitive comparisons
+	SWBuf inputUpper = input;
+	inputUpper.toUpper();
+
+	// 2. Case-insensitive exact match
+	for (const SWBuf &candidate : candidates) {
+		SWBuf candidateUpper = candidate;
+		candidateUpper.toUpper();
+
+		if (inputUpper == candidateUpper) {
+			matches.push_back(candidate);
+			return matches;
+		}
+	}
+
+	// 3. Case-insensitive prefix match
+	for (const SWBuf &candidate : candidates) {
+		SWBuf candidateUpper = candidate;
+		candidateUpper.toUpper();
+
+		if (candidateUpper.startsWith(inputUpper)) {
+			matches.push_back(candidate);
+		}
+	}
+
+	return matches;
+}
+
 bool isOSISAbbrev(const char *buf) {
 	VersificationMgr *vmgr = VersificationMgr::getSystemVersificationMgr();
 	const VersificationMgr::System *av11n = vmgr->getVersificationSystem(currentVerse.getVersificationSystem());
@@ -1386,6 +1467,7 @@
 		fprintf(stderr, "\t\t\t\t (2 bytes to store size equal 65535 characters)\n");
 	}
 	fprintf(stderr, "  -v <v11n>\t\t specify a versification scheme to use (default is KJV)\n");
+	fprintf(stderr, "\t\t\t\t Note: This is case insensitive and allows unique prefixes, e.g. cal for Calvin\n");
 	fprintf(stderr, "\t\t\t\t Note: The following are valid values for v11n:");
 
 	VersificationMgr *vmgr = VersificationMgr::getSystemVersificationMgr();
@@ -1933,8 +2015,41 @@
 			else usage(*argv, "-c requires <cipher_key>");
 		}
 		else if (!strcmp(argv[i], "-v")) {
-			if (i+1 < argc) v11n = argv[++i];
-			else usage(*argv, "-v requires <v11n>");
+			if (i + 1 >= argc) {
+				usage(*argv, "-v requires <v11n>");
+			}
+
+			const char *arg = argv[++i];
+			SWBuf v11nInput = arg;
+
+			VersificationMgr *vmgr = VersificationMgr::getSystemVersificationMgr();
+			const StringList &av11ns = vmgr->getVersificationSystems();
+			StringList matches = resolve_abbreviation(v11nInput, av11ns);
+
+			if (matches.empty()) {
+				SWBuf error = "-v ";
+				error += v11nInput;
+				error += " is unknown";
+				usage(*argv, error);
+			}
+
+			if (matches.size() > 1) {
+				SWBuf error = "-v ";
+				error += v11nInput;
+				error += " is ambiguous, matching ";
+				bool first = true;
+				for (const auto &v : matches) {
+					if (!first) {
+						error += ", ";
+					}
+					error += v;
+					first = false;
+				}
+				usage(*argv, error);
+			}
+
+			v11n = matches.front();  // single unambiguous match
+			cout << "INFO(V11N): Using the " << v11n << " versification." << endl;
 		}
 		else if (!strcmp(argv[i], "-s")) {
 			if (i+1 < argc) {



More information about the sword-cvs mailing list