#!/usr/bin/env python # coding: utf-8 # # This does a very roughshod attempt to compare the osisIDs found in an # XML file with each of the versifications that SWORD knows about to help # a user find the one which is most akin to the one they are using. It is # limited in its need for your file to be at least segregated into OT/NT # in the proper order, although within each testament, it requires nothing # special as for ordering. # # Invoke simply by calling the program and the file name. import argparse import io import logging # in normal state level should be debug.WARNING, debug.INFO and debug.DEBUG # give additional information. logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) log = logging.getLogger('versification') import re import sys try: import lxml.etree as ET except ImportError: import xml.etree.ElementTree as ET OSIS_NS = 'http://www.bibletechnologies.net/2003/OSIS/namespace' VERSEID_RE = re.compile(r'^(.+\.\d+\.\d+).*$') # Inform the user that we need the SWORD extension try: import Sword except ImportError: log.exception( "You do not have the SWORD library installed. Please install it.") sys.exit(1) arg_parser = argparse.ArgumentParser( description='Compare OSIS file with available v11ns.') arg_parser.add_argument('--verbose', '-v', action='count') arg_parser.add_argument('filename', nargs=1) args = arg_parser.parse_args() if args.verbose: log.setLevel = logging.DEBUG log.debug('args = %s', args) # Open the file log.debug('Opening %s' % args.filename[0]) tree = ET.parse(io.open(args.filename[0], encoding='utf8')).getroot() # Get the list of versifications log.debug('Fetching a list of v11ns') vmgr = Sword.VersificationMgr.getSystemVersificationMgr() av11ns = vmgr.getVersificationSystems() log.debug('av11ns = %s', av11ns) # Get the list of all osisIDs log.debug('Fetching a list of OSIS IDs') ids = set() for item in tree.iter('{%s}verse' % OSIS_NS): if 'osisID' in item.attrib: ids.add(item.attrib['osisID'].split('!')[0]) log.debug('ids = len(%d)', len(ids)) # Iterate each versification scheme for v11n in av11ns: v11n_name = v11n.c_str() print('\nChecking %s:\n%s' % (v11n_name, (len(v11n_name) + 10) * '-')) # Construct a list of the IDs in this versification key = Sword.VerseKey() key.setVersificationSystem(v11n.c_str()) # Anything left in this afterwards is missing from the OSIS ot otkeyList = [] # Anything left in this afterwards is missing from the OSIS nt ntkeyList = [] # Anything that gets placed in here is extraneous OT material (we think) otextraKeys = [] # Anything that gets placed in here is extraneous NT material (we think) ntextraKeys = [] inNT = False while key.popError() == '\x00': skey = key.getOSISRef() # Assume we enter the NT when we hit Matthew if not inNT and skey.startswith('Matt'): inNT = True if inNT: ntkeyList.append(skey) else: otkeyList.append(skey) key.increment() ntkeyList = set(ntkeyList) # The 'in' operator only works on a set otkeyList = set(otkeyList) inNT = False # Now iterate the ones we have in this file for osisid in ids: # log.debug('Checking key %s', osisid) if osisid in otkeyList: otkeyList.remove(osisid) elif osisid in ntkeyList: ntkeyList.remove(osisid) inNT = True else: verse_match = VERSEID_RE.match(osisid) if verse_match and inNT: ntextraKeys.append(verse_match.group(1)) elif verse_match and not inNT: otextraKeys.append(verse_match.group(1)) # Ignore it if not VERSEID_RE.match() # Now let's see what is left over # Sets in Python cannot be ordered keyList = list(otkeyList.union(ntkeyList)) keyList.sort() if len(keyList) > 0: if len(keyList) < 100: log.info('\tThe following IDs don’t appear in your file:\n%s', str(", ".join(keyList))) print ('\tThere are %d OT IDs and %d NT IDs ' + 'in v11n which aren’t in your file.') \ % (len(otkeyList), len(ntkeyList)) else: print '\tYour file has all the references in this v11n' # Now let's see if you had extra if len(otextraKeys + ntextraKeys) > 0: # It doesn't make sense to print out lists longer than 100 # they cannot be read anyway if len(keyList) < 100: log.info( '\tThe following IDs don’t appear in v11n:\n%s', str(", ".join(keyList))) print ('\tThere are %d OT IDs and %d NT IDs ' + 'in your file which don’t appear in v11n.') \ % (len(otextraKeys), len(ntextraKeys)) else: print '\tYour file has no extra references'