#!/usr/bin/env python3 # -*- coding: utf-8 -*- # conflint.py - Validate a conf file. # # Copyright (C) 2021 CrossWire Bible Society # Author: domcox # This file is part of Sword Modules # Sword Modules is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # Sword Modules is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with Sword Modules. If not, see . # Created: 2021-01-22 # Requirements import argparse import os.path import re import sys from pathlib import Path from datetime import date # Variables # Version version = '1.0' # Module Name modname = '' elemtype = { "Abbreviation" : ['text'], "Description" : ['text'], "DataPath" : ['text'], "ModDrv" : ['keyword'], "SourceType" : ['keyword'], "Encoding" : ['keyword'], "CompressType" : ['keyword'], "BlockType" : ['keyword'], "BlockCount" : ['integer'], "Versification" : ['keyword'], "CipherKey" : ['text'], "KeyType" : ['keyword'], "CaseSensitiveKeys" : ['boolean'], "GlobalOptionFilter" : ['keyword'], "Direction" : ['keyword'], "DisplayLevel" : ['integer'], "Font" : ['text'], "OSISqToTick" : ['boolean'], "Feature" : ['keyword'], "GlossaryFrom" : ['text'], "GlossaryTo" : ['text'], "PreferredCSSXHTML" : ['text'], "AndBibleCSS" : ['text'], "CaseInsensitiveKeys" : ['boolean'], "LangSortOrder" : ['text'], "StrongsPadding" : ['boolean'], "LocalStripFilter" : ['text'], "About" : ['text','rtf'], "SwordVersionDate" : ['isodate'], "Version" : ['text'], "History" : ['text','html'], "MinimumVersion" : ['text'], "Category" : ['keyword'], "LCSH" : ['text'], "Lang" : ['text'], "Obsoletes" : ['text'], "OSISVersion" : ['text'], "Companion" : ['text'], "DistributionLicense" : ['keyword'], "DistributionNotes" : ['text'], "Copyright" : ['text'], "CopyrightHolder" : ['text'], "CopyrightDate" : ['year'], "CopyrightNotes" : ['text'], "CopyrightContactName" : ['text'], "CopyrightContactNotes" : ['text'], "CopyrightContactAddress": ['text'], "CopyrightContactEmail" : ['text'], "ShortPromo" : ['text','html'], "ShortCopyright" : ['text'], "TextSource" : ['text'], "UnlockInfo" : ['text','html','rtf+html'], "InstallSize" : ['integer'], "Notes" : ['text'], "ReferenceBible" : ['text'], "Scope" : ['text'], "SearchOption" : ['keyword'], "Siglum1" : ['text'], "Siglum2" : ['text'] } keywords = { "ModDrv" : ['RawText','RawText4','zText','zText4','RawCom', 'RawCom4','zCom','zCom4','HREFCom','RawFiles', 'RawLD','RawLD4','zLD','RawGenBook'], "SourceType" : ['OSIS','TEI','GBF','ThML'], "Encoding" : ['UTF-8','UTF-16','SCSU'], "CompressType" : ['ZIP','LZSS','BZIP2','XZ'], "BlockType" : ['BOOK','CHAPTER','VERSE'], "Versification" : ['Calvin','Catholic','Catholic2','DarbyFr','German', 'KJV','KJVA','LXX','Leningrad','Luther','MT','NRSV', 'NRSVA','Orthodox','Segond','Synodal','SynodalProt', 'Vulg'], "KeyType" : ['TreeKey','VerseKey'], "GlobalOptionFilter" : ['UTF8Cantillation','UTF8GreekAccents', 'UTF8HebrewPoints','UTF8ArabicPoints','OSISLemma', 'OSISMorphSegmentation','OSISStrongs','OSISFootnotes', 'OSISScripref','OSISMorph','OSISHeadings', 'OSISVariants','OSISRedLetterWords','OSISGlosses', 'OSISXlit','OSISEnum','OSISReferenceLinks','OSISRuby', 'GBFStrongs','GBFFootnotes','GBFMorph','GBFHeadings', 'GBFRedLetterWords','ThMLStrongs','ThMLFootnotes', 'ThMLScripref','ThMLMorph','ThMLHeadings', 'ThMLVariants','ThMLLemma'], "Direction" : ['LtoR','RtoL','BiDi'], "Feature" : ['StrongsNumbers','GreekDef','HebrewDef','GreekParse', 'HebrewParse','DailyDevotion','Glossary','Images', 'NoParagraphs'], "Category" : ['Biblical Texts','Commentaries', 'Lexicons / Dictionaries','Glossaries', 'Daily Devotional','Generic Books','Maps','Images', 'Cults / Unorthodox / Questionable Material','Essays'], "DistributionLicense" : ['Public Domain','Copyrighted', 'Copyrighted; Permission to distribute granted to CrossWire', 'Copyrighted; Permission granted to distribute non-commercially in SWORD format', 'Copyrighted; Free non-commercial distribution', 'Copyrighted; Freely distributable','GFDL','GPL', 'Creative Commons: BY-NC-ND 4.0', 'Creative Commons: BY-NC-SA 4.0', 'Creative Commons: BY-NC 4.0', 'Creative Commons: BY-ND 4.0', 'Creative Commons: BY-SA 4.0', 'Creative Commons: BY 4.0', 'Creative Commons: CC0'], "SearchOption" : ['IncludeKeyInSearch'] } multi = ['GlobalOptionFilter','Feature','Obsoletes'] continuation = ['About','Copyright','CopyrightNotes','CopyrightContactName', 'CopyrightContactNotes','CopyrightContactAddress','DistributionNotes', 'TextSource','UnlockInfo','Notes'] localization = ['Abbreviation','Description','About','History','Copyright', 'CopyrightHolder','CopyrightNotes','CopyrightContactName', 'CopyrightContactNotes','CopyrightContactAddress', 'CopyrightContactEmail','ShortPromo','ShortCopyright', 'DistributionNotes','TextSource','UnlockInfo'] required = ['Description','DataPath','ModDrv','About', 'SwordVersionDate', 'DistributionLicense','TextSource','Version'] unidentified = ['Notes','ReferenceBible','SearchOption','Siglum1','Siglum2'] deprecated = ['OSISqToTick'] # Existing elemtype in the .conf file existing = [] # Number of errors errors = 0 # Number of warnings warnings = 0 def die(msg): ''' Show an error message then exit on error ''' print(' ERROR! ' + msg, file=sys.stderr) print('Parsing failed\n') sys.exit(1) def error(msg): ''' Show an error message, increment errors number ''' global errors print(' ERROR! ' + msg, file=sys.stderr) errors += 1 return(errors) def warning(msg): ''' Show a warning message, increment warnings number ''' global warnings print(' WARNING! ' + msg, file=sys.stderr) warnings += 1 return(warnings) def get_parameters(): ''' Get Parse command-line options. Returns string containing .conf filename ''' description = ''' Validate a SWORD .conf file contents. ''' # Parse command-line parser = argparse.ArgumentParser(description=description) parser.add_argument('conf', help='config file') args = vars(parser.parse_args()) # Checking conf file in input fileconf = args['conf'] fileObj = Path(fileconf) if not fileObj.is_file(): die(f"File '{fileconf}' does not exist.") return(fileconf) # Discard: doesn't work with continuation def readconf2(file): config = configparser.RawConfigParser(strict=False) config.optionxform = lambda option: option config.read(file) if (len(config.sections())) != 1: die('Invalid File Format') for sect in config.sections(): for k,v in config.items(sect): print(' {} = {}'.format(k,v)) print() def readconf(file): ''' Read conf file in input Returns list of elemtype ''' # List of elemtype config =[] # Key element element = '' # open conf file with open(file, 'r', encoding='utf-8', newline='\n') as f: for line in f: # Read line line = line.strip() # Line continuation if line.endswith('\\'): element = element + line +'\n' # Simple line or end of continuation else: if line: if line[0] != '#': config.append(element + line) element = '' # List of elemtype return config def parseconf(config): ''' Parse config list of elemtype Return list of tuples (element, value) ''' global modname # Config list parsed_config = [] # List of known keys in a config file known_elemtype = elemtype.keys() for entry in config: # Strip trailing whitespaces entry = entry.strip() #print(f"->{entry}<-") # Search for Module identifier id = (re.search(rf'^\[(.+?)\]$', entry)) if id: modname = id.group(1) if not modname.isidentifier(): die(f"{modname}: Invalid Unique Identifier.") else: # Check the '=' separator exists if not '=' in entry: error(f"{entry}: Parsing error, unexpected item") else: # Extract key from entry key, value = entry.split('=', 1) element = key if '_' in key: key, lang = key.split('_', 1) # Check if key exists if key not in known_elemtype: error(f"{key}: Unknown element") else: parsed_config.append( tuple([ element, value ] )) if key in unidentified: warning(f"{key}: Element is not documented") # End return(parsed_config) def chk_type(config): ''' Check element type Returns list containing commented elemtype ''' for elem, value in config: if '_' in elem: # Remove extension (eg: about_de, remove _de) subelem, ext = elem.split('_', 1) else: subelem = elem if '|' in value: # Remove parameters (eg: GlobalOptionFilter=OSISReferenceLinks|Reference..|..) value, parms = value.split('|',1) # Type=keyword if 'keyword' in elemtype[subelem]: if value not in keywords[subelem]: error(f"{elem}={value}: Not matching predefined value") # Type=isodate elif 'isodate' in elemtype[subelem]: isoregex = '^([0-9]{4})-?(1[0-2]|0[1-9])-?(3[01]|0[1-9]|[12][0-9])$' if re.match(isoregex, value): SWdate = date.fromisoformat(value) if SWdate > date.today(): error(f"{SWdate}: Future dates are not allowed") if SWdate < date.fromisoformat('1992-01-01'): error(f"{elem}={SWdate}: Older dates than Sword's are not allowed") else: error(f"{elem}={value}: Incorrect format") # Type=year elif 'year' in elemtype[subelem]: value = value.replace('-',',') listyears = value.split(',') for year in listyears: yregex = '^([0-9]{4})$' if re.match(yregex, year): SWdate = date.fromisoformat(f"{year}-01-01") if SWdate > date.today(): error(f"{elem}={year}: Future years are not allowed") if SWdate < date.fromisoformat('1583-01-01'): error(f"{elem}={year}: Years prior to 1583 are not allowed") else: error(f"{elem}={year}: Incorrect format") # Type=text, html or rtf else: if typevalue(value) not in elemtype[subelem]: error(f"{elem}: '{typevalue(value)}' formatting is not allowed") def typevalue(str): ''' Return type of str (boolean, integer, text, rtf, html, rtf+html) ''' if str.capitalize() in ['True','False']: return('boolean') elif str.isnumeric(): return('integer') elif ishtml(str) and isrtf(str): return('rtf+html') elif ishtml(str): return('html') elif isrtf(str): return('rtf') else: return('text') def ishtml(str): ''' Return True is str contains html codes ''' regexp = r'| 1 and elem not in multi: if not elem in printed: error(f"{elem}: Repeating this element is not allowed.") printed.append(elem) def chk_continuation(config): ''' Check element continuation ''' for elem, value in config: if '_' in elem: subelem, ext = elem.split('_', 1) else: subelem = elem # Search for strings containing '/\n' regexp = r'\\\n' if (re.search(regexp, value)): if subelem not in continuation: error(f"{elem}: Continuation not allowed on that element.") def chk_localization(config): ''' Check element continuation ''' for elem, value in config: if '_' in elem: subelem, ext = elem.split('_', 1) if subelem not in localization: error(f"{elem}: Localization is not allowed") def chk_required(config): ''' Check required elements ''' for elem in required: if not elem in existing: error(f"{elem}: Missing required element") def chk_datapath(config): ''' Check DataPath ''' path = { 'RawText' : './modules/texts/rawtext/', 'RawText4' : './modules/texts/rawtext4/', 'zText' : './modules/texts/ztext/', 'zText4' : './modules/texts/ztext4/', 'zCom' : './modules/comments/zcom/', 'zCom4' : './modules/comments/zcom4/', 'hREFCom' : './modules/comments/hrefcom/', 'RawCom' : './modules/comments/rawcom/', 'RawCom4' : './modules/comments/rawcom4/', 'RawFiles' : './modules/comments/rawfiles/', 'zLD' : './modules/lexdict/zld/', 'RawLD' : './modules/lexdict/rawld/', 'RawLD4' : './modules/lexdict/rawld4/', 'RawGenBook': './modules/genbook/rawgenbook/' } # Module module = modname.lower() # Read needed values datapath = '' moddrv = '' category = '' for elem, value in config: if elem == 'DataPath': datapath = value if elem == 'ModDrv': moddrv = value if elem == 'Category': category = value # Build category specific sub-dir if category == 'DailyDevotion': category = 'devotionals/' elif category == 'Glossary': category = 'glossaries/' else: category = '' if moddrv in path.keys(): # Build the recommended DataPath if moddrv in ['zLD','RawLD','RawLD4']: suitedpath = path[moddrv] + category + module + '/dict' else: suitedpath = path[moddrv] + module + '/' # Compare DataPath values if datapath != suitedpath: warning('DataPath differs from the recommended convention,') if 'devotionals' in datapath: print(' or Feature=DailyDevotion is missing,') if 'glossaries' in datapath: print(' or Feature=Glossary is missing,') print(f" DataPath={datapath}\n Rec.Conv={suitedpath}") def chk_length(config): ''' Check element continuation ''' for elem, value in config: # Remove locale subelem = elem if '_' in elem: subelem, ext = elem.split('_', 1) # Select short elements if subelem in ['Description','ShortPromo','ShortCopyright']: # Check max length if len(value) > 80: warning(f"{elem}: Element length is longer than expected") def chk_obsolete(config): ''' Check deprecated ''' for elem, value in config: subelem = elem if '_' in elem: subelem, ext = elem.split('_', 1) if subelem in deprecated: warning(f"{elem}: This attribute is deprecated") def chk_https(config): ''' Check http:// use ''' for elem, value in config: subelem = elem if '_' in elem: subelem, ext = elem.split('_', 1) if subelem in ['TextSource']: regexp = r'http\:' if (re.search(regexp, value)): error(f"{elem}: URL is not secure, please use https://") def main(): ''' Main function ''' # Get filename filename = get_parameters() basename = os.path.basename(filename) print(f"Validating {basename}:", file=sys.stdout) # Parse file config = readconf(filename) parsed = parseconf(config) listconf(parsed) # Checks chk_type(parsed) chk_repeats(parsed) chk_continuation(parsed) chk_localization(parsed) chk_required(parsed) chk_datapath(parsed) chk_length(parsed) chk_obsolete(parsed) chk_https(parsed) # Final report if errors == 1: printerrors = '1 error' else: printerrors = f'{errors} errors' if errors: print(f'{basename} fails to validate with {printerrors}', file=sys.stdout) sys.exit(1) else: print(f'{basename} validates', file=sys.stdout) return (True) main()