#!/usr/bin/env python3

# -*- coding: utf-8 -*-


# conflint.py - Validate a conf file.
#

# Copyright (C) 2021 CrossWire Bible Society

# Author: domcox <domcox@crosswire.org>

# This file is part of Sword Modules

# Sword Modules is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# Sword Modules is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with Sword Modules.  If not, see <https://www.gnu.org/licenses/>.

# Created:  2021-01-22


# Requirements

import argparse
import os.path
import re
import sys
from pathlib import Path
from datetime import date


# Variables
# Version
version = '1.0'

# Module Name
modname = ''

elemtype = {
    "Abbreviation"           : ['text'],
    "Description"            : ['text'],
    "DataPath"               : ['text'],
    "ModDrv"                 : ['keyword'],
    "SourceType"             : ['keyword'],
    "Encoding"               : ['keyword'],
    "CompressType"           : ['keyword'],
    "BlockType"              : ['keyword'],
    "BlockCount"             : ['integer'],
    "Versification"          : ['keyword'],
    "CipherKey"              : ['text'],
    "KeyType"                : ['keyword'],
    "CaseSensitiveKeys"      : ['boolean'],
    "GlobalOptionFilter"     : ['keyword'],
    "Direction"              : ['keyword'],
    "DisplayLevel"           : ['integer'],
    "Font"                   : ['text'],
    "OSISqToTick"            : ['boolean'],
    "Feature"                : ['keyword'],
    "GlossaryFrom"           : ['text'],
    "GlossaryTo"             : ['text'],
    "PreferredCSSXHTML"      : ['text'],
    "AndBibleCSS"            : ['text'],
    "CaseInsensitiveKeys"    : ['boolean'],
    "LangSortOrder"          : ['text'],
    "StrongsPadding"         : ['boolean'],
    "LocalStripFilter"       : ['text'],
    "About"                  : ['text','rtf'],
    "SwordVersionDate"       : ['isodate'],
    "Version"                : ['text'],
    "History"                : ['text','html'],
    "MinimumVersion"         : ['text'],
    "Category"               : ['keyword'],
    "LCSH"                   : ['text'],
    "Lang"                   : ['text'],
    "Obsoletes"              : ['text'],
    "OSISVersion"            : ['text'],
    "Companion"              : ['text'],
    "DistributionLicense"    : ['keyword'],
    "DistributionNotes"      : ['text'],
    "Copyright"              : ['text'],
    "CopyrightHolder"        : ['text'],
    "CopyrightDate"          : ['year'],
    "CopyrightNotes"         : ['text'],
    "CopyrightContactName"   : ['text'],
    "CopyrightContactNotes"  : ['text'],
    "CopyrightContactAddress": ['text'],
    "CopyrightContactEmail"  : ['text'],
    "ShortPromo"             : ['text','html'],
    "ShortCopyright"         : ['text'],
    "TextSource"             : ['text'],
    "UnlockInfo"             : ['text','html','rtf+html'],
    "InstallSize"            : ['integer'],
    "Notes"                  : ['text'],
    "ReferenceBible"         : ['text'],
    "Scope"                  : ['text'],
    "SearchOption"           : ['keyword'],
    "Siglum1"                : ['text'],
    "Siglum2"                : ['text']

}

keywords = {
    "ModDrv"                 : ['RawText','RawText4','zText','zText4','RawCom',
                                'RawCom4','zCom','zCom4','HREFCom','RawFiles',
                                'RawLD','RawLD4','zLD','RawGenBook'],
    "SourceType"             : ['OSIS','TEI','GBF','ThML'],
    "Encoding"               : ['UTF-8','UTF-16','SCSU'],
    "CompressType"           : ['ZIP','LZSS','BZIP2','XZ'],
    "BlockType"              : ['BOOK','CHAPTER','VERSE'],
    "Versification"          : ['Calvin','Catholic','Catholic2','DarbyFr','German',
                                'KJV','KJVA','LXX','Leningrad','Luther','MT','NRSV',
                                'NRSVA','Orthodox','Segond','Synodal','SynodalProt',
                                'Vulg'],
    "KeyType"                : ['TreeKey','VerseKey'],
    "GlobalOptionFilter"     : ['UTF8Cantillation','UTF8GreekAccents',
                                'UTF8HebrewPoints','UTF8ArabicPoints','OSISLemma',
                                'OSISMorphSegmentation','OSISStrongs','OSISFootnotes',
                                'OSISScripref','OSISMorph','OSISHeadings',
                                'OSISVariants','OSISRedLetterWords','OSISGlosses',
                                'OSISXlit','OSISEnum','OSISReferenceLinks','OSISRuby',
                                'GBFStrongs','GBFFootnotes','GBFMorph','GBFHeadings',
                                'GBFRedLetterWords','ThMLStrongs','ThMLFootnotes',
                                'ThMLScripref','ThMLMorph','ThMLHeadings',
                                'ThMLVariants','ThMLLemma'],
    "Direction"              : ['LtoR','RtoL','BiDi'],
    "Feature"                : ['StrongsNumbers','GreekDef','HebrewDef','GreekParse',
                                'HebrewParse','DailyDevotion','Glossary','Images',
                                'NoParagraphs'],
    "Category"               : ['Biblical Texts','Commentaries',
                                'Lexicons / Dictionaries','Glossaries',
                                'Daily Devotional','Generic Books','Maps','Images',
                                'Cults / Unorthodox / Questionable Material','Essays'],
    "DistributionLicense"    : ['Public Domain','Copyrighted',
                                'Copyrighted; Permission to distribute granted to CrossWire',
                                'Copyrighted; Permission granted to distribute non-commercially in SWORD format',
                                'Copyrighted; Free non-commercial distribution',
                                'Copyrighted; Freely distributable','GFDL','GPL',
                                'Creative Commons: BY-NC-ND 4.0',
                                'Creative Commons: BY-NC-SA 4.0',
                                'Creative Commons: BY-NC 4.0',
                                'Creative Commons: BY-ND 4.0',
                                'Creative Commons: BY-SA 4.0',
                                'Creative Commons: BY 4.0',
                                'Creative Commons: CC0'],
    "SearchOption"           : ['IncludeKeyInSearch']
}

multi = ['GlobalOptionFilter','Feature','Obsoletes']

continuation = ['About','Copyright','CopyrightNotes','CopyrightContactName',
                'CopyrightContactNotes','CopyrightContactAddress','DistributionNotes',
                'TextSource','UnlockInfo','Notes']

localization = ['Abbreviation','Description','About','History','Copyright',
                'CopyrightHolder','CopyrightNotes','CopyrightContactName',
                'CopyrightContactNotes','CopyrightContactAddress',
                'CopyrightContactEmail','ShortPromo','ShortCopyright',
                'DistributionNotes','TextSource','UnlockInfo']

required = ['Description','DataPath','ModDrv','About', 'SwordVersionDate',
            'DistributionLicense','TextSource','Version']

unidentified = ['Notes','ReferenceBible','SearchOption','Siglum1','Siglum2']

deprecated = ['OSISqToTick']

# Existing elemtype in the .conf file
existing = []

# Number of errors
errors = 0

# Number of warnings
warnings = 0


def die(msg):
    '''
    Show an error message then exit on error
    '''
    print(' ERROR! ' + msg, file=sys.stderr)
    print('Parsing failed\n')
    sys.exit(1)


def error(msg):
    '''
    Show an error message, increment errors number
    '''
    global errors
    print(' ERROR! ' + msg, file=sys.stderr)
    errors += 1
    return(errors)


def warning(msg):
    '''
    Show a warning message, increment warnings number
    '''
    global warnings
    print(' WARNING! ' + msg, file=sys.stderr)
    warnings += 1
    return(warnings)


def get_parameters():
    '''
    Get Parse command-line options.
    Returns string containing .conf filename
    '''
    description = '''
    Validate a SWORD .conf file contents.
    '''
    # Parse command-line
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('conf', help='config file')
    args = vars(parser.parse_args())

    # Checking conf file in input
    fileconf = args['conf']
    fileObj = Path(fileconf)
    if not fileObj.is_file():
         die(f"File '{fileconf}' does not exist.")
    return(fileconf)


# Discard: doesn't work with continuation
def readconf2(file):
    config = configparser.RawConfigParser(strict=False)
    config.optionxform = lambda option: option
    config.read(file)
    if (len(config.sections())) != 1:
        die('Invalid File Format')

    for sect in config.sections():
        for k,v in config.items(sect):
            print(' {} = {}'.format(k,v))
        print()


def readconf(file):
    '''
    Read conf file in input
    Returns list of elemtype
    '''
    # List of elemtype
    config =[]
    # Key element
    element = ''
    # open conf file
    with open(file, 'r', encoding='utf-8', newline='\n') as f:
        for line in f:
            # Read line
            line = line.strip()
            # Line continuation
            if line.endswith('\\'):
                 element = element + line +'\n'
            # Simple line or end of continuation
            else:
                 if line:
                      if line[0] != '#':
                           config.append(element + line)
                 element = ''
    # List of elemtype
    return config


def parseconf(config):
    '''
    Parse config list of elemtype
    Return list of tuples (element, value)
    '''
    global modname
    # Config list
    parsed_config = []
    # List of known keys in a config file
    known_elemtype = elemtype.keys()

    for entry in config:
         # Strip trailing whitespaces
         entry = entry.strip()
         #print(f"->{entry}<-")
         # Search for Module identifier
         id = (re.search(rf'^\[(.+?)\]$', entry))
         if id:
             modname = id.group(1)
             if not modname.isidentifier():
                 die(f"{modname}: Invalid Unique Identifier.")
         else:
             # Check the '=' separator exists
             if not '=' in entry:
                 error(f"{entry}: Parsing error, unexpected item")
             else:
                 # Extract key from entry
                 key, value = entry.split('=', 1)
                 element = key
                 if '_' in key:
                     key, lang = key.split('_', 1)
                 # Check if key exists
                 if key not in known_elemtype:
                     error(f"{key}: Unknown element")
                 else:
                     parsed_config.append( tuple([ element, value ] ))
                 if key in unidentified:
                     warning(f"{key}: Element is not documented")
    # End
    return(parsed_config)


def chk_type(config):
    '''
    Check element type
    Returns list containing commented elemtype
    '''
    for elem, value in config:
        if '_' in elem:
            # Remove extension (eg: about_de, remove _de)
            subelem, ext = elem.split('_', 1)
        else:
            subelem = elem
        if '|' in value:
            # Remove parameters (eg: GlobalOptionFilter=OSISReferenceLinks|Reference..|..)
            value, parms = value.split('|',1)
        # Type=keyword
        if 'keyword' in elemtype[subelem]:
            if value not in keywords[subelem]:
                error(f"{elem}={value}: Not matching predefined value")
        # Type=isodate
        elif 'isodate' in elemtype[subelem]:
            isoregex = '^([0-9]{4})-?(1[0-2]|0[1-9])-?(3[01]|0[1-9]|[12][0-9])$'
            if re.match(isoregex, value):
                SWdate = date.fromisoformat(value)
                if SWdate > date.today():
                    error(f"{SWdate}: Future dates are not allowed")
                if SWdate < date.fromisoformat('1992-01-01'):
                    error(f"{elem}={SWdate}: Older dates than Sword's are not allowed")
            else:
                error(f"{elem}={value}: Incorrect format")
        # Type=year
        elif 'year' in elemtype[subelem]:
            value = value.replace('-',',')
            listyears = value.split(',')
            for year in listyears:
                yregex = '^([0-9]{4})$'
                if re.match(yregex, year):
                    SWdate = date.fromisoformat(f"{year}-01-01")
                    if SWdate > date.today():
                        error(f"{elem}={year}: Future years are not allowed")
                    if SWdate < date.fromisoformat('1583-01-01'):
                        error(f"{elem}={year}: Years prior to 1583 are not allowed")
                else:
                    error(f"{elem}={year}: Incorrect format")
        # Type=text, html or rtf
        else:
            if typevalue(value) not in elemtype[subelem]:
                error(f"{elem}: '{typevalue(value)}' formatting is not allowed")


def typevalue(str):
    '''
    Return type of str (boolean, integer, text, rtf, html, rtf+html)
    '''
    if str.capitalize() in ['True','False']:
        return('boolean')
    elif str.isnumeric():
        return('integer')
    elif ishtml(str) and isrtf(str):
        return('rtf+html')
    elif ishtml(str):
        return('html')
    elif isrtf(str):
        return('rtf')
    else:
        return('text')


def ishtml(str):
    '''
    Return True is str contains html codes
    '''
    regexp = r'<a|/>|</'
    return(re.search(regexp, str))


def isrtf(str):
    '''
    Return True is str contains rtf codes
    '''
    regexp = r'\\par|\\qc'
    return(re.search(regexp, str))


def listconf(config):
    '''
    Parse config list of tuples (key, value)
    Return list of existing elemtype
    '''
    for key, value in config:
        existing.append(key)


def chk_repeats(config):
    '''
    Check element repetition
    '''
    printed = []
    for elem, value in config:
        if '_' in elem:
            subelem, ext = elem.split('_', 1)
        else:
            subelem = elem
        # Count repeats
        repeats = existing.count(elem)
        if repeats > 1 and elem not in multi:
            if not elem in printed:
                error(f"{elem}: Repeating this element is not allowed.")
                printed.append(elem)


def chk_continuation(config):
    '''
    Check element continuation
    '''
    for elem, value in config:
        if '_' in elem:
            subelem, ext = elem.split('_', 1)
        else:
            subelem = elem
        # Search for strings containing '/\n'
        regexp = r'\\\n'
        if (re.search(regexp, value)):
            if subelem not in continuation:
                error(f"{elem}: Continuation not allowed on that element.")


def chk_localization(config):
    '''
    Check element continuation
    '''
    for elem, value in config:
        if '_' in elem:
            subelem, ext = elem.split('_', 1)
            if subelem not in localization:
                error(f"{elem}: Localization is not allowed")


def chk_required(config):
    '''
    Check required elements
    '''
    for elem in required:
        if not elem in existing:
            error(f"{elem}: Missing required element")


def chk_datapath(config):
    '''
    Check DataPath
    '''
    path = {
        'RawText'   : './modules/texts/rawtext/',
        'RawText4'  : './modules/texts/rawtext4/',
        'zText'     : './modules/texts/ztext/',
        'zText4'    : './modules/texts/ztext4/',
        'zCom'      : './modules/comments/zcom/',
        'zCom4'     : './modules/comments/zcom4/',
        'hREFCom'   : './modules/comments/hrefcom/',
        'RawCom'    : './modules/comments/rawcom/',
        'RawCom4'   : './modules/comments/rawcom4/',
        'RawFiles'  : './modules/comments/rawfiles/',
        'zLD'       : './modules/lexdict/zld/',
        'RawLD'     : './modules/lexdict/rawld/',
        'RawLD4'    : './modules/lexdict/rawld4/',
        'RawGenBook': './modules/genbook/rawgenbook/'
        }
    # Module
    module = modname.lower()
    # Read needed values
    datapath = ''
    moddrv = ''
    category = ''
    for elem, value in config:
        if elem == 'DataPath':
            datapath = value
        if elem == 'ModDrv':
            moddrv = value
        if elem == 'Category':
            category = value
    # Build category specific sub-dir
    if category == 'DailyDevotion':
        category = 'devotionals/'
    elif category == 'Glossary':
        category = 'glossaries/'
    else:
        category = ''
    if moddrv in path.keys():
        # Build the recommended DataPath
        if moddrv in ['zLD','RawLD','RawLD4']:
            suitedpath = path[moddrv] + category + module + '/dict'
        else:
            suitedpath = path[moddrv] +  module + '/'
        # Compare DataPath values
        if datapath != suitedpath:
            warning('DataPath differs from the recommended convention,')
            if 'devotionals' in datapath:
                print(' or Feature=DailyDevotion is missing,')
            if 'glossaries' in datapath:
                print(' or Feature=Glossary is missing,')
            print(f"  DataPath={datapath}\n  Rec.Conv={suitedpath}")


def chk_length(config):
    '''
    Check element continuation
    '''
    for elem, value in config:
        # Remove locale
        subelem = elem
        if '_' in elem:
            subelem, ext = elem.split('_', 1)
        # Select short elements
        if subelem in ['Description','ShortPromo','ShortCopyright']:
            # Check max length
            if len(value) > 80:
                warning(f"{elem}: Element length is longer than expected")


def chk_obsolete(config):
    '''
    Check deprecated
    '''
    for elem, value in config:
        subelem = elem
        if '_' in elem:
            subelem, ext = elem.split('_', 1)
        if subelem in deprecated:
            warning(f"{elem}: This attribute is deprecated")


def chk_https(config):
    '''
    Check http:// use
    '''
    for elem, value in config:
        subelem = elem
        if '_' in elem:
            subelem, ext = elem.split('_', 1)
        if subelem in ['TextSource']:
            regexp = r'http\:'
            if (re.search(regexp, value)):
                error(f"{elem}: URL is not secure, please use https://")

def main():
    '''
    Main function
    '''
    # Get filename
    filename = get_parameters()
    basename = os.path.basename(filename)
    print(f"Validating {basename}:", file=sys.stdout)
    # Parse file
    config = readconf(filename)
    parsed = parseconf(config)
    listconf(parsed)
    # Checks
    chk_type(parsed)
    chk_repeats(parsed)
    chk_continuation(parsed)
    chk_localization(parsed)
    chk_required(parsed)
    chk_datapath(parsed)
    chk_length(parsed)
    chk_obsolete(parsed)
    chk_https(parsed)
    # Final report
    if errors == 1:
        printerrors = '1 error'
    else:
        printerrors = f'{errors} errors'
    if errors:
        print(f'{basename} fails to validate with {printerrors}', file=sys.stdout)
        sys.exit(1)
    else:
        print(f'{basename} validates', file=sys.stdout)
    return (True)


main()