[sword-cvs] icu-sword/source/tools/genidna .cvsignore,NONE,1.1 Makefile.in,NONE,1.1 filterRFC3454.pl,NONE,1.1 genidna.8.in,NONE,1.1 genidna.c,NONE,1.1 genidna.dsp,NONE,1.1 genidna.h,NONE,1.1 genidna.vcproj,NONE,1.1 store.c,NONE,1.1

sword@www.crosswire.org sword@www.crosswire.org
Tue, 9 Sep 2003 19:42:58 -0700


Update of /usr/local/cvsroot/icu-sword/source/tools/genidna
In directory www:/tmp/cvs-serv19862/source/tools/genidna

Added Files:
	.cvsignore Makefile.in filterRFC3454.pl genidna.8.in genidna.c 
	genidna.dsp genidna.h genidna.vcproj store.c 
Log Message:
ICU 2.6 commit

--- NEW FILE: .cvsignore ---
*.d
*.pdb
Debug
Makefile
Release
genidna
genidna.8

--- NEW FILE: Makefile.in ---
## Makefile.in for ICU - tools/genidna
## Copyright (c) 2001-2003, International Business Machines Corporation and
## others. All Rights Reserved.
## Steven R. Loomis/Markus W. Scherer

## Source directory information
srcdir = @srcdir@
top_srcdir = @top_srcdir@

top_builddir = ../..

include $(top_builddir)/icudefs.mk

##

SECTION = 8

MAN_FILES = $(TARGET:$(EXEEXT)=).$(SECTION)

## Build directory information
subdir = tools/genidna

ICUDATADIR=$(top_builddir)/data
UNICODEDATADIR=$(top_srcdir)/../data/unidata

## Extra files to remove for 'make clean'
CLEANFILES = *~ $(DEPS) $(RES_FILES) $(TEST_FILES) $(MAN_FILES)

## Target information
TARGET = genidna$(EXEEXT)

CPPFLAGS += -I$(top_builddir)/common -I$(top_srcdir)/common -I$(srcdir)/../toolutil
LIBS = $(LIBICUTOOLUTIL) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)

OBJECTS = genidna.o store.o

DEPS = $(OBJECTS:.o=.d)

## List of phony targets
.PHONY : all all-local install install-local clean clean-local		\
distclean distclean-local dist dist-local check	\
check-local build-data install-man

## Clear suffix list
.SUFFIXES :

## List of standard targets
all: all-local
install: install-local
clean: clean-local
distclean : distclean-local
dist: dist-local
check: all check-local

all-local: $(TARGET) build-data $(MAN_FILES)

install-local: all-local install-man
	$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
	$(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)/$(TARGET)

# man page
install-man: $(MAN_FILES)
	$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
	$(INSTALL_DATA) $< $(DESTDIR)$(mandir)/man$(SECTION)

%.$(SECTION): $(srcdir)/%.$(SECTION).in
	cd $(top_builddir) \
	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status

# build postscript and pdf formats
#$(TARGET).ps: $(TARGET).$(SECTION)
#	groff -man < $< > $@

#$(TARGET).pdf: $(TARGET).ps
#	ps2pdf $< $@

dist-local:

clean-local:
	test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
	$(RMV) $(TARGET) $(OBJECTS)

distclean-local: clean-local
	$(RMV) Makefile

check-local: all-local

Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
	cd $(top_builddir) \
	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status

$(TARGET) : $(OBJECTS)
	$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS) 

ifeq (,$(MAKECMDGOALS))
-include $(DEPS)
else
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
-include $(DEPS)
endif
endif


--- NEW FILE: filterRFC3454.pl ---
#/usr/bin/perl
# Copyright (c) 2001-2003 International Business Machines
# Corporation and others. All Rights Reserved.

####################################################################################
# filterRFC3454.pl:
# This tool filters the RFC-3454 txt file for String prep tables
# Author: Ram Viswanadha
#        
####################################################################################

use File::Find;
use File::Basename;
use IO::File;
use Cwd;
use File::Copy;
use Getopt::Long;
use File::Path;
use File::Copy;

$warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT \n#################\n\n";
#run the program
main();

#---------------------------------------------------------------------
# The main program

sub main(){
  GetOptions(
           "--sourcedir=s" => \$sourceDir,
           "--destdir=s" => \$destDir,
           "--filename=s" => \$fileName,
           );
  usage() unless defined $sourceDir;
  usage() unless defined $destDir;
  usage() unless defined $fileName;

  $infile = $sourceDir."/".$fileName;
  $inFH = IO::File->new($infile,"r")
            or die  "could not open the file for reading: $! \n";

  while(defined ($line=<$inFH>)){
      next unless $line=~ /Start\sTable/;
      if($line =~ /A.1/){
            createUnassignedTable($inFH,$destDir);
      }
      if($line =~ /B.1/){
            createCaseMapNoNorm($inFH,$destDir);
      }
      if($line =~ /B.2/){
            createCaseMap($inFH,$destDir);
      }
      if($line =~ /C.*/ ){
            createProhibitedTable($inFH,$destDir,$line);
      }
  }
  close($inFH);
}

#-----------------------------------------------------------------------
sub readPrint{
    local ($inFH, $outFH,$comment, $print) = @_;
    $count = 0;
    print $outFH $comment."\n";
    while(defined ($line = <$inFH>)){
        next if $line =~ /Hoffman\s\&\sBlanchet/;  # ignore heading
        next if $line =~ /RFC\s3454/; # ignore heading
        next if $line =~ /\f/;  # ignore form feed
        next if $line eq "\n";  # ignore blank lines
        # break if "End Table" is found
        if( $line =~ /End\sTable/){
            print $outFH "\n# Total code points $count\n\n";
            return;
        }
        if($print==1){
            print $line;
        }
        $line =~ s/-/../;
        $line =~ s/^\s+//;
        if($line =~ /\;/){
        }else{
            $line =~ s/$/;/;
        }
        if($line =~ /\.\./){
            ($code, $noise) = split /;/ , $line;
            ($startStr, $endStr ) = split /\.\./, $code;
            $start = atoi($startStr);
            $end   = atoi($endStr);
            #print $start."     ".$end."\n";
            while($start <= $end){
                $count++;
                $start++;
            }
        }else{
              $count++;
        }
        print $outFH $line;
    }
}
#-----------------------------------------------------------------------
sub atoi {
    my $t;
    foreach my $d (split(//, shift())) {
        $t = $t * 16 + $d;
    }
    return $t;
}
#-----------------------------------------------------------------------
sub createUnassignedTable{
    ($inFH,$destDir) = @_;
    $outfile = $destDir."/"."rfc3454_A_1.txt";
    $outFH = IO::File->new($outfile,"w")
            or die  "could not open the file $outfile for writing: $! \n";
    $comment = $warning."# This file contains code points from Table A.1 from RFC 3454\n";
    readPrint($inFH,$outFH, $comment);
    close($outFH);
}
#-----------------------------------------------------------------------
sub createCaseMapNoNorm{
    ($inFH,$destDir) = @_;
    $outfile = $destDir."/"."rfc3454_B_1.txt";
    $outFH = IO::File->new($outfile,"w")
            or die  "could not open the file $outfile for writing: $! \n";
    $comment = $warning."# This file contains code points from Table B.1 from RFC 3454\n";
    readPrint($inFH,$outFH,$comment);
    close($outFH);
}
#-----------------------------------------------------------------------
sub createCaseMap{
    ($inFH,$destDir) = @_;
    $outfile = $destDir."/"."rfc3454_B_2.txt";
    $outFH = IO::File->new($outfile,"w")
            or die  "could not open the file $outfile for writing: $! \n";
    $comment = $warning."# This file contains code points from Table B.2 from RFC 3454\n";
    readPrint($inFH,$outFH,$comment);
    close($outFH);
}
#-----------------------------------------------------------------------
sub createProhibitedTable{
    ($inFH,$destDir,$line) = @_;
    $outfile = $destDir."/"."rfc3454_C_X.txt";
    if($line =~ /C.1.1/ && stat($outfile)){
        unlink($outfile)
            or die "could not delete the file $outfile : $! \n";

    }
    $line =~ s/Start//;
    $line =~ s/-//g;
    $comment = $warning."# code points from $line";
    $outFH = IO::File->new($outfile, "a")
            or die  "could not open the file $outfile for writing: $! \n";
    readPrint($inFH,$outFH,$comment);
    close($outFH);
}
#-----------------------------------------------------------------------
sub usage {
    print << "END";
Usage:
filterRFC3454.pl
Options:
        --sourcedir=<directory>
        --destdir=<directory>
        --filename=<name of RFC file>

e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --filename=rfc3454.txt

filterRFC3454.pl filters the RFC file and creates String prep table files.
The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt

END
  exit(0);
}



--- NEW FILE: genidna.8.in ---
.\" Hey, Emacs! This is -*-nroff-*- you know...
.\"
.\" genidna.8: manual page for the genidna utility
.\"
.\" Copyright (C) 2003 IBM, Inc. and others.
.\"
.TH GENIDNA 8 "18 March 2003" "ICU MANPAGE" "ICU @VERSION@ Manual"
.SH NAME
.B genidna
\- compile IDNA data from files filtered by filterRFC3454.pl 
.SH SYNOPSIS
.B genidna
[
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
]
[
.BR "\-v\fP, \fB\-\-verbose"
]
[
.BI "\-c\fP, \fB\-\-copyright"
]
[
.BI "\-s\fP, \fB\-\-sourcedir" " source"
]
[
.BI "\-d\fP, \fB\-\-destdir" " destination"
]
.SH DESCRIPTION
.B genidna
reads filtered RFC 3454 files and compiles their
information into a binary form.
The resulting file,
.BR uidna.icu ,
can then be read directly by ICU, or used by
.BR pkgdata (8)
for incorporation into a larger archive or library.
.LP
The files read by
.B genidna
are described in the
.B FILES
section. 
.SH OPTIONS
.TP
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
Print help about usage and exit.
.TP
.BR "\-v\fP, \fB\-\-verbose"
Display extra informative messages during execution.
.TP
.BI "\-c\fP, \fB\-\-copyright"
Include a copyright notice into the binary data.
.TP
.BI "\-s\fP, \fB\-\-sourcedir" " source"
Set the source directory to
.IR source .
The default source directory is specified by the environment variable
.BR ICU_DATA .
.TP
.BI "\-d\fP, \fB\-\-destdir" " destination"
Set the destination directory to
.IR destination .
The default destination directory is specified by the environment variable
.BR ICU_DATA .
.SH ENVIRONMENT
.TP 10
.B ICU_DATA
Specifies the directory containing ICU data. Defaults to
.BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ .
Some tools in ICU depend on the presence of the trailing slash. It is thus
important to make sure that it is present if
.B ICU_DATA
is set.
.SH FILES
The following files are read by
.B genidna
and are looked for in the
.I source
/misc for rfc3454_*.txt files and in 
.I source
/unidata for NormalizationCorrections.txt.
.TP 20
.B rfc3453_A_1.txt 
Contains the list of unassigned codepoints in Unicode version 3.2.0.\|.\|.. 
.TP
.B rfc3454_B_1.txt
Contains the list of code points that are commonly mapped to nothing.\|.\|..
.TP
.B rfc3454_B_2.txt
Contains the list of mappings for casefolding of  code points when Normalization form NFKC is specified.\|.\|..
.TP
.B rfc3454_C_X.txt
Contains the list of code points that are prohibited for IDNA.
.TP
.B NormalizationCorrections.txt
Contains the list of code points whose normalization has changed since Unicode Version 3.2.0. 
.SH VERSION
@VERSION@
.SH COPYRIGHT
Copyright (C) 2000-2002 IBM, Inc. and others.
.SH SEE ALSO
.BR pkgdata (8)

--- NEW FILE: genidna.c ---
/*
*******************************************************************************
*
*   Copyright (C) 2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  genidna.c
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2003-02-06
*   created by: Ram Viswanadha
*
*   This program reads the rfc3454_*.txt files,
*   parses them, and extracts the data for Nameprep conformance.
*   It then preprocesses it and writes a binary file for efficient use
*   in various IDNA conversion processes.
*/

#include <stdio.h>
#include <stdlib.h>
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/putil.h"
#include "cmemory.h"
#include "cstring.h"
#include "unicode/udata.h"
#include "unewdata.h"
#include "uoptions.h"
#include "uparse.h"
#include "unicode/uset.h"
#include "uprops.h"

U_CDECL_BEGIN
#include "genidna.h"
U_CDECL_END

#ifdef WIN32
#   pragma warning(disable: 4100)
#endif

UBool beVerbose=FALSE, haveCopyright=TRUE, printRules = FALSE;

/* prototypes --------------------------------------------------------------- */

static void
parseMappings(const char *filename, UBool withNorm, UBool reportError, UErrorCode *pErrorCode);

static void
parseTable(const char *filename, UBool isUnassigned, UErrorCode *pErrorCode);

static void
parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode);

/*static void 
setLDHValues(UErrorCode* pErrorCode);*/

static void
setLabelSeperators(UErrorCode* pErrorCode);

static void 
printMapping(UChar32 cp,UChar32* mapping, int32_t mappingLength);

static const char* fileNames[] = {
                                    "rfc3454_A_1.txt", /* contains unassigned code points */
                                    "rfc3454_C_X.txt", /* contains code points that are prohibited */
                                    "rfc3454_B_1.txt", /* contains case mappings when normalization is turned off */
                                    "rfc3454_B_2.txt", /* contains case mappings when normalization it turned on */
                                    "NormalizationCorrections.txt",/* normalization corrections  */
                                };
static const char *UNIDATA_DIR = "unidata";
static const char *MISC_DIR    = "misc";

/* -------------------------------------------------------------------------- */

static UOption options[]={
    UOPTION_HELP_H,
    UOPTION_HELP_QUESTION_MARK,
    UOPTION_VERBOSE,
    UOPTION_COPYRIGHT,
    UOPTION_DESTDIR,
    UOPTION_SOURCEDIR,
    { "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 },
    { "generate-rules", NULL, NULL, NULL, 'g', UOPT_NO_ARG, 0 }
};

extern int
main(int argc, char* argv[]) {
#if !UCONFIG_NO_IDNA
    char* filename = NULL;
#endif
    const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
    char *basename=NULL;
    char *saveBasename = NULL;
    UErrorCode errorCode=U_ZERO_ERROR;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[4].value=u_getDataDirectory();
    options[5].value="";
    options[6].value="3.0.0";
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }
    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        /*
         * Broken into chucks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(stderr,
            "Usage: %s [-options] [suffix]\n"
            "\n"
            "Read the rfc3454_*.txt files and\n"
            "create a binary file " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE " with the normalization data\n"
            "\n",
            argv[0]);
        fprintf(stderr,
            "Options:\n"
            "\t-h or -? or --help		this usage text\n"
            "\t-v or --verbose			verbose output\n"
            "\t-c or --copyright		include a copyright notice\n");
        fprintf(stderr,
            "\t-d or --destdir			destination directory, followed by the path\n"
            "\t-s or --sourcedir		source directory of ICU data, followed by the path\n"
            "\t-g or --generate-rules   generate IDN rules for testing. Will print out rules to STDOUT\n"
            );
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    /* get the options values */
    beVerbose=options[2].doesOccur;
    haveCopyright=options[3].doesOccur;
    srcDir=options[5].value;
    destDir=options[4].value;
    printRules = options[7].doesOccur;

    if(argc>=2) {
        suffix=argv[1];
    } else {
        suffix=NULL;
    }

#if UCONFIG_NO_IDNA

    fprintf(stderr,
        "genidna writes dummy " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE
        " because UCONFIG_NO_IDNA is set, \n"
        "see icu/source/common/unicode/uconfig.h\n");
    generateData(destDir);

#else

    setUnicodeVersion(options[6].value);
    filename = (char* ) uprv_malloc(uprv_strlen(srcDir) + 300); /* hopefully this should be enough */
    /* prepare the filename beginning with the source dir */
    if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
        filename[0] = 0x2E;
        filename[1] = U_FILE_SEP_CHAR;
        uprv_strcpy(filename+2,srcDir);
    }else{
        uprv_strcpy(filename, srcDir);
    }
    basename=filename+uprv_strlen(filename);
    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
        *basename++=U_FILE_SEP_CHAR;
    }
    
    /* initialize */
    init();
    if(printRules){
        printf("// Copyright (C) 2003, International Business Machines\n\n");
        printf("// WARNING: This file is machine generated by %s tool. Please DO NOT edit.\n\n",argv[0]);

        printf("idn_rules{\n");
    }

    /* first copy misc directory */
    saveBasename = basename;
    uprv_strcpy(basename,MISC_DIR);
    basename = basename + uprv_strlen(MISC_DIR);
    *basename++=U_FILE_SEP_CHAR;

    /* process unassigned */
    uprv_strcpy(basename,fileNames[0]);
    parseTable(filename,TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "Could not open file %s for reading \n", filename);
        return errorCode;
    }
    /* process prohibited */
    uprv_strcpy(basename,fileNames[1]);
    parseTable(filename,FALSE,  &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "Could not open file %s for reading \n", filename);
        return errorCode;
    }

    /*  setLDHValues(&errorCode); */
    setLabelSeperators(&errorCode);

    /* process mappings */
    if(printRules){
        printf("\n\tMapNoNormalization{\n");
    }
    uprv_strcpy(basename,fileNames[2]);
    parseMappings(filename, FALSE, FALSE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "Could not open file %s for reading \n", filename);
        return errorCode;
    }
    if(printRules){
        printf("\n\t}\n");
    }
    
    if(printRules){
        printf("\n\tMapNFKC{\n");
    }
    uprv_strcpy(basename,fileNames[3]);
    parseMappings(filename, TRUE, FALSE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "Could not open file %s for reading \n", filename);
        return errorCode;
    }
    /* set up directory for NormalizationCorrections.txt */
    basename = saveBasename;
    uprv_strcpy(basename,UNIDATA_DIR);
    basename = basename + uprv_strlen(UNIDATA_DIR);
    *basename++=U_FILE_SEP_CHAR;
    uprv_strcpy(basename,fileNames[4]);
    
    parseNormalizationCorrections(filename,&errorCode);
    if(U_FAILURE(errorCode)){
        fprintf(stderr,"Could not open file %s for reading \n", filename);
        return errorCode;
    }

    /* process parsed data */
    if(U_SUCCESS(errorCode)) {
        /* write the data file */
       generateData(destDir);

       cleanUpData();
    }
    if(printRules){
        printf("\t\t\"::[:AGE=3.2:]NFKC;\"\n\t}\n}");
    }

    uprv_free(filename);

#endif

    return errorCode;
}

#if !UCONFIG_NO_IDNA

static void U_CALLCONV
normalizationCorrectionsLineFn(void *context,
                    char *fields[][2], int32_t fieldCount,
                    UErrorCode *pErrorCode) {
    uint32_t mapping[40];
    char *end, *s;
    uint32_t code;
    int32_t length;
    UVersionInfo version;
    UVersionInfo thisVersion;

    /* get the character code, field 0 */
    code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "genidn: error parsing FCNFKC_3_2_0.txt mapping at %s\n", fields[0][0]);
        exit(*pErrorCode);
    }
    /* Original (erroneous) decomposition */
    s = fields[1][0];

    /* parse the mapping string */
    length=u_parseCodePoints(s, mapping, sizeof(mapping)/4, pErrorCode);

    /* ignore corrected decomposition */

    u_versionFromString(version,fields[3][0] );
    u_versionFromString(thisVersion, "3.2.0");



    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "genidn error parsing NormalizationCorrection of U+%04lx - %s\n",
                (long)code, u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }

    /* store the mapping */
    if( version[0] > thisVersion[0] || 
        ((version[0]==thisVersion[0]) && (version[1] > thisVersion[1]))
        ){
        storeMapping(code,mapping, length, TRUE, pErrorCode);
        if(printRules){
            printMapping(code,(UChar32*)mapping,length);
        }
    }
}

static void
parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode) {
    char *fields[4][2];

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 4, normalizationCorrectionsLineFn, NULL, pErrorCode);

    /* fprintf(stdout,"Number of code points that have NormalizationCorrections mapping with length >1 : %i\n",len); */

    if(U_FAILURE(*pErrorCode) && ( *pErrorCode!=U_FILE_ACCESS_ERROR)) {
        fprintf(stderr, "genidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }
}

static void U_CALLCONV
caseMapLineFn(void *context,
              char *fields[][2], int32_t fieldCount,
              UErrorCode *pErrorCode) {
    uint32_t mapping[40];
    char *end, *s;
    uint32_t code;
    int32_t length;
    UBool* mapWithNorm = (UBool*) context;


    /* get the character code, field 0 */
    code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
    if(end<=fields[0][0] || end!=fields[0][1]) {
        fprintf(stderr, "genidn: syntax error in field 0 at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    s = fields[1][0];
    /* parse the mapping string */
    length=u_parseCodePoints(s, mapping, sizeof(mapping)/4, pErrorCode);

    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "genidn error parsing UnicodeData.txt decomposition of U+%04lx - %s\n",
                (long)code, u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }

    /* store the mapping */

    storeMapping(code,mapping, length, *mapWithNorm, pErrorCode);
    if(printRules){
        printMapping(code,(UChar32*)mapping,length);
    }
}

static void
parseMappings(const char *filename,UBool withNorm, UBool reportError, UErrorCode *pErrorCode) {
    char *fields[3][2];

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 3, caseMapLineFn, &withNorm, pErrorCode);

    /*fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);*/

    if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
        fprintf(stderr, "genidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }
}

/* parser for UnicodeData.txt ----------------------------------------------- */
static int32_t printedCharCount = 0;

static void printEscaped(UChar32 ch){
    if(ch > 0xFFFF){
        printf("\\\\U%08X",ch);
        printedCharCount+=11;
    }else{
        if(uprv_isRuleWhiteSpace(ch)){
            /* double escape the rule white space */
            printf("\\\\u%04X", ch);
            printedCharCount+=7;
        }else if(0x20< ch && ch <0x7f){
            if(ch == 0x2E){
                /* double escape dot */
                printf("\\\\%c",(char)ch);
                printedCharCount+=3;
            }else{
                printf("%c",(char)ch);
                printedCharCount++;
            }
        }else{
            printf("\\\\u%04X",ch);
            printedCharCount+=7;
        }
    }
}
static void printEscapedRange(UChar32 rangeStart, UChar32 rangeEnd){
    if(rangeStart != rangeEnd){
        printEscaped(rangeStart);
        printf("-");
        printedCharCount++;
        printEscaped(rangeEnd);
        printf(" ");
    }else{
        printEscaped(rangeStart);
        printf(" ");
    }
    if(printedCharCount > 70){
        printf("\"\n\t\t\t\"");
        printedCharCount =0 ;
    }
}
static void printMapping( UChar32 cp, UChar32* mapping, int32_t mappingLength){
    
    int32_t i;
    printf("\t\t\"");
    printEscaped(cp);
    printf(" > ");
    for(i=0;i<mappingLength;i++){
        printEscaped(mapping[i]);
    }
    printf(";\"\n");
    
    printedCharCount=0; 
}
static void U_CALLCONV
unicodeDataLineFn(void *context,
                  char *fields[][2], int32_t fieldCount,
                  UErrorCode *pErrorCode) {
    uint32_t rangeStart=0,rangeEnd =0;
    UBool* isUnassigned = (UBool*) context;


    u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
    
    if(U_FAILURE(*pErrorCode)){
        fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
        return;
    }

    if(*isUnassigned == TRUE){
        storeRange(rangeStart,rangeEnd,UIDNA_UNASSIGNED, pErrorCode);
    }else{
        storeRange(rangeStart,rangeEnd,UIDNA_PROHIBITED, pErrorCode);
    }
    /*TODO: comment out the printer */
    if(printRules){
        printEscapedRange(rangeStart,rangeEnd);
    }
}

static void
parseTable(const char *filename,UBool isUnassigned, UErrorCode *pErrorCode) {
    char *fields[1][2];
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }
    /*TODO: comment out the printer */
    if(printRules){
        printedCharCount = 0;
        if(isUnassigned){
            printf("\n\tUnassignedSet{\"[ ");
        }else{
            printf("\n\tProhibitedSet{\"[ ");
        }
    }
    u_parseDelimitedFile(filename, ';', fields, 1, unicodeDataLineFn, &isUnassigned, pErrorCode);


    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "genidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }
    if(printRules){
        printf("]\"}\n");
    }
}

/*
static void 
setLDHValues(UErrorCode* pErrorCode){
    USet* set = uset_openPattern(LDH_PATTERN, LDH_PATTERN_LEN, pErrorCode);
    int32_t itemCount;
    int32_t index = 0;
    UChar32 start,end;

    if(U_FAILURE(*pErrorCode)){
        fprintf(stderr,"Could not open USet. Error :%s \n",u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }
    
    itemCount = uset_getItemCount(set);

    for(;index < itemCount; index++){
        uset_getItem(set,index, &start, &end, NULL, 0, pErrorCode);
        storeRange(start,end,UIDNA_LDH_OR_MAP_NFKC, pErrorCode);
    }
    if(printRules){
        printf(PAT);
    }

}
*/
static void
setLabelSeperators(UErrorCode *pErrorCode){
    /* U+002E, U+3002, U+FF0E, U+FF61 */
    storeRange(0x002E, 0x002E, UIDNA_LABEL_SEPARATOR, pErrorCode);
    storeRange(0x3002, 0x3002, UIDNA_LABEL_SEPARATOR, pErrorCode);
    storeRange(0xFF0E, 0xFF0E, UIDNA_LABEL_SEPARATOR, pErrorCode);
    storeRange(0xFF61, 0xFF61, UIDNA_LABEL_SEPARATOR, pErrorCode);
    if(U_FAILURE(*pErrorCode)){
        fprintf(stderr, "Could not store values for label separators\n");
    }
    if(printRules){
        printf("\tLabelSeparatorSet{\"[ ");
        printEscaped(0x002E);
        printEscaped(0x3002);
        printEscaped(0xFF0E);
        printEscaped(0xFF61);
        printf(" ]\"}\n\n");
    }
}

#endif /* #if !UCONFIG_NO_IDNA */

/*
 * Hey, Emacs, please set the following:
 *
 * Local Variables:
 * indent-tabs-mode: nil
 * End:
 *
 */

--- NEW FILE: genidna.dsp ---
# Microsoft Developer Studio Project File - Name="genidna" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **

# TARGTYPE "Win32 (x86) Console Application" 0x0103

CFG=genidna - Win32 Debug
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE 
!MESSAGE NMAKE /f "genidna.mak".
!MESSAGE 
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE 
!MESSAGE NMAKE /f "genidna.mak" CFG="genidna - Win32 Debug"
!MESSAGE 
!MESSAGE Possible choices for configuration are:
!MESSAGE 
!MESSAGE "genidna - Win32 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "genidna - Win32 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE "genidna - Win64 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "genidna - Win64 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE 

# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe

!IF  "$(CFG)" == "genidna - Win32 Release"

# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
# ADD CPP /nologo /W3 /GX /O2 /I "..\..\common" /I "..\toolutil" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
# ADD LINK32 icuuc.lib icutu.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib"
# Begin Custom Build
TargetPath=.\Release\genidna.exe
InputPath=.\Release\genidna.exe
InputName=genidna
SOURCE="$(InputPath)"

"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
	copy $(TargetPath) ..\..\..\bin

# End Custom Build

!ELSEIF  "$(CFG)" == "genidna - Win32 Debug"

# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "..\..\common" /I "..\toolutil" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
# ADD LINK32 icutud.lib icuucd.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib"
# Begin Custom Build
TargetPath=.\Debug\genidna.exe
InputPath=.\Debug\genidna.exe
InputName=genidna
SOURCE="$(InputPath)"

"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
	copy $(TargetPath) ..\..\..\bin

# End Custom Build

!ELSEIF  "$(CFG)" == "genidna - Win64 Release"

# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
# ADD CPP /nologo /W3 /GX /Zi /O2 /Op /I "..\..\common" /I "..\toolutil" /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /QIA64_fmaopt /Zm600 /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /machine:IA64
# ADD LINK32 icuuc.lib icutu.lib /nologo /subsystem:console /machine:IX86 /libpath:"..\..\..\lib" /machine:IA64
# Begin Custom Build
TargetPath=.\Release\genidna.exe
InputPath=.\Release\genidna.exe
InputName=genidna
SOURCE="$(InputPath)"

"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
	copy $(TargetPath) ..\..\..\bin

# End Custom Build

!ELSEIF  "$(CFG)" == "genidna - Win64 Debug"

# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /Op /I "..\..\common" /I "..\toolutil" /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /GZ /QIA64_fmaopt /Zm600 /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:IX86 /pdbtype:sept /machine:IA64
# ADD LINK32 icutud.lib icuucd.lib /nologo /subsystem:console /incremental:no /debug /machine:IX86 /pdbtype:sept /libpath:"..\..\..\lib" /machine:IA64
# Begin Custom Build
TargetPath=.\Debug\genidna.exe
InputPath=.\Debug\genidna.exe
InputName=genidna
SOURCE="$(InputPath)"

"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
	copy $(TargetPath) ..\..\..\bin

# End Custom Build

!ENDIF 

# Begin Target

# Name "genidna - Win32 Release"
# Name "genidna - Win32 Debug"
# Name "genidna - Win64 Release"
# Name "genidna - Win64 Debug"
# Begin Group "Source Files"

# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
# Begin Source File

SOURCE=.\genidna.c
# End Source File
# Begin Source File

SOURCE=.\store.c
# End Source File
# End Group
# Begin Group "Header Files"

# PROP Default_Filter "h;hpp;hxx;hm;inl"
# Begin Source File

SOURCE=.\genidna.h
# End Source File
# End Group
# Begin Group "Resource Files"

# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
# End Group
# End Target
# End Project

--- NEW FILE: genidna.h ---
/*
*******************************************************************************
*
*   Copyright (C) 1999-2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  genidn.h
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2003-02-06
*   created by: Ram Viswanadha
*/

#ifndef __GENIDN_H__
#define __GENIDN_H__

#include "unicode/utypes.h"
#include "unicode/uset.h"
#include "sprpimpl.h"

/* file definitions */
#define DATA_NAME "uidna"
#define DATA_TYPE "icu"

/*
 * data structure that holds the IDN properties for one or more
 * code point(s) at build time
 */

 
/* global flags */
extern UBool beVerbose, haveCopyright;

/* prototypes */

extern void
setUnicodeVersion(const char *v);

extern void
init(void);

extern void
storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, UBool withNorm, UErrorCode* status);
extern void
storeRange(uint32_t start, uint32_t end, UBool isUnassigned,UErrorCode* status);

extern void
generateData(const char *dataDir);

extern void
cleanUpData(void);

/*
extern void
storeIDN(uint32_t code, IDN *idn);

extern void
processData(void);


*/
#endif

/*
 * Hey, Emacs, please set the following:
 *
 * Local Variables:
 * indent-tabs-mode: nil
 * End:
 *
 */



--- NEW FILE: genidna.vcproj ---
<?xml version="1.0" encoding = "Windows-1252"?>
<VisualStudioProject
	ProjectType="Visual C++"
	Version="7.00"
	Name="genidna"
	SccProjectName=""
	SccLocalPath="">
	<Platforms>
		<Platform
			Name="Win32"/>
	</Platforms>
	<Configurations>
		<Configuration
			Name="Release|Win32"
			OutputDirectory=".\Release"
			IntermediateDirectory=".\Release"
			ConfigurationType="1"
			UseOfMFC="0"
			ATLMinimizesCRunTimeLibraryUsage="FALSE"
			CharacterSet="2">
			<Tool
				Name="VCCLCompilerTool"
				InlineFunctionExpansion="2"
				ImproveFloatingPointConsistency="TRUE"
				AdditionalIncludeDirectories="..\..\common,..\toolutil"
				PreprocessorDefinitions="WIN32,NDEBUG,_CONSOLE"
				StringPooling="TRUE"
				RuntimeLibrary="4"
				EnableFunctionLevelLinking="TRUE"
				PrecompiledHeaderFile=".\Release/genidna.pch"
				AssemblerListingLocation=".\Release/"
				ObjectFile=".\Release/"
				ProgramDataBaseFileName=".\Release/"
				WarningLevel="3"
				SuppressStartupBanner="TRUE"
				CompileAs="0"/>
			<Tool
				Name="VCCustomBuildTool"
				CommandLine="copy $(TargetPath) ..\..\..\bin
"
				Outputs="..\..\..\bin\$(InputName).exe"/>
			<Tool
				Name="VCLinkerTool"
				AdditionalOptions="/MACHINE:I386"
				AdditionalDependencies="icuuc.lib icutu.lib"
				OutputFile=".\Release/genidna.exe"
				LinkIncremental="1"
				SuppressStartupBanner="TRUE"
				AdditionalLibraryDirectories="..\..\..\lib"
				ProgramDatabaseFile=".\Release/genidna.pdb"
				SubSystem="1"/>
			<Tool
				Name="VCMIDLTool"
				TypeLibraryName=".\Release/genidna.tlb"/>
			<Tool
				Name="VCPostBuildEventTool"/>
			<Tool
				Name="VCPreBuildEventTool"/>
			<Tool
				Name="VCPreLinkEventTool"/>
			<Tool
				Name="VCResourceCompilerTool"
				PreprocessorDefinitions="NDEBUG"
				Culture="1033"/>
			<Tool
				Name="VCWebServiceProxyGeneratorTool"/>
			<Tool
				Name="VCWebDeploymentTool"/>
		</Configuration>
		<Configuration
			Name="Debug|Win32"
			OutputDirectory=".\Debug"
			IntermediateDirectory=".\Debug"
			ConfigurationType="1"
			UseOfMFC="0"
			ATLMinimizesCRunTimeLibraryUsage="FALSE"
			CharacterSet="2">
			<Tool
				Name="VCCLCompilerTool"
				Optimization="0"
				AdditionalIncludeDirectories="..\..\common,..\toolutil"
				PreprocessorDefinitions="WIN32,_DEBUG,_CONSOLE"
				BasicRuntimeChecks="3"
				RuntimeLibrary="5"
				PrecompiledHeaderFile=".\Debug/genidna.pch"
				AssemblerListingLocation=".\Debug/"
				ObjectFile=".\Debug/"
				ProgramDataBaseFileName=".\Debug/"
				WarningLevel="3"
				SuppressStartupBanner="TRUE"
				DebugInformationFormat="4"
				CompileAs="0"/>
			<Tool
				Name="VCCustomBuildTool"
				CommandLine="copy $(TargetPath) ..\..\..\bin
"
				Outputs="..\..\..\bin\$(InputName).exe"/>
			<Tool
				Name="VCLinkerTool"
				AdditionalOptions="/MACHINE:I386"
				AdditionalDependencies="icutud.lib icuucd.lib"
				OutputFile=".\Debug/genidna.exe"
				LinkIncremental="2"
				SuppressStartupBanner="TRUE"
				AdditionalLibraryDirectories="..\..\..\lib"
				GenerateDebugInformation="TRUE"
				ProgramDatabaseFile=".\Debug/genidna.pdb"
				SubSystem="1"/>
			<Tool
				Name="VCMIDLTool"
				TypeLibraryName=".\Debug/genidna.tlb"/>
			<Tool
				Name="VCPostBuildEventTool"/>
			<Tool
				Name="VCPreBuildEventTool"/>
			<Tool
				Name="VCPreLinkEventTool"/>
			<Tool
				Name="VCResourceCompilerTool"
				PreprocessorDefinitions="_DEBUG"
				Culture="1033"/>
			<Tool
				Name="VCWebServiceProxyGeneratorTool"/>
			<Tool
				Name="VCWebDeploymentTool"/>
		</Configuration>
	</Configurations>
	<Files>
		<Filter
			Name="Source Files"
			Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat">
			<File
				RelativePath=".\genidna.c">
			</File>
			<File
				RelativePath=".\store.c">
			</File>
		</Filter>
		<Filter
			Name="Header Files"
			Filter="h;hpp;hxx;hm;inl">
			<File
				RelativePath=".\genidna.h">
			</File>
		</Filter>
		<Filter
			Name="Resource Files"
			Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
		</Filter>
	</Files>
	<Globals>
	</Globals>
</VisualStudioProject>

--- NEW FILE: store.c ---
/*
*******************************************************************************
*
*   Copyright (C) 1999-2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  store.c
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2003-02-06
*   created by: Ram Viswanadha
*
*/

#include <stdio.h>
#include <stdlib.h>
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "unicode/udata.h"
#include "utrie.h"
#include "unicode/uset.h"
#include "unewdata.h"
#include "genidna.h"

#ifdef WIN32
#   pragma warning(disable: 4100)
#endif

#define DO_DEBUG_OUT 0


/**
This is a simple Trie with the following structure

16-bit IDN sets:

Each 16-bit IDN word contains:

 0..2  Category flags
       Contains the enum values IDNStates
 
 3..4  Contains the length of the mapping
       If length of the mapping is < 2 the length is stored
       If length of the mapping is > 2 then _IDNA_LENGTH_IN_MAPPING_TABLE
       enum is stored and the length of mapping is stored in the first index
       in the data array

 5..16 Contains the index into the data array that contains the mapping 
       If it contains _IDNA_MAP_TO_NOTHING, then the codepoint is stripped from
       the input

*/

/* file data ---------------------------------------------------------------- */
/* indexes[] value names */

#if UCONFIG_NO_IDNA

/* dummy UDataInfo cf. udata.h */
static UDataInfo dataInfo = {
    sizeof(UDataInfo),
    0,

    U_IS_BIG_ENDIAN,
    U_CHARSET_FAMILY,
    U_SIZEOF_UCHAR,
    0,

    { 0, 0, 0, 0 },                 /* dummy dataFormat */
    { 0, 0, 0, 0 },                 /* dummy formatVersion */
    { 0, 0, 0, 0 }                  /* dummy dataVersion */
};

#else

static int32_t indexes[_IDNA_INDEX_TOP]={ 0 };

static uint16_t mappingData[_IDNA_MAPPING_DATA_SIZE]={0};

/* UDataInfo cf. udata.h */
static UDataInfo dataInfo={
    sizeof(UDataInfo),
    0,

    U_IS_BIG_ENDIAN,
    U_CHARSET_FAMILY,
    U_SIZEOF_UCHAR,
    0,

    { 0x49, 0x44, 0x4e, 0x41 },   /* dataFormat="IDNA" */
    { 2, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
    { 3, 2, 0, 0 }                /* dataVersion (Unicode version) */
};
void
setUnicodeVersion(const char *v) {
    UVersionInfo version;
    u_versionFromString(version, v);
    uprv_memcpy(dataInfo.dataVersion, version, 4);
}


static UNewTrie idnTrie={ {0},0,0,0,0,0,0,0,0,{0} };

static int32_t currentIndex = 1; /* the current index into the data trie */
static int32_t maxLength = 0;  /* maximum length of mapping string */

#define MAX_DATA_LENGTH 11500

extern void
init() {

    /* initialize the two tries */
    if(NULL==utrie_open(&idnTrie, NULL, MAX_DATA_LENGTH, 0, FALSE)) {
        fprintf(stderr, "error: failed to initialize tries\n");
        exit(U_MEMORY_ALLOCATION_ERROR);
    }
}

static void 
store(uint32_t codepoint, uint32_t* mapping, int32_t length, uint32_t flags, UErrorCode* status){

    uint32_t trieWord = 0;
    int32_t i =0;
    if(flags == _IDNA_MAP_TO_NOTHING){
        trieWord = flags << 5;
    }else{
        if(length==0){
            trieWord =  flags;
        }else{
            int32_t adjustedLen = 0;
            int32_t i=0;
            /*
            int32_t delta;
        
            if(length==1 && (delta=(int32_t)codepoint-(int32_t)mapping[0])>=-4096 && delta<=4095) {
                printf("mapping of U+%04lx to U+%04lx could fit into a 13-bit delta (0x%lx)\n", codepoint, mapping[0], delta);
            }
             */
            /* set the 0..2 bits the flags */
            trieWord = flags;

            /* figure out the real length */ 
            for(i=0; i<length; i++){
                if(mapping[i] > 0xFFFF){
                    adjustedLen +=2;
                }else{
                    adjustedLen++;
                }      
            }
            length = adjustedLen;

            /* set the 3..4 bits the length */
            if(length > 2){
                trieWord += _IDNA_LENGTH_IN_MAPPING_TABLE << 3;
            }else{
                trieWord += (uint32_t)((length)<<3);
            }
            if(length > maxLength) 
                maxLength = length;

            /* get the current index in the data array 
             * and store in 5..15 bits
             */
            trieWord += currentIndex << 5;


            /* load mapping into the data array */
            i = 0;
        
            if(trieWord > 0xFFFF){
                fprintf(stderr,"size of trie word is greater than 0xFFFF.\n");
            }
            /* set the length in mapping table */
            if(length > 2){
                mappingData[currentIndex++] = (uint16_t)length;
            }
            while(i<length){
                if(currentIndex < _IDNA_MAPPING_DATA_SIZE){
                    if(mappingData[currentIndex]==0){
                        if(mapping[i] <= 0xFFFF){
                            mappingData[currentIndex++] = (uint16_t)mapping[i++];
                        }else{
                            mappingData[currentIndex++] = UTF16_LEAD(mapping[i]);
                            if(currentIndex < _IDNA_MAPPING_DATA_SIZE){
                                mappingData[currentIndex++] = UTF16_TRAIL(mapping[i++]);
                            }else{
                                fprintf(stderr, "Data Array index out of bounds.currentIndex = %i size of mapping arry = %i \n",currentIndex, _IDNA_MAPPING_DATA_SIZE);
                                *status = U_INDEX_OUTOFBOUNDS_ERROR;
                                return;
                            }
                        }
                    }
                }else{
                    fprintf(stderr, "Data Array index out of bounds.currentIndex = %i size of mapping arry = %i \n",currentIndex, _IDNA_MAPPING_DATA_SIZE);
                    *status = U_INDEX_OUTOFBOUNDS_ERROR;
                    return;
                }
            }

        }
    }


    i = utrie_get32(&idnTrie,codepoint,NULL);
    
    if(i==0){
        /* now set the value in the trie */
        if(!utrie_set32(&idnTrie,codepoint,trieWord)){
            fprintf(stderr, "error:  too many mapping entries\n");
            exit(U_BUFFER_OVERFLOW_ERROR);
        }

    }else{
        if(i== UIDNA_PROHIBITED){
            i += _IDNA_MAP_TO_NOTHING << 5;
            /* now set the value in the trie */
            if(!utrie_set32(&idnTrie,codepoint,i)){
                fprintf(stderr, "error:  too many mapping entries\n");
                exit(U_BUFFER_OVERFLOW_ERROR);
            }
        }else{
            fprintf(stderr, "Index array has been set for codepoint 0x%06X. \n",codepoint);
            exit(U_INTERNAL_PROGRAM_ERROR);
        }
    }

}
extern void
storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, UBool withNorm, UErrorCode* status){
    
    if(withNorm){
        store(codepoint,mapping,length,UIDNA_MAP_NFKC,status);
    }else{
        store(codepoint,mapping,length,_IDNA_MAP_TO_NOTHING,status);
    }
}


extern void
storeRange(uint32_t start, uint32_t end, int8_t flag,UErrorCode* status){
    uint32_t trieWord = 0, i=0;

    trieWord += flag;

    if(start == end){
        i = utrie_get32(&idnTrie,start,NULL);
        if(i == 0 || i==(uint8_t)flag){
            if(!utrie_set32(&idnTrie,start,trieWord)){
                fprintf(stderr, "error: too  many entries\n");
                exit(U_BUFFER_OVERFLOW_ERROR);
            }
        }else{
            fprintf(stderr, "Index array has been set for codepoint 0x%06X. \n",start);
            exit(U_INTERNAL_PROGRAM_ERROR);
        }
    }else{
        if(!utrie_setRange32(&idnTrie,start,end+1,trieWord,FALSE)){
            fprintf(stderr, "error: too  many entries\n");
            exit(U_BUFFER_OVERFLOW_ERROR);
        }
    }

}

/* folding value: just store the offset (16 bits) if there is any non-0 entry */
static uint32_t U_CALLCONV
getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
    uint32_t foldedValue, value;
    UChar32 limit;
    UBool inBlockZero;

    foldedValue=0;

    limit=start+0x400;
    while(start<limit) {
        value=utrie_get32(trie, start, &inBlockZero);
        if(inBlockZero) {
            start+=UTRIE_DATA_BLOCK_LENGTH;
        } else {
            foldedValue|=value;
            ++start;
        }
    }

    if(foldedValue!=0) {
        return (uint32_t)(offset|0x8000);
    } else {
        return 0;
    }
}

#endif /* #if !UCONFIG_NO_IDNA */

extern void
generateData(const char *dataDir) {
    static uint8_t idnTrieBlock[100000];

    UNewDataMemory *pData;
    UErrorCode errorCode=U_ZERO_ERROR;
    int32_t size, dataLength;

#if UCONFIG_NO_IDNA

    size=0;

#else

    int32_t idnTrieSize;

    idnTrieSize=utrie_serialize(&idnTrie, idnTrieBlock, sizeof(idnTrieBlock), getFoldedValue, TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "error: utrie_serialize(idn trie) failed, %s\n", u_errorName(errorCode));
        exit(errorCode);
    }
    size = idnTrieSize + sizeof(mappingData) + sizeof(indexes);
    if(beVerbose) {
        printf("size of idn trie              %5u bytes\n", idnTrieSize);
        printf("size of " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE " contents: %ld bytes\n", (long)size);
        printf("size of mapping data array %5u bytes\n", sizeof(mappingData));
        printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
        printf("Maximum length of the mapping string is : %i \n", maxLength);
    }

#endif
    
    /* write the data */
    pData=udata_create(dataDir, DATA_TYPE, U_ICUDATA_NAME "_" DATA_NAME, &dataInfo,
                       haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "gennorm: unable to create the output file, error %d\n", errorCode);
        exit(errorCode);
    }

#if !UCONFIG_NO_IDNA

    indexes[_IDNA_INDEX_TRIE_SIZE]=idnTrieSize;
    indexes[_IDNA_INDEX_MAPPING_DATA_SIZE]=sizeof(mappingData);

    udata_writeBlock(pData, indexes, sizeof(indexes));
    udata_writeBlock(pData, idnTrieBlock, idnTrieSize);
    udata_writeBlock(pData, mappingData, sizeof(mappingData));

#endif

    /* finish up */
    dataLength=udata_finish(pData, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genidn: error %d writing the output file\n", errorCode);
        exit(errorCode);
    }

    if(dataLength!=size) {
        fprintf(stderr, "genidn error: data length %ld != calculated size %ld\n",
            (long)dataLength, (long)size);
        exit(U_INTERNAL_PROGRAM_ERROR);
    }
}

#if !UCONFIG_NO_IDNA

extern void
cleanUpData(void) {

    utrie_close(&idnTrie);

}

#endif /* #if !UCONFIG_NO_IDNA */

/*
 * Hey, Emacs, please set the following:
 *
 * Local Variables:
 * indent-tabs-mode: nil
 * End:
 *
 */