[sword-cvs] icu-sword/source/tools/gensprep .cvsignore,NONE,1.1 Makefile.in,NONE,1.1 filterRFC3454.pl,NONE,1.1 gensprep.8.in,NONE,1.1 gensprep.c,NONE,1.1 gensprep.dsp,NONE,1.1 gensprep.h,NONE,1.1 gensprep.vcproj,NONE,1.1 store.c,NONE,1.1

sword@www.crosswire.org sword@www.crosswire.org
Tue, 6 Apr 2004 03:10:18 -0700


Update of /cvs/core/icu-sword/source/tools/gensprep
In directory www:/tmp/cvs-serv8911/source/tools/gensprep

Added Files:
	.cvsignore Makefile.in filterRFC3454.pl gensprep.8.in 
	gensprep.c gensprep.dsp gensprep.h gensprep.vcproj store.c 
Log Message:
ICU 2.8 sync

--- NEW FILE: .cvsignore ---
*.d
*.pdb
Debug
Makefile
Release
gensprep
gensprep.8
gensprep.plg

--- NEW FILE: Makefile.in ---
## Makefile.in for ICU - tools/gensprep
## Copyright (c) 2001-2003, International Business Machines Corporation and
## others. All Rights Reserved.
## Steven R. Loomis/Markus W. Scherer

## Source directory information
srcdir = @srcdir@
top_srcdir = @top_srcdir@

top_builddir = ../..

include $(top_builddir)/icudefs.mk

##

SECTION = 8

MAN_FILES = $(TARGET:$(EXEEXT)=).$(SECTION)

## Build directory information
subdir = tools/gensprep

ICUDATADIR=$(top_builddir)/data
UNICODEDATADIR=$(top_srcdir)/../data/unidata

## Extra files to remove for 'make clean'
CLEANFILES = *~ $(DEPS) $(RES_FILES) $(TEST_FILES) $(MAN_FILES)

## Target information
TARGET = gensprep$(EXEEXT)

CPPFLAGS += -I$(top_builddir)/common -I$(top_srcdir)/common -I$(srcdir)/../toolutil
LIBS = $(LIBICUTOOLUTIL) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)

OBJECTS = gensprep.o store.o

DEPS = $(OBJECTS:.o=.d)

## List of phony targets
.PHONY : all all-local install install-local clean clean-local		\
distclean distclean-local dist dist-local check	\
check-local build-data install-man

## Clear suffix list
.SUFFIXES :

## List of standard targets
all: all-local
install: install-local
clean: clean-local
distclean : distclean-local
dist: dist-local
check: all check-local

all-local: $(TARGET) build-data $(MAN_FILES)

install-local: all-local install-man
	$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
	$(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)/$(TARGET)

# man page
install-man: $(MAN_FILES)
	$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
	$(INSTALL_DATA) $< $(DESTDIR)$(mandir)/man$(SECTION)

%.$(SECTION): $(srcdir)/%.$(SECTION).in
	cd $(top_builddir) \
	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status

# build postscript and pdf formats
#$(TARGET).ps: $(TARGET).$(SECTION)
#	groff -man < $< > $@

#$(TARGET).pdf: $(TARGET).ps
#	ps2pdf $< $@

dist-local:

clean-local:
	test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
	$(RMV) $(TARGET) $(OBJECTS)

distclean-local: clean-local
	$(RMV) Makefile

check-local: all-local

Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
	cd $(top_builddir) \
	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status

$(TARGET) : $(OBJECTS)
	$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS) 

ifeq (,$(MAKECMDGOALS))
-include $(DEPS)
else
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
-include $(DEPS)
endif
endif


--- NEW FILE: filterRFC3454.pl ---
#/usr/bin/perl
# Copyright (c) 2001-2003 International Business Machines
# Corporation and others. All Rights Reserved.

####################################################################################
# filterRFC3454.pl:
# This tool filters the RFC-3454 txt file for StringPrep tables and creates a table
# to be used in NamePrepProfile
#
# Author: Ram Viswanadha
#        
####################################################################################

use File::Find;
use File::Basename;
use IO::File;
use Cwd;
use File::Copy;
use Getopt::Long;
use File::Path;
use File::Copy;

$copyright = "###################\n# Copyright (C) 2003, International Business Machines\n# Corporation and others.  All Rights Reserved.\n###################\n\n";
$warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT \n###################\n\n";
#run the program
main();

#---------------------------------------------------------------------
# The main program

sub main(){
  GetOptions(
           "--sourcedir=s" => \$sourceDir,
           "--destdir=s" => \$destDir,
           "--src-filename=s" => \$srcFileName,
           "--dest-filename=s" => \$destFileName,
           "--A1"  => \$a1,
           "--B1"  => \$b1,
           "--B2"  => \$b2,
		   "--B3"  => \$b3,
           "--C11" => \$c11,
           "--C12" => \$c12,
           "--C21" => \$c21,
           "--C22" => \$c22,
           "--C3"  => \$c3,
           "--C4"  => \$c4,
           "--C5"  => \$c5,
           "--C6"  => \$c6,
           "--C7"  => \$c7,
           "--C8"  => \$c8,
           "--C9"  => \$c9,
           "--ldh-chars" => \$writeLDHChars,
           "--iscsi" => \$writeISCSIChars,
           );
  usage() unless defined $sourceDir;
  usage() unless defined $destDir;
  usage() unless defined $srcFileName;
  usage() unless defined $destFileName;

  $infile = $sourceDir."/".$srcFileName;
  $inFH = IO::File->new($infile,"r")
            or die  "could not open the file $infile for reading: $! \n";
  $outfile = $destDir."/".$destFileName;

  unlink($outfile);
  $outFH = IO::File->new($outfile,"a")
            or die  "could not open the file $outfile for writing: $! \n";
  print $outFH  $copyright;
  print $outFH  $warning;
  close($outFH);

  if(defined $b2 && defined $b3){
      die "ERROR: --B2 and --B3 are both specified\!\n";
  }

  while(defined ($line=<$inFH>)){
      next unless $line=~ /Start\sTable/;
      if($line =~ /A.1/){
            createUnassignedTable($inFH,$outfile);
      }
      if($line =~ /B.1/ && defined $b1){
            createMapToNothing($inFH,$outfile);
      }
      if($line =~ /B.2/ && defined $b2){
            createCaseMapNorm($inFH,$outfile);
      }
      if($line =~ /B.3/ && defined $b3){
            createCaseMapNoNorm($inFH,$outfile);
      }
      if($line =~ /C.1.1/ && defined $c11 ){
            createProhibitedTable($inFH,$outfile,$line);
      }
      if($line =~ /C.1.2/ && defined $c12 ){
            createProhibitedTable($inFH,$outfile,$line);
      }
      if($line =~ /C.2.1/ && defined $c21 ){
            createProhibitedTable($inFH,$outfile,$line);
      }
      if($line =~ /C.2.2/ && defined $c22 ){
            createProhibitedTable($inFH,$outfile,$line);
      }
      if($line =~ /C.3/ && defined $c3 ){
            createProhibitedTable($inFH,$outfile,$line);
      }
      if($line =~ /C.4/ && defined $c4 ){
            createProhibitedTable($inFH,$outfile,$line);
      }
      if($line =~ /C.5/ && defined $c5 ){
            createProhibitedTable($inFH,$outfile,$line);
      }
      if($line =~ /C.6/ && defined $c6 ){
            createProhibitedTable($inFH,$outfile,$line);
      }
      if($line =~ /C.7/ && defined $c7 ){
            createProhibitedTable($inFH,$outfile,$line);
      }
      if($line =~ /C.8/ && defined $c8 ){
            createProhibitedTable($inFH,$outfile,$line);
      }
      if($line =~ /C.9/ && defined $c9 ){
            createProhibitedTable($inFH,$outfile,$line);
      }
  }
  if( defined $writeISCSIChars){
      create_iSCSIExtraProhibitedTable($inFH, $outfile);
  }
  close($inFH);
}

#-----------------------------------------------------------------------
sub readPrint{
    local ($inFH, $outFH,$comment, $table) = @_;
    $count = 0;
    print $outFH $comment."\n";
    while(defined ($line = <$inFH>)){
        next if $line =~ /Hoffman\s\&\sBlanchet/;  # ignore heading
        next if $line =~ /RFC\s3454/; # ignore heading
        next if $line =~ /\f/;  # ignore form feed
        next if $line eq "\n";  # ignore blank lines
        # break if "End Table" is found
        if( $line =~ /End\sTable/){
            print $outFH "\n# Total code points $count\n\n";
            return;
        }
        if($print==1){
            print $line;
        }
        $line =~ s/-/../;
        $line =~ s/^\s+//;
        if($line =~ /\;/){
        }else{
            $line =~ s/$/;/;
        }
        if($table =~ /A/ ){
            ($code, $noise) = split /;/ , $line;
            $line = $code."; ; UNASSIGNED\n";
        }elsif ( $table =~ /B\.1/ ){
            $line =~ s/Map to nothing/MAP/;
        }elsif ( $table =~ /B\.[23]/ ){
            $line =~ s/Case map/MAP/;
            $line =~ s/Additional folding/MAP/;
        }elsif ( $table =~ /C/ ) {
            ($code, $noise) = split /;/ , $line;   
            $line = $code."; ; PROHIBITED\n";
        }
        if($line =~ /\.\./){
            ($code, $noise) = split /;/ , $line;
            ($startStr, $endStr ) = split /\.\./, $code;
            $start = atoi($startStr);
            $end   = atoi($endStr);
            #print $start."     ".$end."\n";
            while($start <= $end){
                $count++;
                $start++;
            }
        }else{
              $count++;
        }
        print $outFH $line;
    }
}
#-----------------------------------------------------------------------
sub atoi {
    my $t;
    foreach my $d (split(//, shift())) {
        $t = $t * 16 + $d;
    }
    return $t;
}
#-----------------------------------------------------------------------
sub createUnassignedTable{
    ($inFH,$outfile) = @_;
    $outFH = IO::File->new($outfile,"a")
            or die  "could not open the file $outfile for writing: $! \n";
    $comment = "# This table contains code points from Table A.1 from RFC 3454\n";
    readPrint($inFH,$outFH, $comment, "A");
    close($outFH);
}
#-----------------------------------------------------------------------
sub createMapToNothing{
    ($inFH,$outfile) = @_;
    $outFH = IO::File->new($outfile,"a")
            or die  "could not open the file $outfile for writing: $! \n";
    $comment = "# This table contains code points from Table B.1 from RFC 3454\n";
    readPrint($inFH,$outFH,$comment, "B.1");
    close($outFH);
}
#-----------------------------------------------------------------------
sub createCaseMapNorm{
    ($inFH,$outfile) = @_;
    $outFH = IO::File->new($outfile,"a")
            or die  "could not open the file $outfile for writing: $! \n";
    $comment = $warning."# This table contains code points from Table B.2 from RFC 3454\n";
    readPrint($inFH,$outFH,$comment, "B.2");
    close($outFH);
}
#-----------------------------------------------------------------------
sub createCaseMapNoNorm{
    ($inFH,$outfile) = @_;
    $outFH = IO::File->new($outfile,"a")
            or die  "could not open the file $outfile for writing: $! \n";
    $comment = $warning."# This table contains code points from Table B.3 from RFC 3454\n";
    readPrint($inFH,$outFH,$comment, "B.3");
    close($outFH);
}
#-----------------------------------------------------------------------
sub createProhibitedTable{
    ($inFH,$outfile,$line) = @_;
    $line =~ s/Start//;
    $line =~ s/-//g;
    $comment = "# code points from $line";

    $outFH = IO::File->new($outfile, "a")
            or die  "could not open the file $outfile for writing: $! \n";
    readPrint($inFH,$outFH,$comment, "C");
    close($outFH);
}

#-----------------------------------------------------------------------
sub create_iSCSIExtraProhibitedTable{
    ($inFH,$outfile,$line) = @_;
    $comment ="# Additional prohibitions from draft-ietf-ips-iscsi-string-prep-06.txt\n";

    $outFH = IO::File->new($outfile, "a")
            or die  "could not open the file $outfile for writing: $! \n";
    print $outFH $comment;
    print $outFH "0021..002C; ; PROHIBITED\n";
    print $outFH "002F; ; PROHIBITED\n";
    print $outFH "003B..0040; ; PROHIBITED\n";
    print $outFH "005B..0060; ; PROHIBITED\n";
    print $outFH "007B..007E; ; PROHIBITED\n";
    print $outFH "3002; ; PROHIBITED\n";
    print $outFH "\n# Total code points 30\n";
    close($outFH);
}
#-----------------------------------------------------------------------
sub usage {
    print << "END";
Usage:
filterRFC3454.pl
Options:
        --sourcedir=<directory>
        --destdir=<directory>
        --src-filename=<name of RFC file>
        --dest-filename=<name of destination file>
        --A1             Generate data for table A.1
        --B1             Generate data for table B.1
        --B2             Generate data for table B.2
        --B3             Generate data for table B.3
        --C11            Generate data for table C.1.1
        --C12            Generate data for table C.1.2
        --C21            Generate data for table C.2.1
        --C22            Generate data for table C.2.2
        --C3             Generate data for table C.3
        --C4             Generate data for table C.4
        --C5             Generate data for table C.5
        --C6             Generate data for table C.6
        --C7             Generate data for table C.7
        --C8             Generate data for table C.8
        --C9             Generate data for table C.9
        --iscsi          Generate data for extra prohibited iSCSI chars

Note, --B2 and --B3 are mutually exclusive.

e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt  --dest-filename=NamePrepProfile.txt --A1 --B2 --C12 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9

filterRFC3454.pl filters the RFC file and creates String prep table files.
The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt

END
  exit(0);
}



--- NEW FILE: gensprep.8.in ---
.\" Hey, Emacs! This is -*-nroff-*- you know...
.\"
.\" gensprep.8: manual page for the gensprep utility
.\"
.\" Copyright (C) 2003 IBM, Inc. and others.
.\"
.TH gensprep 8 "18 March 2003" "ICU MANPAGE" "ICU @VERSION@ Manual"
.SH NAME
.B gensprep
\- compile StringPrep data from files filtered by filterRFC3454.pl 
.SH SYNOPSIS
.B gensprep
[
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
]
[
.BR "\-v\fP, \fB\-\-verbose"
]
[
.BI "\-c\fP, \fB\-\-copyright"
]
[
.BI "\-s\fP, \fB\-\-sourcedir" " source"
]
[
.BI "\-d\fP, \fB\-\-destdir" " destination"
]
.SH DESCRIPTION
.B gensprep
reads filtered RFC 3454 files and compiles their
information into a binary form.
The resulting file,
.BR <name>.icu ,
can then be read directly by ICU, or used by
.BR pkgdata (8)
for incorporation into a larger archive or library.
.LP
The files read by
.B gensprep
are described in the
.B FILES
section. 
.SH OPTIONS
.TP
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
Print help about usage and exit.
.TP
.BR "\-v\fP, \fB\-\-verbose"
Display extra informative messages during execution.
.TP
.BI "\-c\fP, \fB\-\-copyright"
Include a copyright notice into the binary data.
.TP
.BI "\-s\fP, \fB\-\-sourcedir" " source"
Set the source directory to
.IR source .
The default source directory is specified by the environment variable
.BR ICU_DATA .
.TP
.BI "\-d\fP, \fB\-\-destdir" " destination"
Set the destination directory to
.IR destination .
The default destination directory is specified by the environment variable
.BR ICU_DATA .
.SH ENVIRONMENT
.TP 10
.B ICU_DATA
Specifies the directory containing ICU data. Defaults to
.BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ .
Some tools in ICU depend on the presence of the trailing slash. It is thus
important to make sure that it is present if
.B ICU_DATA
is set.
.SH FILES
The following files are read by
.B gensprep
and are looked for in the
.I source
/misc for rfc3454_*.txt files and in 
.I source
/unidata for NormalizationCorrections.txt.
.TP 20
.B rfc3453_A_1.txt 
Contains the list of unassigned codepoints in Unicode version 3.2.0.\|.\|.. 
.TP
.B rfc3454_B_1.txt
Contains the list of code points that are commonly mapped to nothing.\|.\|..
.TP
.B rfc3454_B_2.txt
Contains the list of mappings for casefolding of  code points when Normalization form NFKC is specified.\|.\|..
.TP
.B rfc3454_C_X.txt
Contains the list of code points that are prohibited for IDNA.
.TP
.B NormalizationCorrections.txt
Contains the list of code points whose normalization has changed since Unicode Version 3.2.0. 
.SH VERSION
@VERSION@
.SH COPYRIGHT
Copyright (C) 2000-2002 IBM, Inc. and others.
.SH SEE ALSO
.BR pkgdata (8)

--- NEW FILE: gensprep.c ---
/*
*******************************************************************************
*
*   Copyright (C) 2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  gensprep.c
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2003-02-06
*   created by: Ram Viswanadha
*
*   This program reads the Profile.txt files,
*   parses them, and extracts the data for StringPrep profile.
*   It then preprocesses it and writes a binary file for efficient use
*   in various StringPrep conversion processes.
*/

#define USPREP_TYPE_NAMES_ARRAY 1

#include <stdio.h>
#include <stdlib.h>

#include "cmemory.h"
#include "cstring.h"
#include "unewdata.h"
#include "uoptions.h"
#include "uparse.h"
#include "sprpimpl.h"

#include "unicode/uset.h"
#include "unicode/udata.h"
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/putil.h"
#include "unicode/uclean.h"


U_CDECL_BEGIN
#include "gensprep.h"
U_CDECL_END

#ifdef WIN32
#   pragma warning(disable: 4100)
#endif

UBool beVerbose=FALSE, haveCopyright=TRUE;

#define NORM_CORRECTIONS_FILE_NAME "NormalizationCorrections.txt"

/* prototypes --------------------------------------------------------------- */

static void
parseMappings(const char *filename, UBool reportError, UErrorCode *pErrorCode);

static void
parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode);


/* -------------------------------------------------------------------------- */

static UOption options[]={
    UOPTION_HELP_H,
    UOPTION_HELP_QUESTION_MARK,
    UOPTION_VERBOSE,
    UOPTION_COPYRIGHT,
    UOPTION_DESTDIR,
    UOPTION_SOURCEDIR,
    UOPTION_ICUDATADIR,
    UOPTION_PACKAGE_NAME,
    UOPTION_BUNDLE_NAME,
    { "normalization", NULL, NULL, NULL, 'n', UOPT_REQUIRES_ARG, 0 },
    { "check-bidi", NULL, NULL, NULL,  'k', UOPT_NO_ARG, 0},
    { "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 },
};

enum{
    HELP,
    HELP_QUESTION_MARK,
    VERBOSE,
    COPYRIGHT,
    DESTDIR,
    SOURCEDIR,
    ICUDATADIR,
    PACKAGE_NAME,
    BUNDLE_NAME,
    NORMALIZE,
    CHECK_BIDI,
    UNICODE_VERSION
};

static int printHelp(int argc, char* argv[]){
    /*
     * Broken into chucks because the C89 standard says the minimum
     * required supported string length is 509 bytes.
     */
    fprintf(stderr,
        "Usage: %s [-options] [file_name]\n"
        "\n"
        "Read the files specified and\n"
        "create a binary file [package-name]_[bundle-name]." DATA_TYPE " with the StringPrep profile data\n"
        "\n",
        argv[0]);
    fprintf(stderr,
        "Options:\n"
        "\t-h or -? or --help       print this usage text\n"
        "\t-v or --verbose          verbose output\n"
        "\t-c or --copyright        include a copyright notice\n");
    fprintf(stderr,
        "\t-d or --destdir          destination directory, followed by the path\n"
        "\t-s or --sourcedir        source directory of ICU data, followed by the path\n"
        "\t-b or --bundle-name      generate the ouput data file with the name specified\n"
        "\t-i or --icudatadir       directory for locating any needed intermediate data files,\n"
        "\t                         followed by path, defaults to %s\n",
        u_getDataDirectory());
    fprintf(stderr,
        "\t-p or --package-name     prepend the output data file name with the package name specified\n"
        "\t-n or --normalize        turn on the option for normalization and include mappings\n"
        "\t                         from NormalizationCorrections.txt from the given path,\n"
        "\t                         e.g: /test/icu/source/data/unidata\n"
        "\t-k or --check-bidi       turn on the option for checking for BiDi in the profile\n"
        "\t-u or --unicode          version of Unicode to be used with this profile followed by the version\n"
        );
    return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}


extern int
main(int argc, char* argv[]) {
#if !UCONFIG_NO_IDNA
    char* filename = NULL;
#endif
    const char *srcDir=NULL, *destDir=NULL, *icuUniDataDir=NULL;
    const char *packageName=NULL, *bundleName=NULL, *inputFileName = NULL;
    char *basename=NULL;
    int32_t sprepOptions = 0;

    UErrorCode errorCode=U_ZERO_ERROR;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[DESTDIR].value=u_getDataDirectory();
    options[SOURCEDIR].value="";
    options[UNICODE_VERSION].value="0"; /* don't assume the unicode version */
    options[BUNDLE_NAME].value = DATA_NAME;
    options[PACKAGE_NAME].value = U_ICUDATA_NAME;
    options[NORMALIZE].value = "";

    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }
    if(argc<0 || options[HELP].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
        return printHelp(argc, argv);
        
    }

    /* get the options values */
    beVerbose=options[VERBOSE].doesOccur;
    haveCopyright=options[COPYRIGHT].doesOccur;
    srcDir=options[SOURCEDIR].value;
    destDir=options[DESTDIR].value;
    packageName = options[PACKAGE_NAME].value;
    bundleName = options[BUNDLE_NAME].value;
    icuUniDataDir = options[NORMALIZE].value;

    if(argc<2) {
        /* print the help message */
        return printHelp(argc, argv);
    } else {
        inputFileName = argv[1];
    }
    if(!options[UNICODE_VERSION].doesOccur){
        return printHelp(argc, argv);
    }
    if(options[ICUDATADIR].doesOccur) {
        u_setDataDirectory(options[ICUDATADIR].value);
    }
#if UCONFIG_NO_IDNA

    fprintf(stderr,
        "gensprep writes dummy " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE
        " because UCONFIG_NO_IDNA is set, \n"
        "see icu/source/common/unicode/uconfig.h\n");
    generateData(destDir, packageName, bundleName);

#else

    setUnicodeVersion(options[UNICODE_VERSION].value);
    filename = (char* ) uprv_malloc(uprv_strlen(srcDir) + 300); /* hopefully this should be enough */
   
    /* prepare the filename beginning with the source dir */
    if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
        filename[0] = 0x2E;
        filename[1] = U_FILE_SEP_CHAR;
        uprv_strcpy(filename+2,srcDir);
    }else{
        uprv_strcpy(filename, srcDir);
    }
    
    basename=filename+uprv_strlen(filename);
    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
        *basename++=U_FILE_SEP_CHAR;
    }
    
    /* initialize */
    init();

    /* process the file */
    uprv_strcpy(basename,inputFileName);
    parseMappings(filename,FALSE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "Could not open file %s for reading. Error: %s \n", filename, u_errorName(errorCode));
        return errorCode;
    }
    
    if(options[NORMALIZE].doesOccur){
        /* set up directory for NormalizationCorrections.txt */
        uprv_strcpy(filename,icuUniDataDir);
        basename=filename+uprv_strlen(filename);
        if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
            *basename++=U_FILE_SEP_CHAR;
        }

        *basename++=U_FILE_SEP_CHAR;
        uprv_strcpy(basename,NORM_CORRECTIONS_FILE_NAME);
    
        parseNormalizationCorrections(filename,&errorCode);
        if(U_FAILURE(errorCode)){
            fprintf(stderr,"Could not open file %s for reading \n", filename);
            return errorCode;
        }
        sprepOptions |= _SPREP_NORMALIZATION_ON;
    }
    
    if(options[CHECK_BIDI].doesOccur){
        sprepOptions |= _SPREP_CHECK_BIDI_ON;
    }

    setOptions(sprepOptions);

    /* process parsed data */
    if(U_SUCCESS(errorCode)) {
        /* write the data file */
       generateData(destDir, packageName, bundleName);

       cleanUpData();
    }

    uprv_free(filename);

#endif

    return errorCode;
}

#if !UCONFIG_NO_IDNA

static void U_CALLCONV
normalizationCorrectionsLineFn(void *context,
                    char *fields[][2], int32_t fieldCount,
                    UErrorCode *pErrorCode) {
    uint32_t mapping[40];
    char *end, *s;
    uint32_t code;
    int32_t length;
    UVersionInfo version;
    UVersionInfo thisVersion;

    /* get the character code, field 0 */
    code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "gensprep: error parsing NormalizationCorrections.txt mapping at %s\n", fields[0][0]);
        exit(*pErrorCode);
    }
    /* Original (erroneous) decomposition */
    s = fields[1][0];

    /* parse the mapping string */
    length=u_parseCodePoints(s, mapping, sizeof(mapping)/4, pErrorCode);

    /* ignore corrected decomposition */

    u_versionFromString(version,fields[3][0] );
    u_versionFromString(thisVersion, "3.2.0");



    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "gensprep error parsing NormalizationCorrections.txt of U+%04lx - %s\n",
                (long)code, u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }

    /* store the mapping */
    if( version[0] > thisVersion[0] || 
        ((version[0]==thisVersion[0]) && (version[1] > thisVersion[1]))
        ){
        storeMapping(code,mapping, length, USPREP_MAP, pErrorCode);
    }
    setUnicodeVersionNC(version);
}

static void
parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode) {
    char *fields[4][2];

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 4, normalizationCorrectionsLineFn, NULL, pErrorCode);

    /* fprintf(stdout,"Number of code points that have NormalizationCorrections mapping with length >1 : %i\n",len); */

    if(U_FAILURE(*pErrorCode) && ( *pErrorCode!=U_FILE_ACCESS_ERROR)) {
        fprintf(stderr, "gensprep error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }
}

static void U_CALLCONV
strprepProfileLineFn(void *context,
              char *fields[][2], int32_t fieldCount,
              UErrorCode *pErrorCode) {
    uint32_t mapping[40];
    char *end, *map;
    uint32_t code;
    int32_t length;
   /*UBool* mapWithNorm = (UBool*) context;*/
    const char* typeName;
    uint32_t rangeStart=0,rangeEnd =0;
    const char* filename = (const char*) context;
 
    typeName = fields[2][0];
    map = fields[1][0];

    if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){

        u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
        if(U_FAILURE(*pErrorCode)){
            fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
            return;
        }

        /* store the range */
        storeRange(rangeStart,rangeEnd,USPREP_UNASSIGNED, pErrorCode);

    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){

        u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
        if(U_FAILURE(*pErrorCode)){
            fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
            return;
        }

        /* store the range */
        storeRange(rangeStart,rangeEnd,USPREP_PROHIBITED, pErrorCode);

    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){

        /* get the character code, field 0 */
        code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
        if(end<=fields[0][0] || end!=fields[0][1]) {
            fprintf(stderr, "gensprep: syntax error in field 0 at %s\n", fields[0][0]);
            *pErrorCode=U_PARSE_ERROR;
            exit(U_PARSE_ERROR);
        }

        /* parse the mapping string */
        length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
        
        /* store the mapping */
        storeMapping(code,mapping, length,USPREP_MAP, pErrorCode);

    }else{
        *pErrorCode = U_INVALID_FORMAT_ERROR;
    }
    
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "gensprep error parsing  %s line %s at %s. Error: %s\n",filename,
               fields[0][0],fields[2][0],u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }

}

static void
parseMappings(const char *filename, UBool reportError, UErrorCode *pErrorCode) {
    char *fields[3][2];

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);

    /*fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);*/

    if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
        fprintf(stderr, "gensprep error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }
}


#endif /* #if !UCONFIG_NO_IDNA */

/*
 * Hey, Emacs, please set the following:
 *
 * Local Variables:
 * indent-tabs-mode: nil
 * End:
 *
 */

--- NEW FILE: gensprep.dsp ---
# Microsoft Developer Studio Project File - Name="gensprep" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **

# TARGTYPE "Win32 (x86) Console Application" 0x0103

CFG=gensprep - Win32 Debug
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE 
!MESSAGE NMAKE /f "gensprep.mak".
!MESSAGE 
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE 
!MESSAGE NMAKE /f "gensprep.mak" CFG="gensprep - Win32 Debug"
!MESSAGE 
!MESSAGE Possible choices for configuration are:
!MESSAGE 
!MESSAGE "gensprep - Win32 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "gensprep - Win32 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE "gensprep - Win64 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "gensprep - Win64 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE 

# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe

!IF  "$(CFG)" == "gensprep - Win32 Release"

# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
# ADD CPP /nologo /W3 /GX /O2 /I "..\..\common" /I "..\toolutil" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
# ADD LINK32 icuuc.lib icutu.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib"
# Begin Custom Build
TargetPath=.\Release\gensprep.exe
InputPath=.\Release\gensprep.exe
InputName=gensprep
SOURCE="$(InputPath)"

"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
	copy $(TargetPath) ..\..\..\bin

# End Custom Build

!ELSEIF  "$(CFG)" == "gensprep - Win32 Debug"

# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "..\..\common" /I "..\toolutil" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
# ADD LINK32 icutud.lib icuucd.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib"
# Begin Custom Build
TargetPath=.\Debug\gensprep.exe
InputPath=.\Debug\gensprep.exe
InputName=gensprep
SOURCE="$(InputPath)"

"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
	copy $(TargetPath) ..\..\..\bin

# End Custom Build

!ELSEIF  "$(CFG)" == "gensprep - Win64 Release"

# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
# ADD CPP /nologo /W3 /GX /Zi /O2 /Op /I "..\..\common" /I "..\toolutil" /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /QIA64_fmaopt /Zm600 /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /machine:IA64
# ADD LINK32 icuuc.lib icutu.lib /nologo /subsystem:console /machine:IX86 /libpath:"..\..\..\lib" /machine:IA64
# Begin Custom Build
TargetPath=.\Release\gensprep.exe
InputPath=.\Release\gensprep.exe
InputName=gensprep
SOURCE="$(InputPath)"

"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
	copy $(TargetPath) ..\..\..\bin

# End Custom Build

!ELSEIF  "$(CFG)" == "gensprep - Win64 Debug"

# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /Op /I "..\..\common" /I "..\toolutil" /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /GZ /QIA64_fmaopt /Zm600 /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:IX86 /pdbtype:sept /machine:IA64
# ADD LINK32 icutud.lib icuucd.lib /nologo /subsystem:console /incremental:no /debug /machine:IX86 /pdbtype:sept /libpath:"..\..\..\lib" /machine:IA64
# Begin Custom Build
TargetPath=.\Debug\gensprep.exe
InputPath=.\Debug\gensprep.exe
InputName=gensprep
SOURCE="$(InputPath)"

"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
	copy $(TargetPath) ..\..\..\bin

# End Custom Build

!ENDIF 

# Begin Target

# Name "gensprep - Win32 Release"
# Name "gensprep - Win32 Debug"
# Name "gensprep - Win64 Release"
# Name "gensprep - Win64 Debug"
# Begin Group "Source Files"

# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
# Begin Source File

SOURCE=.\gensprep.c
# End Source File
# Begin Source File

SOURCE=.\store.c
# End Source File
# End Group
# Begin Group "Header Files"

# PROP Default_Filter "h;hpp;hxx;hm;inl"
# Begin Source File

SOURCE=.\gensprep.h
# End Source File
# End Group
# Begin Group "Resource Files"

# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
# End Group
# End Target
# End Project

--- NEW FILE: gensprep.h ---
/*
*******************************************************************************
*
*   Copyright (C) 1999-2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  genidn.h
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2003-02-06
*   created by: Ram Viswanadha
*/

#ifndef __GENIDN_H__
#define __GENIDN_H__

#include "unicode/utypes.h"
#include "unicode/uset.h"
#include "sprpimpl.h"

/* file definitions */
#define DATA_NAME "sprep"
#define DATA_TYPE "spp"

/*
 * data structure that holds the IDN properties for one or more
 * code point(s) at build time
 */

 
/* global flags */
extern UBool beVerbose, haveCopyright;

/* prototypes */

extern void
setUnicodeVersion(const char *v);

extern void
setUnicodeVersionNC(UVersionInfo version);

extern void
init(void);

#if !UCONFIG_NO_IDNA
extern void
storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, UStringPrepType type, UErrorCode* status);
extern void
storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status);
#endif

extern void
generateData(const char *dataDir, const char* packageName, const char* bundleName);

extern void
setOptions(int32_t options);

extern void
cleanUpData(void);

/*
extern void
storeIDN(uint32_t code, IDN *idn);

extern void
processData(void);


*/
#endif

/*
 * Hey, Emacs, please set the following:
 *
 * Local Variables:
 * indent-tabs-mode: nil
 * End:
 *
 */



--- NEW FILE: gensprep.vcproj ---
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
	ProjectType="Visual C++"
	Version="7.10"
	Name="gensprep"
	SccProjectName=""
	SccLocalPath="">
	<Platforms>
		<Platform
			Name="Win32"/>
	</Platforms>
	<Configurations>
		<Configuration
			Name="Release|Win32"
			OutputDirectory=".\Release"
			IntermediateDirectory=".\Release"
			ConfigurationType="1"
			UseOfMFC="0"
			ATLMinimizesCRunTimeLibraryUsage="FALSE"
			CharacterSet="2">
			<Tool
				Name="VCCLCompilerTool"
				InlineFunctionExpansion="2"
				ImproveFloatingPointConsistency="TRUE"
				AdditionalIncludeDirectories="..\..\common,..\toolutil"
				PreprocessorDefinitions="WIN32,NDEBUG,_CONSOLE"
				StringPooling="TRUE"
				RuntimeLibrary="4"
				EnableFunctionLevelLinking="TRUE"
				DisableLanguageExtensions="TRUE"
				PrecompiledHeaderFile=".\Release/gensprep.pch"
				AssemblerListingLocation=".\Release/"
				ObjectFile=".\Release/"
				ProgramDataBaseFileName=".\Release/"
				WarningLevel="3"
				SuppressStartupBanner="TRUE"
				CompileAs="0"/>
			<Tool
				Name="VCCustomBuildTool"
				CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin
"
				Outputs="..\..\..\bin\$(TargetFileName)"/>
			<Tool
				Name="VCLinkerTool"
				AdditionalOptions="/MACHINE:I386"
				AdditionalDependencies="icuuc.lib icutu.lib"
				OutputFile=".\Release/gensprep.exe"
				LinkIncremental="1"
				SuppressStartupBanner="TRUE"
				AdditionalLibraryDirectories="..\..\..\lib"
				ProgramDatabaseFile=".\Release/gensprep.pdb"
				SubSystem="1"/>
			<Tool
				Name="VCMIDLTool"
				TypeLibraryName=".\Release/gensprep.tlb"/>
			<Tool
				Name="VCPostBuildEventTool"/>
			<Tool
				Name="VCPreBuildEventTool"/>
			<Tool
				Name="VCPreLinkEventTool"/>
			<Tool
				Name="VCResourceCompilerTool"
				PreprocessorDefinitions="NDEBUG"
				Culture="1033"/>
			<Tool
				Name="VCWebServiceProxyGeneratorTool"/>
			<Tool
				Name="VCXMLDataGeneratorTool"/>
			<Tool
				Name="VCWebDeploymentTool"/>
			<Tool
				Name="VCManagedWrapperGeneratorTool"/>
			<Tool
				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
		</Configuration>
		<Configuration
			Name="Debug|Win32"
			OutputDirectory=".\Debug"
			IntermediateDirectory=".\Debug"
			ConfigurationType="1"
			UseOfMFC="0"
			ATLMinimizesCRunTimeLibraryUsage="FALSE"
			CharacterSet="2">
			<Tool
				Name="VCCLCompilerTool"
				Optimization="0"
				AdditionalIncludeDirectories="..\..\common,..\toolutil"
				PreprocessorDefinitions="WIN32,_DEBUG,_CONSOLE"
				BasicRuntimeChecks="3"
				RuntimeLibrary="5"
				DisableLanguageExtensions="TRUE"
				PrecompiledHeaderFile=".\Debug/gensprep.pch"
				AssemblerListingLocation=".\Debug/"
				ObjectFile=".\Debug/"
				ProgramDataBaseFileName=".\Debug/"
				WarningLevel="3"
				SuppressStartupBanner="TRUE"
				DebugInformationFormat="4"
				CompileAs="0"/>
			<Tool
				Name="VCCustomBuildTool"
				CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin
"
				Outputs="..\..\..\bin\$(TargetFileName)"/>
			<Tool
				Name="VCLinkerTool"
				AdditionalOptions="/MACHINE:I386"
				AdditionalDependencies="icutud.lib icuucd.lib"
				OutputFile=".\Debug/gensprep.exe"
				LinkIncremental="2"
				SuppressStartupBanner="TRUE"
				AdditionalLibraryDirectories="..\..\..\lib"
				GenerateDebugInformation="TRUE"
				ProgramDatabaseFile=".\Debug/gensprep.pdb"
				SubSystem="1"/>
			<Tool
				Name="VCMIDLTool"
				TypeLibraryName=".\Debug/gensprep.tlb"/>
			<Tool
				Name="VCPostBuildEventTool"/>
			<Tool
				Name="VCPreBuildEventTool"/>
			<Tool
				Name="VCPreLinkEventTool"/>
			<Tool
				Name="VCResourceCompilerTool"
				PreprocessorDefinitions="_DEBUG"
				Culture="1033"/>
			<Tool
				Name="VCWebServiceProxyGeneratorTool"/>
			<Tool
				Name="VCXMLDataGeneratorTool"/>
			<Tool
				Name="VCWebDeploymentTool"/>
			<Tool
				Name="VCManagedWrapperGeneratorTool"/>
			<Tool
				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
		</Configuration>
	</Configurations>
	<References>
	</References>
	<Files>
		<Filter
			Name="Source Files"
			Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat">
			<File
				RelativePath=".\gensprep.c">
			</File>
			<File
				RelativePath=".\store.c">
			</File>
		</Filter>
		<Filter
			Name="Header Files"
			Filter="h;hpp;hxx;hm;inl">
			<File
				RelativePath=".\gensprep.h">
			</File>
		</Filter>
		<Filter
			Name="Resource Files"
			Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
		</Filter>
	</Files>
	<Globals>
	</Globals>
</VisualStudioProject>

--- NEW FILE: store.c ---
/*
*******************************************************************************
*
*   Copyright (C) 1999-2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  store.c
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2003-02-06
*   created by: Ram Viswanadha
*
*/

#include <stdio.h>
#include <stdlib.h>
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "unicode/udata.h"
#include "utrie.h"
#include "unicode/uset.h"
#include "unewdata.h"
#include "gensprep.h"
#include "uhash.h"


#ifdef WIN32
#   pragma warning(disable: 4100)
#endif

#define DO_DEBUG_OUT 0


/* 
 * StringPrep profile file format ------------------------------------
 * 
 * The file format prepared and written here contains a 16-bit trie and a mapping table.
 * 
 * Before the data contents described below, there are the headers required by
 * the udata API for loading ICU data. Especially, a UDataInfo structure
 * precedes the actual data. It contains platform properties values and the
 * file format version.
 * 
 * The following is a description of format version 2.
 * 
 * Data contents:
 * 
 * The contents is a parsed, binary form of RFC3454 and possibly
 * NormalizationCorrections.txt depending on the options specified on the profile.
 * 
 * Any Unicode code point from 0 to 0x10ffff can be looked up to get
 * the trie-word, if any, for that code point. This means that the input
 * to the lookup are 21-bit unsigned integers, with not all of the
 * 21-bit range used.
 * 
 * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c.
 * After that there are the following structures:
 *
 * int32_t indexes[_SPREP_INDEX_TOP];           -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file
 *
 * UTrie stringPrepTrie;                        -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE]
 * 
 * uint16_t mappingTable[];                     -- Contains the sequecence of code units that the code point maps to 
 *                                                 size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]
 *
 * The indexes array also contains the following values:
 *
 *  indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]  -- The index of Unicode version of last entry in NormalizationCorrections.txt 
 *  indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START]    -- The starting index of 1 UChar  mapping index in the mapping table 
 *  indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]   -- The starting index of 2 UChars mapping index in the mapping table
 *  indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table
 *  indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]  -- The starting index of 4 UChars mapping index in the mapping table
 *  indexes[_SPREP_OPTIONS]                          -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON
 *    
 *
 * StringPrep Trie :
 *
 * The StringPrep tries is a 16-bit trie that contains data for the profile. 
 * Each code point is associated with a value (trie-word) in the trie.
 *
 * - structure of data words from the trie
 * 
 *  i)  A value greater than _SPREP_TYPE_THRESHOLD (0xFFF0) represents the type associated with the code point
 *      if(trieWord > _SPREP_TYPE_THRESHOLD){
 *          type = trieWord - 0xFFF0;
 *      }
 *      The type can be :
 *          USPREP_UNASSIGNED           
 *          USPREP_PROHIBITED           
 *          USPREP_LABEL_SEPARATOR      
 *          USPREP_DELETE  
 * 
 *  ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and
 *      contains distribution described below
 *      
 *      0       -  ON : The code point is prohibited (USPREP_PROHIBITED)
 *      1       -  ON : The value in the next 14 bits is an index into the mapping table
 *                 OFF: The value in the next 14 bits is an delta value from the code point
 *      2..15   -  Contains data as described by bit 1. If all bits are set 
 *                 (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE
 *
 *  
 * Mapping Table:
 * The data in mapping table is sorted according to the length of the mapping sequence.
 * If the type of the code point is USPREP_MAP and value in trie word is an index, the index
 * is compared with start indexes of sequence length start to figure out the length according to
 * the following algorithm:
 *
 *              if(       index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
 *                        index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
 *                   length = 1;
 *               }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
 *                        index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
 *                   length = 2;
 *               }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
 *                        index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
 *                   length = 3;
 *               }else{
 *                   // The first position in the mapping table contains the length 
 *                   // of the sequence
 *                   length = mappingTable[index++];
 *        
 *               }
 *
 */

/* file data ---------------------------------------------------------------- */
/* indexes[] value names */

#if UCONFIG_NO_IDNA

/* dummy UDataInfo cf. udata.h */
static UDataInfo dataInfo = {
    sizeof(UDataInfo),
    0,

    U_IS_BIG_ENDIAN,
    U_CHARSET_FAMILY,
    U_SIZEOF_UCHAR,
    0,

    { 0, 0, 0, 0 },                 /* dummy dataFormat */
    { 0, 0, 0, 0 },                 /* dummy formatVersion */
    { 0, 0, 0, 0 }                  /* dummy dataVersion */
};

#else

static int32_t indexes[_SPREP_INDEX_TOP]={ 0 };

static uint16_t* mappingData= NULL;
static int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */
static int16_t currentIndex = 0; /* the current index into the data trie */
static int32_t maxLength = 0;  /* maximum length of mapping string */


/* UDataInfo cf. udata.h */
static UDataInfo dataInfo={
    sizeof(UDataInfo),
    0,

    U_IS_BIG_ENDIAN,
    U_CHARSET_FAMILY,
    U_SIZEOF_UCHAR,
    0,

    { 0x53, 0x50, 0x52, 0x50 },                 /* dataFormat="SPRP" */
    { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
    { 3, 2, 0, 0 }                              /* dataVersion (Unicode version) */
};
void
setUnicodeVersion(const char *v) {
    UVersionInfo version;
    u_versionFromString(version, v);
    uprv_memcpy(dataInfo.dataVersion, version, 4);
}

void
setUnicodeVersionNC(UVersionInfo version){
    uint32_t univer = version[0] << 24;
    univer += version[1] << 16;
    univer += version[2] << 8;
    univer += version[3];
    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer;
}
static UNewTrie *sprepTrie;

#define MAX_DATA_LENGTH 11500


#define SPREP_DELTA_RANGE_POSITIVE_LIMIT              8191 
#define SPREP_DELTA_RANGE_NEGATIVE_LIMIT              -8192


extern void
init() {

    sprepTrie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie));
    uprv_memset(sprepTrie, 0, sizeof(UNewTrie));

    /* initialize the two tries */
    if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) {
        fprintf(stderr, "error: failed to initialize tries\n");
        exit(U_MEMORY_ALLOCATION_ERROR);
    }
}

static UHashtable* hashTable = NULL;


typedef struct ValueStruct {
    UChar* mapping;
    int16_t length;
    UStringPrepType type;
} ValueStruct;

/* Callback for deleting the value from the hashtable */
static void U_CALLCONV valueDeleter(void* obj){
    ValueStruct* value = (ValueStruct*) obj;
    uprv_free(value->mapping);
    uprv_free(value);
}

/* Callback for hashing the entry */
static int32_t U_CALLCONV hashEntry(const UHashTok parm) {
    return  parm.integer;
}

/* Callback for comparing two entries */
static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
    return (UBool)(p1.integer != p2.integer);
}


static void 
storeMappingData(){

    int32_t pos = -1;
    const UHashElement* element = NULL;
    ValueStruct* value  = NULL;
    int32_t codepoint = 0;
    int32_t elementCount = uhash_count(hashTable);
    int32_t writtenElementCount = 0;
    int32_t mappingLength = 1; /* minimum mapping length */
    int32_t oldMappingLength = 0;
    uint16_t trieWord =0;
    int32_t limitIndex = 0;

    /*initialize the mapping data */
    mappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * (mappingDataCapacity));

    uprv_memset(mappingData,0,U_SIZEOF_UCHAR * mappingDataCapacity);

    while(writtenElementCount < elementCount){

        while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
            
            codepoint = element->key.integer;
            value = (ValueStruct*)element->value.pointer;
            
            /* store the start of indexes */
            if(oldMappingLength != mappingLength){
                /* Assume that index[] is used according to the enums defined */
                if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
                    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
                }
                if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
                   mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
                   
                    limitIndex = currentIndex;
                     
                }
                oldMappingLength = mappingLength;
            }

            if(value->length == mappingLength){
                uint32_t savedTrieWord = 0;
                trieWord = currentIndex << 2;
                /* turn on the 2nd bit to signal that the following bits contain an index */
                trieWord += 0x02;
            
                if(trieWord > _SPREP_TYPE_THRESHOLD){
                    fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
                    exit(U_ILLEGAL_CHAR_FOUND);
                }
                /* figure out if the code point has type already stored */
                savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
                if(savedTrieWord!=0){
                    if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
                        /* turn on the first bit in trie word */
                        trieWord += 0x01;
                    }else{
                        /* 
                         * the codepoint has value something other than prohibited
                         * and a mapping .. error! 
                         */
                        fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", codepoint);
                        exit(U_ILLEGAL_ARGUMENT_ERROR); 
                    } 
                } 
                
                /* now set the value in the trie */
                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
                    fprintf(stderr,"Could not set the value for code point.\n");
                    exit(U_ILLEGAL_ARGUMENT_ERROR);   
                }

                /* written the trie word for the codepoint... increment the count*/
                writtenElementCount++;

                /* sanity check are we exceeding the max number allowed */
                if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
                    fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
                    exit(U_INDEX_OUTOFBOUNDS_ERROR);
                }

                /* copy the mapping data */
                if(currentIndex+value->length+1 <= mappingDataCapacity){
                    /* write the length */
                    if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
                         /* the cast here is safe since we donot expect the length to be > 65535 */
                         mappingData[currentIndex++] = (uint16_t) mappingLength;
                    }
                    /* copy the contents to mappindData array */
                    uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
                    currentIndex += value->length;
                   
                }else{
                    /* realloc */
                    UChar* newMappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * mappingDataCapacity*2);
                    if(newMappingData == NULL){
                        fprintf(stderr, "Could not realloc the mapping data!\n");
                        exit(U_MEMORY_ALLOCATION_ERROR);
                    }
                    uprv_memmove(newMappingData, mappingData, U_SIZEOF_UCHAR * mappingDataCapacity);
                    mappingDataCapacity *= 2;
                    uprv_free(mappingData);
                    mappingData = newMappingData;
                    /* write the length */
                    if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
                         /* the cast here is safe since we donot expect the length to be > 65535 */
                         mappingData[currentIndex++] = (uint16_t) mappingLength;
                    }
                    /* continue copying */
                    uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
                    currentIndex += value->length;
                }
                          
            }
        }
        mappingLength++;
        pos = -1;
    }
    /* set the last length for range check */
    if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
        indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
    }else{
        indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
    }
    
}

extern void setOptions(int32_t options){
    indexes[_SPREP_OPTIONS] = options;
}
extern void
storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
             UStringPrepType type, UErrorCode* status){
    
 
    UChar* map = NULL;
    int16_t adjustedLen=0, i;
    uint16_t trieWord = 0;
    ValueStruct *value = NULL;
    uint32_t savedTrieWord = 0;

    /* initialize the hashtable */
    if(hashTable==NULL){
        hashTable = uhash_open(hashEntry, compareEntries, status);
        uhash_setValueDeleter(hashTable, valueDeleter);
    }
    
    /* figure out if the code point has type already stored */
    savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
    if(savedTrieWord!=0){
        if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
            /* turn on the first bit in trie word */
            trieWord += 0x01;
        }else{
            /* 
             * the codepoint has value something other than prohibited
             * and a mapping .. error! 
             */
            fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", codepoint);
            exit(U_ILLEGAL_ARGUMENT_ERROR); 
        } 
    }

    /* figure out the real length */ 
    for(i=0; i<length; i++){
        if(mapping[i] > 0xFFFF){
            adjustedLen +=2;
        }else{
            adjustedLen++;
        }      
    }

    if(adjustedLen == 0){
        trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
        /* make sure that the value of trieWord is less than the threshold */
        if(trieWord < _SPREP_TYPE_THRESHOLD){   
            /* now set the value in the trie */
            if(!utrie_set32(sprepTrie,codepoint,trieWord)){
                fprintf(stderr,"Could not set the value for code point.\n");
                exit(U_ILLEGAL_ARGUMENT_ERROR);   
            }
            /* value is set so just return */
            return;
        }else{
            fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
            exit(U_ILLEGAL_CHAR_FOUND);
        }
    }

    if(adjustedLen == 1){
        /* calculate the delta */
        int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
        if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){

            trieWord = delta << 2;


            /* make sure that the second bit is OFF */
            if((trieWord & 0x02) != 0 ){
                fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
                exit(U_INTERNAL_PROGRAM_ERROR);
            }
            /* make sure that the value of trieWord is less than the threshold */
            if(trieWord < _SPREP_TYPE_THRESHOLD){   
                /* now set the value in the trie */
                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
                    fprintf(stderr,"Could not set the value for code point.\n");
                    exit(U_ILLEGAL_ARGUMENT_ERROR);   
                }
                /* value is set so just return */
                return;
            }
        }
        /* 
         * if the delta is not in the given range or if the trieWord is larger than the threshold
         * just fall through for storing the mapping in the mapping table
         */
    }

    map = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (adjustedLen+1));
    uprv_memset(map,0,U_SIZEOF_UCHAR * (adjustedLen+1));

    i=0;
    
    while(i<length){
        if(mapping[i] <= 0xFFFF){
            map[i] = (uint16_t)mapping[i];
        }else{
            map[i]   = UTF16_LEAD(mapping[i]);
            map[i+1] = UTF16_TRAIL(mapping[i]);
        }
        i++;
    }
    
    value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
    value->mapping = map;
    value->type   = type;
    value->length  = adjustedLen;
    if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
        mappingDataCapacity++;
    }
    if(maxLength < value->length){
        maxLength = value->length;
    }
    uhash_iput(hashTable,codepoint,value,status);
    mappingDataCapacity += adjustedLen;

    if(U_FAILURE(*status)){
        fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
        exit(*status);
    }
}


extern void
storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){
    uint16_t trieWord = 0;

    trieWord += (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
    if(trieWord > 0xFFFF){
        fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
        exit(U_ILLEGAL_CHAR_FOUND);
    }
    if(start == end){
        uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
        if(savedTrieWord>0){
            if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
                /* 
                 * A mapping is stored in the trie word 
                 * and the only other possible type that a 
                 * code point can have is USPREP_PROHIBITED
                 *
                 */

                /* turn on the 0th bit in the savedTrieWord */
                savedTrieWord += 0x01;

                /* the downcast is safe since we only save 16 bit values */
                trieWord = (uint16_t)savedTrieWord;

                /* make sure that the value of trieWord is less than the threshold */
                if(trieWord < _SPREP_TYPE_THRESHOLD){   
                    /* now set the value in the trie */
                    if(!utrie_set32(sprepTrie,start,trieWord)){
                        fprintf(stderr,"Could not set the value for code point.\n");
                        exit(U_ILLEGAL_ARGUMENT_ERROR);   
                    }
                    /* value is set so just return */
                    return;
                }else{
                    fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
                    exit(U_ILLEGAL_CHAR_FOUND);
                }
 
            }else if(savedTrieWord != trieWord){
                fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", start);
                exit(U_ILLEGAL_ARGUMENT_ERROR);
            }
            /* if savedTrieWord == trieWord .. fall through and set the value */
        }
        if(!utrie_set32(sprepTrie,start,trieWord)){
            fprintf(stderr,"Could not set the value for code point \\U%08X.\n", start);
            exit(U_ILLEGAL_ARGUMENT_ERROR);   
        }
    }else{
        if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){
            fprintf(stderr,"Value for certain codepoint already set.\n");
            exit(U_ILLEGAL_CHAR_FOUND);   
        }
    }

}

/* folding value: just store the offset (16 bits) if there is any non-0 entry */
static uint32_t U_CALLCONV
getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
    uint32_t foldedValue, value;
    UChar32 limit=0;
    UBool inBlockZero;

    foldedValue=0;

    limit=start+0x400;
    while(start<limit) {
        value=utrie_get32(trie, start, &inBlockZero);
        if(inBlockZero) {
            start+=UTRIE_DATA_BLOCK_LENGTH;
        } else if(value!=0) {
            return (uint32_t)offset;
        } else {
            ++start;
        }
    }
    return 0;

}

#endif /* #if !UCONFIG_NO_IDNA */

extern void
generateData(const char *dataDir, const char *packageName, const char* bundleName) {
    static uint8_t sprepTrieBlock[100000];

    UNewDataMemory *pData;
    UErrorCode errorCode=U_ZERO_ERROR;
    int32_t size, dataLength;
    char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);

#if UCONFIG_NO_IDNA

    size=0;

#else

    int32_t sprepTrieSize;

    /* sort and add mapping data */
    storeMappingData();
    
    sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
        exit(errorCode);
    }
    
    size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
    if(beVerbose) {
        printf("size of sprep trie              %5u bytes\n", sprepTrieSize);
        printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
        printf("size of mapping data array %5u bytes\n",mappingDataCapacity * U_SIZEOF_UCHAR);
        printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
        printf("Maximum length of the mapping string is : %i \n", maxLength);
    }

#endif
    
    uprv_strcpy(fileName,packageName);
    uprv_strcat(fileName,"_");
    uprv_strcat(fileName,bundleName);
    /* write the data */
    pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
                       haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
        exit(errorCode);
    }

#if !UCONFIG_NO_IDNA

    indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
    indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
    
    udata_writeBlock(pData, indexes, sizeof(indexes));
    udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
    udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
    

#endif

    /* finish up */
    dataLength=udata_finish(pData, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
        exit(errorCode);
    }

    if(dataLength!=size) {
        fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
            (long)dataLength, (long)size);
        exit(U_INTERNAL_PROGRAM_ERROR);
    }

#if !UCONFIG_NO_IDNA
    /* done with writing the data .. close the hashtable */
    uhash_close(hashTable);
#endif
}

#if !UCONFIG_NO_IDNA

extern void
cleanUpData(void) {

    utrie_close(sprepTrie);
    uprv_free(sprepTrie);
}

#endif /* #if !UCONFIG_NO_IDNA */

/*
 * Hey, Emacs, please set the following:
 *
 * Local Variables:
 * indent-tabs-mode: nil
 * End:
 *
 */