[sword-cvs] icu-sword/source/test/testdata conversion.txt,NONE,1.1 nfs4_cis_prep.txt,NONE,1.1 nfs4_cs_prep_ci.txt,NONE,1.1 nfs4_cs_prep_cs.txt,NONE,1.1 nfs4_mixed_prep_p.txt,NONE,1.1 nfs4_mixed_prep_s.txt,NONE,1.1 ra.txt,NONE,1.1 riwords.txt,NONE,1.1 test4x.ucm,NONE,1.1 CollationTest_NON_IGNORABLE_STUB.txt,1.1,1.2 CollationTest_SHIFTED_STUB.txt,1.1,1.2 DataDrivenCollationTest.txt,1.1,1.2 idna_rules.txt,1.1,1.2 rbbitst.txt,1.1,1.2 regextst.txt,1.1,1.2 te.txt,1.3,1.4 test1.ucm,1.3,1.4 test3.ucm,1.3,1.4 test4.ucm,1.3,1.4 testaliases.txt,1.1,1.2 testdata.mk,1.4,1.5 testtypes.txt,1.4,1.5 translit_rules.txt,1.1,1.2 th18057.txt,1.5,NONE

sword@www.crosswire.org sword@www.crosswire.org
Tue, 6 Apr 2004 03:11:19 -0700


Update of /cvs/core/icu-sword/source/test/testdata
In directory www:/tmp/cvs-serv8911/source/test/testdata

Modified Files:
	CollationTest_NON_IGNORABLE_STUB.txt 
	CollationTest_SHIFTED_STUB.txt DataDrivenCollationTest.txt 
	idna_rules.txt rbbitst.txt regextst.txt te.txt test1.ucm 
	test3.ucm test4.ucm testaliases.txt testdata.mk testtypes.txt 
	translit_rules.txt 
Added Files:
	conversion.txt nfs4_cis_prep.txt nfs4_cs_prep_ci.txt 
	nfs4_cs_prep_cs.txt nfs4_mixed_prep_p.txt 
	nfs4_mixed_prep_s.txt ra.txt riwords.txt test4x.ucm 
Removed Files:
	th18057.txt 
Log Message:
ICU 2.8 sync

--- NEW FILE: conversion.txt ---
//*******************************************************************************
//
//   Copyright (C) 2003, International Business Machines
//   Corporation and others.  All Rights Reserved.
//
//   file name:  conversion.txt
//   encoding:   US-ASCII
//   tab size:   8 (not used)
//   indentation:4
//
//   created on: 2003jul15
//   created by: Markus W. Scherer
//
//   ICU resource bundle source file with test data for data-driven conversion tests.
//
//*******************************************************************************

conversion {
  Info {
    Description { "Test data for conversion" }
    LongDescription {
      "Test data for data-driven conversion tests in icu/source/test/intltest/convtest.cpp\n"
      "Run intltest conversion\n"

      "Charset names starting with '*' are for testdata names.\n"

      "ICU callbacks are specified as strings with pairs of characters, each optional.\n"
      "Callback function - '?'=Sub '0'=Skip '.'=Stop '&'=Escape\n"
      "Callback option - a letter is passed in directly as const char * see ucnv_err.h\n"
      "Empty string: Sub callback with NULL option\n"

      "In order to specify a charset substitution character,\n"
      "add a NUL (U+0000) to the callback string followed by the subchar bytes as Latin-1\n"
      "characters. For example, for a Sub callback with no option and a subchar of FC FC,\n"
      "use the string \"?\x00\xFC\xFC\"\n"

      "fallbacks: per-direction boolean, currently only for fromUnicode; see Jitterbug 2401\n"

      "errorCode: (empty)==zero | invalid | illegal | truncated | illesc | unsuppesc\n"
    }
  }
  TestData {
    toUnicode {
      Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" }
      Cases {
        // test that ISO-2022-JP encodes ASCII as itself
        {
          "ISO-2022-JP",
          :bin{ 3f4041424344454647 },
          "?@ABCDEFG",
          :intvector{ 0,1,2,3,4,5,6,7,8 },
          :int{1}, :int{1}, "", "?", :bin{""}
        }
        // test that ISO-2022-CN encodes ASCII as itself
        {
          "ISO-2022-CN",
          :bin{ 3f4041424344454647 },
          "?@ABCDEFG",
          :intvector{ 0,1,2,3,4,5,6,7,8 },
          :int{1}, :int{1}, "", "?", :bin{""}
        }

        // ISO-2022-KR

        // truncated, partial escape sequence
        {
          "ibm-25546",
          :bin{ 1b }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 1b }
        }
        {
          "ibm-25546",
          :bin{ 1b24 }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 1b24 }
        }
        {
          "ibm-25546",
          :bin{ 1b2429 }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 1b2429 }
        }
        // complete escape sequence but nothing else
        {
          "ibm-25546",
          :bin{ 1b242943 }, "", :intvector{},
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        {
          "ibm-25546",
          :bin{ 1b2429430e }, "", :intvector{},
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // escape plus ASCII character
        {
          "ibm-25546",
          :bin{ 1b24294341 }, "A", :intvector{ 4 },
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // escape plus incomplete DBCS character
        {
          "ibm-25546",
          :bin{ 1b2429430e41 }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 41 }
        }
        // all complete with DBCS character
        {
          "ibm-25546",
          :bin{ 1b2429430e4141 }, "\uc88b", :intvector{ 5 },
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // more complicated example
        {
          "ibm-25546",
          :bin{ 411b242943420e4141affe0f43 },
          "AB\uc88b%XAF%XFEC",
          :intvector{ 0, 5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 12 },
          :int{1}, :int{1}, "", "&", :bin{""}
        }

        // truncated, partial escape sequence
        {
          "ISO-2022-KR",
          :bin{ 1b }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 1b }
        }
        {
          "ISO-2022-KR",
          :bin{ 1b24 }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 1b24 }
        }
        {
          "ISO-2022-KR",
          :bin{ 1b2429 }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 1b2429 }
        }
        // complete escape sequence but nothing else
        {
          "ISO-2022-KR",
          :bin{ 1b242943 }, "", :intvector{},
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        {
          "ISO-2022-KR",
          :bin{ 1b2429430e }, "", :intvector{},
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // escape plus ASCII character
        {
          "ISO-2022-KR",
          :bin{ 1b24294341 }, "A", :intvector{ 4 },
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // escape plus incomplete DBCS character
        {
          "ISO-2022-KR",
          :bin{ 1b2429430e41 }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 41 }
        }
        // all complete with DBCS character
        {
          "ISO-2022-KR",
          :bin{ 1b2429430e4141 }, "\uc88b", :intvector{ 5 },
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // more complicated example
        {
          "ISO-2022-KR",
          :bin{ 411b242943420e4141affe0f43 },
          "AB\uc88b%XAF%XFEC",
          :intvector{ 0, 5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 12 },
          :int{1}, :int{1}, "", "&", :bin{""}
        }

        // ISO-2022-JP

        // truncated, partial escape sequence
        {
          "ISO-2022-JP",
          :bin{ 1b }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 1b }
        }
        {
          "ISO-2022-JP-2",
          :bin{ 1b24 }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 1b24 }
        }
        // complete escape sequence but nothing else
        {
          "ISO-2022-JP-2",
          :bin{ 1b2442 }, "", :intvector{},
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // escape plus incomplete DBCS character
        {
          "ISO-2022-JP-2",
          :bin{ 1b244241 }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 41 }
        }
        // all complete with DBCS character
        {
          "ISO-2022-JP-2",
          :bin{ 1b24424141 }, "\u758f", :intvector{ 3 },
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // test the G2 designator & SS2 shift
        {
          "ISO-2022-JP-2",
          :bin{ 431b2e46461b244241411b4e4e353f }, "CF\u758f\u039e\u7591", :intvector{ 0, 4, 8, 12, 13 },
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // JIS7 with Katakana
        {
          "JIS7",
          :bin{ 41420e41420f4142 }, "AB\uff81\uff82AB", :intvector{ 0, 1, 3, 4, 6, 7 },
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // JIS8 with Katakana
        {
          "JIS8",
          :bin{ 41c15c1b284a5cc242 }, "A\uff81\\\xa5\uff82B", :intvector{ 0, 1, 2, 6, 7, 8 },
          :int{1}, :int{1}, "", ".", :bin{""}
        }

        // ISO-2022-CN

        // truncated, partial escape sequence
        {
          "ISO_2022,locale=zh,version=1",
          :bin{ 1b }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 1b }
        }
        {
          "ISO_2022,locale=zh,version=1",
          :bin{ 1b24 }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 1b24 }
        }
        {
          "ISO_2022,locale=zh,version=1",
          :bin{ 1b2429 }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 1b2429 }
        }
        // complete escape sequence but nothing else
        {
          "ISO_2022,locale=zh,version=1",
          :bin{ 1b242941 }, "", :intvector{},
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        {
          "ISO_2022,locale=zh,version=1",
          :bin{ 1b2429410e }, "", :intvector{},
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // escape plus ASCII character
        {
          "ISO_2022,locale=zh,version=1",
          :bin{ 1b24294141 }, "\x41", :intvector{ 4 },
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // escape plus incomplete DBCS character
        {
          "ISO_2022,locale=zh,version=1",
          :bin{ 1b2429410e41 }, "", :intvector{},
          :int{1}, :int{1}, "truncated", ".", :bin{ 41 }
        }
        // all complete with DBCS character
        {
          "ISO_2022,locale=zh,version=1",
          :bin{ 1b2429410e4141 }, "\u4eae", :intvector{ 5 },
          :int{1}, :int{1}, "", ".", :bin{""}
        }
        // ISO-2022-CN-EXT with all subcharsets and shifts and with supplementary code points
        {
          "ISO-2022-CN-EXT",
          :bin{ 1b2429411b242a480e41411b2429457e7c1b4e70341b242b4d1b2429477c341b4f664c2421 },
          "\u4eae\u9f82\u56cd\u56cc\U0002a6d6\x30",
          :intvector{ 9, 15, 19, 29, 33, 33, 35 },
          :int{1}, :int{1}, "", ".", :bin{""}
        }

        // illegal and unsupported escape sequences
        // SS2 without designator: illegal
        {
          "ISO-2022-CN-EXT",
          :bin{ 411b4e2121 }, "\x41", :intvector{ 0 },
          :int{1}, :int{1}, "illesc", ".", :bin{ 1b4e }
        }
        // G3 designator: recognized, but not supported for -CN (only for -CN-EXT)
        {
          "ISO-2022-CN",
          :bin{ 411b242b491b4f2121 }, "\x41", :intvector{ 0 },
          :int{1}, :int{1}, "unsuppesc", ".", :bin{ 1b242b49 }
        }

        // ISO-2022 SBCS
        // [U_ENABLE_GENERIC_ISO_2022]
        // The _generic_ ISO-2022 converter is disabled starting 2003-dec-03 (ICU 2.8).
        // For details see the icu mailing list from 2003-dec-01 and the ucnv2022.c file.
        // Language-specific variants of ISO-2022 continue to be available as listed below.
        //{
        //  "ISO_2022",
        //  :bin{ 0008090a0d1a1c1f203f415c7d7e7f },
        //  "\x00\x08\t\n\r\x1a\x1c\x1f ?A\\}~\x7f",
        //  :intvector{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 },
        //  :int{1}, :int{1}, "", ".", :bin{""}
        //}

        // DBCS-only extensions
        {
          "ibm-970",
          :bin{ 617eece9b2eb },
          "\x61\x7e\u4e00\ub000",
          :intvector{ 0, 1, 2, 4 },
          :int{1}, :int{1}, "", "?", :bin{""}
        }

        {
          "ibm-971",
          :bin{ 617eece9b2eb },
          "\ufffd\u4e00\ub000",
          :intvector{ 0, 2, 4 },
          :int{1}, :int{1}, "", "?", :bin{""}
        }

        {
          "ibm-16684",
          :bin{ 430e4395ecc1404042e1 },
          "\ufffd\u30C8\u30C8\u309A\u3000\u20ac",
          :intvector{ 0, 2, 4, 4, 6, 8 },
          :int{1}, :int{0}, "", "?", :bin{""}
        }

        {
          "ibm-1399",
          :bin{ 430e4395ecc140400fe1 },
          "\uff62\u30C8\u30C8\u309A\u3000\u20ac",
          :intvector{ 0, 2, 4, 4, 6, 9 },
          :int{1}, :int{0}, "", "?", :bin{""}
        }

        // extensions
        {
          "ibm-1390",
          :bin{ 430e4395ecc1 },
          "\uff63\u30C8\u30C8\u309A",
          :intvector{ 0, 2, 4, 4 },
          :int{1}, :int{0}, "", "?", :bin{""}
        }

        {
          "ibm-16684",
          :bin{ ececec8bec8cec8d4386ecb5ecb6ecb7 },
          "\ufffd\u31f6\u31f7\u31f8\u30ab\u304b\u309a\u304d\u309a\u304f\u309a",
          :intvector{ 0, 2, 4, 6, 8, 10, 10, 12, 12, 14, 14 },
          :int{1}, :int{0}, "", "?", :bin{""}
        }

        {
          "ibm-1390",
          :bin{ 43860eececec8bec8cec8d4386ecb5ecb6ecb7ecc10fec },
          "\uff63\uff76\ufffd\u31f6\u31f7\u31f8\u30ab\u304b\u309a\u304d\u309a\u304f\u309a\u30C8\u309A\x1a",
          :intvector{ 0, 1, 3, 5, 7, 9, 11, 13, 13, 15, 15, 17, 17, 19, 19, 22 },
          :int{1}, :int{0}, "", "?", :bin{""}
        }

        {
          "*test3",
          :bin{ 00050601020b0701020a01020c },
          "\u20ac\x05\x06\x0b\U00101234\U00023456\ufffd",
          :intvector{ 0, 1, 2, 3, 6, 6, 7, 7, 10 },
          :int{1}, :int{0}, "", "?", :bin{""}
        }

        // normal conversions
        {
          "UTF-16LE",
          :bin{ 310000d801dc00d902dc320000d8330001dc3400 },
          "1\U00010001\U000500022\ufffd3\ufffd4",
          :intvector{ 0, 2, 2, 6, 6, 10, 12, 14, 16, 18 },
          :int{1}, :int{0}, "", "?", :bin{""}
        }
        { "UTF-16LE", :bin{ 00 }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ 00 } }
        { "UTF-16LE", :bin{ 00d800 }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ 00d800 } }

        {
          "UTF-16BE",
          :bin{ 0031d800dc01d900dc020032d8000033dc010034 },
          "1\U00010001\U000500022\ufffd3\ufffd4",
          :intvector{ 0, 2, 2, 6, 6, 10, 12, 14, 16, 18 },
          :int{1}, :int{0}, "", "?", :bin{""}
        }
        { "UTF-16BE", :bin{ 00 }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ 00 } }
        { "UTF-16BE", :bin{ d800dc }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ d800dc } }

        // e4b8 is a partial sequence
        { "UTF-8", :bin{ 31e4ba8ce4b8 }, "1\u4e8c", :intvector{ 0, 1 }, :int{1}, :int{0}, "truncated", ".", :bin{ e4b8 } }
        { "UTF-8", :bin{ 31e4ba8ce4b8 }, "1\u4e8c\ufffd", :intvector{ 0, 1, 4 }, :int{1}, :int{0}, "", "?", :bin{""} }

        // LMBCS with escape callback (1292a0 is unassigned)
        {
          "LMBCS",
          :bin{ 12c9501292a01292a1 },
          "\u4e2e%X12%X92%XA0\ue5c4",
          :intvector{ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6 },
          :int{1}, :int{0}, "", "&", :bin{""}
        }

        // IMAP-mailbox-name with SUB
        // a<DEL> a&AB~ a&AB\x0c a&AB- a&AB. a&.
        {
          "IMAP-mailbox-name",
          :bin{ 617f612641427e612641420c612641422d612641422e61262e },
          "a\ufffda\ufffda\ufffda\ufffda\ufffda\ufffd",
          :intvector{ 0, 1, 2, 4, 7, 9, 12, 14, 17, 19, 22, 23 },
          :int{1}, :int{0}, "", "?", :bin{""}
        }

        // using testdata_test1.cnv
        { "*test1", :bin{ 000506070809 }, "\u20ac\x05\x06\U00101234\ufffd\ufffd", :intvector{ 0, 1, 2, 3, 3, 4, 5 }, :int{1}, :int{0}, "", "", :bin{""} }

        // surrogates in CESU-8
        { "CESU-8", :bin{ eda080eda081edb081 }, "\ud800\U00010401", :intvector{ 0, 3, 6 }, :int{1}, :int{0}, "", "", :bin{""} }
        // e080 is a partial sequence
        { "UTF-8", :bin{ 31ffe4ba8ce08061 }, "1\ufffd\u4e8c\ufffda", :intvector{ 0, 1, 2, 5, 7 }, :int{0}, :int{0}, "", "", :bin{ e080 } }
        // fbbfbfbfbf exceedes U+10ffff
        { "UTF-8", :bin{ 31fbbfbfbfbf61 }, "1\ufffda", :intvector{ 0, 1, 6 }, :int{0}, :int{0}, "", "", :bin{ fbbfbfbfbf } }

        // lead byte a2 without trail byte
        { "ibm-1363", :bin{ a2aea2 }, "\u00a1", :intvector{ 0 }, :int{1}, :int{0}, "truncated", ".", :bin{ a2 } }
        { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "", "?", :bin{""} }

        // simple sample, no error handling
        { "UTF-8", :bin{ 61F48FBFBF }, "a\U0010FFFF", :intvector{ 0, 1, 1 }, :int{1}, :int{0}, "", "", :bin{""} }
      }
    }

    // --------------------------------------------------------------------- ***

    fromUnicode {
      Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" }
      Cases {
        // test that ISO-2022-JP encodes ASCII as itself
        {
          "ISO-2022-JP",
          "?@ABCDEFG",
          :bin{       3f4041424344454647 },
          :intvector{ 0,1,2,3,4,5,6,7,8 },
          :int{1}, :int{1}, "", "?", ""
        }
        // test that ISO-2022-CN encodes ASCII as itself
        {
          "ISO-2022-CN",
          "?@ABCDEFG",
          :bin{       3f4041424344454647 },
          :intvector{ 0,1,2,3,4,5,6,7,8 },
          :int{1}, :int{1}, "", "?", ""
        }

        // moved from cintltst /tsconv/nccbtst/TestSkipCallBack
        {
          "iso-2022-jp",
          "\u3000\xe9\u3001",
          :bin{       1b2442212121221b2842 },
          :intvector{ 0,0,0,0,0,2,2,2,2,2 },
          :int{1}, :int{1}, "", "0", ""
        }
        // moved from cintltst /tsconv/nccbtst/TestSubCallBack
        {
          "iso-2022-jp",
          "A\xe9B\xe9\u3000",
          :bin{       411a421a1b244221211b2842 },
          :intvector{ 0,1,2,3,4,4,4,4,4,4,4,4 },
          :int{1}, :int{1}, "", "?", ""
        }
        // moved from cintltst /tsconv/nccbtst/TestSubWithValueCallBack
        {
          "iso-2022-jp",
          "A\xe9B\xe9\u3000",
          :bin{       41255530304539422555303045391b244221211b2842 },
          :intvector{ 0,1,1,1,1,1,1,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4 },
          :int{1}, :int{1}, "", "&", ""
        }
        {
          "iso-2022-cn",
          "\u4e00\u3712\u4e01",
          :bin{       1b2429410e523b0f2555333731320e36210f },
          :intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2 },
          :int{1}, :int{1}, "", "&", ""
        }
        {
          "iso-2022-cn",
          "A\u3712\u4e00",
          :bin{       412555333731321b2429410e523b0f },
          :intvector{ 0,1,1,1,1,1,1,2,2,2,2,2,2,2,2 },
          :int{1}, :int{1}, "", "&", ""
        }
        {
          "iso-2022-cn",
          "\u3000\u3712\u3001",
          :bin{       1b2429410e21210f2555333731320e21220f },
          :intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2 },
          :int{1}, :int{1}, "", "&", ""
        }

        // moved from cintltst /tsconv/nucnvtst/TestJIS
        {
          "JIS",
          "\uFF81\uFF82\u30EC\u30ED\u30EE\u30EF\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98",
          :bin{       1b244225412544256c256d256e256F25622564256625682569256a1b2842 },
          :intvector{ 0,0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,11,11,11 },
          :int{1}, :int{1}, "", "?", ""
        }
        {
          "JIS7",
          "\uFF81\uFF82\u30EC\u30ED\u30EE\u30EF\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98",
          :bin{       0e41420f1b2442256c256d256e256F0e5354555657580f1b2842 },
          :intvector{ 0,0,1,2,2,2,2,2,2,3,3,4,4,5,5,6,6,7,8,9,10,11,11,11,11,11 },
          :int{1}, :int{1}, "", "?", ""
        }
        {
          "JIS8",
          "\uFF81\uFF82\u30EC\u30ED\u30EE\u30EF\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98",
          :bin{       C1C21b2442256c256d256e256F1b284AD3D4D5D6D7D81b2842 },
          :intvector{ 0,1,2,2,2,2,2,3,3,4,4,5,5,6,6,6,6,7,8,9,10,11,11,11,11 },
          :int{1}, :int{1}, "", "?", ""
        }

        // moved from cintltst /tsconv/ncnvtst/TestErrorBehaviour
        {
          "iso-2022-jp",
          "\u3000\x50\udc01\u3001",
          :bin{       1B244221211B2842501A1B24422122 },
          :intvector{ 0,0,0,0,0,1,1,1,1,2,3,3,3,3,3 },
          :int{0}, :int{1}, "", "?", "\udc01"
        }
        {
          "iso-2022-jp",
          "\u3000\x50\udc01\u3001",
          :bin{       1B244221211B2842501A1B244221221b2842 },
          :intvector{ 0,0,0,0,0,1,1,1,1,2,3,3,3,3,3,3,3,3 },
          :int{1}, :int{1}, "", "?", ""
        }
        {
          "iso-2022-kr",
          "\x61\u4e00\udc01\u4e00",
          :bin{           1b242943610e6c690f1a0e6c69 },
          :intvector{ -1,-1,-1,-1,0,1,1,1,2,2,3,3,3 },
          :int{0}, :int{1}, "", "?", "\udc01"
        }
        {
          "iso-2022-kr",
          "\x61\u4e00\udc01\u4e00",
          :bin{           1b242943610e6c690f1a0e6c690f },
          :intvector{ -1,-1,-1,-1,0,1,1,1,2,2,3,3,3,3 },
          :int{1}, :int{1}, "", "?", ""
        }

        // ISO-2022-KR
        {
          "ibm-25546",
          "AB\uc88b\U00050005\uacccC",
          :bin{           1b24294341420e41410f7b552b35303030357d0e306a0f43 },
          :intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,3,3,3,3,3,3,3,3,5,5,5,6,6 },
          :int{1}, :int{1}, "", "&U", ""
        }
        {
          "ibm-25546",
          "AB\uc88b\U00050005\uacccC",
          :bin{           1b24294341420e41410f1a0e306a0f43 },
          :intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,5,6,6 },
          :int{1}, :int{1}, "", "?\x00\x1a", ""
        }
        {
          "ibm-25546",
          "AB\uc88b\U00050005\uacccC",
          :bin{           1b24294341420e41412f7e306a0f43 },
          :intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,6,6 },
          :int{1}, :int{1}, "", "?", ""
        }
        {
          "ibm-25546",
          "AB\uc88b\U00050005\uaccc",
          :bin{           1b24294341420e41412f7e306a0f },
          :intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,5 },
          :int{1}, :int{1}, "", "?", ""
        }
        {
          "ISO-2022-KR",
          "AB\uc88b\U00050005\uacccC",
          :bin{           1b24294341420e41410f7b552b35303030357d0e306a0f43 },
          :intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,3,3,3,3,3,3,3,3,5,5,5,6,6 },
          :int{1}, :int{1}, "", "&U", ""
        }
        {
          "ISO-2022-KR",
          "AB\uc88b\U00050005\uacccC",
          :bin{           1b24294341420e41410f1a0e306a0f43 },
          :intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,5,6,6 },
          :int{1}, :int{1}, "", "?", ""
        }
        {
          "ISO-2022-KR",
          "AB\uc88b\U00050005\uacccC",
          :bin{           1b24294341420e41412f7e306a0f43 },
          :intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,6,6 },
          :int{1}, :int{1}, "", "?\x00\x2f\x7e", ""
        }
        {
          "ISO-2022-KR",
          "AB\uc88b\U00050005\uaccc",
          :bin{           1b24294341420e41412f7e306a0f },
          :intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,5 },
          :int{1}, :int{1}, "", "?\x00\x2f\x7e", ""
        }

        // ISO-2022-JP-2 with G2 designator & SS2 shift
        {
          "ISO-2022-JP-2",
          "CF\u758f\u038f\u7591",
          :bin{       43461b244241411b2e461b4e3f353f1b2842 },
          :intvector{ 0,1,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4 },
          :int{1}, :int{1}, "", ".", ""
        }
        // JIS7 with Katakana
        {
          "JIS7",
          "AB\uff81\uff82AB",
          :bin{       41420e41420f4142 },
          :intvector{ 0,1,2,2,3,4,4,5 },
          :int{1}, :int{1}, "", ".", ""
        }
        // JIS7 with shift to ASCII at the very end
        {
          "JIS7",
          "AB\uff81\uff82",
          :bin{       41420e41420f },
          :intvector{ 0,1,2,2,3,3 },
          :int{1}, :int{1}, "", ".", ""
        }
        // JIS8 with Katakana
        {
          "JIS8",
          "A\uff81\\\xa5\uff82B",
          :bin{       41c15c1b284a5cc2421b2842 },
          :intvector{ 0,1,2,3,3,3,3,4,5,5,5,5 },
          :int{1}, :int{1}, "", ".", ""
        }

        // ISO-2022-CN-EXT with all subcharsets and shifts and with supplementary code points
        {
          "ISO-2022-CN-EXT",
          "\u4eae\u9f82\u56cd\u56cc\U0002a6d6\x30",
          :bin{       1b2429410e41411b2429457e7c1b242a481b4e70341b2429477c341b242b4d1b4f664c0f30 },
          :intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4,6,6 },
          :int{1}, :int{1}, "", ".", ""
        }
        // ISO-2022-CN-EXT with shift to ASCII at the very end
        {
          "ISO-2022-CN-EXT",
          "\u4eae\u9f82\u56cd\u56cc\U0002a6d6",
          :bin{       1b2429410e41411b2429457e7c1b242a481b4e70341b2429477c341b242b4d1b4f664c0f },
          :intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4 },
          :int{1}, :int{1}, "", ".", ""
        }
        // ISO-2022-CN-EXT without flush so do not shift to ASCII at the very end
        {
          "ISO-2022-CN-EXT",
          "\u4eae\u9f82\u56cd\u56cc\U0002a6d6",
          :bin{       1b2429410e41411b2429457e7c1b242a481b4e70341b2429477c341b242b4d1b4f664c },
          :intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4 },
          :int{0}, :int{1}, "", ".", ""
        }

        // windows-936 vs. ibm-1386
        {
          "ibm-1386",
          "\x1a\u20ac\u5555\x80\x81\U00055555",
          :bin{ 7fa2e3dffb7f7fa1a1 },
          :intvector{ 0, 1, 1, 2, 2, 3, 4, 5, 5 },
          :int{1}, :int{1}, "", "?", ""
        }
        {
          "windows-936",
          "\x1a\u20ac\u5555\x80\x81\U00055555",
          :bin{ 1a80dffb3f3f3f },
          :intvector{ 0, 1, 2, 2, 3, 4, 5 },
          :int{1}, :int{1}, "", "?", ""
        }

        // verify that if a conversion table does not have any mapping for U+0000,
        // then there will not even be a phantom fallback to 00
        {
          "ibm-971",
          "\x00",
          :bin{ affe },
          :intvector{ 0, 0 },
          :int{1}, :int{1}, "", "?", ""
        }

        {
          "*test4",
          "\x00",
          :bin{ ff },
          :intvector{ 0 },
          :int{1}, :int{1}, "", "?", ""
        }

        // extension in testdata
        {
          "*test4x",
          "\u20ac\x09",
          :bin{ 0009 },
          :intvector{ 0, 1 },
          :int{1}, :int{1}, "", "?", ""
        }

        // DBCS-only extensions
        {
          "ibm-970",
          "\x61\uffa1\u2015\ub000",
          :bin{ 611aa1aab2eb },
          :intvector{ 0, 1, 2, 2, 3, 3 },
          :int{1}, :int{1}, "", "?", ""
        }

        {
          "ibm-971",
          "\x61\uffa1\u2015\ub000",
          :bin{ affeaffeaffeb2eb },
          :intvector{ 0, 0, 1, 1, 2, 2, 3, 3 },
          :int{1}, :int{1}, "", "?", ""
        }

        {
          "ibm-1390,swaplfnl",
          "\uff63\u30C8\u30C8\u309A\u3000\x41\u20ac\x0a",
          :bin{ 430e4395ecc140400fc1e115 },
          :intvector{ 0, 1, 1, 1, 2, 2, 4, 4, 5, 5, 6, 7 },
          :int{1}, :int{0}, "", "?", ""
        }

        {
          "ibm-16684",
          "\uff63\u30C8\u30C8\u309A\u3000\x41\u20ac\x0a",
          :bin{ fefe4395ecc14040fefe42e1fefe },
          :intvector{ 0, 0, 1, 1, 2, 2, 4, 4, 5, 5, 6, 6, 7, 7 },
          :int{1}, :int{0}, "", "?", ""
        }

        {
          "ibm-1399",
          "\uff63\u30C8\u30C8\u309A\u3000\x41\u20ac\x0a",
          :bin{ 440e4395ecc140400fc1e125 },
          :intvector{ 0, 1, 1, 1, 2, 2, 4, 4, 5, 5, 6, 7 },
          :int{1}, :int{0}, "", "?", ""
        }

        // <subchar1> from |2 mappings
        {
          "ibm-1390",
          "\x0e\x0f\u0901\U00050000\uffe8\uffee",
          :bin{ 3f3f0efefefefe0f3f3f },
          :intvector{ 0, 1, 2, 2, 2, 3, 3, 5, 5, 6 },
          :int{1}, :int{1}, "", "?", ""
        }

        // <subchar1> from |2 mappings, and also contains a fallback to 00
        {
          "*test4",
          "\u20ac\u20ad\U00050005\U00023456\U0010ffff\x30",
          :bin{ 0000e10102030affff },
          :intvector{ 0, 1, 2, 4, 4, 4, 4, 6, 8 },
          :int{1}, :int{1}, "", "?", ""
        }

        // setting a <subchar> resets the <subchar1>
        {
          "*test4",
          "\u20ac\u20ad\U00050005\U00023456\U0010ffff\x30",
          :bin{ 00000102030f0102030a0102030f0102030f },
          :intvector{ 0, 1, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8, 8, 8, 8 },
          :int{1}, :int{1}, "", "?\x00\x01\x02\x03\x0f", ""
        }

        // fallback to 00 with old single-byte data structure
        {
          "*test1",
          "\u20ac\u20ad\U00101234\U00050000",
          :bin{ 000007ff },
          :intvector{ 0, 1, 2, 4 },
          :int{1}, :int{1}, "", "?", ""
        }

        // extensions
        {
          "ibm-1390",
          "\u025a\u025a\u0300\u025a\u0301\u025a\u0302\uffe8\U0002a0f9",
          :bin{ 0ed896eccaeccbd896ea530f3f0eb7c20f },
          :intvector{ 0, 0, 0, 1, 1, 3, 3, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8 },
          :int{1}, :int{0}, "", "?", ""
        }

        {
          "*test3",
          "\xc4\xc4\xc4\U00101234\xc4\xc4\U00101234\x05",
          :bin{ ffffff070501020c },
          :intvector{ 0, 1, 2, 3, 5, 5, 5, 5 },
          :int{1}, :int{0}, "", "?", ""
        }

        {
          "*test3",
          "\U00101234\U00101234\U00050005\U00101234\U00050005\U00060006",
          :bin{ 07070001020e05070001020f09 },
          :intvector{ 0, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6 },
          :int{1}, :int{0}, "", "?", ""
        }

        // normal conversions
        {
          "UTF-16LE",
          "1\U00010001\U000500022\ud8003\udc014",
          :bin{ 310000d801dc00d902dc3200fdff3300fdff3400 },
          :intvector{ 0, 0, 1, 1, 1, 1, 3, 3, 3, 3, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9 },
          :int{1}, :int{0}, "", "?", ""
        }
        { "UTF-16LE", "\ud800", :bin{""}, :intvector{}, :int{1}, :int{0}, "truncated", ".", "\ud800" }

        {
          "UTF-16BE",
          "1\U00010001\U000500022\ud8003\udc014",
          :bin{ 0031d800dc01d900dc020032fffd0033fffd0034 },
          :intvector{ 0, 0, 1, 1, 1, 1, 3, 3, 3, 3, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9 },
          :int{1}, :int{0}, "", "?", ""
        }
        { "UTF-16BE", "\ud800", :bin{""}, :intvector{}, :int{1}, :int{0}, "truncated", ".", "\ud800" }

        // escape callback
        {
          "ISCII",
          "A\u0901\U00023456\u0902B\U00023456C",
          :bin{ 41ef42a1255544383444255544433536a24225554438344425554443353643 },
          :intvector{
            0,
            1,1,1,
            2,2,2,2,2,2,
            2,2,2,2,2,2,
            4,
            5,
            6,6,6,6,6,6,
            6,6,6,6,6,6,
            8
          },
          :int{1}, :int{0}, "", "&", ""
        }

        // escape callback (hex)
        {
          "iso-2022-jp",
          "\u3000\U00023456\u3001\U00023456B\u901c",
          :bin{ 1b244221211b284226237832333435363b1b244221221b284226237832333435363b42262378393031433b },
          :intvector{
            0,0,0,0,0,
            1,1,1,1,1,1,1,1,1,1,1,1,
            3,3,3,3,3,
            4,4,4,4,4,4,4,4,4,4,4,4,
            6,
            7,7,7,7,7,7,7,7
          },
          :int{1}, :int{0}, "", "&X", ""
        }

        // sub callback
        {
          "gb18030",
          "$\x7f\x80\u01f9\u20ac\u4e00\u9fa6\uffff\U00010000\U0010ffff",
          :bin{ 247f81308130a8bfa2e3d2bb82358f338431a43990308130e3329a35 },
          :intvector{ 0, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 10, 10, 10, 10 },
          :int{1}, :int{0}, "", "?", ""
        }

        // skip callback
        { "ibm-930", "\u6D63\u6D64\u6D65\u6D66", :bin{ 0e5d5f5d63466b0f }, :intvector{ 0, 0, 0, 1, 1, 3, 3, 3 }, :int{1}, :int{0}, "", "0", "" }
        { "ibm-930", "\u6D63\u6D64\ud89a\u6D66", :bin{ 0e5d5f5d63466b0f }, :intvector{ 0, 0, 0, 1, 1, 3, 3, 3 }, :int{1}, :int{0}, "", "0", "" }
        { "ibm-930", "\u6D63\u6D64\ud89a\u6D66", :bin{ 0e5d5f5d63 }, :intvector{ 0, 0, 0, 1, 1 }, :int{1}, :int{0}, "illegal", "0i", "\ud89a" }

        // sub callback for supplementary code point
        { "LATIN1",  "1\U000104012", :bin{ 311a32 }, :intvector{ 0, 1, 3 }, :int{1}, :int{0}, "", "", "" }
        { "ibm-920", "1\U000104012", :bin{ 311a32 }, :intvector{ 0, 1, 3 }, :int{1}, :int{0}, "", "", "" }

        // sub callback with AA as subchar
        { "ibm-920", "1\U000104012", :bin{ 31AA32 }, :intvector{ 0, 1, 3 }, :int{1}, :int{0}, "", "?\x00\xAA", "" }

        // same but not flushing
        { "LATIN1",  "1\U000104012", :bin{ 311a32 }, :intvector{ 0, 1, 3 }, :int{0}, :int{0}, "", "", "\U00010401" }
        { "ibm-920", "1\U000104012", :bin{ 311a32 }, :intvector{ 0, 1, 3 }, :int{0}, :int{0}, "", "", "\U00010401" }

        // simple sample, no error handling
        { "UTF-8", "a\U0010FFFF", :bin{ 61F48FBFBF }, :intvector{ 0, 1, 1, 1, 1 }, :int{1}, :int{0}, "", "", "" }
      }
    }

    getUnicodeSet {
      // charset - will be opened, and ucnv_getUnicodeSet() called on it
      // map - set of code points and strings that must be in the returned set
      // mapnot - set of code points and strings that must *not* be in the returned set
      // which - numeric UConverterUnicodeSet value
      Headers { "charset", "map", "mapnot", "which" }
      Cases {
        // ISO-2022-KR
        {
          "ISO-2022-KR",
          "[\x00-\x7f\xa1\xa4\xfe\u0111\u4e00\u4e01\uac00-\uac02\uffe6]",
          "[\x80-\xa0\xa3\xa5\xff-\u0110\uac03\uffe7-\U0010ffff]",
          :int{0}
        }

        // versions of ISO-2022-JP
        {
          "ISO-2022-JP",
          "[\x00-\x7f\u0391-\u03a1\uff61-\uff9f\u4e00\u4e01\uffe5]",
          "[\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\uffe6-\U0010ffff]",
          :int{0}
        }
        {
          "ISO-2022-JP-2",
          "[\x00-\u0113\u0385-\u038a\u0390-\u03a1\uff61-\uff9f\u4e00-\u4e05\uffe6]",
          "[\uffe7-\U0010ffff]",
          :int{0}
        }

        // versions of ISO-2022-CN
        {
          "ISO-2022-CN",
          "[\x00-\x7f\u4e00\u4e01\u9f98\ufe6b]",
          "[\u4e29\uffe6-\U0010ffff]",
          :int{0}
        }
        {
          "ISO-2022-CN-EXT",
          "[\x00-\x7f\u4e00-\u4e05\u9f98\ufe6b\u4e28-\u4e2b\U00020000\U00020003-\U00020005\U00029664]",
          "[\U00020001\U00020002\U0002a6d7-\U0010ffff]",
          :int{0}
        }

        // DBCS-only
        {
          "ibm-971",
          "[\xa1\xa4\uac01\ub000]",
          "[\x00-\x9f\u2015]",
          :int{0}
        }

        {
          "ibm-16684",
          "[\xa0\xa1\xa4\xa6-\xab\xad-\u017f\u0254\u309b-\u30ff\u4e00-\u4e05\U00023d00\U000243bc\U0002a6b2"
              "{\u0254\u0300}{\u0254\u0301}{\u304b\u309a}{\u30ad\u309a}{\u30af\u309a}]",
          "[\x00-0x9f\xa2\xa3\xa5\xac\u0200-\u024f\U00010000-\U0001ffff\U0002a61b-\U0002a6b1]",
          :int{0}
        }

        // extensions
        {
          "ibm-1390",
          "[\x00-\x0d\x10-\u017f\u0254\u309b-\u30ff\u4e00-\u4e05\U00023d00\U000243bc\U0002a6b2"
              "{\u0254\u0300}{\u0254\u0301}{\u304b\u309a}{\u30ad\u309a}{\u30af\u309a}]",
          "[\x0e\x0f\u0200-\u024f\U00010000-\U0001ffff\U0002a61b-\U0002a6b1]",
          :int{0}
        }

        {
          "*test3",
          "[\x05\x0b\xc0\u20ac\U00023456\U00101234"
              "{\U00101234\U00050005\U00060006}{\U00101234\U00050005}{\U00101234\U00060006}{\xc4\xc4\U00101234\x05}]",
          "[\x06\x0e\U00034567\U000febcd{\U00101234\U00070007}]",
          :int{0}
        }
      }
    }
  }
}

--- NEW FILE: nfs4_cis_prep.txt ---
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others.  All Rights Reserved.
###################

###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT 
###################

# This table contains code points from Table A.1 from RFC 3454

0221; ; UNASSIGNED
0234..024F; ; UNASSIGNED
02AE..02AF; ; UNASSIGNED
02EF..02FF; ; UNASSIGNED
0350..035F; ; UNASSIGNED
0370..0373; ; UNASSIGNED
0376..0379; ; UNASSIGNED
037B..037D; ; UNASSIGNED
[...1906 lines suppressed...]
202B; ; PROHIBITED
202C; ; PROHIBITED
202D; ; PROHIBITED
202E; ; PROHIBITED
206A; ; PROHIBITED
206B; ; PROHIBITED
206C; ; PROHIBITED
206D; ; PROHIBITED
206E; ; PROHIBITED
206F; ; PROHIBITED

# Total code points 15

# code points from      Table C.9 

E0001; ; PROHIBITED
E0020..E007F; ; PROHIBITED

# Total code points 82


--- NEW FILE: nfs4_cs_prep_ci.txt ---
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others.  All Rights Reserved.
###################

###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT 
###################

# This table contains code points from Table A.1 from RFC 3454

0221; ; UNASSIGNED
0234..024F; ; UNASSIGNED
02AE..02AF; ; UNASSIGNED
02EF..02FF; ; UNASSIGNED
0350..035F; ; UNASSIGNED
0370..0373; ; UNASSIGNED
0376..0379; ; UNASSIGNED
037B..037D; ; UNASSIGNED
[...1863 lines suppressed...]
202B; ; PROHIBITED
202C; ; PROHIBITED
202D; ; PROHIBITED
202E; ; PROHIBITED
206A; ; PROHIBITED
206B; ; PROHIBITED
206C; ; PROHIBITED
206D; ; PROHIBITED
206E; ; PROHIBITED
206F; ; PROHIBITED

# Total code points 15

# code points from      Table C.9 

E0001; ; PROHIBITED
E0020..E007F; ; PROHIBITED

# Total code points 82


--- NEW FILE: nfs4_cs_prep_cs.txt ---
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others.  All Rights Reserved.
###################

###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT 
###################

# This table contains code points from Table A.1 from RFC 3454

0221; ; UNASSIGNED
0234..024F; ; UNASSIGNED
02AE..02AF; ; UNASSIGNED
02EF..02FF; ; UNASSIGNED
0350..035F; ; UNASSIGNED
0370..0373; ; UNASSIGNED
0376..0379; ; UNASSIGNED
037B..037D; ; UNASSIGNED
037F..0383; ; UNASSIGNED
038B; ; UNASSIGNED
038D; ; UNASSIGNED
03A2; ; UNASSIGNED
03CF; ; UNASSIGNED
03F7..03FF; ; UNASSIGNED
0487; ; UNASSIGNED
04CF; ; UNASSIGNED
04F6..04F7; ; UNASSIGNED
04FA..04FF; ; UNASSIGNED
0510..0530; ; UNASSIGNED
0557..0558; ; UNASSIGNED
0560; ; UNASSIGNED
0588; ; UNASSIGNED
058B..0590; ; UNASSIGNED
05A2; ; UNASSIGNED
05BA; ; UNASSIGNED
05C5..05CF; ; UNASSIGNED
05EB..05EF; ; UNASSIGNED
05F5..060B; ; UNASSIGNED
060D..061A; ; UNASSIGNED
061C..061E; ; UNASSIGNED
0620; ; UNASSIGNED
063B..063F; ; UNASSIGNED
0656..065F; ; UNASSIGNED
06EE..06EF; ; UNASSIGNED
06FF; ; UNASSIGNED
070E; ; UNASSIGNED
072D..072F; ; UNASSIGNED
074B..077F; ; UNASSIGNED
07B2..0900; ; UNASSIGNED
0904; ; UNASSIGNED
093A..093B; ; UNASSIGNED
094E..094F; ; UNASSIGNED
0955..0957; ; UNASSIGNED
0971..0980; ; UNASSIGNED
0984; ; UNASSIGNED
098D..098E; ; UNASSIGNED
0991..0992; ; UNASSIGNED
09A9; ; UNASSIGNED
09B1; ; UNASSIGNED
09B3..09B5; ; UNASSIGNED
09BA..09BB; ; UNASSIGNED
09BD; ; UNASSIGNED
09C5..09C6; ; UNASSIGNED
09C9..09CA; ; UNASSIGNED
09CE..09D6; ; UNASSIGNED
09D8..09DB; ; UNASSIGNED
09DE; ; UNASSIGNED
09E4..09E5; ; UNASSIGNED
09FB..0A01; ; UNASSIGNED
0A03..0A04; ; UNASSIGNED
0A0B..0A0E; ; UNASSIGNED
0A11..0A12; ; UNASSIGNED
0A29; ; UNASSIGNED
0A31; ; UNASSIGNED
0A34; ; UNASSIGNED
0A37; ; UNASSIGNED
0A3A..0A3B; ; UNASSIGNED
0A3D; ; UNASSIGNED
0A43..0A46; ; UNASSIGNED
0A49..0A4A; ; UNASSIGNED
0A4E..0A58; ; UNASSIGNED
0A5D; ; UNASSIGNED
0A5F..0A65; ; UNASSIGNED
0A75..0A80; ; UNASSIGNED
0A84; ; UNASSIGNED
0A8C; ; UNASSIGNED
0A8E; ; UNASSIGNED
0A92; ; UNASSIGNED
0AA9; ; UNASSIGNED
0AB1; ; UNASSIGNED
0AB4; ; UNASSIGNED
0ABA..0ABB; ; UNASSIGNED
0AC6; ; UNASSIGNED
0ACA; ; UNASSIGNED
0ACE..0ACF; ; UNASSIGNED
0AD1..0ADF; ; UNASSIGNED
0AE1..0AE5; ; UNASSIGNED
0AF0..0B00; ; UNASSIGNED
0B04; ; UNASSIGNED
0B0D..0B0E; ; UNASSIGNED
0B11..0B12; ; UNASSIGNED
0B29; ; UNASSIGNED
0B31; ; UNASSIGNED
0B34..0B35; ; UNASSIGNED
0B3A..0B3B; ; UNASSIGNED
0B44..0B46; ; UNASSIGNED
0B49..0B4A; ; UNASSIGNED
0B4E..0B55; ; UNASSIGNED
0B58..0B5B; ; UNASSIGNED
0B5E; ; UNASSIGNED
0B62..0B65; ; UNASSIGNED
0B71..0B81; ; UNASSIGNED
0B84; ; UNASSIGNED
0B8B..0B8D; ; UNASSIGNED
0B91; ; UNASSIGNED
0B96..0B98; ; UNASSIGNED
0B9B; ; UNASSIGNED
0B9D; ; UNASSIGNED
0BA0..0BA2; ; UNASSIGNED
0BA5..0BA7; ; UNASSIGNED
0BAB..0BAD; ; UNASSIGNED
0BB6; ; UNASSIGNED
0BBA..0BBD; ; UNASSIGNED
0BC3..0BC5; ; UNASSIGNED
0BC9; ; UNASSIGNED
0BCE..0BD6; ; UNASSIGNED
0BD8..0BE6; ; UNASSIGNED
0BF3..0C00; ; UNASSIGNED
0C04; ; UNASSIGNED
0C0D; ; UNASSIGNED
0C11; ; UNASSIGNED
0C29; ; UNASSIGNED
0C34; ; UNASSIGNED
0C3A..0C3D; ; UNASSIGNED
0C45; ; UNASSIGNED
0C49; ; UNASSIGNED
0C4E..0C54; ; UNASSIGNED
0C57..0C5F; ; UNASSIGNED
0C62..0C65; ; UNASSIGNED
0C70..0C81; ; UNASSIGNED
0C84; ; UNASSIGNED
0C8D; ; UNASSIGNED
0C91; ; UNASSIGNED
0CA9; ; UNASSIGNED
0CB4; ; UNASSIGNED
0CBA..0CBD; ; UNASSIGNED
0CC5; ; UNASSIGNED
0CC9; ; UNASSIGNED
0CCE..0CD4; ; UNASSIGNED
0CD7..0CDD; ; UNASSIGNED
0CDF; ; UNASSIGNED
0CE2..0CE5; ; UNASSIGNED
0CF0..0D01; ; UNASSIGNED
0D04; ; UNASSIGNED
0D0D; ; UNASSIGNED
0D11; ; UNASSIGNED
0D29; ; UNASSIGNED
0D3A..0D3D; ; UNASSIGNED
0D44..0D45; ; UNASSIGNED
0D49; ; UNASSIGNED
0D4E..0D56; ; UNASSIGNED
0D58..0D5F; ; UNASSIGNED
0D62..0D65; ; UNASSIGNED
0D70..0D81; ; UNASSIGNED
0D84; ; UNASSIGNED
0D97..0D99; ; UNASSIGNED
0DB2; ; UNASSIGNED
0DBC; ; UNASSIGNED
0DBE..0DBF; ; UNASSIGNED
0DC7..0DC9; ; UNASSIGNED
0DCB..0DCE; ; UNASSIGNED
0DD5; ; UNASSIGNED
0DD7; ; UNASSIGNED
0DE0..0DF1; ; UNASSIGNED
0DF5..0E00; ; UNASSIGNED
0E3B..0E3E; ; UNASSIGNED
0E5C..0E80; ; UNASSIGNED
0E83; ; UNASSIGNED
0E85..0E86; ; UNASSIGNED
0E89; ; UNASSIGNED
0E8B..0E8C; ; UNASSIGNED
0E8E..0E93; ; UNASSIGNED
0E98; ; UNASSIGNED
0EA0; ; UNASSIGNED
0EA4; ; UNASSIGNED
0EA6; ; UNASSIGNED
0EA8..0EA9; ; UNASSIGNED
0EAC; ; UNASSIGNED
0EBA; ; UNASSIGNED
0EBE..0EBF; ; UNASSIGNED
0EC5; ; UNASSIGNED
0EC7; ; UNASSIGNED
0ECE..0ECF; ; UNASSIGNED
0EDA..0EDB; ; UNASSIGNED
0EDE..0EFF; ; UNASSIGNED
0F48; ; UNASSIGNED
0F6B..0F70; ; UNASSIGNED
0F8C..0F8F; ; UNASSIGNED
0F98; ; UNASSIGNED
0FBD; ; UNASSIGNED
0FCD..0FCE; ; UNASSIGNED
0FD0..0FFF; ; UNASSIGNED
1022; ; UNASSIGNED
1028; ; UNASSIGNED
102B; ; UNASSIGNED
1033..1035; ; UNASSIGNED
103A..103F; ; UNASSIGNED
105A..109F; ; UNASSIGNED
10C6..10CF; ; UNASSIGNED
10F9..10FA; ; UNASSIGNED
10FC..10FF; ; UNASSIGNED
115A..115E; ; UNASSIGNED
11A3..11A7; ; UNASSIGNED
11FA..11FF; ; UNASSIGNED
1207; ; UNASSIGNED
1247; ; UNASSIGNED
1249; ; UNASSIGNED
124E..124F; ; UNASSIGNED
1257; ; UNASSIGNED
1259; ; UNASSIGNED
125E..125F; ; UNASSIGNED
1287; ; UNASSIGNED
1289; ; UNASSIGNED
128E..128F; ; UNASSIGNED
12AF; ; UNASSIGNED
12B1; ; UNASSIGNED
12B6..12B7; ; UNASSIGNED
12BF; ; UNASSIGNED
12C1; ; UNASSIGNED
12C6..12C7; ; UNASSIGNED
12CF; ; UNASSIGNED
12D7; ; UNASSIGNED
12EF; ; UNASSIGNED
130F; ; UNASSIGNED
1311; ; UNASSIGNED
1316..1317; ; UNASSIGNED
131F; ; UNASSIGNED
1347; ; UNASSIGNED
135B..1360; ; UNASSIGNED
137D..139F; ; UNASSIGNED
13F5..1400; ; UNASSIGNED
1677..167F; ; UNASSIGNED
169D..169F; ; UNASSIGNED
16F1..16FF; ; UNASSIGNED
170D; ; UNASSIGNED
1715..171F; ; UNASSIGNED
1737..173F; ; UNASSIGNED
1754..175F; ; UNASSIGNED
176D; ; UNASSIGNED
1771; ; UNASSIGNED
1774..177F; ; UNASSIGNED
17DD..17DF; ; UNASSIGNED
17EA..17FF; ; UNASSIGNED
180F; ; UNASSIGNED
181A..181F; ; UNASSIGNED
1878..187F; ; UNASSIGNED
18AA..1DFF; ; UNASSIGNED
1E9C..1E9F; ; UNASSIGNED
1EFA..1EFF; ; UNASSIGNED
1F16..1F17; ; UNASSIGNED
1F1E..1F1F; ; UNASSIGNED
1F46..1F47; ; UNASSIGNED
1F4E..1F4F; ; UNASSIGNED
1F58; ; UNASSIGNED
1F5A; ; UNASSIGNED
1F5C; ; UNASSIGNED
1F5E; ; UNASSIGNED
1F7E..1F7F; ; UNASSIGNED
1FB5; ; UNASSIGNED
1FC5; ; UNASSIGNED
1FD4..1FD5; ; UNASSIGNED
1FDC; ; UNASSIGNED
1FF0..1FF1; ; UNASSIGNED
1FF5; ; UNASSIGNED
1FFF; ; UNASSIGNED
2053..2056; ; UNASSIGNED
2058..205E; ; UNASSIGNED
2064..2069; ; UNASSIGNED
2072..2073; ; UNASSIGNED
208F..209F; ; UNASSIGNED
20B2..20CF; ; UNASSIGNED
20EB..20FF; ; UNASSIGNED
213B..213C; ; UNASSIGNED
214C..2152; ; UNASSIGNED
2184..218F; ; UNASSIGNED
23CF..23FF; ; UNASSIGNED
2427..243F; ; UNASSIGNED
244B..245F; ; UNASSIGNED
24FF; ; UNASSIGNED
2614..2615; ; UNASSIGNED
2618; ; UNASSIGNED
267E..267F; ; UNASSIGNED
268A..2700; ; UNASSIGNED
2705; ; UNASSIGNED
270A..270B; ; UNASSIGNED
2728; ; UNASSIGNED
274C; ; UNASSIGNED
274E; ; UNASSIGNED
2753..2755; ; UNASSIGNED
2757; ; UNASSIGNED
275F..2760; ; UNASSIGNED
2795..2797; ; UNASSIGNED
27B0; ; UNASSIGNED
27BF..27CF; ; UNASSIGNED
27EC..27EF; ; UNASSIGNED
2B00..2E7F; ; UNASSIGNED
2E9A; ; UNASSIGNED
2EF4..2EFF; ; UNASSIGNED
2FD6..2FEF; ; UNASSIGNED
2FFC..2FFF; ; UNASSIGNED
3040; ; UNASSIGNED
3097..3098; ; UNASSIGNED
3100..3104; ; UNASSIGNED
312D..3130; ; UNASSIGNED
318F; ; UNASSIGNED
31B8..31EF; ; UNASSIGNED
321D..321F; ; UNASSIGNED
3244..3250; ; UNASSIGNED
327C..327E; ; UNASSIGNED
32CC..32CF; ; UNASSIGNED
32FF; ; UNASSIGNED
3377..337A; ; UNASSIGNED
33DE..33DF; ; UNASSIGNED
33FF; ; UNASSIGNED
4DB6..4DFF; ; UNASSIGNED
9FA6..9FFF; ; UNASSIGNED
A48D..A48F; ; UNASSIGNED
A4C7..ABFF; ; UNASSIGNED
D7A4..D7FF; ; UNASSIGNED
FA2E..FA2F; ; UNASSIGNED
FA6B..FAFF; ; UNASSIGNED
FB07..FB12; ; UNASSIGNED
FB18..FB1C; ; UNASSIGNED
FB37; ; UNASSIGNED
FB3D; ; UNASSIGNED
FB3F; ; UNASSIGNED
FB42; ; UNASSIGNED
FB45; ; UNASSIGNED
FBB2..FBD2; ; UNASSIGNED
FD40..FD4F; ; UNASSIGNED
FD90..FD91; ; UNASSIGNED
FDC8..FDCF; ; UNASSIGNED
FDFD..FDFF; ; UNASSIGNED
FE10..FE1F; ; UNASSIGNED
FE24..FE2F; ; UNASSIGNED
FE47..FE48; ; UNASSIGNED
FE53; ; UNASSIGNED
FE67; ; UNASSIGNED
FE6C..FE6F; ; UNASSIGNED
FE75; ; UNASSIGNED
FEFD..FEFE; ; UNASSIGNED
FF00; ; UNASSIGNED
FFBF..FFC1; ; UNASSIGNED
FFC8..FFC9; ; UNASSIGNED
FFD0..FFD1; ; UNASSIGNED
FFD8..FFD9; ; UNASSIGNED
FFDD..FFDF; ; UNASSIGNED
FFE7; ; UNASSIGNED
FFEF..FFF8; ; UNASSIGNED
10000..102FF; ; UNASSIGNED
1031F; ; UNASSIGNED
10324..1032F; ; UNASSIGNED
1034B..103FF; ; UNASSIGNED
10426..10427; ; UNASSIGNED
1044E..1CFFF; ; UNASSIGNED
1D0F6..1D0FF; ; UNASSIGNED
1D127..1D129; ; UNASSIGNED
1D1DE..1D3FF; ; UNASSIGNED
1D455; ; UNASSIGNED
1D49D; ; UNASSIGNED
1D4A0..1D4A1; ; UNASSIGNED
1D4A3..1D4A4; ; UNASSIGNED
1D4A7..1D4A8; ; UNASSIGNED
1D4AD; ; UNASSIGNED
1D4BA; ; UNASSIGNED
1D4BC; ; UNASSIGNED
1D4C1; ; UNASSIGNED
1D4C4; ; UNASSIGNED
1D506; ; UNASSIGNED
1D50B..1D50C; ; UNASSIGNED
1D515; ; UNASSIGNED
1D51D; ; UNASSIGNED
1D53A; ; UNASSIGNED
1D53F; ; UNASSIGNED
1D545; ; UNASSIGNED
1D547..1D549; ; UNASSIGNED
1D551; ; UNASSIGNED
1D6A4..1D6A7; ; UNASSIGNED
1D7CA..1D7CD; ; UNASSIGNED
1D800..1FFFD; ; UNASSIGNED
2A6D7..2F7FF; ; UNASSIGNED
2FA1E..2FFFD; ; UNASSIGNED
30000..3FFFD; ; UNASSIGNED
40000..4FFFD; ; UNASSIGNED
50000..5FFFD; ; UNASSIGNED
60000..6FFFD; ; UNASSIGNED
70000..7FFFD; ; UNASSIGNED
80000..8FFFD; ; UNASSIGNED
90000..9FFFD; ; UNASSIGNED
A0000..AFFFD; ; UNASSIGNED
B0000..BFFFD; ; UNASSIGNED
C0000..CFFFD; ; UNASSIGNED
D0000..DFFFD; ; UNASSIGNED
E0000; ; UNASSIGNED
E0002..E001F; ; UNASSIGNED
E0080..EFFFD; ; UNASSIGNED

# Total code points 3653

# This table contains code points from Table B.1 from RFC 3454

00AD; ; MAP
034F; ; MAP
1806; ; MAP
180B; ; MAP
180C; ; MAP
180D; ; MAP
200B; ; MAP
200C; ; MAP
200D; ; MAP
2060; ; MAP
FE00; ; MAP
FE01; ; MAP
FE02; ; MAP
FE03; ; MAP
FE04; ; MAP
FE05; ; MAP
FE06; ; MAP
FE07; ; MAP
FE08; ; MAP
FE09; ; MAP
FE0A; ; MAP
FE0B; ; MAP
FE0C; ; MAP
FE0D; ; MAP
FE0E; ; MAP
FE0F; ; MAP
FEFF; ; MAP

# Total code points 27

# code points from      Table C.3 

E000..F8FF; ; PROHIBITED
F0000..FFFFD; ; PROHIBITED
100000..10FFFD; ; PROHIBITED

# Total code points 2051

# code points from      Table C.4 

FDD0..FDEF; ; PROHIBITED
FFFE..FFFF; ; PROHIBITED
1FFFE..1FFFF; ; PROHIBITED
2FFFE..2FFFF; ; PROHIBITED
3FFFE..3FFFF; ; PROHIBITED
4FFFE..4FFFF; ; PROHIBITED
5FFFE..5FFFF; ; PROHIBITED
6FFFE..6FFFF; ; PROHIBITED
7FFFE..7FFFF; ; PROHIBITED
8FFFE..8FFFF; ; PROHIBITED
9FFFE..9FFFF; ; PROHIBITED
AFFFE..AFFFF; ; PROHIBITED
BFFFE..BFFFF; ; PROHIBITED
CFFFE..CFFFF; ; PROHIBITED
DFFFE..DFFFF; ; PROHIBITED
EFFFE..EFFFF; ; PROHIBITED
FFFFE..FFFFF; ; PROHIBITED
10FFFE..10FFFF; ; PROHIBITED

# Total code points 18

# code points from      Table C.5 

D800..DFFF; ; PROHIBITED

# Total code points 0

# code points from      Table C.6 

FFF9; ; PROHIBITED
FFFA; ; PROHIBITED
FFFB; ; PROHIBITED
FFFC; ; PROHIBITED
FFFD; ; PROHIBITED

# Total code points 5

# code points from      Table C.7 

2FF0..2FFB; ; PROHIBITED

# Total code points 1

# code points from      Table C.8 

0340; ; PROHIBITED
0341; ; PROHIBITED
200E; ; PROHIBITED
200F; ; PROHIBITED
202A; ; PROHIBITED
202B; ; PROHIBITED
202C; ; PROHIBITED
202D; ; PROHIBITED
202E; ; PROHIBITED
206A; ; PROHIBITED
206B; ; PROHIBITED
206C; ; PROHIBITED
206D; ; PROHIBITED
206E; ; PROHIBITED
206F; ; PROHIBITED

# Total code points 15

# code points from      Table C.9 

E0001; ; PROHIBITED
E0020..E007F; ; PROHIBITED

# Total code points 82


--- NEW FILE: nfs4_mixed_prep_p.txt ---
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others.  All Rights Reserved.
###################

###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT 
###################

# This table contains code points from Table A.1 from RFC 3454

0221; ; UNASSIGNED
0234..024F; ; UNASSIGNED
02AE..02AF; ; UNASSIGNED
02EF..02FF; ; UNASSIGNED
0350..035F; ; UNASSIGNED
0370..0373; ; UNASSIGNED
0376..0379; ; UNASSIGNED
037B..037D; ; UNASSIGNED
037F..0383; ; UNASSIGNED
038B; ; UNASSIGNED
038D; ; UNASSIGNED
03A2; ; UNASSIGNED
03CF; ; UNASSIGNED
03F7..03FF; ; UNASSIGNED
0487; ; UNASSIGNED
04CF; ; UNASSIGNED
04F6..04F7; ; UNASSIGNED
04FA..04FF; ; UNASSIGNED
0510..0530; ; UNASSIGNED
0557..0558; ; UNASSIGNED
0560; ; UNASSIGNED
0588; ; UNASSIGNED
058B..0590; ; UNASSIGNED
05A2; ; UNASSIGNED
05BA; ; UNASSIGNED
05C5..05CF; ; UNASSIGNED
05EB..05EF; ; UNASSIGNED
05F5..060B; ; UNASSIGNED
060D..061A; ; UNASSIGNED
061C..061E; ; UNASSIGNED
0620; ; UNASSIGNED
063B..063F; ; UNASSIGNED
0656..065F; ; UNASSIGNED
06EE..06EF; ; UNASSIGNED
06FF; ; UNASSIGNED
070E; ; UNASSIGNED
072D..072F; ; UNASSIGNED
074B..077F; ; UNASSIGNED
07B2..0900; ; UNASSIGNED
0904; ; UNASSIGNED
093A..093B; ; UNASSIGNED
094E..094F; ; UNASSIGNED
0955..0957; ; UNASSIGNED
0971..0980; ; UNASSIGNED
0984; ; UNASSIGNED
098D..098E; ; UNASSIGNED
0991..0992; ; UNASSIGNED
09A9; ; UNASSIGNED
09B1; ; UNASSIGNED
09B3..09B5; ; UNASSIGNED
09BA..09BB; ; UNASSIGNED
09BD; ; UNASSIGNED
09C5..09C6; ; UNASSIGNED
09C9..09CA; ; UNASSIGNED
09CE..09D6; ; UNASSIGNED
09D8..09DB; ; UNASSIGNED
09DE; ; UNASSIGNED
09E4..09E5; ; UNASSIGNED
09FB..0A01; ; UNASSIGNED
0A03..0A04; ; UNASSIGNED
0A0B..0A0E; ; UNASSIGNED
0A11..0A12; ; UNASSIGNED
0A29; ; UNASSIGNED
0A31; ; UNASSIGNED
0A34; ; UNASSIGNED
0A37; ; UNASSIGNED
0A3A..0A3B; ; UNASSIGNED
0A3D; ; UNASSIGNED
0A43..0A46; ; UNASSIGNED
0A49..0A4A; ; UNASSIGNED
0A4E..0A58; ; UNASSIGNED
0A5D; ; UNASSIGNED
0A5F..0A65; ; UNASSIGNED
0A75..0A80; ; UNASSIGNED
0A84; ; UNASSIGNED
0A8C; ; UNASSIGNED
0A8E; ; UNASSIGNED
0A92; ; UNASSIGNED
0AA9; ; UNASSIGNED
0AB1; ; UNASSIGNED
0AB4; ; UNASSIGNED
0ABA..0ABB; ; UNASSIGNED
0AC6; ; UNASSIGNED
0ACA; ; UNASSIGNED
0ACE..0ACF; ; UNASSIGNED
0AD1..0ADF; ; UNASSIGNED
0AE1..0AE5; ; UNASSIGNED
0AF0..0B00; ; UNASSIGNED
0B04; ; UNASSIGNED
0B0D..0B0E; ; UNASSIGNED
0B11..0B12; ; UNASSIGNED
0B29; ; UNASSIGNED
0B31; ; UNASSIGNED
0B34..0B35; ; UNASSIGNED
0B3A..0B3B; ; UNASSIGNED
0B44..0B46; ; UNASSIGNED
0B49..0B4A; ; UNASSIGNED
0B4E..0B55; ; UNASSIGNED
0B58..0B5B; ; UNASSIGNED
0B5E; ; UNASSIGNED
0B62..0B65; ; UNASSIGNED
0B71..0B81; ; UNASSIGNED
0B84; ; UNASSIGNED
0B8B..0B8D; ; UNASSIGNED
0B91; ; UNASSIGNED
0B96..0B98; ; UNASSIGNED
0B9B; ; UNASSIGNED
0B9D; ; UNASSIGNED
0BA0..0BA2; ; UNASSIGNED
0BA5..0BA7; ; UNASSIGNED
0BAB..0BAD; ; UNASSIGNED
0BB6; ; UNASSIGNED
0BBA..0BBD; ; UNASSIGNED
0BC3..0BC5; ; UNASSIGNED
0BC9; ; UNASSIGNED
0BCE..0BD6; ; UNASSIGNED
0BD8..0BE6; ; UNASSIGNED
0BF3..0C00; ; UNASSIGNED
0C04; ; UNASSIGNED
0C0D; ; UNASSIGNED
0C11; ; UNASSIGNED
0C29; ; UNASSIGNED
0C34; ; UNASSIGNED
0C3A..0C3D; ; UNASSIGNED
0C45; ; UNASSIGNED
0C49; ; UNASSIGNED
0C4E..0C54; ; UNASSIGNED
0C57..0C5F; ; UNASSIGNED
0C62..0C65; ; UNASSIGNED
0C70..0C81; ; UNASSIGNED
0C84; ; UNASSIGNED
0C8D; ; UNASSIGNED
0C91; ; UNASSIGNED
0CA9; ; UNASSIGNED
0CB4; ; UNASSIGNED
0CBA..0CBD; ; UNASSIGNED
0CC5; ; UNASSIGNED
0CC9; ; UNASSIGNED
0CCE..0CD4; ; UNASSIGNED
0CD7..0CDD; ; UNASSIGNED
0CDF; ; UNASSIGNED
0CE2..0CE5; ; UNASSIGNED
0CF0..0D01; ; UNASSIGNED
0D04; ; UNASSIGNED
0D0D; ; UNASSIGNED
0D11; ; UNASSIGNED
0D29; ; UNASSIGNED
0D3A..0D3D; ; UNASSIGNED
0D44..0D45; ; UNASSIGNED
0D49; ; UNASSIGNED
0D4E..0D56; ; UNASSIGNED
0D58..0D5F; ; UNASSIGNED
0D62..0D65; ; UNASSIGNED
0D70..0D81; ; UNASSIGNED
0D84; ; UNASSIGNED
0D97..0D99; ; UNASSIGNED
0DB2; ; UNASSIGNED
0DBC; ; UNASSIGNED
0DBE..0DBF; ; UNASSIGNED
0DC7..0DC9; ; UNASSIGNED
0DCB..0DCE; ; UNASSIGNED
0DD5; ; UNASSIGNED
0DD7; ; UNASSIGNED
0DE0..0DF1; ; UNASSIGNED
0DF5..0E00; ; UNASSIGNED
0E3B..0E3E; ; UNASSIGNED
0E5C..0E80; ; UNASSIGNED
0E83; ; UNASSIGNED
0E85..0E86; ; UNASSIGNED
0E89; ; UNASSIGNED
0E8B..0E8C; ; UNASSIGNED
0E8E..0E93; ; UNASSIGNED
0E98; ; UNASSIGNED
0EA0; ; UNASSIGNED
0EA4; ; UNASSIGNED
0EA6; ; UNASSIGNED
0EA8..0EA9; ; UNASSIGNED
0EAC; ; UNASSIGNED
0EBA; ; UNASSIGNED
0EBE..0EBF; ; UNASSIGNED
0EC5; ; UNASSIGNED
0EC7; ; UNASSIGNED
0ECE..0ECF; ; UNASSIGNED
0EDA..0EDB; ; UNASSIGNED
0EDE..0EFF; ; UNASSIGNED
0F48; ; UNASSIGNED
0F6B..0F70; ; UNASSIGNED
0F8C..0F8F; ; UNASSIGNED
0F98; ; UNASSIGNED
0FBD; ; UNASSIGNED
0FCD..0FCE; ; UNASSIGNED
0FD0..0FFF; ; UNASSIGNED
1022; ; UNASSIGNED
1028; ; UNASSIGNED
102B; ; UNASSIGNED
1033..1035; ; UNASSIGNED
103A..103F; ; UNASSIGNED
105A..109F; ; UNASSIGNED
10C6..10CF; ; UNASSIGNED
10F9..10FA; ; UNASSIGNED
10FC..10FF; ; UNASSIGNED
115A..115E; ; UNASSIGNED
11A3..11A7; ; UNASSIGNED
11FA..11FF; ; UNASSIGNED
1207; ; UNASSIGNED
1247; ; UNASSIGNED
1249; ; UNASSIGNED
124E..124F; ; UNASSIGNED
1257; ; UNASSIGNED
1259; ; UNASSIGNED
125E..125F; ; UNASSIGNED
1287; ; UNASSIGNED
1289; ; UNASSIGNED
128E..128F; ; UNASSIGNED
12AF; ; UNASSIGNED
12B1; ; UNASSIGNED
12B6..12B7; ; UNASSIGNED
12BF; ; UNASSIGNED
12C1; ; UNASSIGNED
12C6..12C7; ; UNASSIGNED
12CF; ; UNASSIGNED
12D7; ; UNASSIGNED
12EF; ; UNASSIGNED
130F; ; UNASSIGNED
1311; ; UNASSIGNED
1316..1317; ; UNASSIGNED
131F; ; UNASSIGNED
1347; ; UNASSIGNED
135B..1360; ; UNASSIGNED
137D..139F; ; UNASSIGNED
13F5..1400; ; UNASSIGNED
1677..167F; ; UNASSIGNED
169D..169F; ; UNASSIGNED
16F1..16FF; ; UNASSIGNED
170D; ; UNASSIGNED
1715..171F; ; UNASSIGNED
1737..173F; ; UNASSIGNED
1754..175F; ; UNASSIGNED
176D; ; UNASSIGNED
1771; ; UNASSIGNED
1774..177F; ; UNASSIGNED
17DD..17DF; ; UNASSIGNED
17EA..17FF; ; UNASSIGNED
180F; ; UNASSIGNED
181A..181F; ; UNASSIGNED
1878..187F; ; UNASSIGNED
18AA..1DFF; ; UNASSIGNED
1E9C..1E9F; ; UNASSIGNED
1EFA..1EFF; ; UNASSIGNED
1F16..1F17; ; UNASSIGNED
1F1E..1F1F; ; UNASSIGNED
1F46..1F47; ; UNASSIGNED
1F4E..1F4F; ; UNASSIGNED
1F58; ; UNASSIGNED
1F5A; ; UNASSIGNED
1F5C; ; UNASSIGNED
1F5E; ; UNASSIGNED
1F7E..1F7F; ; UNASSIGNED
1FB5; ; UNASSIGNED
1FC5; ; UNASSIGNED
1FD4..1FD5; ; UNASSIGNED
1FDC; ; UNASSIGNED
1FF0..1FF1; ; UNASSIGNED
1FF5; ; UNASSIGNED
1FFF; ; UNASSIGNED
2053..2056; ; UNASSIGNED
2058..205E; ; UNASSIGNED
2064..2069; ; UNASSIGNED
2072..2073; ; UNASSIGNED
208F..209F; ; UNASSIGNED
20B2..20CF; ; UNASSIGNED
20EB..20FF; ; UNASSIGNED
213B..213C; ; UNASSIGNED
214C..2152; ; UNASSIGNED
2184..218F; ; UNASSIGNED
23CF..23FF; ; UNASSIGNED
2427..243F; ; UNASSIGNED
244B..245F; ; UNASSIGNED
24FF; ; UNASSIGNED
2614..2615; ; UNASSIGNED
2618; ; UNASSIGNED
267E..267F; ; UNASSIGNED
268A..2700; ; UNASSIGNED
2705; ; UNASSIGNED
270A..270B; ; UNASSIGNED
2728; ; UNASSIGNED
274C; ; UNASSIGNED
274E; ; UNASSIGNED
2753..2755; ; UNASSIGNED
2757; ; UNASSIGNED
275F..2760; ; UNASSIGNED
2795..2797; ; UNASSIGNED
27B0; ; UNASSIGNED
27BF..27CF; ; UNASSIGNED
27EC..27EF; ; UNASSIGNED
2B00..2E7F; ; UNASSIGNED
2E9A; ; UNASSIGNED
2EF4..2EFF; ; UNASSIGNED
2FD6..2FEF; ; UNASSIGNED
2FFC..2FFF; ; UNASSIGNED
3040; ; UNASSIGNED
3097..3098; ; UNASSIGNED
3100..3104; ; UNASSIGNED
312D..3130; ; UNASSIGNED
318F; ; UNASSIGNED
31B8..31EF; ; UNASSIGNED
321D..321F; ; UNASSIGNED
3244..3250; ; UNASSIGNED
327C..327E; ; UNASSIGNED
32CC..32CF; ; UNASSIGNED
32FF; ; UNASSIGNED
3377..337A; ; UNASSIGNED
33DE..33DF; ; UNASSIGNED
33FF; ; UNASSIGNED
4DB6..4DFF; ; UNASSIGNED
9FA6..9FFF; ; UNASSIGNED
A48D..A48F; ; UNASSIGNED
A4C7..ABFF; ; UNASSIGNED
D7A4..D7FF; ; UNASSIGNED
FA2E..FA2F; ; UNASSIGNED
FA6B..FAFF; ; UNASSIGNED
FB07..FB12; ; UNASSIGNED
FB18..FB1C; ; UNASSIGNED
FB37; ; UNASSIGNED
FB3D; ; UNASSIGNED
FB3F; ; UNASSIGNED
FB42; ; UNASSIGNED
FB45; ; UNASSIGNED
FBB2..FBD2; ; UNASSIGNED
FD40..FD4F; ; UNASSIGNED
FD90..FD91; ; UNASSIGNED
FDC8..FDCF; ; UNASSIGNED
FDFD..FDFF; ; UNASSIGNED
FE10..FE1F; ; UNASSIGNED
FE24..FE2F; ; UNASSIGNED
FE47..FE48; ; UNASSIGNED
FE53; ; UNASSIGNED
FE67; ; UNASSIGNED
FE6C..FE6F; ; UNASSIGNED
FE75; ; UNASSIGNED
FEFD..FEFE; ; UNASSIGNED
FF00; ; UNASSIGNED
FFBF..FFC1; ; UNASSIGNED
FFC8..FFC9; ; UNASSIGNED
FFD0..FFD1; ; UNASSIGNED
FFD8..FFD9; ; UNASSIGNED
FFDD..FFDF; ; UNASSIGNED
FFE7; ; UNASSIGNED
FFEF..FFF8; ; UNASSIGNED
10000..102FF; ; UNASSIGNED
1031F; ; UNASSIGNED
10324..1032F; ; UNASSIGNED
1034B..103FF; ; UNASSIGNED
10426..10427; ; UNASSIGNED
1044E..1CFFF; ; UNASSIGNED
1D0F6..1D0FF; ; UNASSIGNED
1D127..1D129; ; UNASSIGNED
1D1DE..1D3FF; ; UNASSIGNED
1D455; ; UNASSIGNED
1D49D; ; UNASSIGNED
1D4A0..1D4A1; ; UNASSIGNED
1D4A3..1D4A4; ; UNASSIGNED
1D4A7..1D4A8; ; UNASSIGNED
1D4AD; ; UNASSIGNED
1D4BA; ; UNASSIGNED
1D4BC; ; UNASSIGNED
1D4C1; ; UNASSIGNED
1D4C4; ; UNASSIGNED
1D506; ; UNASSIGNED
1D50B..1D50C; ; UNASSIGNED
1D515; ; UNASSIGNED
1D51D; ; UNASSIGNED
1D53A; ; UNASSIGNED
1D53F; ; UNASSIGNED
1D545; ; UNASSIGNED
1D547..1D549; ; UNASSIGNED
1D551; ; UNASSIGNED
1D6A4..1D6A7; ; UNASSIGNED
1D7CA..1D7CD; ; UNASSIGNED
1D800..1FFFD; ; UNASSIGNED
2A6D7..2F7FF; ; UNASSIGNED
2FA1E..2FFFD; ; UNASSIGNED
30000..3FFFD; ; UNASSIGNED
40000..4FFFD; ; UNASSIGNED
50000..5FFFD; ; UNASSIGNED
60000..6FFFD; ; UNASSIGNED
70000..7FFFD; ; UNASSIGNED
80000..8FFFD; ; UNASSIGNED
90000..9FFFD; ; UNASSIGNED
A0000..AFFFD; ; UNASSIGNED
B0000..BFFFD; ; UNASSIGNED
C0000..CFFFD; ; UNASSIGNED
D0000..DFFFD; ; UNASSIGNED
E0000; ; UNASSIGNED
E0002..E001F; ; UNASSIGNED
E0080..EFFFD; ; UNASSIGNED

# Total code points 3653

# This table contains code points from Table B.1 from RFC 3454

00AD; ; MAP
034F; ; MAP
1806; ; MAP
180B; ; MAP
180C; ; MAP
180D; ; MAP
200B; ; MAP
200C; ; MAP
200D; ; MAP
2060; ; MAP
FE00; ; MAP
FE01; ; MAP
FE02; ; MAP
FE03; ; MAP
FE04; ; MAP
FE05; ; MAP
FE06; ; MAP
FE07; ; MAP
FE08; ; MAP
FE09; ; MAP
FE0A; ; MAP
FE0B; ; MAP
FE0C; ; MAP
FE0D; ; MAP
FE0E; ; MAP
FE0F; ; MAP
FEFF; ; MAP

# Total code points 27

# code points from      Table C.1.2 

00A0; ; PROHIBITED
1680; ; PROHIBITED
2000; ; PROHIBITED
2001; ; PROHIBITED
2002; ; PROHIBITED
2003; ; PROHIBITED
2004; ; PROHIBITED
2005; ; PROHIBITED
2006; ; PROHIBITED
2007; ; PROHIBITED
2008; ; PROHIBITED
2009; ; PROHIBITED
200A; ; PROHIBITED
200B; ; PROHIBITED
202F; ; PROHIBITED
205F; ; PROHIBITED
3000; ; PROHIBITED

# Total code points 17

# code points from      Table C.2.2 

0080..009F; ; PROHIBITED
06DD; ; PROHIBITED
070F; ; PROHIBITED
180E; ; PROHIBITED
200C; ; PROHIBITED
200D; ; PROHIBITED
2028; ; PROHIBITED
2029; ; PROHIBITED
2060; ; PROHIBITED
2061; ; PROHIBITED
2062; ; PROHIBITED
2063; ; PROHIBITED
206A..206F; ; PROHIBITED
FEFF; ; PROHIBITED
FFF9..FFFC; ; PROHIBITED
1D173..1D17A; ; PROHIBITED

# Total code points 30

# code points from      Table C.3 

E000..F8FF; ; PROHIBITED
F0000..FFFFD; ; PROHIBITED
100000..10FFFD; ; PROHIBITED

# Total code points 2051

# code points from      Table C.4 

FDD0..FDEF; ; PROHIBITED
FFFE..FFFF; ; PROHIBITED
1FFFE..1FFFF; ; PROHIBITED
2FFFE..2FFFF; ; PROHIBITED
3FFFE..3FFFF; ; PROHIBITED
4FFFE..4FFFF; ; PROHIBITED
5FFFE..5FFFF; ; PROHIBITED
6FFFE..6FFFF; ; PROHIBITED
7FFFE..7FFFF; ; PROHIBITED
8FFFE..8FFFF; ; PROHIBITED
9FFFE..9FFFF; ; PROHIBITED
AFFFE..AFFFF; ; PROHIBITED
BFFFE..BFFFF; ; PROHIBITED
CFFFE..CFFFF; ; PROHIBITED
DFFFE..DFFFF; ; PROHIBITED
EFFFE..EFFFF; ; PROHIBITED
FFFFE..FFFFF; ; PROHIBITED
10FFFE..10FFFF; ; PROHIBITED

# Total code points 18

# code points from      Table C.5 

D800..DFFF; ; PROHIBITED

# Total code points 0

# code points from      Table C.6 

FFF9; ; PROHIBITED
FFFA; ; PROHIBITED
FFFB; ; PROHIBITED
FFFC; ; PROHIBITED
FFFD; ; PROHIBITED

# Total code points 5

# code points from      Table C.7 

2FF0..2FFB; ; PROHIBITED

# Total code points 1

# code points from      Table C.8 

0340; ; PROHIBITED
0341; ; PROHIBITED
200E; ; PROHIBITED
200F; ; PROHIBITED
202A; ; PROHIBITED
202B; ; PROHIBITED
202C; ; PROHIBITED
202D; ; PROHIBITED
202E; ; PROHIBITED
206A; ; PROHIBITED
206B; ; PROHIBITED
206C; ; PROHIBITED
206D; ; PROHIBITED
206E; ; PROHIBITED
206F; ; PROHIBITED

# Total code points 15

# code points from      Table C.9 

E0001; ; PROHIBITED
E0020..E007F; ; PROHIBITED

# Total code points 82


--- NEW FILE: nfs4_mixed_prep_s.txt ---
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others.  All Rights Reserved.
###################

###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT 
###################

# This table contains code points from Table A.1 from RFC 3454

0221; ; UNASSIGNED
0234..024F; ; UNASSIGNED
02AE..02AF; ; UNASSIGNED
02EF..02FF; ; UNASSIGNED
0350..035F; ; UNASSIGNED
0370..0373; ; UNASSIGNED
0376..0379; ; UNASSIGNED
037B..037D; ; UNASSIGNED
[...1906 lines suppressed...]
202B; ; PROHIBITED
202C; ; PROHIBITED
202D; ; PROHIBITED
202E; ; PROHIBITED
206A; ; PROHIBITED
206B; ; PROHIBITED
206C; ; PROHIBITED
206D; ; PROHIBITED
206E; ; PROHIBITED
206F; ; PROHIBITED

# Total code points 15

# code points from      Table C.9 

E0001; ; PROHIBITED
E0020..E007F; ; PROHIBITED

# Total code points 82


--- NEW FILE: ra.txt ---
//*******************************************************************************
//*
//*   Copyright (C) 2003, International Business Machines
//*   Corporation and others.  All Rights Reserved.
//*
//*******************************************************************************

/**
 * These are top level comments for the bundle. Tag name: ra
 * @translate yes
 * @note Comments for tag named ra
 */
ra{
    /**
     * Top level comments for the string.Tag name: test1
     * @translate yes
     * @note {0} represents the position of OSNAME and {1} represents the position of job name
     *       e.g: The OS/400 job named SYSLOG
     */
    test1{"The {0} job named {1}"} 
    /**
     * Tag name: test2
     * @note This resource is for test2
     * @translate yes
     */
    test2{"some translatable stuff"} 

    /**
     * Top level comments for TestTable. Tag name: TestTable
     * @translate yes
     * @note This resource is for TestTable
     */
    TestTable{
        /**
         * Tag name: test3
         * @translate yes
         * @note This resource is for test3
         */
        test3{"jasldjfa"}
        /**
         * Tag name: test4
         * @translate yes
         * @note This resource is for test4
         */
        test4{"asdfasdf"}
        /**
         * Tag name: test5
         * @translate yes
         * @note This resource is for test5
         */
        test5:int{1}
        /**
         * Tag name: test6
         * @translate yes
         * @note This resource is for test6
         */
        test6:bin{"0102"}
    }
    /**
     * Top level comments for Tag name: TestArray
     * @translate yes
     * @note This resource is for Tag name: Array
     */
    TestArray{
        /**
         * comments for un-named string
         * @translate yes
         * @note This resource is un-named string
         */
        :string{"abcde"},
        /** 
         * comments for un-named int
         * @translate yes
         * @note This resource is un-named int
         */
        :int{1},
        /**
         * comments for un-named binary
         * @translate yes
         * @note This resource is for un-named binary
         */
        :bin{"12312312"}
    }
    
    /**
     * comments for TestInclude
     * @translate yes
     * @note This resource is for TestInclude
     */
    TestInclude:include{"translit_rules.txt"}

    /**
     * comments for TestImport
     * @translate yes
     * @note This resource is for TestImport
     */
    TestImpport:import{"importtest.bin"}

    /**
     * comments for TestIntVector
     * @translate yes
     * @note This resource is for TestIntVector
     */
     TestIntVector{
        /**
         * comments for element 1
         * @translate yes
         * @note This resource is for element 1
         */
         1,
        /**
         * comments for element 2
         * @translate yes
         * @note This resource is for element 2
         */
         2,
        /**
         * comments for element 3
         * @translate yes
         * @note This resource is for element 3
         */
         3
     }
        
}

--- NEW FILE: riwords.txt ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: test4x.ucm ---
# *******************************************************************************
# * Copyright (C) 2003, International Business Machines
# * Corporation and others.  All Rights Reserved.
# *******************************************************************************
#
# test4x.ucm
#
# Test file for MBCS conversion extension with four-byte codepage data.

<code_set_name>     "test4x"
<mb_cur_max>        4
<mb_cur_min>        1
<uconv_class>       "MBCS"

# test loading an extension table from the testdata package
<icu:base>          "test4"

CHARMAP
<U0009>     \x09 |0
END CHARMAP

Index: CollationTest_NON_IGNORABLE_STUB.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/CollationTest_NON_IGNORABLE_STUB.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- CollationTest_NON_IGNORABLE_STUB.txt	10 Sep 2003 02:42:50 -0000	1.1
+++ CollationTest_NON_IGNORABLE_STUB.txt	6 Apr 2004 10:09:51 -0000	1.2
@@ -1,3148 +1,2272 @@
-# Copyright (c) 2001-2003 International Business Machines
-# Corporation and others. All Rights Reserved.
-# This is a stub file
-# for complete test file, go to:
-# http://oss.software.ibm.com/cvs/icu4j/unicodetools/com/ibm/text/data/
-# based on:
-# UCA Version: 3.1.1d6/3.2.0
-# Generated:   2002-07-02,18:49:19 GMT [MD]
-0338 0334;
-0334 0591;
-0334 0592;
[...5389 lines suppressed...]
+4DA8 003F;	4DA8
+4DB2 003F;	4DB2
+2F803 003F;	2F803
+2F860 003F;	2F860
+2F891 0041;	2F891
+2F8E3 003F;	2F8E3
+2F91D 003F;	2F91D
+2F942 003F;	2F942
+2F95D 0041;	2F95D
+2F97C 003F;	2F97C
+2F9B1 003F;	2F9B1
+2F9ED 003F;	2F9ED
+2FA12 003F;	2FA12
+2A6D6 003F;	2A6D6
+F8FC 003F;	F8FC
+E0004 003F;	E0004
+F00FA 003F;	F00FA
+FFFF8 003F;	FFFF8
+100004 003F;	100004
+10FF02 003F;	10FF02

Index: CollationTest_SHIFTED_STUB.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/CollationTest_SHIFTED_STUB.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- CollationTest_SHIFTED_STUB.txt	10 Sep 2003 02:42:50 -0000	1.1
+++ CollationTest_SHIFTED_STUB.txt	6 Apr 2004 10:09:51 -0000	1.2
@@ -1,2867 +1,2272 @@
-# Copyright (c) 2001-2003 International Business Machines
-# Corporation and others. All Rights Reserved.
-# This is a stub file
-# for complete test file, go to:
-# http://oss.software.ibm.com/cvs/icu4j/unicodetools/com/ibm/text/data/
-# based on:
-# UCA Version: 3.1.1d6/3.2.0
-# Generated:   2002-07-02,18:49:23 GMT [MD]
-0009 0021;
-000A 0021;
-000B 0021;
[...5108 lines suppressed...]
+4DA8 003F;	4DA8
+4DB2 003F;	4DB2
+2F803 003F;	2F803
+2F860 003F;	2F860
+2F891 0041;	2F891
+2F8E3 003F;	2F8E3
+2F91D 003F;	2F91D
+2F942 003F;	2F942
+2F95D 0041;	2F95D
+2F97C 003F;	2F97C
+2F9B1 003F;	2F9B1
+2F9ED 003F;	2F9ED
+2FA12 003F;	2FA12
+2A6D6 003F;	2A6D6
+F8FC 003F;	F8FC
+E0004 003F;	E0004
+F00FA 003F;	F00FA
+FFFF8 003F;	FFFF8
+100004 003F;	100004
+10FF02 003F;	10FF02

Index: DataDrivenCollationTest.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/DataDrivenCollationTest.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- DataDrivenCollationTest.txt	10 Sep 2003 02:42:50 -0000	1.1
+++ DataDrivenCollationTest.txt	6 Apr 2004 10:09:51 -0000	1.2
@@ -18,6 +18,79 @@
                      }
     }
     TestData {
+        TestLithuanian {
+            Info {
+                Description { "Lithuanian sort order." }
+            }
+            Settings {
+                {
+                    TestLocale { "lt" }
+                }
+            }
+            Cases { "cz<č<d<iz<y<j<sz<š<t<zz<ž" } 
+        }
+        TestLatvian {
+            Info {
+                Description { "Latvian sort order." }
+            }
+            Settings {
+                {
+                    TestLocale { "lv" }
+                }
+            }
+            Cases { "cz<č<d<gz<ģ<h<iz<y<j<kz<ķ<l<lz<ļ<m<nz<ņ<o<rz<ŗ<s<sz<š<t<zz<ž" }
+        }
+        TestEstonian {
+            Info {
+                Description { "Estonian sort order." }
+            }
+            Settings {
+                {
+                    TestLocale { "et" }
+                }
+            }
+            Cases { "sy<š<šy<z<zy<ž<v<w<va<õ<õy<ä<äy<ö<öy<ü<üy<x" }
+        }
+        TestAlbanian {
+            Info {
+                Description { "Albanian sort order." }
+            }
+            Settings {
+                {
+                    TestLocale { "sq" }
+                }
+            }
+            Cases { "cz<ç<d<dz<dh<e<ez<ë<f<gz<gj<h<lz<ll<m<nz<nj<o<rz<rr<s<sz<sh<t<tz<th<u<xz<xh<y<zz<zh" }
+         }
+         
+         TestSimplifiedChineseOrder {
+            Info {
+                Description { "Sorted file has different order." }
+            }
+            Settings {
+                {
+                    TestLocale { "root" }
+		    Arguments { "[normalization on]" }
+                }
+            }
+
+            Cases { "\u5F20<\u5F20\u4E00\u8E3F"  }
+        }
+        
+        TestTibetanNormalizedIterativeCrash {
+            Info {
+                Description { "This pretty much crashes." }
+            }
+            Settings {
+                {
+                    TestLocale { "root" }
+                }
+            }
+
+            Cases { "\u0f71\u0f72\u0f80\u0f71\u0f72"
+                    "<\u0f80"
+            }
+        }
         TestThaiPartialSortKeyProblems {
             Info {
                 Description { "These are examples of strings that caused trouble in partial sort key testing." }
@@ -32,12 +105,12 @@
                     "<\u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18",
                     "\u0E01\u0E07\u0E01\u0E32\u0E23"
                     "<\u0E01\u0E07\u0E42\u0E01\u0E49",
-                    //"\u0E01\u0E23\u0E19\u0E17\u0E32"
-                    //"<\u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32",
+                    "\u0E01\u0E23\u0E19\u0E17\u0E32"
+                    "<\u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32",
                     "\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27"
                     "<\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27",
-                    //"\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D"
-                    //"<\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32"
+                    "\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D"
+                    "<\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32"
           }
         }
         TestJavaStyleRule {
@@ -186,8 +259,7 @@
                "= \u30A1\u0000\u059a\u30FC"
                "= \u30A1\u30FC"
             }
-        }
-  
+        }  
         da_TestPrimary {
             Info {
                 Description { "This test goes through primary strength cases" }
@@ -199,7 +271,7 @@
                 }
             }
             Cases {
-                "Lvi=Lwi",
+                "Lvi<Lwi",
                 "L\u00e4vi<L\u00f6wi",
                 "L\u00fcbeck=Lybeck",
             }
@@ -232,19 +304,19 @@
                 "\u00c7C<"
                 "D.S.B.<"
                 "DA<"
+                "\u00d0A<"
                 "DB<"
+                "\u00d0C<"
                 "DSB<"
                 "DSC<"
-                "\u00d0A<"
-                "\u00d0C<"
                 "EKSTRA_ARBEJDE<"
                 "EKSTRABUD0<"
                 "H\u00d8ST<"
                 "HAAG<"
                 "H\u00c5NDBOG<"
                 "HAANDV\u00c6RKSBANKEN<"
-                "karl<"
                 "Karl<"
+                "karl<"
                 "'NIELS J\u00d8RGEN'<"
                 "NIELS-J\u00d8RGEN<"
                 "NIELSEN<"
@@ -266,8 +338,8 @@
                 "STORMLY<"
                 "THORVALD<"
                 "THORVARDUR<"
-                "THYGESEN<"
                 "\u00feORVAR\u00d0UR<"
+                "THYGESEN<"
                 "'VESTERG\u00c5RD, A'<"
                 "'VESTERGAARD, A'<"
                 "'VESTERG\u00c5RD, B'<"
@@ -315,11 +387,11 @@
                 "subtle<"
                 "symbol<"
                 "s\u00e4mtlich<"
-                "waffle<"
                 "verkehrt<"
-                "wood<"
                 "vox<"
                 "v\u00e4ga<"
+                "waffle<"
+                "wood<"
                 "yen<"
                 "yuan<"
                 "yucca<"
@@ -332,6 +404,65 @@
                 "zysk0<"
                 "\u00e4ndere"
             }
+        }
+        hi_TestNewRules {
+            Info {
+                Description { "This test goes through new rules and tests against old rules" }
+            }
+            Settings {
+                {
+                    TestLocale { "hi" }
+                }
+            }
+            Cases {
+                 "ॐ<।<॥<॰<०<१<२<३"
+                 "<४<५<६<७<८<९<अ<आ"
+                 "<इ<ई<उ<ऊ<ऋ<ॠ<ऌ<ॡ"
+                 "<ऍ<ऎ<ए<ऐ<ऑ<ऒ<ओ<औ"
+                 "<क<क़=क़<कँ<कं<कः<क॑<क॒"
+                 "<क॓<क॔<कऽ<क्<का<कि<की<कु"
+                 "<कू<कृ<कॄ<कॢ<कॣ<कॅ<कॆ<के"
+                 "<कै<कॉ<कॊ<को<कौ<ख<ख़ =ख़<खँ<खं<खः"
+                 "<ख॑<ख॒<ख॓<ख॔<खऽ<ख्<खा<खि"
+                 "<खी<खु<खू<खृ<खॄ<खॢ<खॣ<खॅ"
+                 "<खॆ<खे<खै<खॉ<खॊ<खो<खौ<ग"
+                 "<ग़=ग़<गँ<गं<गः<ग॑<ग॒<ग॓<ग॔"
+                 "<गऽ<ग्<गा<गि<गी<गु<गू<गृ"
+                 "<गॄ<गॢ<गॣ<गॅ<गॆ<गे<गै<गॉ"
+                 "<गॊ<गो<गौ<घ<ङ<च<छ<ज<ज़ =ज़<जँ<जं<जः"
+                 "<ज॑<ज॒<ज॓<ज॔<जऽ<ज्<जा<जि"
+                 "<जी<जु<जू<जृ<जॄ<जॢ<जॣ<जॅ"
+                 "<जॆ<जे<जै<जॉ<जॊ<जो<जौ<झ"
+                 "<ञ<ट<ठ<ड<ड़=ड़<डँ<डं<डः<ड॑<ड॒<ड॓<ड॔"
+                 "<डऽ<ड्<डा<डि<डी<डु<डू<डृ"
+                 "<डॄ<डॢ<डॣ<डॅ<डॆ<डे<डै<डॉ"
+                 "<डॊ<डो<डौ<ढ<ढ़=ढ़<ढँ<ढं<ढः"
+                 "<ढ॑<ढ॒<ढ॓<ढ॔<ढऽ<ढ्<ढा<ढि"
+                 "<ढी<ढु<ढू<ढृ<ढॄ<ढॢ<ढॣ<ढॅ"
+                 "<ढॆ<ढे<ढै<ढॉ<ढॊ<ढो<ढौ<ण"
+                 "<त<थ<द<ध<न<ऩ =ऩ< नँ<नं< नः"
+                 "<न॑<न॒<न॓<न॔<नऽ<न्<ना<नि"
+                 "<नी<नु<नू<नृ<नॄ<नॢ<नॣ<नॅ"
+                 "<नॆ<ने<नै<नॉ<नॊ<नो<नौ"
+                 "<प<फ<फ़=फ़<फँ<फं<फः<फ॑<फ॒"
+                 "<फ॓<फ॔<फऽ<फ्<फा<फि<फी<फु<फू<फृ"
+                 "<फॄ<फॢ<फॣ<फॅ<फॆ<फे<फै<फॉ"
+                 "<फॊ<फो<फौ<ब<भ<म<य<य़=य़ "
+                 "<यँ<यं<यः<य॑<य॒<य॓<य॔"
+                 "<यऽ<य्<या<यि<यी<यु<यू<यृ"
+                 "<यॄ<यॢ<यॣ<यॅ<यॆ<ये<यै<यॉ"
+                 "<यॊ<यो<यौ<र<ऱ=ऱ<रँ<रं<रः"
+                 "<र॑<र॒<र॓<र॔<रऽ<र्<रा<रि"
+                 "<री<रु<रू<रृ<रॄ<रॢ<रॣ<रॅ"
+                 "<रॆ<रे<रै<रॉ<रॊ<रो<रौ"
+                 "<ल<ळ<ऴ=ऴ<ळँ<ळं<ळः<ळ॑<ळ॒"
+                 "<ळ॓<ळ॔<ळऽ<ळ्<ळा<ळि<ळी<ळु"
+                 "<ळू<ळृ<ळॄ<ळॢ<ळॣ<ळॅ<ळॆ<ळे"
+                 "<ळै<ळॉ<ळॊ<ळो<ळौ<व<श<ष<स<ह"
+                 "<़<ँ<ं<ः<॑<॒<॓<॔<ऽ<्<ा<ि<ी"
+                 "<ु<ू<ृ<ॄ<ॢ<ॣ<ॅ<ॆ"
+                 "<े<ै<ॉ<ॊ<ो<ौ"
+           }
         }
     }
 }

Index: idna_rules.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/idna_rules.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- idna_rules.txt	10 Sep 2003 02:42:50 -0000	1.1
+++ idna_rules.txt	6 Apr 2004 10:09:52 -0000	1.2
@@ -71,7 +71,7 @@
 			"]"}
 
 	ProhibitedSet{"[ \\u0020 \\u00A0 \\u1680 \\u2000 \\u2001 \\u2002 \\u2003 \\u2004 \\u2005 \\u2006 \\u2007 "
-			"\\u2008 \\u2009 \\u200A \\u200B \\u202F \\u205F \\u3000 \\u0000-\\u001F \\u007F "
+			"\\u2008 \\u2009 \\u200A \\u200B \\u202F \\u205F \\u3000 "
 			"\\u0080-\\u009F \\u06DD \\u070F \\u180E \\u200C \\u200D \\u2028 \\u2029 \\u2060 "
 			"\\u2061 \\u2062 \\u2063 \\u206A-\\u206F \\uFEFF \\uFFF9-\\uFFFC \\U0001D173-\\U0001D17A "
 			"\\uE000-\\uF8FF \\U000F0000-\\U000FFFFD \\U00100000-\\U0010FFFD \\uFDD0-\\uFDEF "

Index: rbbitst.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/rbbitst.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- rbbitst.txt	10 Sep 2003 02:42:50 -0000	1.1
+++ rbbitst.txt	6 Apr 2004 10:09:52 -0000	1.2
@@ -22,6 +22,9 @@
 #
 
 
+#   Temp debugging tests
+<line>
+<data><>\U0001d7f9\u003b\u2034<></data>
 
 ########################################################################################
 #
@@ -176,6 +179,11 @@
 <data>•\u0917\u092a\u00ad\u0936\u092a<200>!•\u092f\u0939<200> •\u0939\u093f\u0928\u094d\u200d\u0926\u0940<200> •\u0939\u0948<200> •\u0905\u093e\u092a<200> •\u0938\u093f\u0916\u094b\u0917\u0947<200>?•\n•:•\u092a\u094d\u0930\u093e\u092f\u0903<200>
 •\u0935\u0930\u094d\u0937\u093e<200>\r\n•\u092a\u094d\u0930\u0915\u093e\u0936<200>,•\u0924\u0941\u092e\u093e\u0930\u094b<200> •\u092e\u093f\u0924\u094d\u0930<200> •\u0915\u093e<200> •\u092a\u0924\u094d\u0930<200> •\u092a\u095d\u094b<200> •\u0938\u094d\u0924\u094d\u0930\u093f<200>.• •\u0968\u0966.\u0969\u096f<100> •\u0967\u0966\u0966.\u0966\u0966<100>\u20a8•\u0967,\u0967\u0966\u0966.\u0966\u0966<100> •\u0905\u092e\u091c<200>\n•\u0938\u094d\u200d\u0935\u0924\u0902\u0924\u094d\u0930<200>\r•</data>
 
+#
+#  Failures from monkey tests
+#
+<data>•\u8527<400>\u02ba<200>\u0027\u0d42•\u00b7•\u09ea<100></data>
+
 ########################################################################################
 #
 #
@@ -192,16 +200,16 @@
 
 
 <sent>
-<data>•This\n•</data>
+<data>•This\n<100></data>
 <data>•Hello! •how are you? •I'am fine. •Thankyou. •How are you \
-doing? •This\n• costs $20,00,000. •</data>
+doing? •This\n<100> costs $20,00,000. •</data>
 
 
 # Sentence ending in a quote.
 <data>•"Sentence ending with a quote." •Bye.•</data>
 
 # Sentence, and test data, ending without a period or other terminator.
-<data>•Here is a random sentence, no ending period•</data>
+<data>•Here is a random sentence, no ending period<100></data>
 
 
 <data>•  (This is it).  •Testing the sentence iterator. •\
@@ -221,7 +229,7 @@
 Yet another popular saying is: \
 'I'm fine thanks.' •\
 What is the proper use of the abbreviation pp.•? •Yes, I am definatelly 12" tall!•!\
-•Now\r•is\n•the\r\n•time\n•\r•for\r•\r•</data>
+•Now\r<100>is\n<100>the\r\n<100>time\n<100>\r<100>for\r<100>\r<100></data>
 
 <data>•No breaks when . is surrounded by UPPER.Case letters.  •</data>
 <data>•No breaks when . is followed by Numeric .4 a.4 C.4 3.1 .•</data>
@@ -242,7 +250,7 @@
 #
 #       Don't break sentences at boundary between CJK and digits
 #
-<data>•\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e48888\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u3002•Bye, now•</data>"
+<data>•\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e48888\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u3002•Bye, now<100></data>"
 
 #
 #      Breaks around '(' following a sentence TERM.  (Rule 9)
@@ -253,16 +261,15 @@
 <data>•How do you do? •(fine). •</data>
 
 #
-<data>•Hello.123•</data>    # Rule 6    
-<data>•Hello?•123•</data>  
+<data>•Hello.123<100></data>    # Rule 6    
+<data>•Hello?•123<100></data>  
   
-<data>•HELLO.Bye•</data>    # Rule 7    
-<data>•HELLO?•Bye•</data>    
+<data>•HELLO.Bye<100></data>    # Rule 7    
+<data>•HELLO?•Bye<100></data>    
 
-<data>•Hello.goodbye•</data>  #Rule 8
-<data>•Hello. •Goodbye•</data>
-<data>•Hello. goodbye•</data>
-<data>•Hello.)@#$%^&*()""   goodbye•</data>
+<data>•Hello.goodbye<100></data>  #Rule 8
+<data>•Hello. •Goodbye<100></data>
+<data>•Hello. goodbye<100></data>
 
 
 
@@ -295,9 +302,9 @@
 #        make sure there is sentence break after ?,danda(hindi phrase separator),
 #        fullstop followed by space.  (VERY old test)
 #       
-<data>•\u0928\u092e\u0938\u094d\u200d\u0924\u0947 \u0930\u092e\u0947\u0936\u0905\u093e\u092a\u0915\u0948\u0938\u0947 \u0939\u0948?•\u092e\u0948 \u0905\u091a\u094d\u200d \u091b\u093e \u0939\u0942\u0901\u0964 •\u0905\u093e\u092a\r\n•\
+<data>•\u0928\u092e\u0938\u094d\u200d\u0924\u0947 \u0930\u092e\u0947\u0936\u0905\u093e\u092a\u0915\u0948\u0938\u0947 \u0939\u0948?•\u092e\u0948 \u0905\u091a\u094d\u200d \u091b\u093e \u0939\u0942\u0901\u0964 •\u0905\u093e\u092a\r\n<100>\
 \u0915\u0948\u0938\u0947 \u0939\u0948?•\u0935\u0939 \u0915\u094d\u200d\u092f\u093e\n\
-•\u0939\u0948?•\u092f\u0939 \u0905\u093e\u092e \u0939\u0948. •\u092f\u0939 means "this". •"\u092a\u095d\u093e\u0908" meaning "education" or "studies". •\u0905\u093e\u091c(\u0938\u094d\u200d\u0935\u0924\u0902\u0924\u094d\u0930 \u0926\u093f\u0935\u093e\u0938) \u0939\u0948\u0964 •Let's end here. •</data>
+<100>\u0939\u0948?•\u092f\u0939 \u0905\u093e\u092e \u0939\u0948. •\u092f\u0939 means "this". •"\u092a\u095d\u093e\u0908" meaning "education" or "studies". •\u0905\u093e\u091c(\u0938\u094d\u200d\u0935\u0924\u0902\u0924\u094d\u0930 \u0926\u093f\u0935\u093e\u0938) \u0939\u0948\u0964 •Let's end here. •</data>
 
 #         Regression test for bug #1984, Sentence break in Arabic text.
   
@@ -358,7 +365,7 @@
 <data>•  •\u0041•</data>
 <data>•  •\u0009•</data>
 <data>•  •\u00B4•</data>
-<data>•    \u000C•</data>    # LB3C  × BK
+<data>•    \u000C<100></data>    # LB3C  × BK
 <data>•  •\u2014•</data>
 <data>•  •\uFFFC•</data>
 <data>•   \u0029•</data>    # LB 8   × CL
@@ -369,7 +376,7 @@
 <data>•  •\u4E00•</data>
 <data>•  •\u2024•</data>
 <data>•   \u002C•</data>    # LB 8   × IS
-<data>•   \u000A•</data>    # LB3C   × ( BK | CR | LF | NL )
+<data>•   \u000A<100></data>    # LB3C   × ( BK | CR | LF | NL )
 <data>•  •\u0E5A•</data>
 <data>•  •\u0032•</data>
 <data>•  •\u0028•</data>
@@ -387,34 +394,34 @@
 # 3a  Always break after hard line breaks.
 # 3c  Never break before hard line breaks.
 
-<data>•  •\u00A1\u2028•\u00A1•</data>
-<data>•  •\u0041\u2028•\u0041•</data>
-<data>•  •\u0009\u2028•\u0009•</data>
-<data>•  •\u00B4\u2028•\u00B4•</data>
-<data>•   \u000C•\u2028•\u000C•</data>
-<data>•  •\u2014\u2028•\u2014•</data>
-<data>•  •\uFFFC\u2028•\uFFFC•</data>
-<data>•   \u0029\u2028•\u0029•</data>
-#<data>•   \u0301\u2028•\u0301•</data>    # TODO:  fix.
-<data>•   \u0021\u2028•\u0021•</data>
-#<data>•   \u00A0\u2028•\u00A0•</data>    # TODO:  fix
-<data>•  •\u002D\u2028•\u002D•</data>
-<data>•  •\u4E00\u2028•\u4E00•</data>
-<data>•  •\u2024\u2028•\u2024•</data>
-<data>•   \u002C\u2028•\u002C•</data>
-<data>•   \u000A•\u2028•\u000A•</data>
-<data>•  •\u0E5A\u2028•\u0E5A•</data>
-<data>•  •\u0032\u2028•\u0032•</data>
-<data>•  •\u0028\u2028•\u0028•</data>
-<data>•  •\u0025\u2028•\u0025•</data>
-<data>•  •\u0024\u2028•\u0024•</data>
-<data>•  •\u0022\u2028•\u0022•</data>
-<data>•  •\u0E01\u2028•\u0E01•</data>
-<data>•  •\uDB7F\u2028•\uDB7F•</data>
-<data>•   \u0020\u2028•\u0020•</data>
-<data>•   \u002F\u2028•\u002F•</data>
-<data>•  •\uF8FF\u2028•\uF8FF•</data>
-<data>•   \u200B\u2028•\u200B•</data>
+<data>•  •\u00A1\u2028<100>\u00A1•</data>
+<data>•  •\u0041\u2028<100>\u0041•</data>
+<data>•  •\u0009\u2028<100>\u0009•</data>
+<data>•  •\u00B4\u2028<100>\u00B4•</data>
+<data>•   \u000C<100>\u2028<100>\u000C<100></data>
+<data>•  •\u2014\u2028<100>\u2014•</data>
+<data>•  •\uFFFC\u2028<100>\uFFFC•</data>
+<data>•   \u0029\u2028<100>\u0029•</data>
+#<data>•   \u0301\u2028<100>\u0301•</data>    # TODO:  fix.
+<data>•   \u0021\u2028<100>\u0021•</data>
+#<data>•   \u00A0\u2028<100>\u00A0•</data>    # TODO:  fix
+<data>•  •\u002D\u2028<100>\u002D•</data>
+<data>•  •\u4E00\u2028<100>\u4E00•</data>
+<data>•  •\u2024\u2028<100>\u2024•</data>
+<data>•   \u002C\u2028<100>\u002C•</data>
+<data>•   \u000A<100>\u2028<100>\u000A<100></data>
+<data>•  •\u0E5A\u2028<100>\u0E5A•</data>
+<data>•  •\u0032\u2028<100>\u0032•</data>
+<data>•  •\u0028\u2028<100>\u0028•</data>
+<data>•  •\u0025\u2028<100>\u0025•</data>
+<data>•  •\u0024\u2028<100>\u0024•</data>
+<data>•  •\u0022\u2028<100>\u0022•</data>
+<data>•  •\u0E01\u2028<100>\u0E01•</data>
+<data>•  •\uDB7F\u2028<100>\uDB7F•</data>
+<data>•   \u0020\u2028<100>\u0020•</data>
+<data>•   \u002F\u2028<100>\u002F•</data>
+<data>•  •\uF8FF\u2028<100>\uF8FF•</data>
+<data>•   \u200B\u2028<100>\u200B•</data>
 
 
 #
@@ -424,11 +431,11 @@
 <line>
 
 <data>•Multi-•Level •example •of •a •semi-•idiotic •non-•sensical •(non-•important) •sentence. 
-•Hi  •Hello •How\n•are\r•you\u2028•fine.\t•good.  •Now\r•is\n•the\r\n•time\n•\r•for\r•\r•all•</data>
+<100>Hi  •Hello •How\n<100>are\r<100>you\u2028<100>fine.\t•good.  •Now\r<100>is\n<100>the\r\n<100>time\n<100>\r<100>for\r<100>\r<100>all•</data>
 
 <line>
-<data>•Hello! •how\r\n• •(are)\r• •you? •I'am •fine- •Thankyou. •foo\u00a0bar 
-•How, •are, •you? •This, •costs •$20,00,000.•</data>
+<data>•Hello! •how\r\n<100> •(are)\r<100> •you? •I'am •fine- •Thankyou. •foo\u00a0bar 
+<100>How, •are, •you? •This, •costs •$20,00,000.•</data>
 
 #     test for bug #4068133
 #
@@ -438,10 +445,10 @@
 <data>•foo\u00a0bar•</data>
 
 #     to test for bug #4097920
-<data>•dog,•cat,•mouse •(one)•(two)\n•</data>
+<data>•dog,•cat,•mouse •(one)•(two)\n<100></data>
 
 #     to test for bug #4035266
-<data>•The •balance •is •$-23,456.78, •not •-•$32,456.78!\n•</data>
+<data>•The •balance •is •$-23,456.78, •not •-•$32,456.78!\n<100></data>
 
 
 #      to test for bug #4098467
@@ -462,7 +469,7 @@
 
 #      Surrogate line break tests.
 #
-<data>•\u4e01•\ud840\udc01•\u4e02•abc•\ue000•\udb80\udc01•</data>
+<data>•\u4e01•\ud840\udc01•\u4e02•abc •\ue000 •\udb80\udc01•</data>
 
 #      Regression for bug 836
 <data>•AAA•(AAA •</data> 

Index: regextst.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/regextst.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- regextst.txt	10 Sep 2003 02:42:50 -0000	1.1
+++ regextst.txt	6 Apr 2004 10:09:52 -0000	1.2
@@ -18,6 +18,8 @@
 #                                   x      free spacing and comments
 #                                   s      dot-matches-all mode
 #                                   m      multi-line mode.  $ and ^ match at embedded new-lines
+#                                   v      If icu configured without break iteration, this
+#                                          regex test pattern should not compile.
 #                                   d      dump the compiled pattern
 #                                   t      trace operation of match engine.
 #                                 White space must be present between the flags and the match string.
@@ -66,6 +68,7 @@
 ".*\Ahello"                    "stuff\nhello" # don't match after embedded new-line.
 
 # \b \B
+#
 ".*?\b(.).*"                   "<0>  $%^&*( <1>h</1>ello123%^&*()gxx</0>"
 "\ba\b"                        "-<0>a</0>"
 "\by\b"                        "xy"
@@ -78,6 +81,20 @@
 
 "(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?.*"   "<0>   \u0301 \u0301<1>A</1>\u0302BC\u0303\u0304<2> </2>\u0305 \u0306<3>X</3>\u0307Y\u0308</0>"
 
+
+#
+#  Unicode word boundary mode
+#
+"(?w).*?\b"                      v   "<0></0>hello, world"
+"(?w).*?(\b.+?\b).*"             v   "<0><1> </1> 123.45   </0>"
+"(?w).*?(\b\d.*?\b).*"           v   "<0>  <1>123.45</1>   </0>"
+".*?(\b.+?\b).*"                     "<0>  <1>123</1>.45   </0>"
+"(?w:.*?(\b\d.*?\b).*)"          v   "<0>  <1>123.45</1>   </0>"
+"(?w:.*?(\b.+?\b).*)"            v   "<0><1>don't</1>   </0>"
+"(?w:.+?(\b\S.+?\b).*)"          v   "<0>  <1>don't</1>   </0>"
+"(?w:(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?).*)"     v "<0><1>.</1><2> </2><3>,</3><4>:</4><5>$</5><6>37,000.50</6><7> </7>   </0>"
+
+
 # . does not match new-lines
 "."                            "\u000a\u000d\u0085\u000c\u2028\u2029<0>X</0>\u000aY"
 "A."                           "A\u000a "# no match
@@ -350,6 +367,27 @@
 "(\ud800)(\udc00)"                "\U00010000"
 
 
+#
+# Bug 3225
+
+"1|9"                             "<0>1</0>"
+"1|9"                             "<0>9</0>"
+"1*|9"                            "<0>1</0>"
+"1*|9"                            "<0></0>9"
+
+"(?:a|ac)d"                       "<0>acd</0>"
+"a|ac"                            "<0>a</0>c"
+
+#
+# Bug 3320
+#
+"(a([^ ]+)){0,} (c)"              "<0><1>a<2>b</2></1> <3>c</3></0> "
+"(a([^ ]+))* (c)"                 "<0><1>a<2>b</2></1> <3>c</3></0> "
+
+#
+# Bug 3436
+#
+"(.*?) *$"                        "<0><1>test</1>    </0>"
 
 #
 #  Random debugging, Temporary

Index: te.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/te.txt,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- te.txt	10 Sep 2003 02:42:50 -0000	1.3
+++ te.txt	6 Apr 2004 10:09:53 -0000	1.4
@@ -1,6 +1,6 @@
 //*******************************************************************************
 //*
-//*   Copyright (C) 1998-2000, International Business Machines
+//*   Copyright (C) 1998-2003, International Business Machines
 //*   Corporation and others.  All Rights Reserved.
 //*
 //*******************************************************************************
@@ -89,12 +89,14 @@
         tag0 { TE0 } 
     }
 	
-	CollationElements 
-	{ Version { "1.0" }
+    collations {
+      standard { 
+	 Version { "1.0" }
 	  Override { "FALSE" }
 	  Sequence {"& A < a\u0308 , A\u0308 & C < c\u0327 , C\u0327 & G < g"
         "\u0306 , G\u0306 & H < \u0131 , I , i , \u0130 < \u0132 , \u0133 & O < o\u0308 "
         ", O\u0308 & S < s\u0327 , S\u0327 & U < u\u0308 , U\u0308 " }
+      }
     }
 }
 

Index: test1.ucm
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/test1.ucm,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- test1.ucm	10 Sep 2003 02:42:50 -0000	1.3
+++ test1.ucm	6 Apr 2004 10:09:53 -0000	1.4
@@ -1,5 +1,5 @@
 # *******************************************************************************
-# * Copyright (C) 2001, International Business Machines
+# * Copyright (C) 2001-2003, International Business Machines
 # * Corporation and others.  All Rights Reserved.
 # *******************************************************************************
 #
@@ -11,13 +11,14 @@
 <mb_cur_max>        1
 <mb_cur_min>        1
 <uconv_class>       "MBCS"
-<subchar>	        \xff
-<icu:state>	        0, 5-9, ff
+<subchar>           \xff
+<icu:state>         0, 5-9, ff
 
 CHARMAP
 
 # fromUnicode result is zero byte from other than U+0000
 <U20ac>     \x00 |0
+<U20ad>     \x00 |1
 
 # nothing special
 <U0005>     \x05 |0

Index: test3.ucm
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/test3.ucm,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- test3.ucm	10 Sep 2003 02:42:50 -0000	1.3
+++ test3.ucm	6 Apr 2004 10:09:53 -0000	1.4
@@ -1,20 +1,21 @@
 # *******************************************************************************
-# * Copyright (C) 2001, International Business Machines
+# * Copyright (C) 2001-2003, International Business Machines
 # * Corporation and others.  All Rights Reserved.
 # *******************************************************************************
 #
 # test3.ucm
 #
 # Test file for MBCS conversion with three-byte codepage data.
+# Also contains extension mappings (m:n).
 
 <code_set_name>     "test3"
 <mb_cur_max>        3
 <mb_cur_min>        1
 <uconv_class>       "MBCS"
-<subchar>	        \xff
-<icu:state>	        0, 1:1, 5-9, ff
-<icu:state>	        2:2
-<icu:state>	        a-f.p
+<subchar>           \xff
+<icu:state>         0, 1:1, 5-9, ff
+<icu:state>         2:2
+<icu:state>         a-f.p
 
 CHARMAP
 
@@ -24,6 +25,11 @@
 # nothing special
 <U0005>     \x05 |0
 
+# extensions
+<U00c0>     \x05+\x01\x02\x0d |0
+<U00c0>     \x05+\x01\x02\x0e |3
+<U00c0>     \x05+\xff |3
+
 # toUnicode result is fallback direct
 <U0006>     \x06 |3
 
@@ -31,7 +37,17 @@
 <U101234>   \x07 |0
 <Ufebcd>    \x08 |3
 
+# extensions
+<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0
+<U101234>+<U50005>          \x07+\x00+\x01\x02\x0e+\x05 |0
+<U101234>+<U60006>          \x07+\x00+\x01\x02\x0f+\x06 |0
+<U101234>+<U70007>          \x07+\x00+\x01\x02\x0f |1
+
 #unassigned \x09
+
+# extensions where the first code point is unassigned, for replay testing
+#<U00c4><U0300> \x09+\x09 |0
+<U00c4><U00c4><U101234><U0005> \x05+\x01\x02\x0c |0
 
 # toUnicode result is surrogate pair: test real pair, single unit, unassigned
 <U23456>    \x01\x02\x0a |0

Index: test4.ucm
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/test4.ucm,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- test4.ucm	10 Sep 2003 02:42:50 -0000	1.3
+++ test4.ucm	6 Apr 2004 10:09:53 -0000	1.4
@@ -1,27 +1,35 @@
 # *******************************************************************************
-# * Copyright (C) 2001, International Business Machines
+# * Copyright (C) 2001-2003, International Business Machines
 # * Corporation and others.  All Rights Reserved.
 # *******************************************************************************
 #
 # test4.ucm
 #
-# Test file for MBCS conversion with three-byte codepage data.
+# Test file for MBCS conversion with four-byte codepage data.
 
 <code_set_name>     "test4"
 <mb_cur_max>        4
 <mb_cur_min>        1
 <uconv_class>       "MBCS"
-<subchar>	        \xff
-<icu:state>	        0, 1:1, 5-9, ff
-<icu:state>	        2:2
-<icu:state>	        3:3
-<icu:state>	        a-f.p
+
+# both subchars are single-byters, which does not make sense
+# but works - adding subchar1 for tests but don't want to
+# change old tests for a new subchar -- markus 20031028
+<subchar>           \xff
+<subchar1>          \xe1
+<icu:state>         0, 1:1, 5-9, e1, ff
+<icu:state>         2:2
+<icu:state>         3:3
+<icu:state>         a-f.p, ff
 
 CHARMAP
 
 # fromUnicode result is zero byte from other than U+0000
 <U20ac>     \x00 |0
 
+# fallback from non-zero to zero possible with extension table
+<U20ad>     \x00 |1
+
 # nothing special
 <U0005>     \x05 |0
 
@@ -41,5 +49,16 @@
 <U34567>    \x01\x02\x03\x0d |3
 <U000e>     \x01\x02\x03\x0e |3
 #unassigned \x01\x02\x03\x0f
+
+# <subchar1> non-mapping
+<U50005>    \xe1 |2
+# add a mapping that turns the above's Unicode side into a prefix
+<U50005><U60006> \x06 |1
+
+# many bytes, and bytes per UChar
+<U30ab><U309a> \x01\x02\x03\x0a\x01\x02\x03\x0b\x01\x02\x03\x0c\x01\x02\x03\x0d\x01\x02\x03\x0e\x01\x02\x03\x0f\x01\x02\x03\x0a\x05\x06\x07 |0
+
+# many UChars, and UChars per byte
+<U304b><U309a><U304d><U309a><U304f><U309a><U3051><U309a><U3053><U309a><U30ab><U309a><U30ad><U309a><U30af><U309a><U30b1><U309a><U0300> \x08\x09 |0
 
 END CHARMAP

Index: testaliases.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/testaliases.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- testaliases.txt	10 Sep 2003 02:42:50 -0000	1.1
+++ testaliases.txt	6 Apr 2004 10:09:53 -0000	1.2
@@ -23,10 +23,10 @@
 
     // alias that uses another alias and references parts of the end structure
     referencingalias:alias { "testaliases/anotheralias/Sequence" } // Referencing through another alias
-    anotheralias:alias { "/ICUDATA/uk/CollationElements" }
+    anotheralias:alias { "/ICUDATA/uk/collations/standard" }
 
     // aliasing using position
-    CollationElements:alias { "/ICUDATA/uk" } // Referencing corresponding resource in another bundle
+    collations:alias { "/ICUDATA/uk" } // Referencing corresponding resource in another bundle
 
     // aliasing arrays
     zoneTests {

Index: testdata.mk
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/testdata.mk,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- testdata.mk	10 Sep 2003 02:42:50 -0000	1.4
+++ testdata.mk	6 Apr 2004 10:09:53 -0000	1.5
@@ -14,16 +14,31 @@
 ALL : "$(TESTDATAOUT)\testdata.dat" 
 	@echo Test data is built.
 
-"$(TESTDATAOUT)\testdata.dat" : "$(TESTDATABLD)\casing.res" "$(TESTDATABLD)\mc.res" "$(TESTDATABLD)\root.res" "$(TESTDATABLD)\te.res" "$(TESTDATABLD)\te_IN.res" "$(TESTDATABLD)\testaliases.res" "$(TESTDATABLD)\testtypes.res" "$(TESTDATABLD)\testempty.res" "$(TESTDATABLD)\$(TESTDT)iscii.res" "$(TESTDATABLD)\$(TESTDT)idna_rules.res" "$(TESTDATABLD)\DataDrivenCollationTest.res" $(TESTDATABLD)\$(TESTDT)test.icu "$(TESTDATABLD)\$(TESTDT)test1.cnv" "$(TESTDATABLD)\$(TESTDT)test3.cnv" "$(TESTDATABLD)\$(TESTDT)test4.cnv" "$(TESTDATABLD)\$(TESTDT)ibm9027.cnv"
+# icu26_testtypes.res is there for cintltst/udatatst.c/TestSwapData()
+# I generated it with an ICU 2.6.1 build on Windows after removing
+# testincludeUTF (which made it large, unnecessarily for this test)
+# and CollationElements (which will not work with a newer swapper)
+# markus 2003nov19
+
+# icu26e_testtypes.res is the same, but icuswapped to big-endian EBCDIC
+# markus 2003nov21
+
+"$(TESTDATAOUT)\testdata.dat" : "$(TESTDATABLD)\casing.res" "$(TESTDATABLD)\conversion.res" "$(TESTDATABLD)\mc.res" "$(TESTDATABLD)\root.res" "$(TESTDATABLD)\te.res" "$(TESTDATABLD)\te_IN.res" "$(TESTDATABLD)\testaliases.res" "$(TESTDATABLD)\testtypes.res" "$(TESTDATABLD)\testempty.res" "$(TESTDATABLD)\$(TESTDT)iscii.res" "$(TESTDATABLD)\$(TESTDT)idna_rules.res" "$(TESTDATABLD)\DataDrivenCollationTest.res" "$(TESTDATABLD)\$(TESTDT)test.icu" "$(TESTDATABLD)\$(TESTDT)testtable32.res" "$(TESTDATABLD)\$(TESTDT)test1.cnv" "$(TESTDATABLD)\$(TESTDT)test3.cnv" "$(TESTDATABLD)\$(TESTDT)test4.cnv" "$(TESTDATABLD)\$(TESTDT)test4x.cnv" "$(TESTDATABLD)\$(TESTDT)ibm9027.cnv" "$(TESTDATABLD)\$(TESTDT)nfscsi.spp" "$(TESTDATABLD)\$(TESTDT)nfscss.spp" "$(TESTDATABLD)\$(TESTDT)nfscis.spp" "$(TESTDATABLD)\$(TESTDT)nfsmxs.spp" "$(TESTDATABLD)\$(TESTDT)nfsmxp.spp"
 	@echo Building test data
 	@copy "$(TESTDATABLD)\$(TESTDT)te.res" "$(TESTDATAOUT)\$(TESTDT)nam.typ"
-	@"$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata" -f -v -m common -c -p"$(TESTPKG)"  -O "$(PKGOPT)" -d "$(TESTDATAOUT)" -T "$(TESTDATABLD)" -s "$(TESTDATABLD)" <<
+	@copy "$(TESTDATA)\$(TESTDT)icu26_testtypes.res" "$(TESTDATABLD)"
+	@copy "$(TESTDATA)\$(TESTDT)icu26e_testtypes.res" "$(TESTDATABLD)"
+	@"$(ICUP)\bin\pkgdata" -f -v -m common -c -p"$(TESTPKG)" -d "$(TESTDATAOUT)" -T "$(TESTDATABLD)" -s "$(TESTDATABLD)" <<
 $(TESTDT)casing.res
+$(TESTDT)conversion.res
 $(TESTDT)mc.res
 $(TESTDT)root.res
+$(TESTDT)testtable32.res
 $(TESTDT)te.res
 $(TESTDT)te_IN.res
 $(TESTDT)testtypes.res
+$(TESTDT)icu26_testtypes.res
+$(TESTDT)icu26e_testtypes.res
 $(TESTDT)testempty.res
 $(TESTDT)testaliases.res
 $(TESTDT)iscii.res
@@ -32,8 +47,14 @@
 $(TESTDT)test1.cnv
 $(TESTDT)test3.cnv
 $(TESTDT)test4.cnv
+$(TESTDT)test4x.cnv
 $(TESTDT)ibm9027.cnv
 $(TESTDT)idna_rules.res
+$(TESTDT)nfscsi.spp
+$(TESTDT)nfscss.spp
+$(TESTDT)nfscis.spp
+$(TESTDT)nfsmxs.spp
+$(TESTDT)nfsmxp.spp
 <<
 
 
@@ -53,9 +74,41 @@
 	@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -p"$(TESTPKG)" -q -s"$(TESTDATA)" -d"$(TESTDATABLD)" idna_rules.txt
 
 
-$(TESTDATABLD)\$(TESTDT)test.icu : {"$(ICUTOOLS)\gentest\$(CFG)"}gentest.exe
+"$(TESTDATABLD)\$(TESTDT)test.icu" : {"$(ICUTOOLS)\gentest\$(CFG)"}gentest.exe
 	"$(ICUTOOLS)\gentest\$(CFG)\gentest" -d"$(TESTDATABLD)"
 
+# testtable32 resource file
+"$(TESTDATABLD)\testtable32.txt" : {"$(ICUTOOLS)\gentest\$(CFG)"}gentest.exe
+	"$(ICUTOOLS)\gentest\$(CFG)\gentest" -r -d"$(TESTDATABLD)"
+
+"$(TESTDATABLD)\$(TESTDT)testtable32.res": "$(TESTDATABLD)\testtable32.txt"
+	@echo Making Test Resource Bundle file for IDNA reference implementation
+	@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -p"$(TESTPKG)" -q -s"$(TESTDATABLD)" -d"$(TESTDATABLD)" testtable32.txt
+
+# Targets for nfscsi.spp
+"$(TESTDATABLD)\$(TESTDT)nfscsi.spp" : {"$(ICUTOOLS)\gensprep\$(CFG)"}gensprep.exe "$(TESTDATA)\nfs4_cs_prep_ci.txt"
+	@echo Building nfscsi.spp
+	@"$(ICUTOOLS)\gensprep\$(CFG)\gensprep" -s "$(TESTDATA)" -d "$(TESTDATABLD)\\" -b nfscsi -p "$(TESTPKG)" -u 3.2.0 nfs4_cs_prep_ci.txt
+
+# Targets for nfscss.spp
+"$(TESTDATABLD)\$(TESTDT)nfscss.spp" : {"$(ICUTOOLS)\gensprep\$(CFG)"}gensprep.exe "$(TESTDATA)\nfs4_cs_prep_cs.txt"
+	@echo Building nfscss.spp
+	@"$(ICUTOOLS)\gensprep\$(CFG)\gensprep" -s "$(TESTDATA)" -d "$(TESTDATABLD)\\" -b nfscss -p "$(TESTPKG)" -u 3.2.0 nfs4_cs_prep_cs.txt
+
+# Targets for nfscis.spp
+"$(TESTDATABLD)\$(TESTDT)nfscis.spp" : {"$(ICUTOOLS)\gensprep\$(CFG)"}gensprep.exe "$(TESTDATA)\nfs4_cis_prep.txt"
+	@echo Building nfscis.spp
+	@"$(ICUTOOLS)\gensprep\$(CFG)\gensprep" -s "$(TESTDATA)" -d "$(TESTDATABLD)\\" -b nfscis -p "$(TESTPKG)" -u 3.2.0 -k -n "$(ICUTOOLS)\..\data\unidata" nfs4_cis_prep.txt
+
+# Targets for nfsmxs.spp
+"$(TESTDATABLD)\$(TESTDT)nfsmxs.spp" : {"$(ICUTOOLS)\gensprep\$(CFG)"}gensprep.exe "$(TESTDATA)\nfs4_mixed_prep_s.txt"
+	@echo Building nfsmxs.spp
+	@"$(ICUTOOLS)\gensprep\$(CFG)\gensprep" -s "$(TESTDATA)" -d "$(TESTDATABLD)\\" -b nfsmxs -p "$(TESTPKG)" -u 3.2.0 -k -n "$(ICUTOOLS)\..\data\unidata" nfs4_mixed_prep_s.txt
+
+# Targets for nfsmxp.spp
+"$(TESTDATABLD)\$(TESTDT)nfsmxp.spp" : {"$(ICUTOOLS)\gensprep\$(CFG)"}gensprep.exe "$(TESTDATA)\nfs4_mixed_prep_p.txt"
+	@echo Building nfsmxp.spp
+	@"$(ICUTOOLS)\gensprep\$(CFG)\gensprep" -s "$(TESTDATA)" -d "$(TESTDATABLD)\\" -b nfsmxp -p "$(TESTPKG)" -u 3.2.0 -k -n "$(ICUTOOLS)\..\data\unidata" nfs4_mixed_prep_p.txt
 
 
 # Targets for test converter data
@@ -66,6 +119,9 @@
 	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -d"$(TESTDATABLD)" -p $(TESTPKG) $**
 
 "$(TESTDATABLD)\$(TESTDT)test4.cnv": "$(TESTDATA)\test4.ucm"
+	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -d"$(TESTDATABLD)" -p $(TESTPKG) $**
+
+"$(TESTDATABLD)\$(TESTDT)test4x.cnv": "$(TESTDATA)\test4x.ucm"
 	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -d"$(TESTDATABLD)" -p $(TESTPKG) $**
 
 "$(TESTDATABLD)\$(TESTDT)ibm9027.cnv": "$(TESTDATA)\ibm9027.ucm"

Index: testtypes.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/testtypes.txt,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- testtypes.txt	10 Sep 2003 02:42:50 -0000	1.4
+++ testtypes.txt	6 Apr 2004 10:09:53 -0000	1.5
@@ -33,16 +33,18 @@
     
     // genrb just includes the test as a string after converting to UTF-16
         
-    testincludeUTF:include{ "th18057.txt" }
+    testincludeUTF:include{ "riwords.txt" }
     // No unescaping is done.
     testinclude:include{ "translit_rules.txt" }
 
     // Genrb failed parsing \u0075 sequence this tests it 
-    CollationElements{
+    collations {
+      standard { 
 	Version{"x01"}
 	Sequence{ 
 		       "&'\u0075' = '\uFF55'" // LATIN SMALL LETTER U
 		}
+      }
     }
     
     string{ }

Index: translit_rules.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/translit_rules.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2

--- th18057.txt DELETED ---