/******************************************************************** * COPYRIGHT: * Copyright (c) 1997-2010, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ #include "unicode/ustring.h" #include "unicode/uchar.h" #include "unicode/uniset.h" #include "unicode/putil.h" #include "cstring.h" #include "hash.h" #include "normalizer2impl.h" #include "uparse.h" #include "ucdtest.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0])) static const char *ignorePropNames[]={ "FC_NFKC", "NFD_QC", "NFC_QC", "NFKD_QC", "NFKC_QC", "Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC", "NFKC_CF" }; UnicodeTest::UnicodeTest() { UErrorCode errorCode=U_ZERO_ERROR; unknownPropertyNames=new U_NAMESPACE_QUALIFIER Hashtable(errorCode); if(U_FAILURE(errorCode)) { delete unknownPropertyNames; unknownPropertyNames=NULL; } // Ignore some property names altogether. for(int32_t i=0; iputi(UnicodeString(ignorePropNames[i], -1, US_INV), 1, errorCode); } } UnicodeTest::~UnicodeTest() { delete unknownPropertyNames; } void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) { if (exec) logln("TestSuite UnicodeTest: "); switch (index) { case 0: name = "TestAdditionalProperties"; if(exec) TestAdditionalProperties(); break; case 1: name = "TestBinaryValues"; if(exec) TestBinaryValues(); break; case 2: name = "TestConsistency"; if(exec) TestConsistency(); break; default: name = ""; break; //needed to end loop } } //==================================================== // private data used by the tests //==================================================== // test DerivedCoreProperties.txt ------------------------------------------- // copied from genprops.c static int32_t getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) { const char *t, *z; int32_t i, j; s=u_skipWhitespace(s); for(i=0; ierrln("UnicodeTest: syntax error in DerivedCoreProperties.txt or DerivedNormalizationProps.txt field 0 at %s\n", fields[0][0]); return; } /* parse derived binary property name, ignore unknown names */ i=getTokenIndex(derivedPropsNames, LENGTHOF(derivedPropsNames), fields[1][0]); if(i<0) { UnicodeString propName(fields[1][0], (int32_t)(fields[1][1]-fields[1][0])); propName.trim(); if(me->unknownPropertyNames->find(propName)==NULL) { UErrorCode errorCode=U_ZERO_ERROR; me->unknownPropertyNames->puti(propName, 1, errorCode); me->errln("UnicodeTest warning: unknown property name '%s' in DerivedCoreProperties.txt or DerivedNormalizationProps.txt\n", fields[1][0]); } return; } me->derivedProps[i].add(start, end); } void UnicodeTest::TestAdditionalProperties() { #if !UCONFIG_NO_NORMALIZATION // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt if(LENGTHOF(derivedProps)=MAX_ERRORS) { dataerrln("Too many errors, moving to the next test"); break; } } } } } // invert all properties for(i=0; i=MAX_ERRORS) { errln("Too many errors, moving to the next test"); break; } } } } } #endif /* !UCONFIG_NO_NORMALIZATION */ } void UnicodeTest::TestBinaryValues() { /* * Unicode 5.1 explicitly defines binary property value aliases. * Verify that they are all recognized. */ UErrorCode errorCode=U_ZERO_ERROR; UnicodeSet alpha(UNICODE_STRING_SIMPLE("[:Alphabetic:]"), errorCode); if(U_FAILURE(errorCode)) { dataerrln("UnicodeSet([:Alphabetic:]) failed - %s", u_errorName(errorCode)); return; } static const char *const falseValues[]={ "N", "No", "F", "False" }; static const char *const trueValues[]={ "Y", "Yes", "T", "True" }; int32_t i; for(i=0; igetCanonStartSet(0x49, set1)) { /* enumerate all characters that are plausible to be latin letters */ for(UChar start=0xa0; start<0x2000; ++start) { UnicodeString decomp=nfd->normalize(UnicodeString(start), errorCode); if(decomp.length()>1 && decomp[0]==0x49) { set2.add(start); } } if (set1!=set2) { errln("[canon start set of 0049] != [all c with canon decomp with 0049]"); } // This was available in cucdtst.c but the test had to move to intltest // because the new internal normalization functions are in C++. //compareUSets(set1, set2, // "[canon start set of 0049]", "[all c with canon decomp with 0049]", // TRUE); } else { errln("NFC.getCanonStartSet() returned FALSE"); } #endif }