The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
utf8nfkd.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * utf8nfkd.cpp - SWFilter descendant to perform NFKD (compatability
4  * decomposition normalization) on UTF-8 text
5  *
6  * $Id: utf8nfkd.cpp 3689 2020-02-01 01:36:20Z scribe $
7  *
8  * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
9  * CrossWire Bible Society
10  * P. O. Box 2528
11  * Tempe, AZ 85280-2528
12  *
13  * This program is free software; you can redistribute it and/or modify it
14  * under the terms of the GNU General Public License as published by the
15  * Free Software Foundation version 2.
16  *
17  * This program is distributed in the hope that it will be useful, but
18  * WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * General Public License for more details.
21  *
22  */
23 
24 #ifdef _ICU_
25 
26 #include <utf8nfkd.h>
27 #include <swbuf.h>
28 
29 #include <unicode/utypes.h>
30 #include <unicode/ucnv.h>
31 #include <unicode/uchar.h>
32 #include <unicode/ustring.h>
33 #include <unicode/unorm2.h>
34 
35 
37 
38 struct UTF8NFKDPrivate {
39  const UNormalizer2 *conv;
40 };
41 
43  UErrorCode err = U_ZERO_ERROR;
44  p = new struct UTF8NFKDPrivate;
45  p->conv = unorm2_getNFKDInstance(&err);
46 }
47 
48 
50  delete p;
51 }
52 
53 
54 char UTF8NFKD::processText(SWBuf &text, const SWKey *key, const SWModule *module)
55 {
56  UErrorCode err = U_ZERO_ERROR;
57  UChar *source, *target;
58 
59  if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
60  return -1;
61 
62  int32_t len = 5 + text.length() * 5;
63  source = new UChar[len + 1]; //each char could become a surrogate pair
64 
65  // Convert UTF-8 string to UTF-16 (UChars)
66  int32_t ulen;
67  u_strFromUTF8(source, len, &ulen, text.c_str(), (int32_t)text.size(), &err);
68 
69 
70  target = new UChar[len + 1];
71 
72  //compatability decomposition
73  ulen = unorm2_normalize(p->conv, source, ulen, target, len, &err);
74 
75  text.setSize(len);
76  u_strToUTF8 (text.getRawData(), len, &len, target, ulen, &err);
77  text.setSize(len);
78 
79  delete [] source;
80  delete [] target;
81 
82  return 0;
83 }
84 
85 
87 #endif
#define SWORD_NAMESPACE_START
Definition: defs.h:39
Definition: swbuf.h:47
unsigned long length() const
Definition: swbuf.h:197
SWText * module
Definition: osis2mod.cpp:105
char * getRawData()
Definition: swbuf.h:379
const char * c_str() const
Definition: swbuf.h:158
unsigned long size() const
Definition: swbuf.h:185
struct UTF8NFKDPrivate * p
Definition: utf8nfkd.h:36
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
#define SWORD_NAMESPACE_END
Definition: defs.h:40
Definition: swkey.h:77
void setSize(unsigned long len)
Definition: swbuf.h:255