The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
utf8nfc.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * utf8nfc.cpp - SWFilter descendant to perform NFC (canonical
4  * composition normalization) on UTF-8 text
5  *
6  * $Id: utf8nfc.cpp 3618 2019-04-14 22:30:32Z scribe $
7  *
8  * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
9  * CrossWire Bible Society
10  * P. O. Box 2528
11  * Tempe, AZ 85280-2528
12  *
13  * This program is free software; you can redistribute it and/or modify it
14  * under the terms of the GNU General Public License as published by the
15  * Free Software Foundation version 2.
16  *
17  * This program is distributed in the hope that it will be useful, but
18  * WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * General Public License for more details.
21  *
22  */
23 
24 #ifdef _ICU_
25 
26 #include <unicode/unistr.h>
27 #include <unicode/normlzr.h>
28 #include <unicode/unorm.h>
29 
30 #include <utf8nfc.h>
31 #include <swbuf.h>
32 
34 
36  conv = ucnv_open("UTF-8", &err);
37 }
38 
40  ucnv_close(conv);
41 }
42 
43 char UTF8NFC::processText(SWBuf &text, const SWKey *key, const SWModule *module)
44 {
45  if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
46  return -1;
47 
48  err = U_ZERO_ERROR;
49  icu::UnicodeString source(text.getRawData(), text.length(), conv, err);
50  icu::UnicodeString target;
51 
52  err = U_ZERO_ERROR;
53  icu::Normalizer::normalize(source, UNORM_NFC, 0, target, err);
54 
55  err = U_ZERO_ERROR;
56  text.setSize(text.size()*2); // potentially, it can grow to 2x the original size
57  int32_t len = target.extract(text.getRawData(), text.size(), conv, err);
58  text.setSize(len);
59 
60  return 0;
61 }
62 
64 #endif
#define SWORD_NAMESPACE_START
Definition: defs.h:39
Definition: swbuf.h:47
unsigned long length() const
Definition: swbuf.h:197
bool normalize
Definition: tei2mod.cpp:101
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
SWText * module
Definition: osis2mod.cpp:105
char * getRawData()
Definition: swbuf.h:379
UErrorCode err
Definition: utf8nfc.h:42
unsigned long size() const
Definition: swbuf.h:185
UConverter * conv
Definition: utf8nfc.h:41
#define SWORD_NAMESPACE_END
Definition: defs.h:40
Definition: swkey.h:77
void setSize(unsigned long len)
Definition: swbuf.h:255