The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
unicodertf.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * unicodertf.cpp - SWFilter descendant to convert UTF-8 to RTF tags
4  *
5  * $Id: unicodertf.cpp 3081 2014-03-05 19:52:08Z chrislit $
6  *
7  * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
8  * CrossWire Bible Society
9  * P. O. Box 2528
10  * Tempe, AZ 85280-2528
11  *
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU General Public License as published by the
14  * Free Software Foundation version 2.
15  *
16  * This program is distributed in the hope that it will be useful, but
17  * WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * General Public License for more details.
20  *
21  */
22 
23 #include <stdio.h>
24 #include <unicodertf.h>
25 #include <swbuf.h>
26 
28 
30 }
31 
32 
33 char UnicodeRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module)
34 {
35  const unsigned char *from;
36  char digit[10];
37  unsigned long ch;
38  signed short utf16;
39  unsigned char from2[7];
40 
41  SWBuf orig = text;
42 
43  from = (const unsigned char *)orig.c_str();
44 
45  // -------------------------------
46  for (text = ""; *from; from++) {
47  ch = 0;
48  //case: ANSI
49  if ((*from & 128) != 128) {
50  text += *from;
51  continue;
52  }
53  //case: Invalid UTF-8 (illegal continuing byte in initial position)
54  if ((*from & 128) && ((*from & 64) != 64)) {
55  continue;
56  }
57  //case: 2+ byte codepoint
58  from2[0] = *from;
59  from2[0] <<= 1;
60  int subsequent;
61  for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) {
62  from2[0] <<= 1;
63  from2[subsequent] = from[subsequent];
64  from2[subsequent] &= 63;
65  ch <<= 6;
66  ch |= from2[subsequent];
67  }
68  subsequent--;
69  from2[0] <<= 1;
70  char significantFirstBits = 8 - (2+subsequent);
71 
72  ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8));
73  from += subsequent;
74  if (ch < 0x10000) {
75  utf16 = (signed short)ch;
76  text += '\\';
77  text += 'u';
78  sprintf(digit, "%d", utf16);
79  text += digit;
80  text += '?';
81  }
82  else {
83  utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800);
84  text += '\\';
85  text += 'u';
86  sprintf(digit, "%d", utf16);
87  text += digit;
88  text += '?';
89  utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00);
90  text += '\\';
91  text += 'u';
92  sprintf(digit, "%d", utf16);
93  text += digit;
94  text += '?';
95  }
96  }
97 
98  return 0;
99 }
100 
#define SWORD_NAMESPACE_START
Definition: defs.h:39
Definition: swbuf.h:47
SWText * module
Definition: osis2mod.cpp:105
const char * c_str() const
Definition: swbuf.h:158
#define SWORD_NAMESPACE_END
Definition: defs.h:40
Definition: swkey.h:77
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
Definition: unicodertf.cpp:33