The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SCSUUTF8 Class Reference

#include <scsuutf8.h>

+ Inheritance diagram for SCSUUTF8:
+ Collaboration diagram for SCSUUTF8:

Public Member Functions

virtual const char * getHeader () const
 
virtual char processText (SWBuf &text, const SWKey *key=0, const SWModule *module=0)
 
 SCSUUTF8 ()
 
 ~SCSUUTF8 ()
 

Private Member Functions

int UTF8Output (unsigned long, SWBuf *utf8Buf)
 

Private Attributes

unsigned char active
 
unsigned long c
 
unsigned long d
 
bool mode
 

Static Private Attributes

static unsigned short slide [8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00}
 
static unsigned short start [8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000}
 
static unsigned short win [256]
 

Detailed Description

This filter converts SCSU compressed (encoded) text to UTF-8

Definition at line 38 of file scsuutf8.h.

Constructor & Destructor Documentation

SWORD_NAMESPACE_START SCSUUTF8::SCSUUTF8 ( )

Definition at line 43 of file scsuutf8.cpp.

45  : err()
46 #endif
47 {
48 #ifdef _ICU_
49 
50  // initialize SCSU converter
51  scsuConv = ucnv_open("SCSU", &err);
52  // initialize UTF-8 converter
53  utf8Conv = ucnv_open("UTF-8", &err);
54 #else
55  active = 0;
56  mode = 0;
57 #endif
58 }
bool mode
Definition: scsuutf8.h:47
unsigned char active
Definition: scsuutf8.h:46
SCSUUTF8::~SCSUUTF8 ( )

Definition at line 60 of file scsuutf8.cpp.

60  {
61 #ifdef _ICU_
62  ucnv_close(scsuConv);
63  ucnv_close(utf8Conv);
64 #endif
65 }

Member Function Documentation

virtual const char* SWFilter::getHeader ( ) const
inlinevirtualinherited

This method can supply a header associated with the processing done with this filter. A typical example is a suggested CSS style block for classed containers.

Reimplemented in OSISLaTeX, OSISXHTML, ThMLLaTeX, ThMLXHTML, TEIXHTML, GBFLaTeX, and GBFXHTML.

Definition at line 62 of file swfilter.h.

62 { return ""; }
char SCSUUTF8::processText ( SWBuf text,
const SWKey key = 0,
const SWModule module = 0 
)
virtual

This method processes and appropriately modifies the text given it for a particular filter task

Parameters
textThe text to be filtered/converted
keyCurrent key That was used.
moduleCurrent module.
Returns
0

Implements SWFilter.

Definition at line 141 of file scsuutf8.cpp.

141  {
142  if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
143  return -1;
144 
145 #ifdef _ICU_
146  // Try decoding with ICU if possible
147  err = U_ZERO_ERROR;
148  icu::UnicodeString utf16Text(text.getRawData(), text.length(), scsuConv, err);
149  err = U_ZERO_ERROR;
150  int32_t len = utf16Text.extract(text.getRawData(), text.size(), utf8Conv, err);
151  if (len > (int32_t)text.size()+1) {
152  text.setSize(len+1);
153  utf16Text.extract(text.getRawData(), text.size(), utf8Conv, err);
154  }
155 #else
156  // If ICU is unavailable, decode using Czyborra's decoder
157  SWBuf utf8Buf = "";
158  int len = text.length();
159  const char* scsuString = text.c_str();
160 
161  for (int i = 0; i < len;) {
162 
163  if (i >= len) break;
164  c = scsuString[i++];
165 
166  if (c >= 0x80)
167  {
168  UTF8Output(c - 0x80 + slide[active], &utf8Buf);
169  }
170  else if (c >= 0x20 && c <= 0x7F)
171  {
172  UTF8Output(c, &utf8Buf);
173  }
174  else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD)
175  {
176  UTF8Output(c, &utf8Buf);
177  }
178  else if (c >= 0x1 && c <= 0x8) // SQn
179  {
180  if (i >= len) break;
181  d = scsuString[i++]; // single quote
182 
183  UTF8Output(d < 0x80 ? d + start[c - 0x1] :
184  d - 0x80 + slide[c - 0x1], &utf8Buf);
185  }
186  else if (c >= 0x10 && c <= 0x17) // SCn
187  {
188  active = c - 0x10; // change window
189  }
190  else if (c >= 0x18 && c <= 0x1F) // SDn
191  {
192  active = c - 0x18; // define window
193  if (i >= len) break;
194  slide[active] = win[(unsigned char)scsuString[i++]];
195  }
196  else if (c == 0xB) // SDX
197  {
198  if (i >= len) break;
199  c = scsuString[i++];
200 
201  if (i >= len) break;
202  d = scsuString[i++];
203 
204  slide[active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7);
205  }
206  else if (c == 0xE) // SQU
207  {
208  if (i >= len) break;
209  c = scsuString[i++]; // SQU
210 
211  if (i >= len) break;
212  UTF8Output(c << 8 | scsuString[i++], &utf8Buf);
213  }
214  else if (c == 0xF) // SCU
215  {
216  mode = 1; // change to Unicode mode
217 
218  while (mode)
219  {
220  if (i >= len) break;
221  c = scsuString[i++];
222 
223  if (c <= 0xDF || c >= 0xF3)
224  {
225  if (i >= len) break;
226  UTF8Output(c << 8 | scsuString[i++], &utf8Buf);
227  }
228  else if (c == 0xF0) // UQU
229  {
230  if (i >= len) break;
231  c = scsuString[i++];
232 
233  if (i >= len) break;
234  UTF8Output(c << 8 | scsuString[i++], &utf8Buf);
235  }
236  else if (c >= 0xE0 && c <= 0xE7) // UCn
237  {
238  active = c - 0xE0;
239  mode = 0;
240  }
241  else if (c >= 0xE8 && c <= 0xEF) // UDn
242  {
243  if (i >= len) break;
244  slide[active=c-0xE8] = win[(unsigned char)scsuString[i++]];
245  mode = 0;
246  }
247  else if (c == 0xF1) // UDX
248  {
249  if (i >= len) break;
250  c = scsuString[i++];
251 
252  if (i >= len) break;
253  d = scsuString[i++];
254 
255  slide[active = c>>5] =
256  0x10000 + (((c & 0x1F) << 8 | d) << 7);
257  mode = 0;
258  }
259  }
260  }
261  }
262 #endif
263 
264  return 0;
265 }
static unsigned short start[8]
Definition: scsuutf8.h:50
static unsigned short slide[8]
Definition: scsuutf8.h:51
static unsigned short win[256]
Definition: scsuutf8.h:52
Definition: swbuf.h:47
unsigned long length() const
Definition: swbuf.h:197
bool mode
Definition: scsuutf8.h:47
unsigned long d
Definition: scsuutf8.h:48
char * getRawData()
Definition: swbuf.h:379
const char * c_str() const
Definition: swbuf.h:158
unsigned char active
Definition: scsuutf8.h:46
unsigned long size() const
Definition: swbuf.h:185
unsigned long c
Definition: scsuutf8.h:48
int UTF8Output(unsigned long, SWBuf *utf8Buf)
Definition: scsuutf8.cpp:105
void setSize(unsigned long len)
Definition: swbuf.h:255
int SCSUUTF8::UTF8Output ( unsigned long  uchar,
SWBuf utf8Buf 
)
private

Definition at line 105 of file scsuutf8.cpp.

106 {
107  // join UTF-16 surrogates without any pairing sanity checks
108  if (uchar >= 0xd800 && uchar <= 0xdbff) {
109  d = uchar & 0x3ff;
110  return 0;
111  }
112  if (uchar >= 0xdc00 && uchar <= 0xdfff) {
113  uchar = uchar + 0x2400 + d * 0x400;
114  }
115 
116  // output one character as UTF-8 multibyte sequence
117 
118  if (uchar < 0x80) {
119  utf8Buf += uchar;
120  }
121  else if (uchar < 0x800) {
122  utf8Buf += (0xc0 | (uchar>>6));
123  utf8Buf += (0x80 | (uchar & 0x3f));
124  }
125  else if (uchar < 0x10000) {
126  utf8Buf += (0xe0 | (uchar>>12));
127  utf8Buf += (0x80 | (uchar>>6 & 0x3f));
128  utf8Buf += (0x80 | (uchar & 0x3f));
129  }
130  else if (uchar < 0x200000) {
131  utf8Buf += (0xf0 | (uchar>>18));
132  utf8Buf += (0x80 | (uchar>>12 & 0x3f));
133  utf8Buf += (0x80 | (uchar>>6 & 0x3f));
134  utf8Buf += (0x80 | (uchar & 0x3f));
135  }
136 
137  return 0;
138 }
unsigned long d
Definition: scsuutf8.h:48

Member Data Documentation

unsigned char SCSUUTF8::active
private

Definition at line 46 of file scsuutf8.h.

unsigned long SCSUUTF8::c
private

Definition at line 48 of file scsuutf8.h.

unsigned long SCSUUTF8::d
private

Definition at line 48 of file scsuutf8.h.

bool SCSUUTF8::mode
private

Definition at line 47 of file scsuutf8.h.

unsigned short SCSUUTF8::slide = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00}
staticprivate

Definition at line 51 of file scsuutf8.h.

unsigned short SCSUUTF8::start = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000}
staticprivate

Definition at line 50 of file scsuutf8.h.

unsigned short SCSUUTF8::win
staticprivate

Definition at line 52 of file scsuutf8.h.


The documentation for this class was generated from the following files: