The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
StringMgr Class Reference

#include <stringmgr.h>

+ Collaboration diagram for StringMgr:

Public Member Functions

virtual bool isAlpha (SW_u32 character) const
 
virtual bool isDigit (SW_u32 character) const
 
virtual bool isLower (SW_u32 character) const
 
virtual bool isUpper (SW_u32 character) const
 
virtual char * lowerUTF8 (char *text, unsigned int max=0) const
 
virtual char * upperLatin1 (char *text, unsigned int max=0) const
 
virtual char * upperUTF8 (char *text, unsigned int max=0) const
 

Static Public Member Functions

static StringMgrgetSystemStringMgr ()
 
static bool hasUTF8Support ()
 
static void setSystemStringMgr (StringMgr *newStringMgr)
 

Protected Member Functions

 StringMgr ()
 
 StringMgr (const StringMgr &)
 
virtual bool supportsUnicode () const
 
virtual ~StringMgr ()
 

Static Private Attributes

static StringMgrsystemStringMgr = 0
 

Friends

class __staticsystemStringMgr
 

Detailed Description

StringMgr provide UTF8 handling This class makes it possible to implement Unicode support on the client-side and not in SWORD itself.

Definition at line 39 of file stringmgr.h.

Constructor & Destructor Documentation

StringMgr::StringMgr ( )
protected

Default constructor. Protected to make instances on user side impossible, because this is a Singleton

Default constructor

Definition at line 166 of file stringmgr.cpp.

166  {
167 }
StringMgr::StringMgr ( const StringMgr m)
protected

Copy constructor

Definition at line 171 of file stringmgr.cpp.

171  {
172 }
StringMgr::~StringMgr ( )
protectedvirtual

Destructor

Definition at line 176 of file stringmgr.cpp.

176  {
177 }

Member Function Documentation

StringMgr * StringMgr::getSystemStringMgr ( )
static

Returns the global StringMgr handle

Returns
The global string handle

Definition at line 197 of file stringmgr.cpp.

197  {
198  if (!systemStringMgr) {
199 #ifdef _ICU_
200  systemStringMgr = new ICUStringMgr();
201 // SWLOGI("created default ICUStringMgr");
202 #else
203  systemStringMgr = new StringMgr();
204 // SWLOGI("created default StringMgr");
205 #endif
206  }
207 
208  return systemStringMgr;
209 }
static StringMgr * systemStringMgr
Definition: stringmgr.h:41
static bool StringMgr::hasUTF8Support ( )
inlinestatic

Checks whether Utf8 support is available. Override the function supportsUnicode() to tell whether your implementation has utf8 support.

Returns
True if this implementation provides support for Utf8 handling or false if just latin1 handling is available

Definition at line 58 of file stringmgr.h.

58  {
60  };
static StringMgr * getSystemStringMgr()
Definition: stringmgr.cpp:197
virtual bool supportsUnicode() const
Definition: stringmgr.cpp:342
bool StringMgr::isAlpha ( SW_u32  character) const
virtual

Definition at line 317 of file stringmgr.cpp.

317  {
318  return isalpha(character);
319 }
bool StringMgr::isDigit ( SW_u32  character) const
virtual

Definition at line 314 of file stringmgr.cpp.

314  {
315  return isdigit(character);
316 }
bool StringMgr::isLower ( SW_u32  character) const
virtual

Definition at line 311 of file stringmgr.cpp.

311  {
312  return islower(character);
313 }
bool StringMgr::isUpper ( SW_u32  character) const
virtual

Definition at line 308 of file stringmgr.cpp.

308  {
309  return isupper(character);
310 }
char * StringMgr::lowerUTF8 ( char *  t,
unsigned int  maxlen = 0 
) const
virtual

Converts the param to a lower case Utf8 string

Parameters
textThe text encoded in utf8 which should be turned into an upper case string
maxMax buffer size
Returns
text buffer (only for convenience)

This is a fallback method. It should never be called. If UTF8 support is desired, then a UTF8 StringMgr needs to be used.

Here we just do our best.

Converts the param to a lower case UTF8 string

Parameters
t- The text encoded in utf8 which should be turned into an lower case string

Definition at line 284 of file stringmgr.cpp.

284  {
285  // try to decide if it's worth trying to tolower. Do we have more
286  // characters which are probably lower latin than not?
287  // we still don't use isValidUTF8 optimally. what if we have 1 unicode
288  // character in the string? should we not try to lower any of the string?
289  // dunno. Best solution is to lower all other characters. Don't have
290  // time to write that before release.
291  long performOp = 0;
292  if (!isValidUTF8((unsigned char *)t)) {
293  performOp = 1;
294  }
295  else {
296  for (const char *ch = t; *ch; ch++) {
297  performOp += (*ch > 0) ? 1 : -1;
298  }
299  }
300 
301  if (performOp > 0) {
302  return lowerLatin1(t);
303  }
304 
305  return t;
306 }
void StringMgr::setSystemStringMgr ( StringMgr newStringMgr)
static

Sets the global StringMgr handle

Parameters
newStringMgrThe new global StringMgr. This pointer will be deleted by this StringMgr

Definition at line 182 of file stringmgr.cpp.

182  {
183  if (systemStringMgr)
184  delete systemStringMgr;
185 
186  systemStringMgr = newStringMgr;
187 
188  // TODO: this is magic. apparently we have to reset the system localemgr upon changing stringmgr.
189  // setting system stringmgr should be set before localemgr and not possible to change.
190  // rework this design.
192 }
static StringMgr * systemStringMgr
Definition: stringmgr.h:41
static LocaleMgr * getSystemLocaleMgr()
Definition: localemgr.cpp:54
static void setSystemLocaleMgr(LocaleMgr *newLocaleMgr)
Definition: localemgr.cpp:63
bool StringMgr::supportsUnicode ( ) const
protectedvirtual

Definition at line 342 of file stringmgr.cpp.

342  {
343  return true; //default impl has no UTF8 support
344 }
char * StringMgr::upperLatin1 ( char *  buf,
unsigned int  maxlen = 0 
) const
virtual

Converts the param to an uppercase latin1 string

Parameters
textThe text encoded in latin1 which should be turned into an upper case string
maxMax buffer size
Returns
text buffer (only for convenience)

Converts the param to an uppercase latin1 string

Parameters
Thetext encoded in latin1 which should be turned into an upper case string

Definition at line 327 of file stringmgr.cpp.

327  {
328  if (!buf)
329  return 0;
330 
331  char *ret = buf;
332  bool checkMax = maxlen;
333 
334  while (*buf && (!checkMax || maxlen--)) {
335  *buf = SW_toupper(*buf);
336  buf++;
337  }
338 
339  return ret;
340 }
#define SW_toupper(c)
Definition: utilstr.h:67
reg_syntax_t ret
Definition: regex.c:1351
char * StringMgr::upperUTF8 ( char *  t,
unsigned int  maxlen = 0 
) const
virtual

Converts the param to an upper case Utf8 string

Parameters
textThe text encoded in utf8 which should be turned into an upper case string
maxMax buffer size
Returns
text buffer (only for convenience)

This is a fallback method. It should never be called. If UTF8 support is desired, then a UTF8 StringMgr needs to be used.

Here we just do our best.

Converts the param to an upper case UTF8 string

Parameters
t- The text encoded in utf8 which should be turned into an upper case string

Definition at line 223 of file stringmgr.cpp.

223  {
224 
225 #ifndef _ICU_
226 
227  SWBuf orig = t;
228  const unsigned char* from = (unsigned char*)orig.c_str();
229  SWBuf text = "";
230  std::map<SW_u32, SW_u32>::const_iterator it = toUpperData.end();
231  while (*from) {
232  SW_u32 ch = getUniCharFromUTF8(&from, true);
233  // should we skip conversion if we run into an invalid UTF8 character?
234  // maybe the string isn't intended to be UTF8
235  // Right now, if ch is bad, then convert to replacement char
236  if (!ch) ch = 0xFFFD;
237 
238  it = toUpperData.find(ch);
239  getUTF8FromUniChar(it == toUpperData.end() ? ch : it->second, &text);
240  }
241  long len = maxlen ? (text.size() < maxlen ? text.size() : (maxlen - 1)) : 0;
242  if (len) memcpy(t, text.c_str(), len);
243  t[len] = 0;
244 #endif
245  return t;
246 /* OLD
247  // try to decide if it's worth trying to toupper. Do we have more
248  // characters which are probably lower latin than not?
249  // we still don't use isValidUTF8 optimally. what if we have 1 unicode
250  // character in the string? should we not try to upper any of the string?
251  // dunno. Best solution is to upper all other characters. Don't have
252  // time to write that before release.
253 
254  long performOp = 0;
255  if (!isValidUTF8((unsigned char *)t)) {
256  performOp = 1;
257  }
258  else {
259  for (const char *ch = t; *ch; ch++) {
260  performOp += (*ch > 0) ? 1 : -1;
261  }
262  }
263 
264  if (performOp > 0) {
265  return upperLatin1(t);
266  }
267 */
268 
269  return t;
270 }
Definition: swbuf.h:47
const char * c_str() const
Definition: swbuf.h:158
unsigned long size() const
Definition: swbuf.h:185
SWBuf * getUTF8FromUniChar(SW_u32 uchar, SWBuf *appendTo)
Definition: utilstr.h:165
unsigned int SW_u32
Definition: sysdata.h:41
SWORD_NAMESPACE_START std::map< SW_u32, SW_u32 > toUpperData
Definition: swtoupperdata.h:32
SW_u32 getUniCharFromUTF8(const unsigned char **buf, bool skipValidation=false)
Definition: utilstr.h:88

Friends And Related Function Documentation

friend class __staticsystemStringMgr
friend

Definition at line 89 of file stringmgr.h.

Member Data Documentation

SWORD_NAMESPACE_START StringMgr * StringMgr::systemStringMgr = 0
staticprivate

Definition at line 41 of file stringmgr.h.


The documentation for this class was generated from the following files: