The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
utilstr.h File Reference
#include <defs.h>
#include <sysdata.h>
#include <swbuf.h>
+ Include dependency graph for utilstr.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define SW_tolower(c)   SW_tolower_array[(unsigned char)c]
 
#define SW_toupper(c)   SW_toupper_array[(unsigned char)c]
 

Functions

SWBuf assureValidUTF8 (const char *buf)
 
SW_u32 getUniCharFromUTF8 (const unsigned char **buf, bool skipValidation=false)
 
SWBufgetUTF8FromUniChar (SW_u32 uchar, SWBuf *appendTo)
 
SWORD_NAMESPACE_START char * stdstr (char **ipstr, const char *istr, unsigned int memPadFactor=1)
 
SWDLLEXPORT int stricmp (const char *s1, const char *s2)
 
SWDLLEXPORT const char * stristr (const char *s1, const char *s2)
 
SWDLLEXPORT int strnicmp (const char *s1, const char *s2, int len)
 
SWDLLEXPORT char * strstrip (char *istr)
 
SWBuf utf8ToWChar (const char *buf)
 
SWBuf wcharToUTF8 (const wchar_t *buf)
 

Variables

const unsigned char SW_tolower_array [256]
 
const unsigned char SW_toupper_array [256]
 

Macro Definition Documentation

#define SW_tolower (   c)    SW_tolower_array[(unsigned char)c]

Definition at line 75 of file utilstr.h.

#define SW_toupper (   c)    SW_toupper_array[(unsigned char)c]

Definition at line 67 of file utilstr.h.

Function Documentation

SWBuf assureValidUTF8 ( const char *  buf)

Definition at line 207 of file utilstr.cpp.

207  {
208 
209  SWBuf myCopy = buf;
210  const unsigned char *b = (const unsigned char *)myCopy.c_str();
211  const unsigned char *q = 0;
212  bool invalidChar = false;
213  while (*b) {
214  q = b;
215  if (!getUniCharFromUTF8(&b)) {
216  long len = b - q;
217  if (len) {
218  invalidChar = true;
219  for (long start = q - (const unsigned char *)myCopy.c_str(); len; len--) {
220  myCopy[start+len-1] = 0x1a; // unicode replacement character
221  }
222 
223  }
224  }
225  }
226  if (invalidChar) {
227 // SWLog::getSystemLog()->logWarning("Changing invalid UTF-8 string (%s) to (%s)\n", buf, myCopy.c_str());
228  }
229  return myCopy;
230 }
Definition: swbuf.h:47
const char * c_str() const
Definition: swbuf.h:158
SW_u32 getUniCharFromUTF8(const unsigned char **buf, bool skipValidation=false)
Definition: utilstr.h:88
SW_u32 getUniCharFromUTF8 ( const unsigned char **  buf,
bool  skipValidation = false 
)
inline

Definition at line 88 of file utilstr.h.

88  {
89  SW_u32 ch = 0;
90 
91  //case: We're at the end
92  if (!(**buf)) {
93  return ch;
94  }
95 
96  //case: ANSI
97  if (!(**buf & 128)) {
98  ch = **buf;
99  (*buf)++;
100  return ch;
101  }
102 
103  //case: Invalid UTF-8 (illegal continuing byte in initial position)
104  if ((**buf >> 6) == 2) {
105  (*buf)++;
106  return ch;
107  }
108 
109 
110  //case: 2+ byte codepoint
111  int subsequent = 1;
112  if ((**buf & 32) == 0) { subsequent = 1; }
113  else if ((**buf & 16) == 0) { subsequent = 2; }
114  else if ((**buf & 8) == 0) { subsequent = 3; }
115  else if ((**buf & 4) == 0) { subsequent = 4; }
116  else if ((**buf & 2) == 0) { subsequent = 5; }
117  else if ((**buf & 1) == 0) { subsequent = 6; }
118  else subsequent = 7; // is this legal?
119 
120  ch = **buf & (0xFF>>(subsequent + 1));
121 
122  for (int i = 1; i <= subsequent; ++i) {
123  // subsequent byte did not begin with 10XXXXXX
124  // move our buffer to here and error out
125  // this also catches our null if we hit the string terminator
126  if (((*buf)[i] >> 6) != 2) {
127  *buf += i;
128  return 0;
129  }
130  ch <<= 6;
131  ch |= (*buf)[i] & 63;
132  }
133  *buf += (subsequent+1);
134 
135  if (!skipValidation) {
136  // I THINK THIS IS STUPID BUT THE SPEC SAYS NO MORE THAN 4 BYTES
137  if (subsequent > 3) ch = 0;
138  // AGAIN stupid, but spec says UTF-8 can't use more than 21 bits
139  if (ch > 0x1FFFFF) ch = 0;
140  // This would be out of Unicode bounds
141  if (ch > 0x10FFFF) ch = 0;
142  // these would be values which could be represented in less bytes
143  if (ch < 0x80 && subsequent > 0) ch = 0;
144  if (ch < 0x800 && subsequent > 1) ch = 0;
145  if (ch < 0x10000 && subsequent > 2) ch = 0;
146  if (ch < 0x200000 && subsequent > 3) ch = 0;
147  }
148 
149  return ch;
150 }
unsigned int SW_u32
Definition: sysdata.h:41
SWBuf* getUTF8FromUniChar ( SW_u32  uchar,
SWBuf appendTo 
)
inline

Definition at line 165 of file utilstr.h.

165  {
166  unsigned long base = appendTo->size();
167 
168  // This would be out of Unicode bounds
169  if (uchar > 0x10FFFF) uchar = 0xFFFD;
170  char bytes = uchar < 0x80 ? 1 : uchar < 0x800 ? 2 : uchar < 0x10000 ? 3 : 4;
171  appendTo->setSize(base+bytes);
172  switch (bytes) {
173  case 1:
174  (*appendTo)[base ] = (unsigned char)uchar;
175  break;
176  case 2:
177  (*appendTo)[base+1] = (unsigned char)(0x80 | (uchar & 0x3f));
178  uchar >>= 6;
179  (*appendTo)[base ] = (unsigned char)(0xc0 | (uchar & 0x1f));
180  break;
181  case 3:
182  (*appendTo)[base+2] = (unsigned char)(0x80 | (uchar & 0x3f));
183  uchar >>= 6;
184  (*appendTo)[base+1] = (unsigned char)(0x80 | (uchar & 0x3f));
185  uchar >>= 6;
186  (*appendTo)[base ] = (unsigned char)(0xe0 | (uchar & 0x0f));
187  break;
188  case 4:
189  (*appendTo)[base+3] = (unsigned char)(0x80 | (uchar & 0x3f));
190  uchar >>= 6;
191  (*appendTo)[base+2] = (unsigned char)(0x80 | (uchar & 0x3f));
192  uchar >>= 6;
193  (*appendTo)[base+1] = (unsigned char)(0x80 | (uchar & 0x3f));
194  uchar >>= 6;
195  (*appendTo)[base ] = (unsigned char)(0xf0 | (uchar & 0x07));
196  break;
197  }
198 /*
199  else if (uchar < 0x4000000) {
200  appendTo->setSize(base+5);
201  i = uchar & 0x3f;
202  (*appendTo)[base+4] = (unsigned char)(0x80 | i);
203  uchar >>= 6;
204 
205  i = uchar & 0x3f;
206  (*appendTo)[base+3] = (unsigned char)(0x80 | i);
207  uchar >>= 6;
208 
209  i = uchar & 0x3f;
210  (*appendTo)[base+2] = (unsigned char)(0x80 | i);
211  uchar >>= 6;
212 
213  i = uchar & 0x3f;
214  (*appendTo)[base+1] = (unsigned char)(0x80 | i);
215  uchar >>= 6;
216 
217  i = uchar & 0x03;
218  (*appendTo)[base] = (unsigned char)(0xf8 | i);
219  }
220  else if (uchar < 0x80000000) {
221  appendTo->setSize(base+6);
222  i = uchar & 0x3f;
223  (*appendTo)[base+5] = (unsigned char)(0x80 | i);
224  uchar >>= 6;
225 
226  i = uchar & 0x3f;
227  (*appendTo)[base+4] = (unsigned char)(0x80 | i);
228  uchar >>= 6;
229 
230  i = uchar & 0x3f;
231  (*appendTo)[base+3] = (unsigned char)(0x80 | i);
232  uchar >>= 6;
233 
234  i = uchar & 0x3f;
235  (*appendTo)[base+2] = (unsigned char)(0x80 | i);
236  uchar >>= 6;
237 
238  i = uchar & 0x3f;
239  (*appendTo)[base+1] = (unsigned char)(0x80 | i);
240  uchar >>= 6;
241 
242  i = uchar & 0x01;
243  (*appendTo)[base] = (unsigned char)(0xfc | i);
244  }
245 */
246  return appendTo;
247 }
unsigned long size() const
Definition: swbuf.h:185
void setSize(unsigned long len)
Definition: swbuf.h:255
static time_t base
Definition: ftpparse.c:47
SWORD_NAMESPACE_START char* stdstr ( char **  ipstr,
const char *  istr,
unsigned int  memPadFactor = 1 
)
inline

Definition at line 44 of file utilstr.h.

44  {
45  if (*ipstr)
46  delete [] *ipstr;
47  if (istr) {
48  int len = (int)strlen(istr) + 1;
49  *ipstr = new char [ len * memPadFactor ];
50  memcpy(*ipstr, istr, len);
51  }
52  else *ipstr = 0;
53  return *ipstr;
54 }
SWDLLEXPORT int stricmp ( const char *  s1,
const char *  s2 
)

Definition at line 194 of file utilstr.cpp.

194  {
195 #if defined(__GNUC__)
196  return ::strcasecmp(s1, s2);
197 #else
198  #if defined(_WIN32_WCE)
199  return ::_stricmp(s1, s2);
200  #else
201  return ::stricmp(s1, s2);
202  #endif
203 #endif
204 }
int stricmp(const char *s1, const char *s2)
Definition: utilstr.cpp:194
SWDLLEXPORT const char* stristr ( const char *  s1,
const char *  s2 
)

Definition at line 145 of file utilstr.cpp.

145  {
146  int tLen = (int)strlen(s2);
147  int cLen = (int)strlen(s1);
148  char *target = new char [ tLen + 1 ];
149  int i, j;
150  const char *retVal = 0;
151 
152  strcpy(target, s2);
153  for (i = 0; i < tLen; i++)
154  target[i] = SW_toupper(target[i]);
155 
156  for (i = 0; i < (cLen - tLen)+1; i++) {
157  if (SW_toupper(s1[i]) == (unsigned char)*target) {
158  for (j = 1; j < tLen; j++) {
159  if (SW_toupper(s1[i+j]) != (unsigned char)target[j])
160  break;
161  }
162  if (j == tLen) {
163  retVal = s1+i;
164  break;
165  }
166  }
167  }
168  delete [] target;
169  return retVal;
170 }
#define SW_toupper(c)
Definition: utilstr.h:67
SWDLLEXPORT int strnicmp ( const char *  s1,
const char *  s2,
int  len 
)

Definition at line 180 of file utilstr.cpp.

180  {
181  int tLen = (int)strlen(s2);
182  int cLen = (int)strlen(s1);
183  char diff;
184  int i;
185  for (i = 0; ((i < len) && (i < tLen) && (i < cLen)); i++) {
186  if ((diff = SW_toupper(*s1) - SW_toupper(*s2)))
187  return diff;
188  s1++;
189  s2++;
190  }
191  return (i < len) ? cLen - tLen : 0;
192 }
#define SW_toupper(c)
Definition: utilstr.h:67
SWDLLEXPORT char* strstrip ( char *  istr)

Definition at line 118 of file utilstr.cpp.

118  {
119  char *tmp = istr;
120  char *rtmp;
121 
122  int len = (int)strlen(istr);
123  if (len < 1)
124  return istr;
125  rtmp = istr + (len - 1);
126 
127  while ((rtmp > istr)&&((*rtmp == ' ')||(*rtmp == '\t')||(*rtmp == 10)||(*rtmp == 13))) *(rtmp--) = 0;
128  while ((*tmp == ' ')||(*tmp == '\t')||(*tmp == 10)||(*tmp == 13)) tmp++;
129  memmove(istr, tmp, (rtmp - tmp) + 1);
130  istr[(rtmp - tmp) + 1] = 0;
131 
132  return istr;
133 }
SWBuf utf8ToWChar ( const char *  buf)

Definition at line 239 of file utilstr.cpp.

239  {
240 
241  const char *q = 0;
242  SWBuf wcharBuf;
243  while (*buf) {
244  q = buf;
245  wchar_t wc = getUniCharFromUTF8((const unsigned char **)&buf);
246  if (!wc) {
247  // if my buffer was advanced but nothing was converted, I had invalid data
248  if (buf - q) {
249  // invalid bytes in UTF8 stream
250  wcharBuf.append((wchar_t)0x1a); // unicode replacement character
251  }
252  }
253  else wcharBuf.append(wc);
254  }
255  return wcharBuf;
256 }
Definition: swbuf.h:47
SWBuf & append(const char *str, long max=-1)
Definition: swbuf.h:274
SW_u32 getUniCharFromUTF8(const unsigned char **buf, bool skipValidation=false)
Definition: utilstr.h:88
SWBuf wcharToUTF8 ( const wchar_t *  buf)

Definition at line 263 of file utilstr.cpp.

263  {
264 
265  SWBuf utf8Buf;
266  if (buf) {
267  while (*buf) {
268  getUTF8FromUniChar(*buf++, &utf8Buf);
269  }
270  }
271  return utf8Buf;
272 }
Definition: swbuf.h:47
SWBuf * getUTF8FromUniChar(SW_u32 uchar, SWBuf *appendTo)
Definition: utilstr.h:165

Variable Documentation

const unsigned char SW_tolower_array[256]

Definition at line 73 of file utilstr.cpp.

const unsigned char SW_toupper_array[256]

Definition at line 34 of file utilstr.cpp.