#include <utilstr.h>#include <ctype.h>#include <string.h>#include <sysdata.h>#include <swlog.h>#include <swbuf.h>
Go to the source code of this file.
Functions | |
| SWBuf | assureValidUTF8 (const char *buf) |
| __u32 | getUniCharFromUTF8 (const unsigned char **buf) |
| SWBuf | getUTF8FromUniChar (__u32 uchar) |
| char * | stdstr (char **ipstr, const char *istr, unsigned int memPadFactor) |
| int | stricmp (const char *s1, const char *s2) |
| const char * | stristr (const char *s1, const char *s2) |
| int | strnicmp (const char *s1, const char *s2, int len) |
| char * | strstrip (char *istr) |
| SWBuf | utf8ToWChar (const char *buf) |
| SWBuf | wcharToUTF8 (const wchar_t *buf) |
Variables | |
| SWORD_NAMESPACE_START const unsigned char | SW_toupper_array [256] |
| SWBuf assureValidUTF8 | ( | const char * | buf | ) |
Definition at line 252 of file utilstr.cpp.
00252 { 00253 00254 SWBuf myCopy = buf; 00255 const unsigned char *b = (const unsigned char *)myCopy.c_str(); 00256 const unsigned char *q = 0; 00257 bool invalidChar = false; 00258 while (*b) { 00259 q = b; 00260 if (!getUniCharFromUTF8(&b)) { 00261 long len = b - q; 00262 if (len) { 00263 invalidChar = true; 00264 for (long start = q - (const unsigned char *)myCopy.c_str(); len; len--) { 00265 myCopy[start+len-1] = 0x1a; // unicode replacement character 00266 } 00267 00268 } 00269 } 00270 } 00271 if (invalidChar) { 00272 // SWLog::getSystemLog()->logWarning("Changing invalid UTF-8 string (%s) to (%s)\n", buf, myCopy.c_str()); 00273 } 00274 return myCopy; 00275 }
| __u32 getUniCharFromUTF8 | ( | const unsigned char ** | buf | ) |
Definition at line 197 of file utilstr.cpp.
00197 { 00198 __u32 ch = 0; 00199 unsigned char multibuf[7]; 00200 00201 //case: We're at the end 00202 if (!(**buf)) { 00203 return ch; 00204 } 00205 00206 //case: ANSI 00207 if (!(**buf & 128)) { 00208 ch = **buf; 00209 (*buf)++; 00210 return ch; 00211 } 00212 00213 //case: Invalid UTF-8 (illegal continuing byte in initial position) 00214 if ((**buf & 128) && (!(**buf & 64))) { 00215 (*buf)++; 00216 return ch; 00217 } 00218 00219 //case: 2+ byte codepoint 00220 multibuf[0] = **buf; 00221 multibuf[0] <<= 1; 00222 int subsequent; 00223 for (subsequent = 1; (multibuf[0] & 128) && (subsequent < 7); subsequent++) { 00224 multibuf[0] <<= 1; 00225 multibuf[subsequent] = (*buf)[subsequent]; 00226 multibuf[subsequent] &= 63; 00227 // subsequent byte did not begin with 10XXXXXX 00228 // move our buffer to here and error out 00229 if (((*buf)[subsequent] - multibuf[subsequent]) != 128) { 00230 *buf += subsequent; 00231 return 0; 00232 } 00233 ch <<= 6; 00234 ch |= multibuf[subsequent]; 00235 } 00236 subsequent--; 00237 multibuf[0] <<= 1; 00238 char significantFirstBits = 8 - (2+subsequent); 00239 00240 ch |= (((__s16)multibuf[0]) << (((6*subsequent)+significantFirstBits)-8)); 00241 *buf += (subsequent+1); 00242 return ch; 00243 }
Definition at line 246 of file utilstr.cpp.
00246 { 00247 // TODO: finish this logic 00248 return SWBuf((char)uchar); 00249 }
| char* stdstr | ( | char ** | iistr, | |
| const char * | istr, | |||
| unsigned int | memPadFactor = 1 | |||
| ) |
stdstr - clone a string
Definition at line 77 of file utilstr.cpp.
00077 { 00078 if (*ipstr) 00079 delete [] *ipstr; 00080 if (istr) { 00081 int len = strlen(istr) + 1; 00082 *ipstr = new char [ len * memPadFactor ]; 00083 memcpy(*ipstr, istr, len); 00084 } 00085 else *ipstr = 0; 00086 return *ipstr; 00087 }
| int stricmp | ( | const char * | s1, | |
| const char * | s2 | |||
| ) |
Definition at line 174 of file utilstr.cpp.
00174 { 00175 #if defined(__GNUC__) 00176 return ::strcasecmp(s1, s2); 00177 #else 00178 #if defined(_WIN32_WCE) 00179 return ::_stricmp(s1, s2); 00180 #else 00181 return ::stricmp(s1, s2); 00182 #endif 00183 #endif 00184 }
| const char* stristr | ( | const char * | s1, | |
| const char * | s2 | |||
| ) |
Definition at line 125 of file utilstr.cpp.
00125 { 00126 int tLen = strlen(s2); 00127 int cLen = strlen(s1); 00128 char *target = new char [ tLen + 1 ]; 00129 int i, j; 00130 const char *retVal = 0; 00131 00132 strcpy(target, s2); 00133 for (i = 0; i < tLen; i++) 00134 target[i] = SW_toupper(target[i]); 00135 00136 for (i = 0; i < (cLen - tLen)+1; i++) { 00137 if (SW_toupper(s1[i]) == (unsigned char)*target) { 00138 for (j = 1; j < tLen; j++) { 00139 if (SW_toupper(s1[i+j]) != (unsigned char)target[j]) 00140 break; 00141 } 00142 if (j == tLen) { 00143 retVal = s1+i; 00144 break; 00145 } 00146 } 00147 } 00148 delete [] target; 00149 return retVal; 00150 }
| int strnicmp | ( | const char * | s1, | |
| const char * | s2, | |||
| int | len | |||
| ) |
Definition at line 160 of file utilstr.cpp.
00160 { 00161 int tLen = strlen(s2); 00162 int cLen = strlen(s1); 00163 char diff; 00164 int i; 00165 for (i = 0; ((i < len) && (i < tLen) && (i < cLen)); i++) { 00166 if ((diff = SW_toupper(*s1) - SW_toupper(*s2))) 00167 return diff; 00168 s1++; 00169 s2++; 00170 } 00171 return (i < len) ? cLen - tLen : 0; 00172 }
| char* strstrip | ( | char * | istr | ) |
Definition at line 98 of file utilstr.cpp.
00098 { 00099 char *tmp = istr; 00100 char *rtmp; 00101 00102 int len = strlen(istr); 00103 if (len < 1) 00104 return istr; 00105 rtmp = istr + (len - 1); 00106 00107 while ((rtmp > istr)&&((*rtmp == ' ')||(*rtmp == '\t')||(*rtmp == 10)||(*rtmp == 13))) *(rtmp--) = 0; 00108 while ((*tmp == ' ')||(*tmp == '\t')||(*tmp == 10)||(*tmp == 13)) tmp++; 00109 memmove(istr, tmp, (rtmp - tmp) + 1); 00110 istr[(rtmp - tmp) + 1] = 0; 00111 00112 return istr; 00113 }
| SWBuf utf8ToWChar | ( | const char * | buf | ) |
Definition at line 284 of file utilstr.cpp.
00284 { 00285 00286 const char *q = 0; 00287 SWBuf wcharBuf; 00288 while (*buf) { 00289 q = buf; 00290 wchar_t wc = getUniCharFromUTF8((const unsigned char **)&buf); 00291 if (!wc) { 00292 // if my buffer was advanced but nothing was converted, I had invalid data 00293 if (buf - q) { 00294 // invalid bytes in UTF8 stream 00295 wcharBuf.append((wchar_t)0x1a); // unicode replacement character 00296 } 00297 } 00298 else wcharBuf.append(wc); 00299 } 00300 return wcharBuf; 00301 }
| SWBuf wcharToUTF8 | ( | const wchar_t * | buf | ) |
Definition at line 308 of file utilstr.cpp.
00308 { 00309 00310 SWBuf utf8Buf; 00311 while (*buf) { 00312 utf8Buf.append(getUTF8FromUniChar(*buf++)); 00313 } 00314 return utf8Buf; 00315 }
| SWORD_NAMESPACE_START const unsigned char SW_toupper_array[256] |
Definition at line 29 of file utilstr.cpp.
1.6.1