src/utilfuns/utilstr.cpp File Reference

#include <utilstr.h>
#include <ctype.h>
#include <string.h>
#include <sysdata.h>
#include <swlog.h>
#include <swbuf.h>
Include dependency graph for utilstr.cpp:

Go to the source code of this file.

Functions

SWBuf assureValidUTF8 (const char *buf)
__u32 getUniCharFromUTF8 (const unsigned char **buf)
SWBuf getUTF8FromUniChar (__u32 uchar)
char * stdstr (char **ipstr, const char *istr, unsigned int memPadFactor)
int stricmp (const char *s1, const char *s2)
const char * stristr (const char *s1, const char *s2)
int strnicmp (const char *s1, const char *s2, int len)
char * strstrip (char *istr)
SWBuf utf8ToWChar (const char *buf)
SWBuf wcharToUTF8 (const wchar_t *buf)

Variables

SWORD_NAMESPACE_START const
unsigned char 
SW_toupper_array [256]

Function Documentation

SWBuf assureValidUTF8 ( const char *  buf  ) 

Definition at line 252 of file utilstr.cpp.

00252                                        {
00253 
00254     SWBuf myCopy = buf;
00255     const unsigned char *b = (const unsigned char *)myCopy.c_str();
00256     const unsigned char *q = 0;
00257     bool invalidChar = false;
00258     while (*b) {
00259         q = b;
00260         if (!getUniCharFromUTF8(&b)) {
00261             long len = b - q;
00262             if (len) {
00263                 invalidChar = true;
00264                 for (long start = q - (const unsigned char *)myCopy.c_str(); len; len--) {
00265                     myCopy[start+len-1] = 0x1a; // unicode replacement character
00266                 }
00267                 
00268             }
00269         }
00270     }
00271     if (invalidChar) {
00272 //      SWLog::getSystemLog()->logWarning("Changing invalid UTF-8 string (%s) to (%s)\n", buf, myCopy.c_str());
00273     }
00274     return myCopy;
00275 }

__u32 getUniCharFromUTF8 ( const unsigned char **  buf  ) 

Definition at line 197 of file utilstr.cpp.

00197                                                     {
00198     __u32 ch = 0;
00199     unsigned char multibuf[7];
00200 
00201     //case: We're at the end
00202     if (!(**buf)) {
00203         return ch;
00204     }
00205 
00206     //case: ANSI
00207     if (!(**buf & 128)) {
00208         ch = **buf;
00209         (*buf)++;
00210         return ch;
00211     }
00212 
00213     //case: Invalid UTF-8 (illegal continuing byte in initial position)
00214     if ((**buf & 128) && (!(**buf & 64))) {
00215         (*buf)++;
00216         return ch;
00217     }
00218 
00219     //case: 2+ byte codepoint
00220     multibuf[0] = **buf;
00221     multibuf[0] <<= 1;
00222     int subsequent;
00223     for (subsequent = 1; (multibuf[0] & 128) && (subsequent < 7); subsequent++) {
00224         multibuf[0] <<= 1;
00225         multibuf[subsequent] = (*buf)[subsequent];
00226         multibuf[subsequent] &= 63;
00227         // subsequent byte did not begin with 10XXXXXX
00228         // move our buffer to here and error out
00229         if (((*buf)[subsequent] - multibuf[subsequent]) != 128) {
00230             *buf += subsequent;
00231             return 0;
00232         }
00233         ch <<= 6;
00234         ch |= multibuf[subsequent];
00235     }
00236     subsequent--;
00237     multibuf[0] <<= 1;
00238     char significantFirstBits = 8 - (2+subsequent);
00239     
00240     ch |= (((__s16)multibuf[0]) << (((6*subsequent)+significantFirstBits)-8));
00241     *buf += (subsequent+1);
00242     return ch;
00243 }

SWBuf getUTF8FromUniChar ( __u32  uchar  ) 

Definition at line 246 of file utilstr.cpp.

00246                                       {
00247     // TODO: finish this logic
00248     return SWBuf((char)uchar);
00249 }

char* stdstr ( char **  iistr,
const char *  istr,
unsigned int  memPadFactor = 1 
)

stdstr - clone a string

Definition at line 77 of file utilstr.cpp.

00077                                                                         {
00078     if (*ipstr)
00079         delete [] *ipstr;
00080     if (istr) {
00081         int len = strlen(istr) + 1;
00082         *ipstr = new char [ len * memPadFactor ];
00083         memcpy(*ipstr, istr, len);
00084     }
00085     else *ipstr = 0;
00086     return *ipstr;
00087 }

int stricmp ( const char *  s1,
const char *  s2 
)

Definition at line 174 of file utilstr.cpp.

00174                                             {
00175 #if defined(__GNUC__)
00176     return ::strcasecmp(s1, s2);
00177 #else
00178  #if defined(_WIN32_WCE)
00179     return ::_stricmp(s1, s2);
00180  #else
00181     return ::stricmp(s1, s2);
00182  #endif
00183 #endif
00184 }

const char* stristr ( const char *  s1,
const char *  s2 
)

Definition at line 125 of file utilstr.cpp.

00125                                                     {
00126     int tLen = strlen(s2);
00127     int cLen = strlen(s1);
00128     char *target = new char [ tLen + 1 ];
00129     int i, j;
00130     const char *retVal = 0;
00131 
00132     strcpy(target, s2);
00133     for (i = 0; i < tLen; i++)
00134         target[i] = SW_toupper(target[i]);
00135 
00136     for (i = 0; i < (cLen - tLen)+1; i++) {
00137         if (SW_toupper(s1[i]) == (unsigned char)*target) {
00138             for (j = 1; j < tLen; j++) {
00139                 if (SW_toupper(s1[i+j]) != (unsigned char)target[j])
00140                     break;
00141             }
00142             if (j == tLen) {
00143                 retVal = s1+i;
00144                 break;
00145             }
00146         }
00147     }
00148     delete [] target;
00149     return retVal;
00150 }

int strnicmp ( const char *  s1,
const char *  s2,
int  len 
)

Definition at line 160 of file utilstr.cpp.

00160                                                       {
00161     int tLen = strlen(s2);
00162     int cLen = strlen(s1);
00163     char diff;
00164     int i;
00165     for (i = 0; ((i < len) && (i < tLen) && (i < cLen)); i++) {
00166         if ((diff = SW_toupper(*s1) - SW_toupper(*s2)))
00167             return diff;
00168     s1++;
00169     s2++;
00170     }
00171     return (i < len) ? cLen - tLen : 0;
00172 }

char* strstrip ( char *  istr  ) 

Definition at line 98 of file utilstr.cpp.

00098                            {
00099     char *tmp = istr;
00100     char *rtmp;
00101 
00102     int len = strlen(istr);
00103     if (len < 1)
00104         return istr;
00105     rtmp = istr + (len - 1);
00106     
00107     while ((rtmp > istr)&&((*rtmp == ' ')||(*rtmp == '\t')||(*rtmp == 10)||(*rtmp == 13))) *(rtmp--) = 0;
00108     while ((*tmp == ' ')||(*tmp == '\t')||(*tmp == 10)||(*tmp == 13)) tmp++;
00109     memmove(istr, tmp, (rtmp - tmp) + 1);
00110     istr[(rtmp - tmp) + 1] = 0;
00111 
00112     return istr;
00113 }

SWBuf utf8ToWChar ( const char *  buf  ) 

Definition at line 284 of file utilstr.cpp.

00284                                    {
00285 
00286     const char *q = 0;
00287     SWBuf wcharBuf;
00288     while (*buf) {
00289         q = buf;
00290         wchar_t wc = getUniCharFromUTF8((const unsigned char **)&buf);
00291         if (!wc) {
00292             // if my buffer was advanced but nothing was converted, I had invalid data
00293             if (buf - q) {
00294                 // invalid bytes in UTF8 stream
00295                 wcharBuf.append((wchar_t)0x1a);     // unicode replacement character
00296             }
00297         }
00298         else wcharBuf.append(wc);
00299     }
00300     return wcharBuf;
00301 }

SWBuf wcharToUTF8 ( const wchar_t *  buf  ) 

Definition at line 308 of file utilstr.cpp.

00308                                       {
00309 
00310     SWBuf utf8Buf;
00311     while (*buf) {
00312         utf8Buf.append(getUTF8FromUniChar(*buf++));
00313     }
00314     return utf8Buf;
00315 }


Variable Documentation

SWORD_NAMESPACE_START const unsigned char SW_toupper_array[256]

Definition at line 29 of file utilstr.cpp.


Generated on 18 Mar 2013 for The SWORD Project by  doxygen 1.6.1