LZSSCompress Class Reference

#include <lzsscomprs.h>

Inheritance diagram for LZSSCompress:
Inheritance graph
[legend]
Collaboration diagram for LZSSCompress:
Collaboration graph
[legend]

List of all members.

Public Member Functions

virtual char * Buf (const char *buf=0, unsigned long *len=0)
virtual void Decode (void)
virtual void Encode (void)
virtual unsigned long GetChars (char *buf, unsigned long len)
 LZSSCompress ()
virtual unsigned long SendChars (char *buf, unsigned long len)
virtual char * zBuf (unsigned long *len, char *buf=0)
virtual ~LZSSCompress ()

Protected Attributes

char * buf
char direct
unsigned long pos
unsigned long slen
char * zbuf
unsigned long zlen
unsigned long zpos

Private Member Functions

void DeleteNode (short int Node)
void InitTree ()
void InsertNode (short int Pos)

Static Private Attributes

static short int m_dad [N+1]
static short int m_lson [N+1]
static short int m_match_length
static short int m_match_position
static unsigned char m_ring_buffer [N+F-1]
static short int m_rson [N+257]

Detailed Description

Definition at line 63 of file lzsscomprs.h.


Constructor & Destructor Documentation

LZSSCompress::LZSSCompress (  ) 

Definition at line 86 of file lzsscomprs.cpp.

00086                            : SWCompress() {
00087 }

LZSSCompress::~LZSSCompress (  )  [virtual]

Definition at line 94 of file lzsscomprs.cpp.

00094                             {
00095 }


Member Function Documentation

char * SWCompress::Buf ( const char *  buf = 0,
unsigned long *  len = 0 
) [virtual, inherited]

Definition at line 73 of file swcomprs.cpp.

00073                                                           {
00074     // setting an uncompressed buffer
00075     if (ibuf) {
00076         Init();
00077         slen = (len) ? *len : strlen(ibuf);
00078         buf = (char *) calloc(slen + 1, 1);
00079         memcpy(buf, ibuf, slen);
00080     }
00081 
00082     // getting an uncompressed buffer
00083     if (!buf) {
00084         buf = (char *)calloc(1,1); // be sure we at least allocate an empty buf for return;
00085         direct = 1;
00086         Decode();
00087 //      slen = strlen(buf);
00088         if (len)
00089             *len = slen;
00090     }
00091     return buf;
00092 }

void LZSSCompress::Decode ( void   )  [virtual]

Reimplemented from SWCompress.

Definition at line 563 of file lzsscomprs.cpp.

00564 {
00565     int k;
00566     int r;                            // node number
00567     unsigned char c[F];              // an array of chars
00568     unsigned char flags;                // 8 bits of flags
00569     int flag_count;                  // which flag we're on
00570     short int pos;                    // position in the ring buffer
00571     short int len;                    // number of chars in ring buffer
00572     unsigned long totalLen = 0;
00573 
00574     direct = 1; // set direction needed by parent [Get|Send]Chars()
00575 
00576     // Initialize the ring buffer with a common string.
00577     //
00578     // Note that the last F bytes of the ring buffer are not filled.
00579 
00580     memset(m_ring_buffer, ' ', N - F);
00581     
00582     r = N - F;
00583 
00584     flags = (char) 0;
00585     flag_count = 0;
00586 
00587     for ( ; ; ) {
00588 
00589         // If there are more bits of interest in this flag, then
00590         // shift that next interesting bit into the 1's position.
00591         //
00592         // If this flag has been exhausted, the next byte must 
00593         // be a flag.
00594 
00595         if (flag_count > 0) {
00596             flags = (unsigned char) (flags >> 1);
00597             flag_count--;
00598         }
00599         else {
00600             // Next byte must be a flag.
00601 
00602             if (GetChars((char *) &flags, 1) != 1)
00603                 break;
00604 
00605             // Set the flag counter.  While at first it might appear
00606             // that this should be an 8 since there are 8 bits in the
00607             // flag, it should really be a 7 because the shift must
00608             // be performed 7 times in order to see all 8 bits.
00609 
00610             flag_count = 7;
00611         }
00612 
00613         // If the low order bit of the flag is now set, then we know
00614         // that the next byte is a single, unencoded character.
00615 
00616         if (flags & 1) {
00617             if (GetChars((char *) c, 1) != 1)
00618                 break;
00619 
00620             if (SendChars((char *) c, 1) != 1) {
00621                 break;
00622             }
00623             totalLen++;
00624 
00625             // Add to buffer, and increment to next spot. Wrap at end.
00626 
00627             m_ring_buffer[r] = c[0];
00628             r = (short int) ( (r + 1) & (N - 1) );
00629         }
00630 
00631         // Otherwise, we know that the next two bytes are a
00632         // <position,length> pair.  The position is in 12 bits and
00633         // the length is in 4 bits.
00634 
00635         else {
00636             // Original code:
00637             //  if ((i = getc(infile)) == EOF)
00638             //    break;
00639             //  if ((j = getc(infile)) == EOF)
00640             //    break;
00641             //  i |= ((j & 0xf0) << 4); 
00642             //  j = (j & 0x0f) + THRESHOLD;
00643             //
00644             // I've modified this to only make one input call, and
00645             // have changed the variable names to something more
00646             // obvious.
00647 
00648             if (GetChars((char *) c, 2) != 2)
00649                 break;
00650 
00651             // Convert these two characters into the position and
00652             // length.  Note that the length is always at least
00653             // THRESHOLD, which is why we're able to get a length
00654             // of 18 out of only 4 bits.
00655 
00656             pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) );
00657 
00658             len = (short int) ( (c[1] & 0x0f) + THRESHOLD );
00659 
00660             // There are now "len" characters at position "pos" in
00661             // the ring buffer that can be pulled out.  Note that
00662             // len is never more than F.
00663 
00664             for (k = 0; k < len; k++) {
00665                 c[k] = m_ring_buffer[(pos + k) & (N - 1)];
00666 
00667                 // Add to buffer, and increment to next spot. Wrap at end.
00668 
00669                 m_ring_buffer[r] = c[k];
00670                 r = (short int) ( (r + 1) & (N - 1) );
00671             }
00672 
00673             // Add the "len" :characters to the output stream.
00674 
00675             if (SendChars((char *) c, len) != (unsigned int)len) {
00676                 break;
00677             }
00678             totalLen += len;
00679         }
00680     }
00681     slen = totalLen;
00682 }

void LZSSCompress::DeleteNode ( short int  Node  )  [private]

Definition at line 260 of file lzsscomprs.cpp.

00261 {
00262     short int  q;
00263 
00264 /*
00265     ASSERT(Node >= 0);
00266     ASSERT(Node < (N+1));
00267 */
00268 
00269     if (m_dad[Node] == NOT_USED) { // not in tree, nothing to do
00270         return;
00271     }
00272 
00273     if (m_rson[Node] == NOT_USED) {
00274         q = m_lson[Node];
00275     }
00276     else if (m_lson[Node] == NOT_USED) {
00277         q = m_rson[Node];
00278     }
00279     else {
00280         q = m_lson[Node];
00281         if (m_rson[q] != NOT_USED) {
00282             do {
00283                 q = m_rson[q];
00284             } while (m_rson[q] != NOT_USED);
00285 
00286             m_rson[ m_dad[q] ] = m_lson[q];
00287             m_dad[ m_lson[q] ] = m_dad[q];
00288             m_lson[q] = m_lson[Node];
00289             m_dad[ m_lson[Node] ] = q;
00290         }
00291 
00292         m_rson[q] = m_rson[Node];
00293         m_dad[ m_rson[Node] ] = q;
00294     }
00295 
00296     m_dad[q] = m_dad[Node];
00297 
00298     if (m_rson[ m_dad[Node] ] == Node) {
00299         m_rson[ m_dad[Node] ] = q;
00300     }
00301     else {
00302         m_lson[ m_dad[Node] ] = q;
00303     }
00304 
00305     m_dad[Node] = NOT_USED;
00306 }

void LZSSCompress::Encode ( void   )  [virtual]

Reimplemented from SWCompress.

Definition at line 319 of file lzsscomprs.cpp.

00320 {
00321     short int i;                        // an iterator
00322     short int r;                        // node number in the binary tree
00323     short int s;                        // position in the ring buffer
00324     unsigned short int len;          // len of initial string
00325     short int last_match_length;        // length of last match
00326     short int code_buf_pos;          // position in the output buffer
00327     unsigned char code_buf[17];      // the output buffer
00328     unsigned char mask;              // bit mask for byte 0 of out buf
00329     unsigned char c;                    // character read from string
00330 
00331     // Start with a clean tree.
00332 
00333     InitTree();
00334     direct = 0; // set direction needed by parent [Get|Send]Chars()
00335 
00336     // code_buf[0] works as eight flags.  A "1" represents that the
00337     // unit is an unencoded letter (1 byte), and a "0" represents
00338     // that the next unit is a <position,length> pair (2 bytes).
00339     //
00340     // code_buf[1..16] stores eight units of code.  Since the best
00341     // we can do is store eight <position,length> pairs, at most 16 
00342     // bytes are needed to store this.
00343     //
00344     // This is why the maximum size of the code buffer is 17 bytes.
00345 
00346     code_buf[0] = 0;
00347     code_buf_pos = 1;
00348 
00349     // Mask iterates over the 8 bits in the code buffer.  The first
00350     // character ends up being stored in the low bit.
00351     //
00352     //  bit   8   7   6   5   4   3   2   1
00353     //      |                          |
00354     //      |            first sequence in code buffer
00355     //      |
00356     //    last sequence in code buffer      
00357 
00358     mask = 1;
00359 
00360     s = 0;
00361     r = (short int) N - (short int) F;
00362 
00363     // Initialize the ring buffer with spaces...
00364 
00365     // Note that the last F bytes of the ring buffer are not filled.
00366     // This is because those F bytes will be filled in immediately
00367     // with bytes from the input stream.
00368 
00369     memset(m_ring_buffer, ' ', N - F);
00370     
00371     // Read F bytes into the last F bytes of the ring buffer.
00372     //
00373     // This function loads the buffer with X characters and returns
00374     // the actual amount loaded.
00375 
00376     len = GetChars((char *) &(m_ring_buffer[r]), F);
00377 
00378     // Make sure there is something to be compressed.
00379 
00380     if (len == 0)
00381         return;
00382 
00383     // Insert the F strings, each of which begins with one or more
00384     // 'space' characters.  Note the order in which these strings
00385     // are inserted.  This way, degenerate trees will be less likely
00386     // to occur.
00387 
00388     for (i = 1; i <= F; i++) {
00389         InsertNode((short int) (r - i));
00390     }
00391 
00392     // Finally, insert the whole string just read.  The
00393     // member variables match_length and match_position are set.
00394 
00395     InsertNode(r);
00396 
00397     // Now that we're preloaded, continue till done.
00398 
00399     do {
00400 
00401         // m_match_length may be spuriously long near the end of
00402         // text.
00403 
00404         if (m_match_length > len) {
00405             m_match_length = len;
00406         }
00407 
00408         // Is it cheaper to store this as a single character?  If so,
00409         // make it so.
00410 
00411         if (m_match_length < THRESHOLD) {
00412             // Send one character.  Remember that code_buf[0] is the
00413             // set of flags for the next eight items.
00414 
00415             m_match_length = 1;  
00416             code_buf[0] |= mask;  
00417             code_buf[code_buf_pos++] = m_ring_buffer[r];
00418         }
00419 
00420         // Otherwise, we do indeed have a string that can be stored
00421         // compressed to save space.
00422 
00423         else {
00424             // The next 16 bits need to contain the position (12 bits)
00425             // and the length (4 bits).
00426 
00427             code_buf[code_buf_pos++] = (unsigned char) m_match_position;
00428             code_buf[code_buf_pos++] = (unsigned char) (
00429                 ((m_match_position >> 4) & 0xf0) | 
00430                 (m_match_length - THRESHOLD) );
00431         }
00432 
00433         // Shift the mask one bit to the left so that it will be ready
00434         // to store the new bit.
00435 
00436         mask = (unsigned char) (mask << 1);
00437 
00438         // If the mask is now 0, then we know that we have a full set
00439         // of flags and items in the code buffer.  These need to be
00440         // output.
00441 
00442         if (!mask) {
00443             // code_buf is the buffer of characters to be output.
00444             // code_buf_pos is the number of characters it contains.
00445 
00446             SendChars((char *) code_buf, code_buf_pos);
00447 
00448             // Reset for next buffer...
00449 
00450             code_buf[0] = 0;
00451             code_buf_pos = 1;
00452             mask = 1;
00453         }
00454 
00455         last_match_length = m_match_length;
00456 
00457         // Delete old strings and read new bytes...
00458 
00459         for (i = 0; i < last_match_length; i++) {
00460             // Get next character...
00461 
00462             if (GetChars((char *) &c, 1) != 1)
00463                 break;
00464 
00465             // Delete "old strings"
00466 
00467             DeleteNode(s);
00468 
00469             // Put this character into the ring buffer.
00470             //        
00471             // The original comment here says "If the position is near
00472             // the end of the buffer, extend the buffer to make
00473             // string comparison easier."
00474             //
00475             // That's a little misleading, because the "end" of the 
00476             // buffer is really what we consider to be the "beginning"
00477             // of the buffer, that is, positions 0 through F.
00478             //
00479             // The idea is that the front end of the buffer is duplicated
00480             // into the back end so that when you're looking at characters
00481             // at the back end of the buffer, you can index ahead (beyond
00482             // the normal end of the buffer) and see the characters
00483             // that are at the front end of the buffer wihtout having
00484             // to adjust the index.
00485             //
00486             // That is...
00487             //
00488             //    1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234
00489             //    |                            |  |
00490             //    position 0          end of buffer  |
00491             //                                       |
00492             //                duplicate of front of buffer
00493 
00494             m_ring_buffer[s] = c;
00495 
00496             if (s < F - 1) {
00497                 m_ring_buffer[s + N] = c;
00498             }
00499 
00500             // Increment the position, and wrap around when we're at
00501             // the end.  Note that this relies on N being a power of 2.
00502 
00503             s = (short int) ( (s + 1) & (N - 1) );
00504             r = (short int) ( (r + 1) & (N - 1) );
00505 
00506             // Register the string that is found in 
00507             // m_ring_buffer[r..r+F-1].
00508 
00509             InsertNode(r);
00510         }
00511 
00512         // If we didn't quit because we hit the last_match_length,
00513         // then we must have quit because we ran out of characters
00514         // to process.
00515 
00516         while (i++ < last_match_length) {                             
00517             DeleteNode(s);
00518 
00519             s = (short int) ( (s + 1) & (N - 1) );
00520             r = (short int) ( (r + 1) & (N - 1) );
00521 
00522             // Note that len hitting 0 is the key that causes the
00523             // do...while() to terminate.  This is the only place
00524             // within the loop that len is modified.
00525             //
00526             // Its original value is F (or a number less than F for
00527             // short strings).
00528 
00529             if (--len) {
00530                 InsertNode(r);     /* buffer may not be empty. */
00531             }
00532         }
00533 
00534         // End of do...while() loop.  Continue processing until there
00535         // are no more characters to be compressed.  The variable
00536         // "len" is used to signal this condition.
00537     } while (len > 0);
00538 
00539     // There could still be something in the output buffer.  Send it
00540     // now.
00541 
00542     if (code_buf_pos > 1) {
00543         // code_buf is the encoded string to send.
00544         // code_buf_ptr is the number of characters.
00545 
00546         SendChars((char *) code_buf, code_buf_pos);
00547     }
00548 
00549 
00550     // must set zlen for parent class to know length of compressed buffer
00551     zlen = zpos;
00552 }

unsigned long SWCompress::GetChars ( char *  buf,
unsigned long  len 
) [virtual, inherited]

Definition at line 116 of file swcomprs.cpp.

00117 {
00118     if (direct) {
00119         len = (((zlen - zpos) > (unsigned)len) ? len : zlen - zpos);
00120         if (len > 0) {
00121             memmove(ibuf, &zbuf[zpos], len);
00122             zpos += len;
00123         }
00124     }
00125     else {
00126 //      slen = strlen(buf);
00127         len = (((slen - pos) > (unsigned)len) ? len : slen - pos);
00128         if (len > 0) {
00129             memmove(ibuf, &buf[pos], len);
00130             pos += len;
00131         }
00132     }
00133     return len;
00134 }

void LZSSCompress::InitTree ( void   )  [private]

Definition at line 103 of file lzsscomprs.cpp.

00103                                 {
00104     int  i;
00105 
00106     // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right
00107     // and left children of node i.  These nodes need not be
00108     // initialized.  However, for debugging purposes, it is nice to
00109     // have them initialized.  Since this is only used for compression
00110     // (not decompression), I don't mind spending the time to do it.
00111     //
00112     // For the same range of i, m_dad[i] is the parent of node i.
00113     // These are initialized to a known value that can represent
00114     // a "not used" state.
00115     
00116     for (i = 0; i < N; i++) {
00117         m_lson[i] = NOT_USED;
00118         m_rson[i] = NOT_USED;
00119         m_dad[i] = NOT_USED;
00120     }
00121 
00122     // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree
00123     // for strings that begin with the character i.  This is why
00124     // the right child array is larger than the left child array.
00125     // These are also initialzied to a "not used" state.
00126     //
00127     // Note that there are 256 of these, one for each of the possible
00128     // 256 characters.
00129 
00130     for (i = N + 1; i <= (N + 256); i++) {
00131         m_rson[i] = NOT_USED;
00132     }
00133 }

void LZSSCompress::InsertNode ( short int  Pos  )  [private]

Definition at line 162 of file lzsscomprs.cpp.

00163 {
00164     short int i;
00165     short int p;
00166     int cmp;
00167     unsigned char * key;
00168 
00169 /*
00170     ASSERT(Pos >= 0);
00171     ASSERT(Pos < N);
00172 */
00173 
00174     cmp = 1;
00175     key = &(m_ring_buffer[Pos]);
00176 
00177     // The last 256 entries in m_rson contain the root nodes for
00178     // strings that begin with a letter.  Get an index for the
00179     // first letter in this string.
00180 
00181     p = (short int) (N + 1 + key[0]);
00182 
00183     // Set the left and right tree nodes for this position to "not
00184     // used."
00185 
00186     m_lson[Pos] = NOT_USED;
00187     m_rson[Pos] = NOT_USED;
00188 
00189     // Haven't matched anything yet.
00190 
00191     m_match_length = 0;
00192 
00193     for ( ; ; ) {
00194         if (cmp >= 0) {
00195             if (m_rson[p] != NOT_USED) {
00196                 p = m_rson[p];
00197             }
00198             else {
00199                 m_rson[p] = Pos;
00200                 m_dad[Pos] = p;
00201                 return;
00202             }
00203         }
00204         else {
00205             if (m_lson[p] != NOT_USED) {
00206                 p = m_lson[p];
00207             }
00208             else {
00209                 m_lson[p] = Pos;
00210                 m_dad[Pos] = p;
00211                 return;
00212             }
00213         }
00214 
00215         // Should we go to the right or the left to look for the
00216         // next match?
00217 
00218         for (i = 1; i < F; i++) {
00219             cmp = key[i] - m_ring_buffer[p + i];
00220             if (cmp != 0)
00221                 break;
00222         }
00223 
00224         if (i > m_match_length) {
00225             m_match_position = p;
00226             m_match_length = i;
00227 
00228             if (i >= F)
00229                 break;
00230         }
00231     }
00232 
00233     m_dad[Pos] = m_dad[p];
00234     m_lson[Pos] = m_lson[p];
00235     m_rson[Pos] = m_rson[p];
00236 
00237     m_dad[ m_lson[p] ] = Pos;
00238     m_dad[ m_rson[p] ] = Pos;
00239 
00240     if (m_rson[ m_dad[p] ] == p) {
00241         m_rson[ m_dad[p] ] = Pos;
00242     }
00243     else {
00244         m_lson[ m_dad[p] ] = Pos;
00245     }
00246 
00247     // Remove "p"
00248 
00249     m_dad[p] = NOT_USED;
00250 }

unsigned long SWCompress::SendChars ( char *  buf,
unsigned long  len 
) [virtual, inherited]

Definition at line 137 of file swcomprs.cpp.

00138 {
00139     if (direct) {
00140         if (buf) {
00141 //          slen = strlen(buf);
00142             if ((pos + len) > (unsigned)slen) {
00143                 buf = (char *) realloc(buf, pos + len + 1024);
00144                 memset(&buf[pos], 0, len + 1024);
00145             }
00146         }
00147         else    buf = (char *)calloc(1, len + 1024);
00148         memmove(&buf[pos], ibuf, len);
00149         pos += len;
00150     }
00151     else {
00152         if (zbuf) {
00153             if ((zpos + len) > zlen) {
00154                 zbuf = (char *) realloc(zbuf, zpos + len + 1024);
00155                 zlen = zpos + len + 1024;
00156             }
00157         }
00158         else {
00159             zbuf = (char *)calloc(1, len + 1024);
00160             zlen = len + 1024;
00161         }
00162         memmove(&zbuf[zpos], ibuf, len);
00163         zpos += len;
00164     }
00165     return len;
00166 }

char * SWCompress::zBuf ( unsigned long *  len,
char *  buf = 0 
) [virtual, inherited]

Definition at line 95 of file swcomprs.cpp.

00096 {
00097     // setting a compressed buffer
00098     if (ibuf) {
00099         Init();
00100         zbuf = (char *) malloc(*len);
00101         memcpy(zbuf, ibuf, *len);
00102         zlen = *len;
00103     }
00104 
00105     // getting a compressed buffer
00106     if (!zbuf) {
00107         direct = 0;
00108         Encode();
00109     }
00110 
00111     *len = zlen;
00112     return zbuf;
00113 }


Member Data Documentation

char* SWCompress::buf [protected, inherited]

Definition at line 34 of file swcomprs.h.

char SWCompress::direct [protected, inherited]

Definition at line 34 of file swcomprs.h.

short int LZSSCompress::m_dad [static, private]

Definition at line 70 of file lzsscomprs.h.

short int LZSSCompress::m_lson [static, private]

Definition at line 68 of file lzsscomprs.h.

short int LZSSCompress::m_match_length [static, private]

Definition at line 67 of file lzsscomprs.h.

short int LZSSCompress::m_match_position [static, private]

Definition at line 66 of file lzsscomprs.h.

SWORD_NAMESPACE_START unsigned char LZSSCompress::m_ring_buffer [static, private]

Definition at line 65 of file lzsscomprs.h.

short int LZSSCompress::m_rson [static, private]

Definition at line 69 of file lzsscomprs.h.

unsigned long SWCompress::pos [protected, inherited]

Definition at line 35 of file swcomprs.h.

unsigned long SWCompress::slen [protected, inherited]

Definition at line 35 of file swcomprs.h.

char * SWCompress::zbuf [protected, inherited]

Definition at line 34 of file swcomprs.h.

unsigned long SWCompress::zlen [protected, inherited]

Definition at line 35 of file swcomprs.h.

unsigned long SWCompress::zpos [protected, inherited]

Definition at line 35 of file swcomprs.h.


The documentation for this class was generated from the following files:

Generated on 18 Mar 2013 for The SWORD Project by  doxygen 1.6.1