The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
LZSSCompress Class Reference

#include <lzsscomprs.h>

+ Inheritance diagram for LZSSCompress:
+ Collaboration diagram for LZSSCompress:

Classes

class  Private
 

Public Member Functions

virtual void decode (void)
 
virtual void encode (void)
 
virtual unsigned long getChars (char *buf, unsigned long len)
 
virtual char * getCompressedBuf (unsigned long *len=0)
 
virtual int getLevel ()
 
virtual char * getUncompressedBuf (unsigned long *len=0)
 
 LZSSCompress ()
 
virtual unsigned long sendChars (char *buf, unsigned long len)
 
virtual void setCompressedBuf (unsigned long *len, char *buf=0)
 
virtual void setLevel (int l)
 
virtual void setUncompressedBuf (const char *buf=0, unsigned long *len=0)
 
virtual ~LZSSCompress ()
 

Protected Attributes

char * buf
 
char direct
 
int level
 
unsigned long pos
 
unsigned long slen
 
char * zbuf
 
unsigned long zlen
 
unsigned long zpos
 

Private Attributes

Privatep
 

Detailed Description

Definition at line 33 of file lzsscomprs.h.

Constructor & Destructor Documentation

LZSSCompress::LZSSCompress ( )

Definition at line 132 of file lzsscomprs.cpp.

132  : SWCompress() {
133  p = new Private();
134 }
Private * p
Definition: lzsscomprs.h:35
LZSSCompress::~LZSSCompress ( )
virtual

Definition at line 141 of file lzsscomprs.cpp.

141  {
142  delete p;
143 }
Private * p
Definition: lzsscomprs.h:35

Member Function Documentation

void LZSSCompress::decode ( void  )
virtual

Reimplemented from SWCompress.

Definition at line 611 of file lzsscomprs.cpp.

612 {
613  int k;
614  int r; // node number
615  unsigned char c[F]; // an array of chars
616  unsigned char flags; // 8 bits of flags
617  int flag_count; // which flag we're on
618  short int pos; // position in the ring buffer
619  short int len; // number of chars in ring buffer
620  unsigned long totalLen = 0;
621 
622  direct = 1; // set direction needed by parent [Get|Send]Chars()
623 
624  // Initialize the ring buffer with a common string.
625  //
626  // Note that the last F bytes of the ring buffer are not filled.
627 
628  memset(p->m_ring_buffer, ' ', N - F);
629 
630  r = N - F;
631 
632  flags = (char) 0;
633  flag_count = 0;
634 
635  for ( ; ; ) {
636 
637  // If there are more bits of interest in this flag, then
638  // shift that next interesting bit into the 1's position.
639  //
640  // If this flag has been exhausted, the next byte must
641  // be a flag.
642 
643  if (flag_count > 0) {
644  flags = (unsigned char) (flags >> 1);
645  flag_count--;
646  }
647  else {
648  // Next byte must be a flag.
649 
650  if (getChars((char *) &flags, 1) != 1)
651  break;
652 
653  // Set the flag counter. While at first it might appear
654  // that this should be an 8 since there are 8 bits in the
655  // flag, it should really be a 7 because the shift must
656  // be performed 7 times in order to see all 8 bits.
657 
658  flag_count = 7;
659  }
660 
661  // If the low order bit of the flag is now set, then we know
662  // that the next byte is a single, unencoded character.
663 
664  if (flags & 1) {
665  if (getChars((char *) c, 1) != 1)
666  break;
667 
668  if (sendChars((char *) c, 1) != 1) {
669  break;
670  }
671  totalLen++;
672 
673  // Add to buffer, and increment to next spot. Wrap at end.
674 
675  p->m_ring_buffer[r] = c[0];
676  r = (short int) ( (r + 1) & (N - 1) );
677  }
678 
679  // Otherwise, we know that the next two bytes are a
680  // <position,length> pair. The position is in 12 bits and
681  // the length is in 4 bits.
682 
683  else {
684  // Original code:
685  // if ((i = getc(infile)) == EOF)
686  // break;
687  // if ((j = getc(infile)) == EOF)
688  // break;
689  // i |= ((j & 0xf0) << 4);
690  // j = (j & 0x0f) + THRESHOLD;
691  //
692  // I've modified this to only make one input call, and
693  // have changed the variable names to something more
694  // obvious.
695 
696  if (getChars((char *) c, 2) != 2)
697  break;
698 
699  // Convert these two characters into the position and
700  // length. Note that the length is always at least
701  // THRESHOLD, which is why we're able to get a length
702  // of 18 out of only 4 bits.
703 
704  pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) );
705 
706  len = (short int) ( (c[1] & 0x0f) + THRESHOLD );
707 
708  // There are now "len" characters at position "pos" in
709  // the ring buffer that can be pulled out. Note that
710  // len is never more than F.
711 
712  for (k = 0; k < len; k++) {
713  c[k] = p->m_ring_buffer[(pos + k) & (N - 1)];
714 
715  // Add to buffer, and increment to next spot. Wrap at end.
716 
717  p->m_ring_buffer[r] = c[k];
718  r = (short int) ( (r + 1) & (N - 1) );
719  }
720 
721  // Add the "len" :characters to the output stream.
722 
723  if (sendChars((char *) c, len) != (unsigned int)len) {
724  break;
725  }
726  totalLen += len;
727  }
728  }
729  slen = totalLen;
730 }
unsigned long slen
Definition: swcomprs.h:39
#define THRESHOLD
Definition: lzsscomprs.cpp:55
Private * p
Definition: lzsscomprs.h:35
static unsigned char m_ring_buffer[N+F-1]
Definition: lzsscomprs.cpp:63
virtual unsigned long getChars(char *buf, unsigned long len)
Definition: swcomprs.cpp:121
char direct
Definition: swcomprs.h:38
unsigned long pos
Definition: swcomprs.h:39
#define F
Definition: lzsscomprs.cpp:54
#define N
Definition: lzsscomprs.cpp:53
virtual unsigned long sendChars(char *buf, unsigned long len)
Definition: swcomprs.cpp:141
void LZSSCompress::encode ( void  )
virtual

Reimplemented from SWCompress.

Definition at line 367 of file lzsscomprs.cpp.

368 {
369  short int i; // an iterator
370  short int r; // node number in the binary tree
371  short int s; // position in the ring buffer
372  unsigned short int len; // len of initial string
373  short int last_match_length; // length of last match
374  short int code_buf_pos; // position in the output buffer
375  unsigned char code_buf[17]; // the output buffer
376  unsigned char mask; // bit mask for byte 0 of out buf
377  unsigned char c; // character read from string
378 
379  // Start with a clean tree.
380 
381  p->InitTree();
382  direct = 0; // set direction needed by parent [Get|Send]Chars()
383 
384  // code_buf[0] works as eight flags. A "1" represents that the
385  // unit is an unencoded letter (1 byte), and a "0" represents
386  // that the next unit is a <position,length> pair (2 bytes).
387  //
388  // code_buf[1..16] stores eight units of code. Since the best
389  // we can do is store eight <position,length> pairs, at most 16
390  // bytes are needed to store this.
391  //
392  // This is why the maximum size of the code buffer is 17 bytes.
393 
394  code_buf[0] = 0;
395  code_buf_pos = 1;
396 
397  // Mask iterates over the 8 bits in the code buffer. The first
398  // character ends up being stored in the low bit.
399  //
400  // bit 8 7 6 5 4 3 2 1
401  // | |
402  // | first sequence in code buffer
403  // |
404  // last sequence in code buffer
405 
406  mask = 1;
407 
408  s = 0;
409  r = (short int) N - (short int) F;
410 
411  // Initialize the ring buffer with spaces...
412 
413  // Note that the last F bytes of the ring buffer are not filled.
414  // This is because those F bytes will be filled in immediately
415  // with bytes from the input stream.
416 
417  memset(p->m_ring_buffer, ' ', N - F);
418 
419  // Read F bytes into the last F bytes of the ring buffer.
420  //
421  // This function loads the buffer with X characters and returns
422  // the actual amount loaded.
423 
424  len = getChars((char *) &(p->m_ring_buffer[r]), F);
425 
426  // Make sure there is something to be compressed.
427 
428  if (len == 0)
429  return;
430 
431  // Insert the F strings, each of which begins with one or more
432  // 'space' characters. Note the order in which these strings
433  // are inserted. This way, degenerate trees will be less likely
434  // to occur.
435 
436  for (i = 1; i <= F; i++) {
437  p->InsertNode((short int) (r - i));
438  }
439 
440  // Finally, insert the whole string just read. The
441  // member variables match_length and match_position are set.
442 
443  p->InsertNode(r);
444 
445  // Now that we're preloaded, continue till done.
446 
447  do {
448 
449  // m_match_length may be spuriously long near the end of
450  // text.
451 
452  if (p->m_match_length > len) {
453  p->m_match_length = len;
454  }
455 
456  // Is it cheaper to store this as a single character? If so,
457  // make it so.
458 
459  if (p->m_match_length < THRESHOLD) {
460  // Send one character. Remember that code_buf[0] is the
461  // set of flags for the next eight items.
462 
463  p->m_match_length = 1;
464  code_buf[0] |= mask;
465  code_buf[code_buf_pos++] = p->m_ring_buffer[r];
466  }
467 
468  // Otherwise, we do indeed have a string that can be stored
469  // compressed to save space.
470 
471  else {
472  // The next 16 bits need to contain the position (12 bits)
473  // and the length (4 bits).
474 
475  code_buf[code_buf_pos++] = (unsigned char) p->m_match_position;
476  code_buf[code_buf_pos++] = (unsigned char) (
477  ((p->m_match_position >> 4) & 0xf0) |
478  (p->m_match_length - THRESHOLD) );
479  }
480 
481  // Shift the mask one bit to the left so that it will be ready
482  // to store the new bit.
483 
484  mask = (unsigned char) (mask << 1);
485 
486  // If the mask is now 0, then we know that we have a full set
487  // of flags and items in the code buffer. These need to be
488  // output.
489 
490  if (!mask) {
491  // code_buf is the buffer of characters to be output.
492  // code_buf_pos is the number of characters it contains.
493 
494  sendChars((char *) code_buf, code_buf_pos);
495 
496  // Reset for next buffer...
497 
498  code_buf[0] = 0;
499  code_buf_pos = 1;
500  mask = 1;
501  }
502 
503  last_match_length = p->m_match_length;
504 
505  // Delete old strings and read new bytes...
506 
507  for (i = 0; i < last_match_length; i++) {
508  // Get next character...
509 
510  if (getChars((char *) &c, 1) != 1)
511  break;
512 
513  // Delete "old strings"
514 
515  p->DeleteNode(s);
516 
517  // Put this character into the ring buffer.
518  //
519  // The original comment here says "If the position is near
520  // the end of the buffer, extend the buffer to make
521  // string comparison easier."
522  //
523  // That's a little misleading, because the "end" of the
524  // buffer is really what we consider to be the "beginning"
525  // of the buffer, that is, positions 0 through F.
526  //
527  // The idea is that the front end of the buffer is duplicated
528  // into the back end so that when you're looking at characters
529  // at the back end of the buffer, you can index ahead (beyond
530  // the normal end of the buffer) and see the characters
531  // that are at the front end of the buffer wihtout having
532  // to adjust the index.
533  //
534  // That is...
535  //
536  // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234
537  // | | |
538  // position 0 end of buffer |
539  // |
540  // duplicate of front of buffer
541 
542  p->m_ring_buffer[s] = c;
543 
544  if (s < F - 1) {
545  p->m_ring_buffer[s + N] = c;
546  }
547 
548  // Increment the position, and wrap around when we're at
549  // the end. Note that this relies on N being a power of 2.
550 
551  s = (short int) ( (s + 1) & (N - 1) );
552  r = (short int) ( (r + 1) & (N - 1) );
553 
554  // Register the string that is found in
555  // m_ring_buffer[r..r+F-1].
556 
557  p->InsertNode(r);
558  }
559 
560  // If we didn't quit because we hit the last_match_length,
561  // then we must have quit because we ran out of characters
562  // to process.
563 
564  while (i++ < last_match_length) {
565  p->DeleteNode(s);
566 
567  s = (short int) ( (s + 1) & (N - 1) );
568  r = (short int) ( (r + 1) & (N - 1) );
569 
570  // Note that len hitting 0 is the key that causes the
571  // do...while() to terminate. This is the only place
572  // within the loop that len is modified.
573  //
574  // Its original value is F (or a number less than F for
575  // short strings).
576 
577  if (--len) {
578  p->InsertNode(r); /* buffer may not be empty. */
579  }
580  }
581 
582  // End of do...while() loop. Continue processing until there
583  // are no more characters to be compressed. The variable
584  // "len" is used to signal this condition.
585  } while (len > 0);
586 
587  // There could still be something in the output buffer. Send it
588  // now.
589 
590  if (code_buf_pos > 1) {
591  // code_buf is the encoded string to send.
592  // code_buf_ptr is the number of characters.
593 
594  sendChars((char *) code_buf, code_buf_pos);
595  }
596 
597 
598  // must set zlen for parent class to know length of compressed buffer
599  zlen = zpos;
600 }
void InsertNode(short int Pos)
Definition: lzsscomprs.cpp:210
unsigned long zlen
Definition: swcomprs.h:39
void DeleteNode(short int Node)
Definition: lzsscomprs.cpp:308
#define THRESHOLD
Definition: lzsscomprs.cpp:55
Private * p
Definition: lzsscomprs.h:35
static short int m_match_length
Definition: lzsscomprs.cpp:65
unsigned long zpos
Definition: swcomprs.h:39
static unsigned char m_ring_buffer[N+F-1]
Definition: lzsscomprs.cpp:63
virtual unsigned long getChars(char *buf, unsigned long len)
Definition: swcomprs.cpp:121
char direct
Definition: swcomprs.h:38
#define F
Definition: lzsscomprs.cpp:54
static short int m_match_position
Definition: lzsscomprs.cpp:64
#define N
Definition: lzsscomprs.cpp:53
virtual unsigned long sendChars(char *buf, unsigned long len)
Definition: swcomprs.cpp:141
unsigned long SWCompress::getChars ( char *  buf,
unsigned long  len 
)
virtualinherited

Definition at line 121 of file swcomprs.cpp.

121  {
122  if (direct) {
123  len = (((zlen - zpos) > (unsigned)len) ? len : zlen - zpos);
124  if (len > 0) {
125  memmove(ibuf, &zbuf[zpos], len);
126  zpos += len;
127  }
128  }
129  else {
130 // slen = strlen(buf);
131  len = (((slen - pos) > (unsigned)len) ? len : slen - pos);
132  if (len > 0) {
133  memmove(ibuf, &buf[pos], len);
134  pos += len;
135  }
136  }
137  return len;
138 }
unsigned long zlen
Definition: swcomprs.h:39
unsigned long slen
Definition: swcomprs.h:39
char * buf
Definition: swcomprs.h:38
unsigned long zpos
Definition: swcomprs.h:39
char direct
Definition: swcomprs.h:38
unsigned long pos
Definition: swcomprs.h:39
char * zbuf
Definition: swcomprs.h:38
char * SWCompress::getCompressedBuf ( unsigned long *  len = 0)
virtualinherited

Definition at line 111 of file swcomprs.cpp.

111  {
112  if (!zbuf) {
113  direct = 0;
114  encode();
115  }
116  if (len) *len = zlen;
117  return zbuf;
118 }
unsigned long zlen
Definition: swcomprs.h:39
virtual void encode(void)
Definition: swcomprs.cpp:180
char direct
Definition: swcomprs.h:38
char * zbuf
Definition: swcomprs.h:38
virtual int SWCompress::getLevel ( )
inlinevirtualinherited

Definition at line 54 of file swcomprs.h.

54 {return level;};
int level
Definition: swcomprs.h:40
char * SWCompress::getUncompressedBuf ( unsigned long *  len = 0)
virtualinherited

Definition at line 90 of file swcomprs.cpp.

90  {
91  if (!buf) {
92  buf = (char *)calloc(1,1); // be sure we at least allocate an empty buf for return;
93  direct = 1;
94  decode();
95  }
96  if (len) *len = slen;
97  return buf;
98 }
virtual void decode(void)
Definition: swcomprs.cpp:193
unsigned long slen
Definition: swcomprs.h:39
char * buf
Definition: swcomprs.h:38
char direct
Definition: swcomprs.h:38
unsigned long SWCompress::sendChars ( char *  buf,
unsigned long  len 
)
virtualinherited

Definition at line 141 of file swcomprs.cpp.

141  {
142  if (direct) {
143  if (buf) {
144 // slen = strlen(buf);
145  if ((pos + len) > (unsigned)slen) {
146  buf = (char *) realloc(buf, pos + len + 1024);
147  memset(&buf[pos], 0, len + 1024);
148  }
149  }
150  else buf = (char *)calloc(1, len + 1024);
151  memmove(&buf[pos], ibuf, len);
152  pos += len;
153  }
154  else {
155  if (zbuf) {
156  if ((zpos + len) > zlen) {
157  zbuf = (char *) realloc(zbuf, zpos + len + 1024);
158  zlen = zpos + len + 1024;
159  }
160  }
161  else {
162  zbuf = (char *)calloc(1, len + 1024);
163  zlen = len + 1024;
164  }
165  memmove(&zbuf[zpos], ibuf, len);
166  zpos += len;
167  }
168  return len;
169 }
unsigned long zlen
Definition: swcomprs.h:39
unsigned long slen
Definition: swcomprs.h:39
char * realloc()
char * buf
Definition: swcomprs.h:38
unsigned long zpos
Definition: swcomprs.h:39
char direct
Definition: swcomprs.h:38
unsigned long pos
Definition: swcomprs.h:39
char * zbuf
Definition: swcomprs.h:38
void SWCompress::setCompressedBuf ( unsigned long *  len,
char *  buf = 0 
)
virtualinherited

Definition at line 101 of file swcomprs.cpp.

101  {
102  if (ibuf) {
103  init();
104  zbuf = (char *) malloc(*len);
105  memcpy(zbuf, ibuf, *len);
106  zlen = *len;
107  }
108  *len = zlen;
109 }
unsigned long zlen
Definition: swcomprs.h:39
char * malloc()
void init()
Definition: swcomprs.cpp:57
char * zbuf
Definition: swcomprs.h:38
virtual void SWCompress::setLevel ( int  l)
inlinevirtualinherited

Reimplemented in XzCompress.

Definition at line 53 of file swcomprs.h.

53 {level = l;};
int level
Definition: swcomprs.h:40
void SWCompress::setUncompressedBuf ( const char *  buf = 0,
unsigned long *  len = 0 
)
virtualinherited

Definition at line 75 of file swcomprs.cpp.

75  {
76  if (ibuf) {
77  init();
78  slen = (len) ? *len : strlen(ibuf);
79  buf = (char *) calloc(slen + 1, 1);
80  memcpy(buf, ibuf, slen);
81  }
82  if (!buf) {
83  buf = (char *)calloc(1,1); // be sure we at least allocate an empty buf for return;
84  direct = 1;
85  decode();
86  if (len) *len = slen;
87  }
88 }
virtual void decode(void)
Definition: swcomprs.cpp:193
unsigned long slen
Definition: swcomprs.h:39
char * buf
Definition: swcomprs.h:38
void init()
Definition: swcomprs.cpp:57
char direct
Definition: swcomprs.h:38

Member Data Documentation

char* SWCompress::buf
mutableprotectedinherited

Definition at line 38 of file swcomprs.h.

char SWCompress::direct
mutableprotectedinherited

Definition at line 38 of file swcomprs.h.

int SWCompress::level
protectedinherited

Definition at line 40 of file swcomprs.h.

Private* LZSSCompress::p
private

Definition at line 35 of file lzsscomprs.h.

unsigned long SWCompress::pos
protectedinherited

Definition at line 39 of file swcomprs.h.

unsigned long SWCompress::slen
protectedinherited

Definition at line 39 of file swcomprs.h.

char * SWCompress::zbuf
mutableprotectedinherited

Definition at line 38 of file swcomprs.h.

unsigned long SWCompress::zlen
protectedinherited

Definition at line 39 of file swcomprs.h.

unsigned long SWCompress::zpos
protectedinherited

Definition at line 39 of file swcomprs.h.


The documentation for this class was generated from the following files: