org.crosswire.common.compress.LZSS (Java2HTML)

1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 or later
5    * as published by the Free Software Foundation. This program is distributed
6    * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7    * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *      http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * © CrossWire Bible Society, 2007 - 2016
18   *
19   */
20  package org.crosswire.common.compress;
21  
22  import java.io.ByteArrayOutputStream;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.util.Arrays;
26  
27  /**
28   * The LZSS compression is a port of code as implemented for STEP. The following
29   * information gives the history of this implementation.
30   * 
31   * <p>
32   * Compression Info, 10-11-95<br>
33   * Jeff Wheeler
34   * </p>
35   * 
36   * <h2><u>Source of Algorithm</u></h2>
37   * 
38   * <p>
39   * The compression algorithms used here are based upon the algorithms developed
40   * and published by Haruhiko Okumura in a paper entitled
41   * "Data Compression Algorithms of LARC and LHarc." This paper discusses three
42   * compression algorithms, LSZZ, LZARI, and LZHUF. LZSS is described as the
43   * "first" of these, and is described as providing moderate compression with
44   * good speed. LZARI is described as an improved LZSS, a combination of the LZSS
45   * algorithm with adaptive arithmetic compression. It is described as being
46   * slower than LZSS but with better compression. LZHUF (the basis of the common
47   * LHA compression program) was included in the paper, however, a free usage
48   * license was not included.
49   * </p>
50   * 
51   * <p>
52   * The following are copies of the statements included at the beginning of each
53   * source code listing that was supplied in the working paper.
54   * </p>
55   * 
56   * <blockquote>LZSS, dated 4/6/89, marked as "Use, distribute and modify this
57   * program freely."</blockquote>
58   * 
59   * <blockquote>LZARI, dated 4/7/89, marked as "Use, distribute and modify this
60   * program freely."</blockquote>
61   * 
62   * <blockquote>LZHUF, dated 11/20/88, written by Haruyasu Yoshizaki, translated
63   * by Haruhiko Okumura on 4/7/89. Not expressly marked as redistributable or
64   * modifiable.</blockquote>
65   * 
66   * <p>
67   * Since both LZSS and LZARI are marked as "use, distribute and modify freely"
68   * we have felt at liberty basing our compression algorithm on either of these.
69   * </p>
70   * 
71   * <h2><u>Selection of Algorithm</u></h2>
72   * 
73   * <p>
74   * Working samples of three possible compression algorithms are supplied in
75   * Okumura's paper. Which should be used?
76   * </p>
77   * 
78   * <p>
79   * LZSS is the fastest at decompression, but does not generated as small a
80   * compressed file as the other methods. The other two methods provided,
81   * perhaps, a 15% improvement in compression. Or, put another way, on a 100K
82   * file, LZSS might compress it to 50K while the others might approach 40-45K.
83   * For STEP purposes, it was decided that decoding speed was of more importance
84   * than tighter compression. For these reasons, the first compression algorithm
85   * implemented is the LZSS algorithm.
86   * </p>
87   * 
88   * <h2><u>About LZSS Encoding</u></h2>
89   * 
90   * <p>
91   * (adapted from Haruhiko Okumura's paper)
92   * </p>
93   * 
94   * <p>
95   * This scheme was proposed by Ziv and Lempel [1]. A slightly modified version
96   * is described by Storer and Szymanski [2]. An implementation using a binary
97   * tree has been proposed by Bell [3].
98   * </p>
99   * 
100  * The algorithm is quite simple.
101  * <ol>
102  * <li>Keep a ring buffer which initially contains all space characters.</li>
103  * <li>Read several letters from the file to the buffer.</li>
104  * <li>Search the buffer for the longest string that matches the letters just
105  * read, and send its length and position into the buffer.</li>
106  * </ol>
107  * 
108  * <p>
109  * If the ring buffer is 4096 bytes, the position can be stored in 12 bits. If
110  * the length is represented in 4 bits, the &lt;position, length&gt; pair is two bytes
111  * long. If the longest match is no more than two characters, then just one
112  * character is sent without encoding. The process starts again with the next
113  * character. An extra bit is sent each time to tell the decoder whether the
114  * next item is a character of a &lt;position, length&gt; pair.
115  * </p>
116  * 
117  * <p>
118  * [1] J. Ziv and A. Lempel, IEEE Transactions IT-23, 337-343 (1977).<br>
119  * [2] J. A. Storer and T. G. Szymanski, J. ACM, 29, 928-951 (1982).<br>
120  * [3] T.C. Gell, IEEE Transactions COM-34, 1176-1182 (1986).
121  * </p>
122  * 
123  * Regarding this port to Java and not the original code, the following license
124  * applies:
125  * 
126  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
127  * @author DM Smith
128  */
129 public class LZSS extends AbstractCompressor {
130     /**
131      * Create an LZSS that is capable of transforming the input.
132      * 
133      * @param input
134      *            to compress or uncompress.
135      */
136     public LZSS(InputStream input) {
137         super(input);
138         ringBuffer = new byte[RING_SIZE + MAX_STORE_LENGTH - 1];
139         dad = new short[RING_SIZE + 1];
140         leftSon = new short[RING_SIZE + 1];
141         rightSon = new short[RING_SIZE + 257];
142     }
143 
144     /*
145      * (non-Javadoc)
146      * 
147      * @see org.crosswire.common.compress.Compressor#compress()
148      */
149     public ByteArrayOutputStream compress() throws IOException {
150         out = new ByteArrayOutputStream(BUF_SIZE);
151 
152         short i; // an iterator
153         short r; // node number in the binary tree
154         short s; // position in the ring buffer
155         short len; // length of initial string
156         short lastMatchLength; // length of last match
157         short codeBufPos; // position in the output buffer
158         byte[] codeBuff = new byte[17]; // the output buffer
159         byte mask; // bit mask for byte 0 of out input
160         byte c; // character read from string
161 
162         // Start with a clean tree.
163         initTree();
164 
165         // codeBuff[0] works as eight flags. A "1" represents that the
166         // unit is an unencoded letter (1 byte), and a "0" represents
167         // that the next unit is a <position,length> pair (2 bytes).
168         //
169         // codeBuff[1..16] stores eight units of code. Since the best
170         // we can do is store eight <position,length> pairs, at most 16
171         // bytes are needed to store this.
172         //
173         // This is why the maximum size of the code buffer is 17 bytes.
174         codeBuff[0] = 0;
175         codeBufPos = 1;
176 
177         // Mask iterates over the 8 bits in the code buffer. The first
178         // character ends up being stored in the low bit.
179         //
180         // bit 8 7 6 5 4 3 2 1
181         // | |
182         // | first sequence in code buffer
183         // |
184         // last sequence in code buffer
185         mask = 1;
186 
187         s = 0;
188         r = RING_SIZE - MAX_STORE_LENGTH;
189 
190         // Initialize the ring buffer with spaces...
191 
192         // Note that the last MAX_STORE_LENGTH bytes of the ring buffer are not
193         // filled.
194         // This is because those MAX_STORE_LENGTH bytes will be filled in
195         // immediately
196         // with bytes from the input stream.
197         Arrays.fill(ringBuffer, 0, r, (byte) ' ');
198 
199         // Read MAX_STORE_LENGTH bytes into the last MAX_STORE_LENGTH bytes of
200         // the ring buffer.
201         //
202         // This function loads the buffer with up to MAX_STORE_LENGTH characters
203         // and returns
204         // the actual amount loaded.
205         int readResult = input.read(ringBuffer, r, MAX_STORE_LENGTH);
206 
207         // Make sure there is something to be compressed.
208         if (readResult <= 0) {
209             return out;
210         }
211 
212         len = (short) readResult;
213 
214         // Insert the MAX_STORE_LENGTH strings, each of which begins with one or
215         // more
216         // 'space' characters. Note the order in which these strings
217         // are inserted. This way, degenerate trees will be less likely
218         // to occur.
219         for (i = 1; i <= MAX_STORE_LENGTH; i++) {
220             insertNode((short) (r - i));
221         }
222 
223         // Finally, insert the whole string just read. The
224         // member variables matchLength and matchPosition are set.
225         insertNode(r);
226 
227         // Now that we're preloaded, continue till done.
228         do {
229 
230             // matchLength may be spuriously long near the end of text.
231             if (matchLength > len) {
232                 matchLength = len;
233             }
234 
235             // Is it cheaper to store this as a single character? If so, make it
236             // so.
237             if (matchLength < THRESHOLD) {
238                 // Send one character. Remember that codeBuff[0] is the
239                 // set of flags for the next eight items.
240                 matchLength = 1;
241                 codeBuff[0] |= mask;
242                 codeBuff[codeBufPos++] = ringBuffer[r];
243             } else {
244                 // Otherwise, we do indeed have a string that can be stored
245                 // compressed to save space.
246 
247                 // The next 16 bits need to contain the position (12 bits)
248                 // and the length (4 bits).
249                 codeBuff[codeBufPos++] = (byte) matchPosition;
250                 codeBuff[codeBufPos++] = (byte) (((matchPosition >> 4) & 0xF0) | (matchLength - THRESHOLD));
251             }
252 
253             // Shift the mask one bit to the left so that it will be ready
254             // to store the new bit.
255             mask <<= 1;
256 
257             // If the mask is now 0, then we know that we have a full set
258             // of flags and items in the code buffer. These need to be
259             // output.
260             if (mask == 0) {
261                 // codeBuff is the buffer of characters to be output.
262                 // codeBufPos is the number of characters it contains.
263                 out.write(codeBuff, 0, codeBufPos);
264 
265                 // Reset for next buffer...
266                 codeBuff[0] = 0;
267                 codeBufPos = 1;
268                 mask = 1;
269             }
270 
271             lastMatchLength = matchLength;
272 
273             // Delete old strings and read new bytes...
274             for (i = 0; i < lastMatchLength; i++) {
275 
276                 // Get next character...
277                 readResult = input.read();
278                 if (readResult == -1) {
279                     break;
280                 }
281                 c = (byte) readResult;
282 
283                 // Delete "old strings"
284                 deleteNode(s);
285 
286                 // Put this character into the ring buffer.
287                 //
288                 // The original comment here says "If the position is near
289                 // the end of the buffer, extend the buffer to make
290                 // string comparison easier."
291                 //
292                 // That's a little misleading, because the "end" of the
293                 // buffer is really what we consider to be the "beginning"
294                 // of the buffer, that is, positions 0 through MAX_STORE_LENGTH.
295                 //
296                 // The idea is that the front end of the buffer is duplicated
297                 // into the back end so that when you're looking at characters
298                 // at the back end of the buffer, you can index ahead (beyond
299                 // the normal end of the buffer) and see the characters
300                 // that are at the front end of the buffer without having
301                 // to adjust the index.
302                 //
303                 // That is...
304                 //
305                 // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234
306                 // | | |
307                 // position 0 end of buffer |
308                 // |
309                 // duplicate of front of buffer
310                 ringBuffer[s] = c;
311 
312                 if (s < MAX_STORE_LENGTH - 1) {
313                     ringBuffer[s + RING_SIZE] = c;
314                 }
315 
316                 // Increment the position, and wrap around when we're at
317                 // the end. Note that this relies on RING_SIZE being a power of
318                 // 2.
319                 s = (short) ((s + 1) & RING_WRAP);
320                 r = (short) ((r + 1) & RING_WRAP);
321 
322                 // Register the string that is found in
323                 // ringBuffer[r..r + MAX_STORE_LENGTH - 1].
324                 insertNode(r);
325             }
326 
327             // If we didn't quit because we hit the lastMatchLength,
328             // then we must have quit because we ran out of characters
329             // to process.
330             while (i++ < lastMatchLength) {
331                 deleteNode(s);
332 
333                 s = (short) ((s + 1) & RING_WRAP);
334                 r = (short) ((r + 1) & RING_WRAP);
335 
336                 // Note that len hitting 0 is the key that causes the
337                 // do...while() to terminate. This is the only place
338                 // within the loop that len is modified.
339                 //
340                 // Its original value is MAX_STORE_LENGTH (or a number less than
341                 // MAX_STORE_LENGTH for
342                 // short strings).
343                 if (--len != 0) {
344                     insertNode(r); /* buffer may not be empty. */
345                 }
346             }
347 
348             // End of do...while() loop. Continue processing until there
349             // are no more characters to be compressed. The variable
350             // "len" is used to signal this condition.
351         } while (len > 0);
352 
353         // There could still be something in the output buffer. Send it now.
354         if (codeBufPos > 1) {
355             // codeBuff is the encoded string to send.
356             // codeBufPos is the number of characters.
357             out.write(codeBuff, 0, codeBufPos);
358         }
359 
360         return out;
361     }
362 
363     /*
364      * (non-Javadoc)
365      * 
366      * @see org.crosswire.common.compress.Compressor#uncompress()
367      */
368     public ByteArrayOutputStream uncompress() throws IOException {
369         return uncompress(BUF_SIZE);
370     }
371 
372     /*
373      * (non-Javadoc)
374      * 
375      * @see org.crosswire.common.compress.Compressor#uncompress(int)
376      */
377     public ByteArrayOutputStream uncompress(int expectedSize) throws IOException {
378         out = new ByteArrayOutputStream(expectedSize);
379 
380         byte[] c = new byte[MAX_STORE_LENGTH]; // an array of chars
381         byte flags; // 8 bits of flags
382 
383         // Initialize the ring buffer with a common string.
384         //
385         // Note that the last MAX_STORE_LENGTH bytes of the ring buffer are not
386         // filled.
387         // r is a nodeNumber
388         int r = RING_SIZE - MAX_STORE_LENGTH;
389         Arrays.fill(ringBuffer, 0, r, (byte) ' ');
390 
391         flags = 0;
392         int flagCount = 0; // which flag we're on
393 
394         while (true) {
395             // If there are more bits of interest in this flag, then
396             // shift that next interesting bit into the 1's position.
397             //
398             // If this flag has been exhausted, the next byte must be a flag.
399             if (flagCount > 0) {
400                 flags = (byte) (flags >> 1);
401                 flagCount--;
402             } else {
403                 // Next byte must be a flag.
404                 int readResult = input.read();
405                 if (readResult == -1) {
406                     break;
407                 }
408 
409                 flags = (byte) (readResult & 0xFF);
410 
411                 // Set the flag counter. While at first it might appear
412                 // that this should be an 8 since there are 8 bits in the
413                 // flag, it should really be a 7 because the shift must
414                 // be performed 7 times in order to see all 8 bits.
415                 flagCount = 7;
416             }
417 
418             // If the low order bit of the flag is now set, then we know
419             // that the next byte is a single, unencoded character.
420             if ((flags & 1) != 0) {
421                 if (input.read(c, 0, 1) != 1) {
422                     break;
423                 }
424 
425                 out.write(c[0]);
426 
427                 // Add to buffer, and increment to next spot. Wrap at end.
428                 ringBuffer[r] = c[0];
429                 r = (short) ((r + 1) & RING_WRAP);
430             } else {
431                 // Otherwise, we know that the next two bytes are a
432                 // <position,length> pair. The position is in 12 bits and
433                 // the length is in 4 bits.
434                 if (input.read(c, 0, 2) != 2) {
435                     break;
436                 }
437 
438                 // Convert these two characters into the position and
439                 // length in the ringBuffer. Note that the length is always at
440                 // least
441                 // THRESHOLD, which is why we're able to get a length
442                 // of 18 out of only 4 bits.
443                 short pos = (short) ((c[0] & 0xFF) | ((c[1] & 0xF0) << 4));
444                 short len = (short) ((c[1] & 0x0F) + THRESHOLD);
445 
446                 // There are now "len" characters at position "pos" in
447                 // the ring buffer that can be pulled out. Note that
448                 // len is never more than MAX_STORE_LENGTH.
449                 for (int k = 0; k < len; k++) {
450                     c[k] = ringBuffer[(pos + k) & RING_WRAP];
451 
452                     // Add to buffer, and increment to next spot. Wrap at end.
453                     ringBuffer[r] = c[k];
454                     r = (r + 1) & RING_WRAP;
455                 }
456 
457                 // Add the "len" characters to the output stream.
458                 out.write(c, 0, len);
459             }
460         }
461         return out;
462     }
463 
464     /**
465      * Initializes the tree nodes to "empty" states.
466      */
467     private void initTree() {
468         // For i = 0 to RING_SIZE - 1, rightSon[i] and leftSon[i] will be the
469         // right
470         // and left children of node i. These nodes need not be
471         // initialized. However, for debugging purposes, it is nice to
472         // have them initialized. Since this is only used for compression
473         // (not decompression), I don't mind spending the time to do it.
474         //
475         // For the same range of i, dad[i] is the parent of node i.
476         // These are initialized to a known value that can represent
477         // a "not used" state.
478         // For i = 0 to 255, rightSon[RING_SIZE + i + 1] is the root of the tree
479         // for strings that begin with the character i. This is why
480         // the right child array is larger than the left child array.
481         // These are also initialized to a "not used" state.
482         //
483         // Note that there are 256 of these, one for each of the possible
484         // 256 characters.
485         Arrays.fill(dad, 0, dad.length, NOT_USED);
486         Arrays.fill(leftSon, 0, leftSon.length, NOT_USED);
487         Arrays.fill(rightSon, 0, rightSon.length, NOT_USED);
488     }
489 
490     /**
491      * Inserts a string from the ring buffer into one of the trees. It loads the
492      * match position and length member variables for the longest match.
493      * 
494      * <p>
495      * The string to be inserted is identified by the parameter pos, A full
496      * MAX_STORE_LENGTH bytes are inserted. So, ringBuffer[pos ...
497      * pos+MAX_STORE_LENGTH-1] are inserted.
498      * </p>
499      * 
500      * <p>
501      * If the matched length is exactly MAX_STORE_LENGTH, then an old node is
502      * removed in favor of the new one (because the old one will be deleted
503      * sooner).
504      * </p>
505      * 
506      * @param pos
507      *            plays a dual role. It is used as both a position in the ring
508      *            buffer and also as a tree node. ringBuffer[pos] defines a
509      *            character that is used to identify a tree node.
510      */
511     private void insertNode(short pos) {
512         assert pos >= 0 && pos < RING_SIZE;
513 
514         int cmp = 1;
515         short key = pos;
516 
517         // The last 256 entries in rightSon contain the root nodes for
518         // strings that begin with a letter. Get an index for the
519         // first letter in this string.
520         short p = (short) (RING_SIZE + 1 + (ringBuffer[key] & 0xFF));
521         assert p > RING_SIZE;
522 
523         // Set the left and right tree nodes for this position to "not used."
524         leftSon[pos] = NOT_USED;
525         rightSon[pos] = NOT_USED;
526 
527         // Haven't matched anything yet.
528         matchLength = 0;
529 
530         while (true) {
531             if (cmp >= 0) {
532                 if (rightSon[p] != NOT_USED) {
533                     p = rightSon[p];
534                 } else {
535                     rightSon[p] = pos;
536                     dad[pos] = p;
537                     return;
538                 }
539             } else {
540                 if (leftSon[p] != NOT_USED) {
541                     p = leftSon[p];
542                 } else {
543                     leftSon[p] = pos;
544                     dad[pos] = p;
545                     return;
546                 }
547             }
548 
549             // Should we go to the right or the left to look for the
550             // next match?
551             short i = 0;
552             for (i = 1; i < MAX_STORE_LENGTH; i++) {
553                 cmp = (ringBuffer[key + i] & 0xFF) - (ringBuffer[p + i] & 0xFF);
554                 if (cmp != 0) {
555                     break;
556                 }
557             }
558 
559             if (i > matchLength) {
560                 matchPosition = p;
561                 matchLength = i;
562 
563                 if (i >= MAX_STORE_LENGTH) {
564                     break;
565                 }
566             }
567         }
568 
569         dad[pos] = dad[p];
570         leftSon[pos] = leftSon[p];
571         rightSon[pos] = rightSon[p];
572 
573         dad[leftSon[p]] = pos;
574         dad[rightSon[p]] = pos;
575 
576         if (rightSon[dad[p]] == p) {
577             rightSon[dad[p]] = pos;
578         } else {
579             leftSon[dad[p]] = pos;
580         }
581 
582         // Remove "p"
583         dad[p] = NOT_USED;
584     }
585 
586     /**
587      * Remove a node from the tree.
588      * 
589      * @param node
590      *            the node to remove
591      */
592     private void deleteNode(short node) {
593         assert node >= 0 && node < (RING_SIZE + 1);
594 
595         short q;
596 
597         if (dad[node] == NOT_USED) {
598             // not in tree, nothing to do
599             return;
600         }
601 
602         if (rightSon[node] == NOT_USED) {
603             q = leftSon[node];
604         } else if (leftSon[node] == NOT_USED) {
605             q = rightSon[node];
606         } else {
607             q = leftSon[node];
608             if (rightSon[q] != NOT_USED) {
609                 do {
610                     q = rightSon[q];
611                 } while (rightSon[q] != NOT_USED);
612 
613                 rightSon[dad[q]] = leftSon[q];
614                 dad[leftSon[q]] = dad[q];
615                 leftSon[q] = leftSon[node];
616                 dad[leftSon[node]] = q;
617             }
618 
619             rightSon[q] = rightSon[node];
620             dad[rightSon[node]] = q;
621         }
622 
623         dad[q] = dad[node];
624 
625         if (rightSon[dad[node]] == node) {
626             rightSon[dad[node]] = q;
627         } else {
628             leftSon[dad[node]] = q;
629         }
630 
631         dad[node] = NOT_USED;
632     }
633 
634     /**
635      * This is the size of the ring buffer. It is set to 4K. It is important to
636      * note that a position within the ring buffer requires 12 bits.
637      */
638     private static final short RING_SIZE = 4096;
639 
640     /**
641      * This is used to determine the next position in the ring buffer, from 0 to
642      * RING_SIZE - 1. The idiom s = (s + 1) &amp; RING_WRAP; will ensure this. This
643      * only works if RING_SIZE is a power of 2. Note this is slightly faster
644      * than the equivalent: s = (s + 1) % RING_SIZE;
645      */
646     private static final short RING_WRAP = RING_SIZE - 1;
647 
648     /**
649      * This is the maximum length of a character sequence that can be taken from
650      * the ring buffer. It is set to 18. Note that a length must be 3 before it
651      * is worthwhile to store a position/length pair, so the length can be
652      * encoded in only 4 bits. Or, put yet another way, it is not necessary to
653      * encode a length of 0-18, it is necessary to encode a length of 3-18,
654      * which requires 4 bits.
655      * <p>
656      * Note that the 12 bits used to store the position and the 4 bits used to
657      * store the length equal a total of 16 bits, or 2 bytes.
658      * </p>
659      */
660     private static final int MAX_STORE_LENGTH = 18;
661 
662     /**
663      * It takes 2 bytes to store an offset and a length. If a character sequence
664      * only requires 1 or 2 characters to store uncompressed, then it is better
665      * to store it uncompressed than as an offset into the ring buffer.
666      */
667     private static final int THRESHOLD = 3;
668 
669     /**
670      * Used to mark nodes as not used.
671      */
672     private static final short NOT_USED = RING_SIZE;
673 
674     /**
675      * A text buffer. It contains "nodes" of uncompressed text that can be
676      * indexed by position. That is, a substring of the ring buffer can be
677      * indexed by a position and a length. When decoding, the compressed text
678      * may contain a position in the ring buffer and a count of the number of
679      * bytes from the ring buffer that are to be moved into the uncompressed
680      * buffer.
681      * 
682      * <p>
683      * This ring buffer is not maintained as part of the compressed text.
684      * Instead, it is reconstructed dynamically. That is, it starts out empty
685      * and gets built as the text is decompressed.
686      * </p>
687      * 
688      * <p>
689      * The ring buffer contain RING_SIZE bytes, with an additional
690      * MAX_STORE_LENGTH - 1 bytes to facilitate string comparison.
691      * </p>
692      */
693     private byte[] ringBuffer;
694 
695     /**
696      * The position in the ring buffer. Used by insertNode.
697      */
698     private short matchPosition;
699 
700     /**
701      * The number of characters in the ring buffer at matchPosition that match a
702      * given string. Used by insertNode.
703      */
704     private short matchLength;
705 
706     /**
707      * leftSon, rightSon, and dad are the Japanese way of referring to a tree
708      * structure. The dad is the parent and it has a right and left son (child).
709      * 
710      * <p>
711      * For i = 0 to RING_SIZE-1, rightSon[i] and leftSon[i] will be the right
712      * and left children of node i.
713      * </p>
714      * 
715      * <p>
716      * For i = 0 to RING_SIZE-1, dad[i] is the parent of node i.
717      * </p>
718      * 
719      * <p>
720      * For i = 0 to 255, rightSon[RING_SIZE + i + 1] is the root of the tree for
721      * strings that begin with the character i. Note that this requires one byte
722      * characters.
723      * </p>
724      * 
725      * <p>
726      * These nodes store values of 0...(RING_SIZE-1). Memory requirements can be
727      * reduces by using 2-byte integers instead of full 4-byte integers (for
728      * 32-bit applications). Therefore, these are defined as "shorts."
729      * </p>
730      */
731     private short[] dad;
732     private short[] leftSon;
733     private short[] rightSon;
734 
735     /**
736      * The output stream containing the result.
737      */
738     private ByteArrayOutputStream out;
739 }
740