1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2007
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: LZSS.java 1966 2009-10-30 01:15:14Z dmsmith $
21   */
22  
23  package org.crosswire.common.compress;
24  
25  import java.io.ByteArrayOutputStream;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.util.Arrays;
29  
30  /**
31   * The LZSS compression is a port of code as implemented for STEP. The following
32   * information gives the history of this implementation.
33   * 
34   * <p>
35   * Compression Info, 10-11-95<br/>
36   * Jeff Wheeler
37   * </p>
38   * 
39   * <h2><u>Source of Algorithm</u></h2>
40   * 
41   * <p>
42   * The compression algorithms used here are based upon the algorithms developed
43   * and published by Haruhiko Okumura in a paper entitled
44   * "Data Compression Algorithms of LARC and LHarc." This paper discusses three
45   * compression algorithms, LSZZ, LZARI, and LZHUF. LZSS is described as the
46   * "first" of these, and is described as providing moderate compression with
47   * good speed. LZARI is described as an improved LZSS, a combination of the LZSS
48   * algorithm with adaptive arithmetic compression. It is described as being
49   * slower than LZSS but with better compression. LZHUF (the basis of the common
50   * LHA compression program) was included in the paper, however, a free usage
51   * license was not included.
52   * </p>
53   * 
54   * <p>
55   * The following are copies of the statements included at the beginning of each
56   * source code listing that was supplied in the working paper.
57   * </p>
58   * 
59   * <blockquote>LZSS, dated 4/6/89, marked as "Use, distribute and modify this
60   * program freely."</blockquote>
61   * 
62   * <blockquote>LZARI, dated 4/7/89, marked as "Use, distribute and modify this
63   * program freely."</blockquote>
64   * 
65   * <blockquote>LZHUF, dated 11/20/88, written by Haruyasu Yoshizaki, translated
66   * by Haruhiko Okumura on 4/7/89. Not expressly marked as redistributable or
67   * modifiable.</blockquote>
68   * 
69   * <p>
70   * Since both LZSS and LZARI are marked as "use, distribute and modify freely"
71   * we have felt at liberty basing our compression algorithm on either of these.
72   * </p>
73   * 
74   * <h2><u>Selection of Algorithm</u></h2>
75   * 
76   * <p>
77   * Working samples of three possible compression algorithms are supplied in
78   * Okumura's paper. Which should be used?
79   * </p>
80   * 
81   * <p>
82   * LZSS is the fastest at decompression, but does not generated as small a
83   * compressed file as the other methods. The other two methods provided,
84   * perhaps, a 15% improvement in compression. Or, put another way, on a 100K
85   * file, LZSS might compress it to 50K while the others might approach 40-45K.
86   * For STEP purposes, it was decided that decoding speed was of more importance
87   * than tighter compression. For these reasons, the first compression algorithm
88   * implemented is the LZSS algorithm.
89   * </p>
90   * 
91   * <h2><u>About LZSS Encoding</u></h2>
92   * 
93   * <p>
94   * (adapted from Haruhiko Okumura's paper)
95   * </p>
96   * 
97   * <p>
98   * This scheme was proposed by Ziv and Lempel [1]. A slightly modified version
99   * is described by Storer and Szymanski [2]. An implementation using a binary
100  * tree has been proposed by Bell [3].
101  * </p>
102  * 
103  * The algorithm is quite simple.<br/>
104  * <ol>
105  * <li>Keep a ring buffer which initially contains all space characters.</li>
106  * <li>Read several letters from the file to the buffer.</li>
107  * <li>Search the buffer for the longest string that matches the letters just
108  * read, and send its length and position into the buffer.</li>
109  * </ol>
110  * 
111  * <p>
112  * If the ring buffer is 4096 bytes, the position can be stored in 12 bits. If
113  * the length is represented in 4 bits, the <position, length> pair is two bytes
114  * long. If the longest match is no more than two characters, then just one
115  * character is sent without encoding. The process starts again with the next
116  * character. An extra bit is sent each time to tell the decoder whether the
117  * next item is a character of a <position, length> pair.
118  * </p>
119  * 
120  * <p>
121  * [1] J. Ziv and A. Lempel, IEEE Transactions IT-23, 337-343 (1977).<br/>
122  * [2] J. A. Storer and T. G. Szymanski, J. ACM, 29, 928-951 (1982).<br/>
123  * [3] T.C. Gell, IEEE Transactions COM-34, 1176-1182 (1986).
124  * </p>
125  * 
126  * Regarding this port to Java and not the original code, the following license
127  * applies:
128  * 
129  * @see gnu.lgpl.License for license details.<br/>
130  *      The copyright to this program is held by it's authors.
131  * @author DM Smith [dmsmith555 at yahoo dot com]
132  */
133 public class LZSS extends AbstractCompressor {
134     /**
135      * Create an LZSS that is capable of transforming the input.
136      * 
137      * @param input
138      *            to compress or uncompress.
139      */
140     public LZSS(InputStream input) {
141         super(input);
142         ringBuffer = new byte[RING_SIZE + MAX_STORE_LENGTH - 1];
143         dad = new short[RING_SIZE + 1];
144         leftSon = new short[RING_SIZE + 1];
145         rightSon = new short[RING_SIZE + 257];
146     }
147 
148     /*
149      * (non-Javadoc)
150      * 
151      * @see org.crosswire.common.compress.Compressor#compress()
152      */
153     public ByteArrayOutputStream compress() throws IOException {
154         out = new ByteArrayOutputStream(BUF_SIZE);
155 
156         short i; // an iterator
157         short r; // node number in the binary tree
158         short s; // position in the ring buffer
159         short len; // length of initial string
160         short lastMatchLength; // length of last match
161         short codeBufPos; // position in the output buffer
162         byte[] codeBuff = new byte[17]; // the output buffer
163         byte mask; // bit mask for byte 0 of out input
164         byte c; // character read from string
165 
166         // Start with a clean tree.
167         initTree();
168 
169         // codeBuff[0] works as eight flags. A "1" represents that the
170         // unit is an unencoded letter (1 byte), and a "0" represents
171         // that the next unit is a <position,length> pair (2 bytes).
172         //
173         // codeBuff[1..16] stores eight units of code. Since the best
174         // we can do is store eight <position,length> pairs, at most 16
175         // bytes are needed to store this.
176         //
177         // This is why the maximum size of the code buffer is 17 bytes.
178         codeBuff[0] = 0;
179         codeBufPos = 1;
180 
181         // Mask iterates over the 8 bits in the code buffer. The first
182         // character ends up being stored in the low bit.
183         //
184         // bit 8 7 6 5 4 3 2 1
185         // | |
186         // | first sequence in code buffer
187         // |
188         // last sequence in code buffer
189         mask = 1;
190 
191         s = 0;
192         r = RING_SIZE - MAX_STORE_LENGTH;
193 
194         // Initialize the ring buffer with spaces...
195 
196         // Note that the last MAX_STORE_LENGTH bytes of the ring buffer are not
197         // filled.
198         // This is because those MAX_STORE_LENGTH bytes will be filled in
199         // immediately
200         // with bytes from the input stream.
201         Arrays.fill(ringBuffer, 0, r, (byte) ' ');
202 
203         // Read MAX_STORE_LENGTH bytes into the last MAX_STORE_LENGTH bytes of
204         // the ring buffer.
205         //
206         // This function loads the buffer with up to MAX_STORE_LENGTH characters
207         // and returns
208         // the actual amount loaded.
209         int readResult = input.read(ringBuffer, r, MAX_STORE_LENGTH);
210 
211         // Make sure there is something to be compressed.
212         if (readResult <= 0) {
213             return out;
214         }
215 
216         len = (short) readResult;
217 
218         // Insert the MAX_STORE_LENGTH strings, each of which begins with one or
219         // more
220         // 'space' characters. Note the order in which these strings
221         // are inserted. This way, degenerate trees will be less likely
222         // to occur.
223         for (i = 1; i <= MAX_STORE_LENGTH; i++) {
224             insertNode((short) (r - i));
225         }
226 
227         // Finally, insert the whole string just read. The
228         // member variables matchLength and matchPosition are set.
229         insertNode(r);
230 
231         // Now that we're preloaded, continue till done.
232         do {
233 
234             // matchLength may be spuriously long near the end of text.
235             if (matchLength > len) {
236                 matchLength = len;
237             }
238 
239             // Is it cheaper to store this as a single character? If so, make it
240             // so.
241             if (matchLength < THRESHOLD) {
242                 // Send one character. Remember that codeBuff[0] is the
243                 // set of flags for the next eight items.
244                 matchLength = 1;
245                 codeBuff[0] |= mask;
246                 codeBuff[codeBufPos++] = ringBuffer[r];
247             } else {
248                 // Otherwise, we do indeed have a string that can be stored
249                 // compressed to save space.
250 
251                 // The next 16 bits need to contain the position (12 bits)
252                 // and the length (4 bits).
253                 codeBuff[codeBufPos++] = (byte) matchPosition;
254                 codeBuff[codeBufPos++] = (byte) (((matchPosition >> 4) & 0xF0) | (matchLength - THRESHOLD));
255             }
256 
257             // Shift the mask one bit to the left so that it will be ready
258             // to store the new bit.
259             mask <<= 1;
260 
261             // If the mask is now 0, then we know that we have a full set
262             // of flags and items in the code buffer. These need to be
263             // output.
264             if (mask == 0) {
265                 // codeBuff is the buffer of characters to be output.
266                 // codeBufPos is the number of characters it contains.
267                 out.write(codeBuff, 0, codeBufPos);
268 
269                 // Reset for next buffer...
270                 codeBuff[0] = 0;
271                 codeBufPos = 1;
272                 mask = 1;
273             }
274 
275             lastMatchLength = matchLength;
276 
277             // Delete old strings and read new bytes...
278             for (i = 0; i < lastMatchLength; i++) {
279 
280                 // Get next character...
281                 readResult = input.read();
282                 if (readResult == -1) {
283                     break;
284                 }
285                 c = (byte) readResult;
286 
287                 // Delete "old strings"
288                 deleteNode(s);
289 
290                 // Put this character into the ring buffer.
291                 //
292                 // The original comment here says "If the position is near
293                 // the end of the buffer, extend the buffer to make
294                 // string comparison easier."
295                 //
296                 // That's a little misleading, because the "end" of the
297                 // buffer is really what we consider to be the "beginning"
298                 // of the buffer, that is, positions 0 through MAX_STORE_LENGTH.
299                 //
300                 // The idea is that the front end of the buffer is duplicated
301                 // into the back end so that when you're looking at characters
302                 // at the back end of the buffer, you can index ahead (beyond
303                 // the normal end of the buffer) and see the characters
304                 // that are at the front end of the buffer without having
305                 // to adjust the index.
306                 //
307                 // That is...
308                 //
309                 // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234
310                 // | | |
311                 // position 0 end of buffer |
312                 // |
313                 // duplicate of front of buffer
314                 ringBuffer[s] = c;
315 
316                 if (s < MAX_STORE_LENGTH - 1) {
317                     ringBuffer[s + RING_SIZE] = c;
318                 }
319 
320                 // Increment the position, and wrap around when we're at
321                 // the end. Note that this relies on RING_SIZE being a power of
322                 // 2.
323                 s = (short) ((s + 1) & RING_WRAP);
324                 r = (short) ((r + 1) & RING_WRAP);
325 
326                 // Register the string that is found in
327                 // ringBuffer[r..r + MAX_STORE_LENGTH - 1].
328                 insertNode(r);
329             }
330 
331             // If we didn't quit because we hit the lastMatchLength,
332             // then we must have quit because we ran out of characters
333             // to process.
334             while (i++ < lastMatchLength) {
335                 deleteNode(s);
336 
337                 s = (short) ((s + 1) & RING_WRAP);
338                 r = (short) ((r + 1) & RING_WRAP);
339 
340                 // Note that len hitting 0 is the key that causes the
341                 // do...while() to terminate. This is the only place
342                 // within the loop that len is modified.
343                 //
344                 // Its original value is MAX_STORE_LENGTH (or a number less than
345                 // MAX_STORE_LENGTH for
346                 // short strings).
347                 if (--len != 0) {
348                     insertNode(r); /* buffer may not be empty. */
349                 }
350             }
351 
352             // End of do...while() loop. Continue processing until there
353             // are no more characters to be compressed. The variable
354             // "len" is used to signal this condition.
355         } while (len > 0);
356 
357         // There could still be something in the output buffer. Send it now.
358         if (codeBufPos > 1) {
359             // codeBuff is the encoded string to send.
360             // codeBufPos is the number of characters.
361             out.write(codeBuff, 0, codeBufPos);
362         }
363 
364         return out;
365     }
366 
367     /*
368      * (non-Javadoc)
369      * 
370      * @see org.crosswire.common.compress.Compressor#uncompress()
371      */
372     public ByteArrayOutputStream uncompress() throws IOException {
373         return uncompress(BUF_SIZE);
374     }
375 
376     /*
377      * (non-Javadoc)
378      * 
379      * @see org.crosswire.common.compress.Compressor#uncompress(int)
380      */
381     public ByteArrayOutputStream uncompress(int expectedSize) throws IOException {
382         out = new ByteArrayOutputStream(expectedSize);
383 
384         byte[] c = new byte[MAX_STORE_LENGTH]; // an array of chars
385         byte flags; // 8 bits of flags
386 
387         // Initialize the ring buffer with a common string.
388         //
389         // Note that the last MAX_STORE_LENGTH bytes of the ring buffer are not
390         // filled.
391         // r is a nodeNumber
392         int r = RING_SIZE - MAX_STORE_LENGTH;
393         Arrays.fill(ringBuffer, 0, r, (byte) ' ');
394 
395         flags = 0;
396         int flagCount = 0; // which flag we're on
397 
398         while (true) {
399             // If there are more bits of interest in this flag, then
400             // shift that next interesting bit into the 1's position.
401             //
402             // If this flag has been exhausted, the next byte must be a flag.
403             if (flagCount > 0) {
404                 flags = (byte) (flags >> 1);
405                 flagCount--;
406             } else {
407                 // Next byte must be a flag.
408                 int readResult = input.read();
409                 if (readResult == -1) {
410                     break;
411                 }
412 
413                 flags = (byte) (readResult & 0xFF);
414 
415                 // Set the flag counter. While at first it might appear
416                 // that this should be an 8 since there are 8 bits in the
417                 // flag, it should really be a 7 because the shift must
418                 // be performed 7 times in order to see all 8 bits.
419                 flagCount = 7;
420             }
421 
422             // If the low order bit of the flag is now set, then we know
423             // that the next byte is a single, unencoded character.
424             if ((flags & 1) != 0) {
425                 if (input.read(c, 0, 1) != 1) {
426                     break;
427                 }
428 
429                 out.write(c[0]);
430 
431                 // Add to buffer, and increment to next spot. Wrap at end.
432                 ringBuffer[r] = c[0];
433                 r = (short) ((r + 1) & RING_WRAP);
434             } else {
435                 // Otherwise, we know that the next two bytes are a
436                 // <position,length> pair. The position is in 12 bits and
437                 // the length is in 4 bits.
438                 if (input.read(c, 0, 2) != 2) {
439                     break;
440                 }
441 
442                 // Convert these two characters into the position and
443                 // length in the ringBuffer. Note that the length is always at
444                 // least
445                 // THRESHOLD, which is why we're able to get a length
446                 // of 18 out of only 4 bits.
447                 short pos = (short) ((c[0] & 0xFF) | ((c[1] & 0xF0) << 4));
448                 short len = (short) ((c[1] & 0x0F) + THRESHOLD);
449 
450                 // There are now "len" characters at position "pos" in
451                 // the ring buffer that can be pulled out. Note that
452                 // len is never more than MAX_STORE_LENGTH.
453                 for (int k = 0; k < len; k++) {
454                     c[k] = ringBuffer[(pos + k) & RING_WRAP];
455 
456                     // Add to buffer, and increment to next spot. Wrap at end.
457                     ringBuffer[r] = c[k];
458                     r = (r + 1) & RING_WRAP;
459                 }
460 
461                 // Add the "len" characters to the output stream.
462                 out.write(c, 0, len);
463             }
464         }
465         return out;
466     }
467 
468     /**
469      * Initializes the tree nodes to "empty" states.
470      */
471     private void initTree() {
472         // For i = 0 to RING_SIZE - 1, rightSon[i] and leftSon[i] will be the
473         // right
474         // and left children of node i. These nodes need not be
475         // initialized. However, for debugging purposes, it is nice to
476         // have them initialized. Since this is only used for compression
477         // (not decompression), I don't mind spending the time to do it.
478         //
479         // For the same range of i, dad[i] is the parent of node i.
480         // These are initialized to a known value that can represent
481         // a "not used" state.
482         // For i = 0 to 255, rightSon[RING_SIZE + i + 1] is the root of the tree
483         // for strings that begin with the character i. This is why
484         // the right child array is larger than the left child array.
485         // These are also initialized to a "not used" state.
486         //
487         // Note that there are 256 of these, one for each of the possible
488         // 256 characters.
489         Arrays.fill(dad, 0, dad.length, NOT_USED);
490         Arrays.fill(leftSon, 0, leftSon.length, NOT_USED);
491         Arrays.fill(rightSon, 0, rightSon.length, NOT_USED);
492     }
493 
494     /**
495      * Inserts a string from the ring buffer into one of the trees. It loads the
496      * match position and length member variables for the longest match.
497      * 
498      * <p>
499      * The string to be inserted is identified by the parameter pos, A full
500      * MAX_STORE_LENGTH bytes are inserted. So, ringBuffer[pos ...
501      * pos+MAX_STORE_LENGTH-1] are inserted.
502      * </p>
503      * 
504      * <p>
505      * If the matched length is exactly MAX_STORE_LENGTH, then an old node is
506      * removed in favor of the new one (because the old one will be deleted
507      * sooner).
508      * </p>
509      * 
510      * @param pos
511      *            plays a dual role. It is used as both a position in the ring
512      *            buffer and also as a tree node. ringBuffer[pos] defines a
513      *            character that is used to identify a tree node.
514      */
515     private void insertNode(short pos) {
516         assert pos >= 0 && pos < RING_SIZE;
517 
518         int cmp = 1;
519         short key = pos;
520 
521         // The last 256 entries in rightSon contain the root nodes for
522         // strings that begin with a letter. Get an index for the
523         // first letter in this string.
524         short p = (short) (RING_SIZE + 1 + (ringBuffer[key] & 0xFF));
525         assert p > RING_SIZE;
526 
527         // Set the left and right tree nodes for this position to "not used."
528         leftSon[pos] = NOT_USED;
529         rightSon[pos] = NOT_USED;
530 
531         // Haven't matched anything yet.
532         matchLength = 0;
533 
534         while (true) {
535             if (cmp >= 0) {
536                 if (rightSon[p] != NOT_USED) {
537                     p = rightSon[p];
538                 } else {
539                     rightSon[p] = pos;
540                     dad[pos] = p;
541                     return;
542                 }
543             } else {
544                 if (leftSon[p] != NOT_USED) {
545                     p = leftSon[p];
546                 } else {
547                     leftSon[p] = pos;
548                     dad[pos] = p;
549                     return;
550                 }
551             }
552 
553             // Should we go to the right or the left to look for the
554             // next match?
555             short i = 0;
556             for (i = 1; i < MAX_STORE_LENGTH; i++) {
557                 cmp = (ringBuffer[key + i] & 0xFF) - (ringBuffer[p + i] & 0xFF);
558                 if (cmp != 0) {
559                     break;
560                 }
561             }
562 
563             if (i > matchLength) {
564                 matchPosition = p;
565                 matchLength = i;
566 
567                 if (i >= MAX_STORE_LENGTH) {
568                     break;
569                 }
570             }
571         }
572 
573         dad[pos] = dad[p];
574         leftSon[pos] = leftSon[p];
575         rightSon[pos] = rightSon[p];
576 
577         dad[leftSon[p]] = pos;
578         dad[rightSon[p]] = pos;
579 
580         if (rightSon[dad[p]] == p) {
581             rightSon[dad[p]] = pos;
582         } else {
583             leftSon[dad[p]] = pos;
584         }
585 
586         // Remove "p"
587         dad[p] = NOT_USED;
588     }
589 
590     /**
591      * Remove a node from the tree.
592      * 
593      * @param node
594      *            the node to remove
595      */
596     private void deleteNode(short node) {
597         assert node >= 0 && node < (RING_SIZE + 1);
598 
599         short q;
600 
601         if (dad[node] == NOT_USED) {
602             // not in tree, nothing to do
603             return;
604         }
605 
606         if (rightSon[node] == NOT_USED) {
607             q = leftSon[node];
608         } else if (leftSon[node] == NOT_USED) {
609             q = rightSon[node];
610         } else {
611             q = leftSon[node];
612             if (rightSon[q] != NOT_USED) {
613                 do {
614                     q = rightSon[q];
615                 } while (rightSon[q] != NOT_USED);
616 
617                 rightSon[dad[q]] = leftSon[q];
618                 dad[leftSon[q]] = dad[q];
619                 leftSon[q] = leftSon[node];
620                 dad[leftSon[node]] = q;
621             }
622 
623             rightSon[q] = rightSon[node];
624             dad[rightSon[node]] = q;
625         }
626 
627         dad[q] = dad[node];
628 
629         if (rightSon[dad[node]] == node) {
630             rightSon[dad[node]] = q;
631         } else {
632             leftSon[dad[node]] = q;
633         }
634 
635         dad[node] = NOT_USED;
636     }
637 
638     /**
639      * This is the size of the ring buffer. It is set to 4K. It is important to
640      * note that a position within the ring buffer requires 12 bits.
641      */
642     private static final short RING_SIZE = 4096;
643 
644     /**
645      * This is used to determine the next position in the ring buffer, from 0 to
646      * RING_SIZE - 1. The idiom s = (s + 1) & RING_WRAP; will ensure this. This
647      * only works if RING_SIZE is a power of 2. Note this is slightly faster
648      * than the equivalent: s = (s + 1) % RING_SIZE;
649      */
650     private static final short RING_WRAP = RING_SIZE - 1;
651 
652     /**
653      * This is the maximum length of a character sequence that can be taken from
654      * the ring buffer. It is set to 18. Note that a length must be 3 before it
655      * is worthwhile to store a position/length pair, so the length can be
656      * encoded in only 4 bits. Or, put yet another way, it is not necessary to
657      * encode a length of 0-18, it is necessary to encode a length of 3-18,
658      * which requires 4 bits.
659      * <p>
660      * Note that the 12 bits used to store the position and the 4 bits used to
661      * store the length equal a total of 16 bits, or 2 bytes.
662      * </p>
663      */
664     private static final int MAX_STORE_LENGTH = 18;
665 
666     /**
667      * It takes 2 bytes to store an offset and a length. If a character sequence
668      * only requires 1 or 2 characters to store uncompressed, then it is better
669      * to store it uncompressed than as an offset into the ring buffer.
670      */
671     private static final int THRESHOLD = 3;
672 
673     /**
674      * Used to mark nodes as not used.
675      */
676     private static final short NOT_USED = RING_SIZE;
677 
678     /**
679      * A text buffer. It contains "nodes" of uncompressed text that can be
680      * indexed by position. That is, a substring of the ring buffer can be
681      * indexed by a position and a length. When decoding, the compressed text
682      * may contain a position in the ring buffer and a count of the number of
683      * bytes from the ring buffer that are to be moved into the uncompressed
684      * buffer.
685      * 
686      * <p>
687      * This ring buffer is not maintained as part of the compressed text.
688      * Instead, it is reconstructed dynamically. That is, it starts out empty
689      * and gets built as the text is decompressed.
690      * </p>
691      * 
692      * <p>
693      * The ring buffer contain RING_SIZE bytes, with an additional
694      * MAX_STORE_LENGTH - 1 bytes to facilitate string comparison.
695      * </p>
696      */
697     private byte[] ringBuffer;
698 
699     /**
700      * The position in the ring buffer. Used by insertNode.
701      */
702     private short matchPosition;
703 
704     /**
705      * The number of characters in the ring buffer at matchPosition that match a
706      * given string. Used by insertNode.
707      */
708     private short matchLength;
709 
710     /**
711      * leftSon, rightSon, and dad are the Japanese way of referring to a tree
712      * structure. The dad is the parent and it has a right and left son (child).
713      * 
714      * <p>
715      * For i = 0 to RING_SIZE-1, rightSon[i] and leftSon[i] will be the right
716      * and left children of node i.
717      * </p>
718      * 
719      * <p>
720      * For i = 0 to RING_SIZE-1, dad[i] is the parent of node i.
721      * </p>
722      * 
723      * <p>
724      * For i = 0 to 255, rightSon[RING_SIZE + i + 1] is the root of the tree for
725      * strings that begin with the character i. Note that this requires one byte
726      * characters.
727      * </p>
728      * 
729      * <p>
730      * These nodes store values of 0...(RING_SIZE-1). Memory requirements can be
731      * reduces by using 2-byte integers instead of full 4-byte integers (for
732      * 32-bit applications). Therefore, these are defined as "shorts."
733      * </p>
734      */
735     private short[] dad;
736     private short[] leftSon;
737     private short[] rightSon;
738 
739     /**
740      * The output stream containing the result.
741      */
742     private ByteArrayOutputStream out;
743 }
744