CoCalc -- lz.c

GitHub Repository: CTCaer/hekate
Path: blob/master/bdk/libs/compr/lz.c
¹⁴⁷⁶ views
1
/*************************************************************************
2
* Name:        lz.c
3
* Author:      Marcus Geelnard
4
* Description: LZ77 coder/decoder implementation.
5
* Reentrant:   Yes
6
*
7
* The LZ77 compression scheme is a substitutional compression scheme
8
* proposed by Abraham Lempel and Jakob Ziv in 1977. It is very simple in
9
* its design, and uses no fancy bit level compression.
10
*
11
* This is my first attempt at an implementation of a LZ77 code/decoder.
12
*
13
* The principle of the LZ77 compression algorithm is to store repeated
14
* occurrences of strings as references to previous occurrences of the same
15
* string. The point is that the reference consumes less space than the
16
* string itself, provided that the string is long enough (in this
17
* implementation, the string has to be at least 4 bytes long, since the
18
* minimum coded reference is 3 bytes long). Also note that the term
19
* "string" refers to any kind of byte sequence (it does not have to be
20
* an ASCII string, for instance).
21
*
22
* The coder uses a brute force approach to finding string matches in the
23
* history buffer (or "sliding window", if you wish), which is very, very
24
* slow. I recon the complexity is somewhere between O(n^2) and O(n^3),
25
* depending on the input data.
26
*
27
* There is also a faster implementation that uses a large working buffer
28
* in which a "jump table" is stored, which is used to quickly find
29
* possible string matches (see the source code for LZ_CompressFast() for
30
* more information). The faster method is an order of magnitude faster,
31
* but still quite slow compared to other compression methods.
32
*
33
* The upside is that decompression is very fast, and the compression ratio
34
* is often very good.
35
*
36
* The reference to a string is coded as a (length,offset) pair, where the
37
* length indicates the length of the string, and the offset gives the
38
* offset from the current data position. To distinguish between string
39
* references and literal strings (uncompressed bytes), a string reference
40
* is preceded by a marker byte, which is chosen as the least common byte
41
* symbol in the input data stream (this marker byte is stored in the
42
* output stream as the first byte).
43
*
44
* Occurrences of the marker byte in the stream are encoded as the marker
45
* byte followed by a zero byte, which means that occurrences of the marker
46
* byte have to be coded with two bytes.
47
*
48
* The lengths and offsets are coded in a variable length fashion, allowing
49
* values of any magnitude (up to 4294967295 in this implementation).
50
*
51
* With this compression scheme, the worst case compression result is
52
* (257/256)*insize + 1.
53
*
54
*-------------------------------------------------------------------------
55
* Copyright (c) 2003-2006 Marcus Geelnard
56
*
57
* This software is provided 'as-is', without any express or implied
58
* warranty. In no event will the authors be held liable for any damages
59
* arising from the use of this software.
60
*
61
* Permission is granted to anyone to use this software for any purpose,
62
* including commercial applications, and to alter it and redistribute it
63
* freely, subject to the following restrictions:
64
*
65
* 1. The origin of this software must not be misrepresented; you must not
66
*    claim that you wrote the original software. If you use this software
67
*    in a product, an acknowledgment in the product documentation would
68
*    be appreciated but is not required.
69
*
70
* 2. Altered source versions must be plainly marked as such, and must not
71
*    be misrepresented as being the original software.
72
*
73
* 3. This notice may not be removed or altered from any source
74
*    distribution.
75
*
76
* Marcus Geelnard
77
* marcus.geelnard at home.se
78
*************************************************************************/
79

80

81
/*************************************************************************
82
*                           INTERNAL FUNCTIONS                           *
83
*************************************************************************/
84

85

86
/*************************************************************************
87
* _LZ_ReadVarSize() - Read unsigned integer with variable number of
88
* bytes depending on value.
89
*************************************************************************/
90

91
static int _LZ_ReadVarSize( unsigned int * x, const unsigned char * buf )
92
{
93
    unsigned int y, b, num_bytes;
94

95
    /* Read complete value (stop when byte contains zero in 8:th bit) */
96
    y = 0;
97
    num_bytes = 0;
98
    do
99
    {
100
        b = (unsigned int) (*buf ++);
101
        y = (y << 7) | (b & 0x0000007f);
102
        ++ num_bytes;
103
    }
104
    while( b & 0x00000080 );
105

106
    /* Store value in x */
107
    *x = y;
108

109
    /* Return number of bytes read */
110
    return num_bytes;
111
}
112

113

114

115
/*************************************************************************
116
*                            PUBLIC FUNCTIONS                            *
117
*************************************************************************/
118

119

120
/*************************************************************************
121
* LZ_Uncompress() - Uncompress a block of data using an LZ77 decoder.
122
*  in      - Input (compressed) buffer.
123
*  out     - Output (uncompressed) buffer. This buffer must be large
124
*            enough to hold the uncompressed data.
125
*  insize  - Number of input bytes.
126
*************************************************************************/
127

128
unsigned int LZ_Uncompress( const unsigned char *in, unsigned char *out,
129
    unsigned int insize )
130
{
131
    unsigned char marker, symbol;
132
    unsigned int  i, inpos, outpos, length, offset;
133

134
    /* Do we have anything to uncompress? */
135
    if( insize < 1 )
136
    {
137
        return 0;
138
    }
139

140
    /* Get marker symbol from input stream */
141
    marker = in[ 0 ];
142
    inpos = 1;
143

144
    /* Main decompression loop */
145
    outpos = 0;
146
    do
147
    {
148
        symbol = in[ inpos ++ ];
149
        if( symbol == marker )
150
        {
151
            /* We had a marker byte */
152
            if( in[ inpos ] == 0 )
153
            {
154
                /* It was a single occurrence of the marker byte */
155
                out[ outpos ++ ] = marker;
156
                ++ inpos;
157
            }
158
            else
159
            {
160
                /* Extract true length and offset */
161
                inpos += _LZ_ReadVarSize( &length, &in[ inpos ] );
162
                inpos += _LZ_ReadVarSize( &offset, &in[ inpos ] );
163

164
                /* Copy corresponding data from history window */
165
                for( i = 0; i < length; ++ i )
166
                {
167
                    out[ outpos ] = out[ outpos - offset ];
168
                    ++ outpos;
169
                }
170
            }
171
        }
172
        else
173
        {
174
            /* No marker, plain copy */
175
            out[ outpos ++ ] = symbol;
176
        }
177
    }
178
    while( inpos < insize );
179

180
    return outpos;
181
}
182

183
Product

Resources

Company