Commit | Line | Data |
---|---|---|
f9995f31 MG |
1 | /* Copyright 2013 Google Inc. All Rights Reserved. |
2 | ||
3 | Distributed under MIT license. | |
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | |
5 | */ | |
6 | ||
7 | /* Lookup table to map the previous two bytes to a context id. | |
8 | ||
9 | There are four different context modeling modes defined here: | |
10 | CONTEXT_LSB6: context id is the least significant 6 bits of the last byte, | |
11 | CONTEXT_MSB6: context id is the most significant 6 bits of the last byte, | |
12 | CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text, | |
13 | CONTEXT_SIGNED: second-order context model tuned for signed integers. | |
14 | ||
15 | The context id for the UTF8 context model is calculated as follows. If p1 | |
16 | and p2 are the previous two bytes, we calculate the context as | |
17 | ||
18 | context = kContextLookup[p1] | kContextLookup[p2 + 256]. | |
19 | ||
20 | If the previous two bytes are ASCII characters (i.e. < 128), this will be | |
21 | equivalent to | |
22 | ||
23 | context = 4 * context1(p1) + context2(p2), | |
24 | ||
25 | where context1 is based on the previous byte in the following way: | |
26 | ||
27 | 0 : non-ASCII control | |
28 | 1 : \t, \n, \r | |
29 | 2 : space | |
30 | 3 : other punctuation | |
31 | 4 : " ' | |
32 | 5 : % | |
33 | 6 : ( < [ { | |
34 | 7 : ) > ] } | |
35 | 8 : , ; : | |
36 | 9 : . | |
37 | 10 : = | |
38 | 11 : number | |
39 | 12 : upper-case vowel | |
40 | 13 : upper-case consonant | |
41 | 14 : lower-case vowel | |
42 | 15 : lower-case consonant | |
43 | ||
44 | and context2 is based on the second last byte: | |
45 | ||
46 | 0 : control, space | |
47 | 1 : punctuation | |
48 | 2 : upper-case letter, number | |
49 | 3 : lower-case letter | |
50 | ||
51 | If the last byte is ASCII, and the second last byte is not (in a valid UTF8 | |
52 | stream it will be a continuation byte, value between 128 and 191), the | |
53 | context is the same as if the second last byte was an ASCII control or space. | |
54 | ||
55 | If the last byte is a UTF8 lead byte (value >= 192), then the next byte will | |
56 | be a continuation byte and the context id is 2 or 3 depending on the LSB of | |
57 | the last byte and to a lesser extent on the second last byte if it is ASCII. | |
58 | ||
59 | If the last byte is a UTF8 continuation byte, the second last byte can be: | |
60 | - continuation byte: the next byte is probably ASCII or lead byte (assuming | |
61 | 4-byte UTF8 characters are rare) and the context id is 0 or 1. | |
62 | - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 | |
63 | - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 | |
64 | ||
65 | The possible value combinations of the previous two bytes, the range of | |
66 | context ids and the type of the next byte is summarized in the table below: | |
67 | ||
68 | |--------\-----------------------------------------------------------------| | |
69 | | \ Last byte | | |
70 | | Second \---------------------------------------------------------------| | |
71 | | last byte \ ASCII | cont. byte | lead byte | | |
72 | | \ (0-127) | (128-191) | (192-) | | |
73 | |=============|===================|=====================|==================| | |
74 | | ASCII | next: ASCII/lead | not valid | next: cont. | | |
75 | | (0-127) | context: 4 - 63 | | context: 2 - 3 | | |
76 | |-------------|-------------------|---------------------|------------------| | |
77 | | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | | |
78 | | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | | |
79 | |-------------|-------------------|---------------------|------------------| | |
80 | | lead byte | not valid | next: ASCII/lead | not valid | | |
81 | | (192-207) | | context: 0 - 1 | | | |
82 | |-------------|-------------------|---------------------|------------------| | |
83 | | lead byte | not valid | next: cont. | not valid | | |
84 | | (208-) | | context: 2 - 3 | | | |
85 | |-------------|-------------------|---------------------|------------------| | |
86 | ||
87 | The context id for the signed context mode is calculated as: | |
88 | ||
89 | context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2]. | |
90 | ||
91 | For any context modeling modes, the context ids can be calculated by |-ing | |
92 | together two lookups from one table using context model dependent offsets: | |
93 | ||
94 | context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2]. | |
95 | ||
96 | where offset1 and offset2 are dependent on the context mode. | |
97 | */ | |
98 | ||
99 | #ifndef BROTLI_DEC_CONTEXT_H_ | |
100 | #define BROTLI_DEC_CONTEXT_H_ | |
101 | ||
102 | #include "./types.h" | |
103 | ||
104 | enum ContextType { | |
105 | CONTEXT_LSB6 = 0, | |
106 | CONTEXT_MSB6 = 1, | |
107 | CONTEXT_UTF8 = 2, | |
108 | CONTEXT_SIGNED = 3 | |
109 | }; | |
110 | ||
111 | /* Common context lookup table for all context modes. */ | |
112 | static const uint8_t kContextLookup[1792] = { | |
113 | /* CONTEXT_UTF8, last byte. */ | |
114 | /* ASCII range. */ | |
115 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, | |
116 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
117 | 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, | |
118 | 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, | |
119 | 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, | |
120 | 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, | |
121 | 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, | |
122 | 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, | |
123 | /* UTF8 continuation byte range. */ | |
124 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, | |
125 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, | |
126 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, | |
127 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, | |
128 | /* UTF8 lead byte range. */ | |
129 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, | |
130 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, | |
131 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, | |
132 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, | |
133 | /* CONTEXT_UTF8 second last byte. */ | |
134 | /* ASCII range. */ | |
135 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
136 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
137 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
138 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, | |
139 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
140 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, | |
141 | 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
142 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, | |
143 | /* UTF8 continuation byte range. */ | |
144 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
145 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
146 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
147 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
148 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
149 | /* UTF8 lead byte range. */ | |
150 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
151 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
152 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
153 | /* CONTEXT_SIGNED, second last byte. */ | |
154 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
155 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
156 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
157 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
158 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
159 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
160 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
161 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
162 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
163 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
164 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
165 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
166 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
167 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
168 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
169 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, | |
170 | /* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */ | |
171 | 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, | |
172 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, | |
173 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, | |
174 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, | |
175 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | |
176 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | |
177 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | |
178 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | |
179 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, | |
180 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, | |
181 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, | |
182 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, | |
183 | 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, | |
184 | 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, | |
185 | 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, | |
186 | 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56, | |
187 | /* CONTEXT_LSB6, last byte. */ | |
188 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
189 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
190 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
191 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, | |
192 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
193 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
194 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
195 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, | |
196 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
197 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
198 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
199 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, | |
200 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
201 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
202 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
203 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, | |
204 | /* CONTEXT_MSB6, last byte. */ | |
205 | 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, | |
206 | 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, | |
207 | 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, | |
208 | 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, | |
209 | 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, | |
210 | 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, | |
211 | 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, | |
212 | 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, | |
213 | 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, | |
214 | 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, | |
215 | 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, | |
216 | 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, | |
217 | 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, | |
218 | 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, | |
219 | 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, | |
220 | 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, | |
221 | /* CONTEXT_{M,L}SB6, second last byte, */ | |
222 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
223 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
224 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
225 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
226 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
227 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
228 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
229 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
230 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
231 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
232 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
233 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
234 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
235 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
236 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
237 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
238 | }; | |
239 | ||
240 | static const int kContextLookupOffsets[8] = { | |
241 | /* CONTEXT_LSB6 */ | |
242 | 1024, 1536, | |
243 | /* CONTEXT_MSB6 */ | |
244 | 1280, 1536, | |
245 | /* CONTEXT_UTF8 */ | |
246 | 0, 256, | |
247 | /* CONTEXT_SIGNED */ | |
248 | 768, 512, | |
249 | }; | |
250 | ||
251 | #endif /* BROTLI_DEC_CONTEXT_H_ */ |