]>
iEval git - io-compress-brotli.git/blob - dec/transform.h
f47eeb44a44f5771c3e7df72f5bbf2e98c2fffda
1 /* Copyright 2013 Google Inc. All Rights Reserved.
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
7 /* Transformations on dictionary words. */
9 #ifndef BROTLI_DEC_TRANSFORM_H_
10 #define BROTLI_DEC_TRANSFORM_H_
17 #if defined(__cplusplus) || defined(c_plusplus)
21 enum WordTransformType
{
46 const uint8_t prefix_id
;
47 const uint8_t transform
;
48 const uint8_t suffix_id
;
51 static const char kPrefixSuffix
[208] =
52 "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"
53 " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"
54 " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"
55 " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";
65 NBSP = non-breaking space "\0xc2\xa0"
70 kPFix_SPofSPtheSP
= 6,
92 kPFix_DOTSPTheSP
= 93,
97 kPFix_NEWLINETAB
= 120,
105 kPFix_DOTcomSLASH
= 146,
106 kPFix_DOTSPThisSP
= 152,
120 static const Transform kTransforms
[] = {
121 { kPFix_EMPTY
, kIdentity
, kPFix_EMPTY
},
122 { kPFix_EMPTY
, kIdentity
, kPFix_SP
},
123 { kPFix_SP
, kIdentity
, kPFix_SP
},
124 { kPFix_EMPTY
, kOmitFirst1
, kPFix_EMPTY
},
125 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_SP
},
126 { kPFix_EMPTY
, kIdentity
, kPFix_SPtheSP
},
127 { kPFix_SP
, kIdentity
, kPFix_EMPTY
},
128 { kPFix_sSP
, kIdentity
, kPFix_SP
},
129 { kPFix_EMPTY
, kIdentity
, kPFix_SPofSP
},
130 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_EMPTY
},
131 { kPFix_EMPTY
, kIdentity
, kPFix_SPandSP
},
132 { kPFix_EMPTY
, kOmitFirst2
, kPFix_EMPTY
},
133 { kPFix_EMPTY
, kOmitLast1
, kPFix_EMPTY
},
134 { kPFix_COMMASP
, kIdentity
, kPFix_SP
},
135 { kPFix_EMPTY
, kIdentity
, kPFix_COMMASP
},
136 { kPFix_SP
, kUppercaseFirst
, kPFix_SP
},
137 { kPFix_EMPTY
, kIdentity
, kPFix_SPinSP
},
138 { kPFix_EMPTY
, kIdentity
, kPFix_SPtoSP
},
139 { kPFix_eSP
, kIdentity
, kPFix_SP
},
140 { kPFix_EMPTY
, kIdentity
, kPFix_DQUOT
},
141 { kPFix_EMPTY
, kIdentity
, kPFix_DOT
},
142 { kPFix_EMPTY
, kIdentity
, kPFix_DQUOTGT
},
143 { kPFix_EMPTY
, kIdentity
, kPFix_NEWLINE
},
144 { kPFix_EMPTY
, kOmitLast3
, kPFix_EMPTY
},
145 { kPFix_EMPTY
, kIdentity
, kPFix_CLOSEBR
},
146 { kPFix_EMPTY
, kIdentity
, kPFix_SPforSP
},
147 { kPFix_EMPTY
, kOmitFirst3
, kPFix_EMPTY
},
148 { kPFix_EMPTY
, kOmitLast2
, kPFix_EMPTY
},
149 { kPFix_EMPTY
, kIdentity
, kPFix_SPaSP
},
150 { kPFix_EMPTY
, kIdentity
, kPFix_SPthatSP
},
151 { kPFix_SP
, kUppercaseFirst
, kPFix_EMPTY
},
152 { kPFix_EMPTY
, kIdentity
, kPFix_DOTSP
},
153 { kPFix_DOT
, kIdentity
, kPFix_EMPTY
},
154 { kPFix_SP
, kIdentity
, kPFix_COMMASP
},
155 { kPFix_EMPTY
, kOmitFirst4
, kPFix_EMPTY
},
156 { kPFix_EMPTY
, kIdentity
, kPFix_SPwithSP
},
157 { kPFix_EMPTY
, kIdentity
, kPFix_SQUOT
},
158 { kPFix_EMPTY
, kIdentity
, kPFix_SPfromSP
},
159 { kPFix_EMPTY
, kIdentity
, kPFix_SPbySP
},
160 { kPFix_EMPTY
, kOmitFirst5
, kPFix_EMPTY
},
161 { kPFix_EMPTY
, kOmitFirst6
, kPFix_EMPTY
},
162 { kPFix_SPtheSP
, kIdentity
, kPFix_EMPTY
},
163 { kPFix_EMPTY
, kOmitLast4
, kPFix_EMPTY
},
164 { kPFix_EMPTY
, kIdentity
, kPFix_DOTSPTheSP
},
165 { kPFix_EMPTY
, kUppercaseAll
, kPFix_EMPTY
},
166 { kPFix_EMPTY
, kIdentity
, kPFix_SPonSP
},
167 { kPFix_EMPTY
, kIdentity
, kPFix_SPasSP
},
168 { kPFix_EMPTY
, kIdentity
, kPFix_SPisSP
},
169 { kPFix_EMPTY
, kOmitLast7
, kPFix_EMPTY
},
170 { kPFix_EMPTY
, kOmitLast1
, kPFix_ingSP
},
171 { kPFix_EMPTY
, kIdentity
, kPFix_NEWLINETAB
},
172 { kPFix_EMPTY
, kIdentity
, kPFix_COLON
},
173 { kPFix_SP
, kIdentity
, kPFix_DOTSP
},
174 { kPFix_EMPTY
, kIdentity
, kPFix_edSP
},
175 { kPFix_EMPTY
, kOmitFirst9
, kPFix_EMPTY
},
176 { kPFix_EMPTY
, kOmitFirst7
, kPFix_EMPTY
},
177 { kPFix_EMPTY
, kOmitLast6
, kPFix_EMPTY
},
178 { kPFix_EMPTY
, kIdentity
, kPFix_OPEN
},
179 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_COMMASP
},
180 { kPFix_EMPTY
, kOmitLast8
, kPFix_EMPTY
},
181 { kPFix_EMPTY
, kIdentity
, kPFix_SPatSP
},
182 { kPFix_EMPTY
, kIdentity
, kPFix_lySP
},
183 { kPFix_SPtheSP
, kIdentity
, kPFix_SPofSP
},
184 { kPFix_EMPTY
, kOmitLast5
, kPFix_EMPTY
},
185 { kPFix_EMPTY
, kOmitLast9
, kPFix_EMPTY
},
186 { kPFix_SP
, kUppercaseFirst
, kPFix_COMMASP
},
187 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_DQUOT
},
188 { kPFix_DOT
, kIdentity
, kPFix_OPEN
},
189 { kPFix_EMPTY
, kUppercaseAll
, kPFix_SP
},
190 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_DQUOTGT
},
191 { kPFix_EMPTY
, kIdentity
, kPFix_EQDQUOT
},
192 { kPFix_SP
, kIdentity
, kPFix_DOT
},
193 { kPFix_DOTcomSLASH
, kIdentity
, kPFix_EMPTY
},
194 { kPFix_SPtheSP
, kIdentity
, kPFix_SPofSPtheSP
},
195 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_SQUOT
},
196 { kPFix_EMPTY
, kIdentity
, kPFix_DOTSPThisSP
},
197 { kPFix_EMPTY
, kIdentity
, kPFix_COMMA
},
198 { kPFix_DOT
, kIdentity
, kPFix_SP
},
199 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_OPEN
},
200 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_DOT
},
201 { kPFix_EMPTY
, kIdentity
, kPFix_SPnotSP
},
202 { kPFix_SP
, kIdentity
, kPFix_EQDQUOT
},
203 { kPFix_EMPTY
, kIdentity
, kPFix_erSP
},
204 { kPFix_SP
, kUppercaseAll
, kPFix_SP
},
205 { kPFix_EMPTY
, kIdentity
, kPFix_alSP
},
206 { kPFix_SP
, kUppercaseAll
, kPFix_EMPTY
},
207 { kPFix_EMPTY
, kIdentity
, kPFix_EQSQUOT
},
208 { kPFix_EMPTY
, kUppercaseAll
, kPFix_DQUOT
},
209 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_DOTSP
},
210 { kPFix_SP
, kIdentity
, kPFix_OPEN
},
211 { kPFix_EMPTY
, kIdentity
, kPFix_fulSP
},
212 { kPFix_SP
, kUppercaseFirst
, kPFix_DOTSP
},
213 { kPFix_EMPTY
, kIdentity
, kPFix_iveSP
},
214 { kPFix_EMPTY
, kIdentity
, kPFix_lessSP
},
215 { kPFix_EMPTY
, kUppercaseAll
, kPFix_SQUOT
},
216 { kPFix_EMPTY
, kIdentity
, kPFix_estSP
},
217 { kPFix_SP
, kUppercaseFirst
, kPFix_DOT
},
218 { kPFix_EMPTY
, kUppercaseAll
, kPFix_DQUOTGT
},
219 { kPFix_SP
, kIdentity
, kPFix_EQSQUOT
},
220 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_COMMA
},
221 { kPFix_EMPTY
, kIdentity
, kPFix_izeSP
},
222 { kPFix_EMPTY
, kUppercaseAll
, kPFix_DOT
},
223 { kPFix_NBSP
, kIdentity
, kPFix_EMPTY
},
224 { kPFix_SP
, kIdentity
, kPFix_COMMA
},
225 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_EQDQUOT
},
226 { kPFix_EMPTY
, kUppercaseAll
, kPFix_EQDQUOT
},
227 { kPFix_EMPTY
, kIdentity
, kPFix_ousSP
},
228 { kPFix_EMPTY
, kUppercaseAll
, kPFix_COMMASP
},
229 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_EQSQUOT
},
230 { kPFix_SP
, kUppercaseFirst
, kPFix_COMMA
},
231 { kPFix_SP
, kUppercaseAll
, kPFix_EQDQUOT
},
232 { kPFix_SP
, kUppercaseAll
, kPFix_COMMASP
},
233 { kPFix_EMPTY
, kUppercaseAll
, kPFix_COMMA
},
234 { kPFix_EMPTY
, kUppercaseAll
, kPFix_OPEN
},
235 { kPFix_EMPTY
, kUppercaseAll
, kPFix_DOTSP
},
236 { kPFix_SP
, kUppercaseAll
, kPFix_DOT
},
237 { kPFix_EMPTY
, kUppercaseAll
, kPFix_EQSQUOT
},
238 { kPFix_SP
, kUppercaseAll
, kPFix_DOTSP
},
239 { kPFix_SP
, kUppercaseFirst
, kPFix_EQDQUOT
},
240 { kPFix_SP
, kUppercaseAll
, kPFix_EQSQUOT
},
241 { kPFix_SP
, kUppercaseFirst
, kPFix_EQSQUOT
},
244 static const int kNumTransforms
= sizeof(kTransforms
) / sizeof(kTransforms
[0]);
246 static int ToUpperCase(uint8_t *p
) {
248 if (p
[0] >= 'a' && p
[0] <= 'z') {
253 /* An overly simplified uppercasing model for utf-8. */
258 /* An arbitrary transform for three byte characters. */
263 static BROTLI_NOINLINE
int TransformDictionaryWord(
264 uint8_t* dst
, const uint8_t* word
, int len
, int transform
) {
267 const char* prefix
= &kPrefixSuffix
[kTransforms
[transform
].prefix_id
];
268 while (*prefix
) { dst
[idx
++] = (uint8_t)*prefix
++; }
271 const int t
= kTransforms
[transform
].transform
;
272 int skip
= t
< kOmitFirst1
? 0 : t
- (kOmitFirst1
- 1);
280 if (t
<= kOmitLast9
) {
283 while (i
< len
) { dst
[idx
++] = word
[i
++]; }
284 uppercase
= &dst
[idx
- len
];
285 if (t
== kUppercaseFirst
) {
286 ToUpperCase(uppercase
);
287 } else if (t
== kUppercaseAll
) {
289 int step
= ToUpperCase(uppercase
);
296 const char* suffix
= &kPrefixSuffix
[kTransforms
[transform
].suffix_id
];
297 while (*suffix
) { dst
[idx
++] = (uint8_t)*suffix
++; }
302 #if defined(__cplusplus) || defined(c_plusplus)
306 #endif /* BROTLI_DEC_TRANSFORM_H_ */
This page took 0.056908 seconds and 3 git commands to generate.