1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.filter;
20
21 import com.google.protobuf.InvalidProtocolBufferException;
22
23 import java.nio.charset.Charset;
24 import java.nio.charset.IllegalCharsetNameException;
25 import java.util.Arrays;
26 import java.util.regex.Pattern;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.hbase.classification.InterfaceAudience;
31 import org.apache.hadoop.hbase.classification.InterfaceStability;
32 import org.apache.hadoop.hbase.exceptions.DeserializationException;
33 import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos;
34 import org.apache.hadoop.hbase.util.Bytes;
35
36 import org.jcodings.Encoding;
37 import org.jcodings.EncodingDB;
38 import org.jcodings.specific.UTF8Encoding;
39 import org.joni.Matcher;
40 import org.joni.Option;
41 import org.joni.Regex;
42 import org.joni.Syntax;
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 @InterfaceAudience.Public
75 @InterfaceStability.Stable
76 public class RegexStringComparator extends ByteArrayComparable {
77
78 private static final Log LOG = LogFactory.getLog(RegexStringComparator.class);
79
80 private Engine engine;
81
82
83 public enum EngineType {
84 JAVA,
85 JONI
86 }
87
88
89
90
91
92
93 public RegexStringComparator(String expr) {
94 this(expr, Pattern.DOTALL);
95 }
96
97
98
99
100
101
102
103 public RegexStringComparator(String expr, EngineType engine) {
104 this(expr, Pattern.DOTALL, engine);
105 }
106
107
108
109
110
111
112 public RegexStringComparator(String expr, int flags) {
113 this(expr, flags, EngineType.JAVA);
114 }
115
116
117
118
119
120
121
122 public RegexStringComparator(String expr, int flags, EngineType engine) {
123 super(Bytes.toBytes(expr));
124 switch (engine) {
125 case JAVA:
126 this.engine = new JavaRegexEngine(expr, flags);
127 break;
128 case JONI:
129 this.engine = new JoniRegexEngine(expr, flags);
130 break;
131 }
132 }
133
134
135
136
137
138
139
140
141
142
143
144
145 public void setCharset(final Charset charset) {
146 engine.setCharset(charset.name());
147 }
148
149 @Override
150 public int compareTo(byte[] value, int offset, int length) {
151 return engine.compareTo(value, offset, length);
152 }
153
154
155
156
157 public byte [] toByteArray() {
158 return engine.toByteArray();
159 }
160
161
162
163
164
165
166
167 public static RegexStringComparator parseFrom(final byte [] pbBytes)
168 throws DeserializationException {
169 ComparatorProtos.RegexStringComparator proto;
170 try {
171 proto = ComparatorProtos.RegexStringComparator.parseFrom(pbBytes);
172 } catch (InvalidProtocolBufferException e) {
173 throw new DeserializationException(e);
174 }
175 RegexStringComparator comparator;
176 if (proto.hasEngine()) {
177 EngineType engine = EngineType.valueOf(proto.getEngine());
178 comparator = new RegexStringComparator(proto.getPattern(), proto.getPatternFlags(),
179 engine);
180 } else {
181 comparator = new RegexStringComparator(proto.getPattern(), proto.getPatternFlags());
182 }
183 String charset = proto.getCharset();
184 if (charset.length() > 0) {
185 try {
186 comparator.getEngine().setCharset(charset);
187 } catch (IllegalCharsetNameException e) {
188 LOG.error("invalid charset", e);
189 }
190 }
191 return comparator;
192 }
193
194
195
196
197
198
199 boolean areSerializedFieldsEqual(ByteArrayComparable other) {
200 if (other == this) return true;
201 if (!(other instanceof RegexStringComparator)) return false;
202 RegexStringComparator comparator = (RegexStringComparator)other;
203 return super.areSerializedFieldsEqual(comparator)
204 && engine.getClass().isInstance(comparator.getEngine())
205 && engine.getPattern().equals(comparator.getEngine().getPattern())
206 && engine.getFlags() == comparator.getEngine().getFlags()
207 && engine.getCharset().equals(comparator.getEngine().getCharset());
208 }
209
210 Engine getEngine() {
211 return engine;
212 }
213
214
215
216
217
218 static interface Engine {
219
220
221
222
223 String getPattern();
224
225
226
227
228
229 int getFlags();
230
231
232
233
234 String getCharset();
235
236
237
238
239
240 void setCharset(final String charset);
241
242
243
244
245 byte [] toByteArray();
246
247
248
249
250
251
252
253
254 int compareTo(byte[] value, int offset, int length);
255 }
256
257
258
259
260
261
262 static class JavaRegexEngine implements Engine {
263 private Charset charset = Charset.forName("UTF-8");
264 private Pattern pattern;
265
266 public JavaRegexEngine(String regex, int flags) {
267 this.pattern = Pattern.compile(regex, flags);
268 }
269
270 @Override
271 public String getPattern() {
272 return pattern.toString();
273 }
274
275 @Override
276 public int getFlags() {
277 return pattern.flags();
278 }
279
280 @Override
281 public String getCharset() {
282 return charset.name();
283 }
284
285 @Override
286 public void setCharset(String charset) {
287 this.charset = Charset.forName(charset);
288 }
289
290 @Override
291 public int compareTo(byte[] value, int offset, int length) {
292
293
294 String tmp;
295 if (length < value.length / 2) {
296
297
298 tmp = new String(Arrays.copyOfRange(value, offset, offset + length), charset);
299 } else {
300 tmp = new String(value, offset, length, charset);
301 }
302 return pattern.matcher(tmp).find() ? 0 : 1;
303 }
304
305 @Override
306 public byte[] toByteArray() {
307 ComparatorProtos.RegexStringComparator.Builder builder =
308 ComparatorProtos.RegexStringComparator.newBuilder();
309 builder.setPattern(pattern.pattern());
310 builder.setPatternFlags(pattern.flags());
311 builder.setCharset(charset.name());
312 builder.setEngine(EngineType.JAVA.name());
313 return builder.build().toByteArray();
314 }
315 }
316
317
318
319
320
321
322
323
324
325
326 static class JoniRegexEngine implements Engine {
327 private Encoding encoding = UTF8Encoding.INSTANCE;
328 private String regex;
329 private Regex pattern;
330
331 public JoniRegexEngine(String regex, int flags) {
332 this.regex = regex;
333 byte[] b = Bytes.toBytes(regex);
334 this.pattern = new Regex(b, 0, b.length, patternToJoniFlags(flags), encoding, Syntax.Java);
335 }
336
337 @Override
338 public String getPattern() {
339 return regex;
340 }
341
342 @Override
343 public int getFlags() {
344 return pattern.getOptions();
345 }
346
347 @Override
348 public String getCharset() {
349 return encoding.getCharsetName();
350 }
351
352 @Override
353 public void setCharset(String name) {
354 setEncoding(name);
355 }
356
357 @Override
358 public int compareTo(byte[] value, int offset, int length) {
359
360
361 Matcher m = pattern.matcher(value);
362 return m.search(offset, length, pattern.getOptions()) < 0 ? 1 : 0;
363 }
364
365 @Override
366 public byte[] toByteArray() {
367 ComparatorProtos.RegexStringComparator.Builder builder =
368 ComparatorProtos.RegexStringComparator.newBuilder();
369 builder.setPattern(regex);
370 builder.setPatternFlags(joniToPatternFlags(pattern.getOptions()));
371 builder.setCharset(encoding.getCharsetName());
372 builder.setEngine(EngineType.JONI.name());
373 return builder.build().toByteArray();
374 }
375
376 private int patternToJoniFlags(int flags) {
377 int newFlags = 0;
378 if ((flags & Pattern.CASE_INSENSITIVE) != 0) {
379 newFlags |= Option.IGNORECASE;
380 }
381 if ((flags & Pattern.DOTALL) != 0) {
382
383 newFlags |= Option.MULTILINE;
384 }
385 if ((flags & Pattern.MULTILINE) != 0) {
386
387
388 newFlags &= ~Option.SINGLELINE;
389 newFlags |= Option.NEGATE_SINGLELINE;
390 }
391 return newFlags;
392 }
393
394 private int joniToPatternFlags(int flags) {
395 int newFlags = 0;
396 if ((flags & Option.IGNORECASE) != 0) {
397 newFlags |= Pattern.CASE_INSENSITIVE;
398 }
399
400 if ((flags & Option.MULTILINE) != 0) {
401 newFlags |= Pattern.DOTALL;
402 }
403
404 if ((flags & Option.NEGATE_SINGLELINE) != 0) {
405 newFlags |= Pattern.MULTILINE;
406 }
407 return newFlags;
408 }
409
410 private void setEncoding(String name) {
411 EncodingDB.Entry e = EncodingDB.getEncodings().get(Bytes.toBytes(name));
412 if (e != null) {
413 encoding = e.getEncoding();
414 } else {
415 throw new IllegalCharsetNameException(name);
416 }
417 }
418 }
419 }