1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.apache.hadoop.hbase.HBaseTestingUtility.assertKVListsEqual;
22 import static org.junit.Assert.assertTrue;
23
24 import java.io.IOException;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Collection;
28 import java.util.Collections;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Random;
34 import java.util.Set;
35
36 import org.apache.commons.logging.Log;
37 import org.apache.commons.logging.LogFactory;
38 import org.apache.hadoop.hbase.Cell;
39 import org.apache.hadoop.hbase.CellUtil;
40 import org.apache.hadoop.hbase.HBaseTestingUtility;
41 import org.apache.hadoop.hbase.HColumnDescriptor;
42 import org.apache.hadoop.hbase.HConstants;
43 import org.apache.hadoop.hbase.KeyValue;
44 import org.apache.hadoop.hbase.testclassification.MediumTests;
45 import org.apache.hadoop.hbase.client.Delete;
46 import org.apache.hadoop.hbase.client.Put;
47 import org.apache.hadoop.hbase.client.Scan;
48 import org.apache.hadoop.hbase.io.compress.Compression;
49 import org.apache.hadoop.hbase.util.Bytes;
50 import org.junit.After;
51 import org.junit.Before;
52 import org.junit.Test;
53 import org.junit.experimental.categories.Category;
54 import org.junit.runner.RunWith;
55 import org.junit.runners.Parameterized;
56 import org.junit.runners.Parameterized.Parameters;
57
58
59
60
61
62 @RunWith(Parameterized.class)
63 @Category(MediumTests.class)
64 public class TestSeekOptimizations {
65
66 private static final Log LOG =
67 LogFactory.getLog(TestSeekOptimizations.class);
68
69
70 private static final String FAMILY = "myCF";
71 private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
72
73 private static final int PUTS_PER_ROW_COL = 50;
74 private static final int DELETES_PER_ROW_COL = 10;
75
76 private static final int NUM_ROWS = 3;
77 private static final int NUM_COLS = 3;
78
79 private static final boolean VERBOSE = false;
80
81
82
83
84
85 private static final boolean USE_MANY_STORE_FILES = true;
86
87 private static final int[][] COLUMN_SETS = new int[][] {
88 {},
89 {0},
90 {1},
91 {0, 2},
92 {1, 2},
93 {0, 1, 2},
94 };
95
96
97
98 private static final int[][] ROW_RANGES = new int[][] {
99 {-1, -1},
100 {0, 1},
101 {1, 1},
102 {1, 2},
103 {0, 2}
104 };
105
106 private static final int[] MAX_VERSIONS_VALUES = new int[] { 1, 2 };
107
108
109 private HRegion region;
110 private Put put;
111 private Delete del;
112 private Random rand;
113 private Set<Long> putTimestamps = new HashSet<Long>();
114 private Set<Long> delTimestamps = new HashSet<Long>();
115 private List<Cell> expectedKVs = new ArrayList<Cell>();
116
117 private Compression.Algorithm comprAlgo;
118 private BloomType bloomType;
119
120 private long totalSeekDiligent, totalSeekLazy;
121
122 private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();
123
124 @Parameters
125 public static final Collection<Object[]> parameters() {
126 return HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS;
127 }
128
129 public TestSeekOptimizations(Compression.Algorithm comprAlgo,
130 BloomType bloomType) {
131 this.comprAlgo = comprAlgo;
132 this.bloomType = bloomType;
133 }
134
135 @Before
136 public void setUp() {
137 rand = new Random(91238123L);
138 expectedKVs.clear();
139 }
140
141 @Test
142 public void testMultipleTimestampRanges() throws IOException {
143
144 StoreFileScanner.instrument();
145
146 region = TEST_UTIL.createTestRegion("testMultipleTimestampRanges",
147 new HColumnDescriptor(FAMILY)
148 .setCompressionType(comprAlgo)
149 .setBloomFilterType(bloomType)
150 .setMaxVersions(3)
151 );
152
153
154 final long latestDelTS = USE_MANY_STORE_FILES ? 1397 : -1;
155
156 createTimestampRange(1, 50, -1);
157 createTimestampRange(51, 100, -1);
158 if (USE_MANY_STORE_FILES) {
159 createTimestampRange(100, 500, 127);
160 createTimestampRange(900, 1300, -1);
161 createTimestampRange(1301, 2500, latestDelTS);
162 createTimestampRange(2502, 2598, -1);
163 createTimestampRange(2599, 2999, -1);
164 }
165
166 prepareExpectedKVs(latestDelTS);
167
168 for (int[] columnArr : COLUMN_SETS) {
169 for (int[] rowRange : ROW_RANGES) {
170 for (int maxVersions : MAX_VERSIONS_VALUES) {
171 for (boolean lazySeekEnabled : new boolean[] { false, true }) {
172 testScan(columnArr, lazySeekEnabled, rowRange[0], rowRange[1],
173 maxVersions);
174 }
175 }
176 }
177 }
178
179 final double seekSavings = 1 - totalSeekLazy * 1.0 / totalSeekDiligent;
180 System.err.println("For bloom=" + bloomType + ", compr=" + comprAlgo +
181 " total seeks without optimization: " + totalSeekDiligent
182 + ", with optimization: " + totalSeekLazy + " (" +
183 String.format("%.2f%%", totalSeekLazy * 100.0 / totalSeekDiligent) +
184 "), savings: " + String.format("%.2f%%",
185 100.0 * seekSavings) + "\n");
186
187
188
189 final double expectedSeekSavings = 0.0;
190 assertTrue("Lazy seek is only saving " +
191 String.format("%.2f%%", seekSavings * 100) + " seeks but should " +
192 "save at least " + String.format("%.2f%%", expectedSeekSavings * 100),
193 seekSavings >= expectedSeekSavings);
194 }
195
196 private void testScan(final int[] columnArr, final boolean lazySeekEnabled,
197 final int startRow, final int endRow, int maxVersions)
198 throws IOException {
199 StoreScanner.enableLazySeekGlobally(lazySeekEnabled);
200 final Scan scan = new Scan();
201 final Set<String> qualSet = new HashSet<String>();
202 for (int iColumn : columnArr) {
203 String qualStr = getQualStr(iColumn);
204 scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qualStr));
205 qualSet.add(qualStr);
206 }
207 scan.setMaxVersions(maxVersions);
208 scan.setStartRow(rowBytes(startRow));
209
210
211 {
212 final byte[] scannerStopRow =
213 rowBytes(endRow + (startRow != endRow ? 1 : 0));
214 scan.setStopRow(scannerStopRow);
215 }
216
217 final long initialSeekCount = StoreFileScanner.getSeekCount();
218 final InternalScanner scanner = region.getScanner(scan);
219 final List<Cell> results = new ArrayList<Cell>();
220 final List<Cell> actualKVs = new ArrayList<Cell>();
221
222
223
224
225 boolean hasNext;
226 do {
227 hasNext = scanner.next(results);
228 actualKVs.addAll(results);
229 results.clear();
230 } while (hasNext);
231
232 List<Cell> filteredKVs = filterExpectedResults(qualSet,
233 rowBytes(startRow), rowBytes(endRow), maxVersions);
234 final String rowRestrictionStr =
235 (startRow == -1 && endRow == -1) ? "all rows" : (
236 startRow == endRow ? ("row=" + startRow) : ("startRow="
237 + startRow + ", " + "endRow=" + endRow));
238 final String columnRestrictionStr =
239 columnArr.length == 0 ? "all columns"
240 : ("columns=" + Arrays.toString(columnArr));
241 final String testDesc =
242 "Bloom=" + bloomType + ", compr=" + comprAlgo + ", "
243 + (scan.isGetScan() ? "Get" : "Scan") + ": "
244 + columnRestrictionStr + ", " + rowRestrictionStr
245 + ", maxVersions=" + maxVersions + ", lazySeek=" + lazySeekEnabled;
246 long seekCount = StoreFileScanner.getSeekCount() - initialSeekCount;
247 if (VERBOSE) {
248 System.err.println("Seek count: " + seekCount + ", KVs returned: "
249 + actualKVs.size() + ". " + testDesc +
250 (lazySeekEnabled ? "\n" : ""));
251 }
252 if (lazySeekEnabled) {
253 totalSeekLazy += seekCount;
254 } else {
255 totalSeekDiligent += seekCount;
256 }
257 assertKVListsEqual(testDesc, filteredKVs, actualKVs);
258 }
259
260 private List<Cell> filterExpectedResults(Set<String> qualSet,
261 byte[] startRow, byte[] endRow, int maxVersions) {
262 final List<Cell> filteredKVs = new ArrayList<Cell>();
263 final Map<String, Integer> verCount = new HashMap<String, Integer>();
264 for (Cell kv : expectedKVs) {
265 if (startRow.length > 0 &&
266 Bytes.compareTo(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
267 startRow, 0, startRow.length) < 0) {
268 continue;
269 }
270
271
272 if (endRow.length > 0 &&
273 Bytes.compareTo(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
274 endRow, 0, endRow.length) > 0) {
275 continue;
276 }
277
278 if (!qualSet.isEmpty() && (!CellUtil.matchingFamily(kv, FAMILY_BYTES)
279 || !qualSet.contains(Bytes.toString(CellUtil.cloneQualifier(kv))))) {
280 continue;
281 }
282
283 final String rowColStr =
284 Bytes.toStringBinary(CellUtil.cloneRow(kv)) + "/"
285 + Bytes.toStringBinary(CellUtil.cloneFamily(kv)) + ":"
286 + Bytes.toStringBinary(CellUtil.cloneQualifier(kv));
287 final Integer curNumVer = verCount.get(rowColStr);
288 final int newNumVer = curNumVer != null ? (curNumVer + 1) : 1;
289 if (newNumVer <= maxVersions) {
290 filteredKVs.add(kv);
291 verCount.put(rowColStr, newNumVer);
292 }
293 }
294
295 return filteredKVs;
296 }
297
298 private void prepareExpectedKVs(long latestDelTS) {
299 final List<Cell> filteredKVs = new ArrayList<Cell>();
300 for (Cell kv : expectedKVs) {
301 if (kv.getTimestamp() > latestDelTS || latestDelTS == -1) {
302 filteredKVs.add(kv);
303 }
304 }
305 expectedKVs = filteredKVs;
306 Collections.sort(expectedKVs, KeyValue.COMPARATOR);
307 }
308
309 public void put(String qual, long ts) {
310 if (!putTimestamps.contains(ts)) {
311 put.add(FAMILY_BYTES, Bytes.toBytes(qual), ts, createValue(ts));
312 putTimestamps.add(ts);
313 }
314 if (VERBOSE) {
315 LOG.info("put: row " + Bytes.toStringBinary(put.getRow())
316 + ", cf " + FAMILY + ", qualifier " + qual + ", ts " + ts);
317 }
318 }
319
320 private byte[] createValue(long ts) {
321 return Bytes.toBytes("value" + ts);
322 }
323
324 public void delAtTimestamp(String qual, long ts) {
325 del.deleteColumn(FAMILY_BYTES, Bytes.toBytes(qual), ts);
326 logDelete(qual, ts, "at");
327 }
328
329 private void logDelete(String qual, long ts, String delType) {
330 if (VERBOSE) {
331 LOG.info("del " + delType + ": row "
332 + Bytes.toStringBinary(put.getRow()) + ", cf " + FAMILY
333 + ", qualifier " + qual + ", ts " + ts);
334 }
335 }
336
337 private void delUpToTimestamp(String qual, long upToTS) {
338 del.deleteColumns(FAMILY_BYTES, Bytes.toBytes(qual), upToTS);
339 logDelete(qual, upToTS, "up to and including");
340 }
341
342 private long randLong(long n) {
343 long l = rand.nextLong();
344 if (l == Long.MIN_VALUE)
345 l = Long.MAX_VALUE;
346 return Math.abs(l) % n;
347 }
348
349 private long randBetween(long a, long b) {
350 long x = a + randLong(b - a + 1);
351 assertTrue(a <= x && x <= b);
352 return x;
353 }
354
355 private final String rowStr(int i) {
356 return ("row" + i).intern();
357 }
358
359 private final byte[] rowBytes(int i) {
360 if (i == -1) {
361 return HConstants.EMPTY_BYTE_ARRAY;
362 }
363 return Bytes.toBytes(rowStr(i));
364 }
365
366 private final String getQualStr(int i) {
367 return ("qual" + i).intern();
368 }
369
370 public void createTimestampRange(long minTS, long maxTS,
371 long deleteUpToTS) throws IOException {
372 assertTrue(minTS < maxTS);
373 assertTrue(deleteUpToTS == -1
374 || (minTS <= deleteUpToTS && deleteUpToTS <= maxTS));
375
376 for (int iRow = 0; iRow < NUM_ROWS; ++iRow) {
377 final String row = rowStr(iRow);
378 final byte[] rowBytes = Bytes.toBytes(row);
379 for (int iCol = 0; iCol < NUM_COLS; ++iCol) {
380 final String qual = getQualStr(iCol);
381 final byte[] qualBytes = Bytes.toBytes(qual);
382 put = new Put(rowBytes);
383
384 putTimestamps.clear();
385 put(qual, minTS);
386 put(qual, maxTS);
387 for (int i = 0; i < PUTS_PER_ROW_COL; ++i) {
388 put(qual, randBetween(minTS, maxTS));
389 }
390
391 long[] putTimestampList = new long[putTimestamps.size()];
392 {
393 int i = 0;
394 for (long ts : putTimestamps) {
395 putTimestampList[i++] = ts;
396 }
397 }
398
399
400 delTimestamps.clear();
401 assertTrue(putTimestampList.length >= DELETES_PER_ROW_COL);
402 int numToDel = DELETES_PER_ROW_COL;
403 int tsRemaining = putTimestampList.length;
404 del = new Delete(rowBytes);
405 for (long ts : putTimestampList) {
406 if (rand.nextInt(tsRemaining) < numToDel) {
407 delAtTimestamp(qual, ts);
408 putTimestamps.remove(ts);
409 --numToDel;
410 }
411
412 if (--tsRemaining == 0) {
413 break;
414 }
415 }
416
417
418 if (deleteUpToTS != -1) {
419 delUpToTimestamp(qual, deleteUpToTS);
420 }
421
422 region.put(put);
423 if (!del.isEmpty()) {
424 region.delete(del);
425 }
426
427
428
429 for (long ts : putTimestamps) {
430 expectedKVs.add(new KeyValue(rowBytes, FAMILY_BYTES, qualBytes, ts,
431 KeyValue.Type.Put));
432 }
433 }
434 }
435
436 region.flushcache();
437 }
438
439 @After
440 public void tearDown() throws IOException {
441 if (region != null) {
442 HRegion.closeHRegion(region);
443 }
444
445
446
447 StoreScanner.enableLazySeekGlobally(
448 StoreScanner.LAZY_SEEK_ENABLED_BY_DEFAULT);
449 }
450
451
452 }
453