1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mttr;
20
21 import static org.junit.Assert.assertEquals;
22
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.concurrent.Callable;
26 import java.util.concurrent.ExecutorService;
27 import java.util.concurrent.Executors;
28 import java.util.concurrent.Future;
29 import java.util.concurrent.TimeUnit;
30
31 import org.apache.commons.lang.RandomStringUtils;
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
35 import org.apache.hadoop.hbase.ClusterStatus;
36 import org.apache.hadoop.hbase.HColumnDescriptor;
37 import org.apache.hadoop.hbase.HTableDescriptor;
38 import org.apache.hadoop.hbase.IntegrationTestingUtility;
39 import org.apache.hadoop.hbase.InvalidFamilyOperationException;
40 import org.apache.hadoop.hbase.NamespaceExistException;
41 import org.apache.hadoop.hbase.NamespaceNotFoundException;
42 import org.apache.hadoop.hbase.TableExistsException;
43 import org.apache.hadoop.hbase.TableName;
44 import org.apache.hadoop.hbase.TableNotFoundException;
45 import org.apache.hadoop.hbase.testclassification.IntegrationTests;
46 import org.apache.hadoop.hbase.chaos.actions.Action;
47 import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction;
48 import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
49 import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction;
50 import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingTableAction;
51 import org.apache.hadoop.hbase.chaos.factories.MonkeyConstants;
52 import org.apache.hadoop.hbase.client.HBaseAdmin;
53 import org.apache.hadoop.hbase.client.HTable;
54 import org.apache.hadoop.hbase.client.Put;
55 import org.apache.hadoop.hbase.client.Result;
56 import org.apache.hadoop.hbase.client.ResultScanner;
57 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
58 import org.apache.hadoop.hbase.client.Scan;
59 import org.apache.hadoop.hbase.coprocessor.CoprocessorException;
60 import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
61 import org.apache.hadoop.hbase.ipc.FatalConnectionException;
62 import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
63 import org.apache.hadoop.hbase.security.AccessDeniedException;
64 import org.apache.hadoop.hbase.util.Bytes;
65 import org.apache.hadoop.hbase.util.LoadTestTool;
66 import org.cloudera.htrace.Span;
67 import org.cloudera.htrace.Trace;
68 import org.cloudera.htrace.TraceScope;
69 import org.cloudera.htrace.impl.AlwaysSampler;
70 import org.junit.AfterClass;
71 import org.junit.BeforeClass;
72 import org.junit.Test;
73 import org.junit.experimental.categories.Category;
74
75 import com.google.common.base.Objects;
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117 @Category(IntegrationTests.class)
118 public class IntegrationTestMTTR {
119
120
121
122 private static final byte[] FAMILY = Bytes.toBytes("d");
123 private static final Log LOG = LogFactory.getLog(IntegrationTestMTTR.class);
124 private static long sleepTime;
125 private static final String SLEEP_TIME_KEY = "hbase.IntegrationTestMTTR.sleeptime";
126 private static final long SLEEP_TIME_DEFAULT = 60 * 1000l;
127
128
129
130
131 private static TableName tableName;
132 private static TableName loadTableName;
133
134
135
136
137 private static IntegrationTestingUtility util;
138
139
140
141
142 private static ExecutorService executorService;
143
144
145
146
147 private static Action restartRSAction;
148 private static Action restartMetaAction;
149 private static Action moveMetaRegionsAction;
150 private static Action moveRegionAction;
151 private static Action restartMasterAction;
152
153
154
155
156 private static LoadTestTool loadTool;
157
158
159 @BeforeClass
160 public static void setUp() throws Exception {
161
162 if (util == null) {
163 util = new IntegrationTestingUtility();
164 }
165
166
167 util.initializeCluster(3);
168
169
170 loadTool = new LoadTestTool();
171 loadTool.setConf(util.getConfiguration());
172
173
174
175 executorService = Executors.newFixedThreadPool(8);
176
177
178 setupTables();
179
180
181 sleepTime = util.getConfiguration().getLong(SLEEP_TIME_KEY, SLEEP_TIME_DEFAULT);
182 setupActions();
183 }
184
185 private static void setupActions() throws IOException {
186
187
188 util.getConfiguration().setLong(Action.START_RS_TIMEOUT_KEY, 3 * 60 * 1000);
189
190
191
192 restartRSAction = new RestartRsHoldingTableAction(sleepTime, tableName.getNameAsString());
193
194
195 restartMetaAction = new RestartRsHoldingMetaAction(sleepTime);
196
197
198 moveMetaRegionsAction = new MoveRegionsOfTableAction(sleepTime,
199 MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, TableName.META_TABLE_NAME.getNameAsString());
200
201
202 moveRegionAction = new MoveRegionsOfTableAction(sleepTime,
203 MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, tableName.getNameAsString());
204
205
206 restartMasterAction = new RestartActiveMasterAction(1000);
207
208
209 Action.ActionContext actionContext = new Action.ActionContext(util);
210 restartRSAction.init(actionContext);
211 restartMetaAction.init(actionContext);
212 moveMetaRegionsAction.init(actionContext);
213 moveRegionAction.init(actionContext);
214 restartMasterAction.init(actionContext);
215 }
216
217 private static void setupTables() throws IOException {
218
219 tableName = TableName.valueOf(util.getConfiguration()
220 .get("hbase.IntegrationTestMTTR.tableName", "IntegrationTestMTTR"));
221
222 loadTableName = TableName.valueOf(util.getConfiguration()
223 .get("hbase.IntegrationTestMTTR.loadTableName", "IntegrationTestMTTRLoadTestTool"));
224
225 if (util.getHBaseAdmin().tableExists(tableName)) {
226 util.deleteTable(tableName);
227 }
228
229 if (util.getHBaseAdmin().tableExists(loadTableName)) {
230 util.deleteTable(loadTableName);
231 }
232
233
234 HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
235
236
237 tableDescriptor.setMaxFileSize(Long.MAX_VALUE);
238
239 HColumnDescriptor descriptor = new HColumnDescriptor(FAMILY);
240 descriptor.setMaxVersions(1);
241 tableDescriptor.addFamily(descriptor);
242 util.getHBaseAdmin().createTable(tableDescriptor);
243
244
245 int ret = loadTool.run(new String[]{"-tn", loadTableName.getNameAsString(), "-init_only"});
246 assertEquals("Failed to initialize LoadTestTool", 0, ret);
247 }
248
249 @AfterClass
250 public static void after() throws IOException {
251
252 util.restoreCluster();
253 util = null;
254
255
256 executorService.shutdown();
257 executorService = null;
258
259
260 moveRegionAction = null;
261 restartMetaAction = null;
262 moveMetaRegionsAction = null;
263 restartRSAction = null;
264 restartMasterAction = null;
265
266 loadTool = null;
267 }
268
269 @Test
270 public void testRestartRsHoldingTable() throws Exception {
271 run(new ActionCallable(restartRSAction), "RestartRsHoldingTableAction");
272 }
273
274 @Test
275 public void testKillRsHoldingMeta() throws Exception {
276 run(new ActionCallable(restartMetaAction), "KillRsHoldingMeta");
277 }
278
279 @Test
280 public void testMoveMeta() throws Exception {
281 run(new ActionCallable(moveMetaRegionsAction), "MoveMeta");
282 }
283
284 @Test
285 public void testMoveRegion() throws Exception {
286 run(new ActionCallable(moveRegionAction), "MoveRegion");
287 }
288
289 @Test
290 public void testRestartMaster() throws Exception {
291 run(new ActionCallable(restartMasterAction), "RestartMaster");
292 }
293
294 public void run(Callable<Boolean> monkeyCallable, String testName) throws Exception {
295 int maxIters = util.getHBaseClusterInterface().isDistributedCluster() ? 10 : 3;
296 LOG.info("Starting " + testName + " with " + maxIters + " iterations.");
297
298
299 ArrayList<TimingResult> resultPuts = new ArrayList<TimingResult>(maxIters);
300 ArrayList<TimingResult> resultScan = new ArrayList<TimingResult>(maxIters);
301 ArrayList<TimingResult> resultAdmin = new ArrayList<TimingResult>(maxIters);
302 long start = System.nanoTime();
303
304 try {
305
306 for (int fullIterations = 0; fullIterations < maxIters; fullIterations++) {
307
308 Future<Boolean> monkeyFuture = executorService.submit(monkeyCallable);
309
310
311 Future<TimingResult> putFuture = executorService.submit(new PutCallable(monkeyFuture));
312 Future<TimingResult> scanFuture = executorService.submit(new ScanCallable(monkeyFuture));
313 Future<TimingResult> adminFuture = executorService.submit(new AdminCallable(monkeyFuture));
314
315 Future<Boolean> loadFuture = executorService.submit(new LoadCallable(monkeyFuture));
316
317 monkeyFuture.get();
318 loadFuture.get();
319
320
321 TimingResult putTime = putFuture.get();
322 TimingResult scanTime = scanFuture.get();
323 TimingResult adminTime = adminFuture.get();
324
325
326 resultPuts.add(putTime);
327 resultScan.add(scanTime);
328 resultAdmin.add(adminTime);
329
330
331 Thread.sleep(5000l);
332 }
333 } catch (Exception e) {
334 long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
335 LOG.info(testName + " failed after " + runtimeMs + "ms.", e);
336 throw e;
337 }
338
339 long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
340
341 Objects.ToStringHelper helper = Objects.toStringHelper("MTTRResults")
342 .add("putResults", resultPuts)
343 .add("scanResults", resultScan)
344 .add("adminResults", resultAdmin)
345 .add("totalRuntimeMs", runtimeMs)
346 .add("name", testName);
347
348
349 LOG.info(helper.toString());
350 }
351
352
353
354
355
356
357 private static class TimingResult {
358 DescriptiveStatistics stats = new DescriptiveStatistics();
359 ArrayList<Long> traces = new ArrayList<Long>(10);
360
361
362
363
364
365
366 public void addResult(long time, Span span) {
367 stats.addValue(TimeUnit.MILLISECONDS.convert(time, TimeUnit.NANOSECONDS));
368 if (TimeUnit.SECONDS.convert(time, TimeUnit.NANOSECONDS) >= 1) {
369 traces.add(span.getTraceId());
370 }
371 }
372
373 public String toString() {
374 Objects.ToStringHelper helper = Objects.toStringHelper(this)
375 .add("numResults", stats.getN())
376 .add("minTime", stats.getMin())
377 .add("meanTime", stats.getMean())
378 .add("maxTime", stats.getMax())
379 .add("25th", stats.getPercentile(25))
380 .add("50th", stats.getPercentile(50))
381 .add("75th", stats.getPercentile(75))
382 .add("90th", stats.getPercentile(90))
383 .add("95th", stats.getPercentile(95))
384 .add("99th", stats.getPercentile(99))
385 .add("99.9th", stats.getPercentile(99.9))
386 .add("99.99th", stats.getPercentile(99.99))
387 .add("traces", traces);
388 return helper.toString();
389 }
390 }
391
392
393
394
395 static abstract class TimingCallable implements Callable<TimingResult> {
396 protected final Future<?> future;
397
398 public TimingCallable(Future<?> f) {
399 future = f;
400 }
401
402 @Override
403 public TimingResult call() throws Exception {
404 TimingResult result = new TimingResult();
405 final int maxIterations = 10;
406 int numAfterDone = 0;
407 int resetCount = 0;
408
409 while (numAfterDone < maxIterations) {
410 long start = System.nanoTime();
411 TraceScope scope = null;
412 try {
413 scope = Trace.startSpan(getSpanName(), AlwaysSampler.INSTANCE);
414 boolean actionResult = doAction();
415 if (actionResult && future.isDone()) {
416 numAfterDone++;
417 }
418
419
420
421
422
423
424 } catch (AccessDeniedException e) {
425 throw e;
426 } catch (CoprocessorException e) {
427 throw e;
428 } catch (FatalConnectionException e) {
429 throw e;
430 } catch (InvalidFamilyOperationException e) {
431 throw e;
432 } catch (NamespaceExistException e) {
433 throw e;
434 } catch (NamespaceNotFoundException e) {
435 throw e;
436 } catch (NoSuchColumnFamilyException e) {
437 throw e;
438 } catch (TableExistsException e) {
439 throw e;
440 } catch (TableNotFoundException e) {
441 throw e;
442 } catch (RetriesExhaustedException e){
443 throw e;
444
445
446
447
448
449 } catch (Exception e) {
450 resetCount++;
451 if (resetCount < maxIterations) {
452 LOG.info("Non-fatal exception while running " + this.toString()
453 + ". Resetting loop counter", e);
454 numAfterDone = 0;
455 } else {
456 LOG.info("Too many unexpected Exceptions. Aborting.", e);
457 throw e;
458 }
459 } finally {
460 if (scope != null) {
461 scope.close();
462 }
463 }
464 result.addResult(System.nanoTime() - start, scope.getSpan());
465 }
466 return result;
467 }
468
469 protected abstract boolean doAction() throws Exception;
470
471 protected String getSpanName() {
472 return this.getClass().getSimpleName();
473 }
474
475 @Override
476 public String toString() {
477 return this.getSpanName();
478 }
479 }
480
481
482
483
484
485 static class PutCallable extends TimingCallable {
486
487 private final HTable table;
488
489 public PutCallable(Future<?> f) throws IOException {
490 super(f);
491 this.table = new HTable(util.getConfiguration(), tableName);
492 }
493
494 @Override
495 protected boolean doAction() throws Exception {
496 Put p = new Put(Bytes.toBytes(RandomStringUtils.randomAlphanumeric(5)));
497 p.add(FAMILY, Bytes.toBytes("\0"), Bytes.toBytes(RandomStringUtils.randomAscii(5)));
498 table.put(p);
499 table.flushCommits();
500 return true;
501 }
502
503 @Override
504 protected String getSpanName() {
505 return "MTTR Put Test";
506 }
507 }
508
509
510
511
512
513 static class ScanCallable extends TimingCallable {
514 private final HTable table;
515
516 public ScanCallable(Future<?> f) throws IOException {
517 super(f);
518 this.table = new HTable(util.getConfiguration(), tableName);
519 }
520
521 @Override
522 protected boolean doAction() throws Exception {
523 ResultScanner rs = null;
524 try {
525 Scan s = new Scan();
526 s.setBatch(2);
527 s.addFamily(FAMILY);
528 s.setFilter(new KeyOnlyFilter());
529 s.setMaxVersions(1);
530
531 rs = table.getScanner(s);
532 Result result = rs.next();
533 return result != null && result.size() > 0;
534 } finally {
535 if (rs != null) {
536 rs.close();
537 }
538 }
539 }
540 @Override
541 protected String getSpanName() {
542 return "MTTR Scan Test";
543 }
544 }
545
546
547
548
549 static class AdminCallable extends TimingCallable {
550
551 public AdminCallable(Future<?> f) throws IOException {
552 super(f);
553 }
554
555 @Override
556 protected boolean doAction() throws Exception {
557 HBaseAdmin admin = null;
558 try {
559 admin = new HBaseAdmin(util.getConfiguration());
560 ClusterStatus status = admin.getClusterStatus();
561 return status != null;
562 } finally {
563 if (admin != null) {
564 admin.close();
565 }
566 }
567 }
568
569 @Override
570 protected String getSpanName() {
571 return "MTTR Admin Test";
572 }
573 }
574
575
576 static class ActionCallable implements Callable<Boolean> {
577 private final Action action;
578
579 public ActionCallable(Action action) {
580 this.action = action;
581 }
582
583 @Override
584 public Boolean call() throws Exception {
585 this.action.perform();
586 return true;
587 }
588 }
589
590
591
592
593
594 public static class LoadCallable implements Callable<Boolean> {
595
596 private final Future<?> future;
597
598 public LoadCallable(Future<?> f) {
599 future = f;
600 }
601
602 @Override
603 public Boolean call() throws Exception {
604 int colsPerKey = 10;
605 int numServers = util.getHBaseClusterInterface().getInitialClusterStatus().getServersSize();
606 int numKeys = numServers * 5000;
607 int writeThreads = 10;
608
609
610
611
612 do {
613 int ret = loadTool.run(new String[]{
614 "-tn", loadTableName.getNameAsString(),
615 "-write", String.format("%d:%d:%d", colsPerKey, 500, writeThreads),
616 "-num_keys", String.valueOf(numKeys),
617 "-skip_init"
618 });
619 assertEquals("Load failed", 0, ret);
620 } while (!future.isDone());
621
622 return true;
623 }
624 }
625 }