1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.tool;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.TreeSet;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32
33 import org.apache.commons.lang.time.StopWatch;
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.hbase.AuthUtil;
38 import org.apache.hadoop.hbase.DoNotRetryIOException;
39 import org.apache.hadoop.hbase.HBaseConfiguration;
40 import org.apache.hadoop.hbase.HColumnDescriptor;
41 import org.apache.hadoop.hbase.HRegionInfo;
42 import org.apache.hadoop.hbase.HTableDescriptor;
43 import org.apache.hadoop.hbase.ServerName;
44 import org.apache.hadoop.hbase.TableName;
45 import org.apache.hadoop.hbase.TableNotEnabledException;
46 import org.apache.hadoop.hbase.TableNotFoundException;
47 import org.apache.hadoop.hbase.client.Get;
48 import org.apache.hadoop.hbase.client.HBaseAdmin;
49 import org.apache.hadoop.hbase.client.HTable;
50 import org.apache.hadoop.hbase.client.ResultScanner;
51 import org.apache.hadoop.hbase.client.Scan;
52 import org.apache.hadoop.util.Tool;
53 import org.apache.hadoop.util.ToolRunner;
54
55
56
57
58
59
60
61
62
63
64
65
66 public final class Canary implements Tool {
67
68 public interface Sink {
69 public void publishReadFailure(HRegionInfo region, Exception e);
70 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
71 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
72 }
73
74
75 public interface ExtendedSink extends Sink {
76 public void publishReadFailure(String table, String server);
77 public void publishReadTiming(String table, String server, long msTime);
78 }
79
80
81
82 public static class StdOutSink implements Sink {
83 @Override
84 public void publishReadFailure(HRegionInfo region, Exception e) {
85 LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
86 }
87
88 @Override
89 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
90 LOG.error(String.format("read from region %s column family %s failed",
91 region.getRegionNameAsString(), column.getNameAsString()), e);
92 }
93
94 @Override
95 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
96 LOG.info(String.format("read from region %s column family %s in %dms",
97 region.getRegionNameAsString(), column.getNameAsString(), msTime));
98 }
99 }
100
101 public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
102
103 @Override
104 public void publishReadFailure(String table, String server) {
105 LOG.error(String.format("Read from table:%s on region server:%s", table, server));
106 }
107
108 @Override
109 public void publishReadTiming(String table, String server, long msTime) {
110 LOG.info(String.format("Read from table:%s on region server:%s in %dms",
111 table, server, msTime));
112 }
113 }
114
115 private static final int USAGE_EXIT_CODE = 1;
116 private static final int INIT_ERROR_EXIT_CODE = 2;
117 private static final int TIMEOUT_ERROR_EXIT_CODE = 3;
118 private static final int ERROR_EXIT_CODE = 4;
119
120 private static final long DEFAULT_INTERVAL = 6000;
121
122 private static final long DEFAULT_TIMEOUT = 600000;
123
124 private static final Log LOG = LogFactory.getLog(Canary.class);
125
126 private Configuration conf = null;
127 private long interval = 0;
128 private Sink sink = null;
129
130 private boolean useRegExp;
131 private long timeout = DEFAULT_TIMEOUT;
132 private boolean failOnError = true;
133 private boolean regionServerMode = false;
134
135 public Canary() {
136 this(new RegionServerStdOutSink());
137 }
138
139 public Canary(Sink sink) {
140 this.sink = sink;
141 }
142
143 @Override
144 public Configuration getConf() {
145 return conf;
146 }
147
148 @Override
149 public void setConf(Configuration conf) {
150 this.conf = conf;
151 }
152
153 @Override
154 public int run(String[] args) throws Exception {
155 int index = -1;
156
157
158 for (int i = 0; i < args.length; i++) {
159 String cmd = args[i];
160
161 if (cmd.startsWith("-")) {
162 if (index >= 0) {
163
164 System.err.println("Invalid command line options");
165 printUsageAndExit();
166 }
167
168 if (cmd.equals("-help")) {
169
170 printUsageAndExit();
171 } else if (cmd.equals("-daemon") && interval == 0) {
172
173 interval = DEFAULT_INTERVAL;
174 } else if (cmd.equals("-interval")) {
175
176 i++;
177
178 if (i == args.length) {
179 System.err.println("-interval needs a numeric value argument.");
180 printUsageAndExit();
181 }
182
183 try {
184 interval = Long.parseLong(args[i]) * 1000;
185 } catch (NumberFormatException e) {
186 System.err.println("-interval needs a numeric value argument.");
187 printUsageAndExit();
188 }
189 } else if(cmd.equals("-regionserver")) {
190 this.regionServerMode = true;
191 } else if (cmd.equals("-e")) {
192 this.useRegExp = true;
193 } else if (cmd.equals("-t")) {
194 i++;
195
196 if (i == args.length) {
197 System.err.println("-t needs a numeric value argument.");
198 printUsageAndExit();
199 }
200
201 try {
202 this.timeout = Long.parseLong(args[i]);
203 } catch (NumberFormatException e) {
204 System.err.println("-t needs a numeric value argument.");
205 printUsageAndExit();
206 }
207
208 } else if (cmd.equals("-f")) {
209 i++;
210
211 if (i == args.length) {
212 System.err
213 .println("-f needs a boolean value argument (true|false).");
214 printUsageAndExit();
215 }
216
217 this.failOnError = Boolean.parseBoolean(args[i]);
218 } else {
219
220 System.err.println(cmd + " options is invalid.");
221 printUsageAndExit();
222 }
223 } else if (index < 0) {
224
225 index = i;
226 }
227 }
228
229
230 AuthUtil.launchAuthChore(conf);
231
232
233 Monitor monitor = null;
234 Thread monitorThread = null;
235 long startTime = 0;
236 long currentTimeLength = 0;
237
238 do {
239
240 monitor = this.newMonitor(index, args);
241 monitorThread = new Thread(monitor);
242 startTime = System.currentTimeMillis();
243 monitorThread.start();
244 while (!monitor.isDone()) {
245
246 Thread.sleep(1000);
247
248 if (this.failOnError && monitor.hasError()) {
249 monitorThread.interrupt();
250 if (monitor.initialized) {
251 System.exit(monitor.errorCode);
252 } else {
253 System.exit(INIT_ERROR_EXIT_CODE);
254 }
255 }
256 currentTimeLength = System.currentTimeMillis() - startTime;
257 if (currentTimeLength > this.timeout) {
258 LOG.error("The monitor is running too long (" + currentTimeLength
259 + ") after timeout limit:" + this.timeout
260 + " will be killed itself !!");
261 if (monitor.initialized) {
262 System.exit(TIMEOUT_ERROR_EXIT_CODE);
263 } else {
264 System.exit(INIT_ERROR_EXIT_CODE);
265 }
266 break;
267 }
268 }
269
270 if (this.failOnError && monitor.hasError()) {
271 monitorThread.interrupt();
272 System.exit(monitor.errorCode);
273 }
274
275 Thread.sleep(interval);
276 } while (interval > 0);
277
278 return(monitor.errorCode);
279 }
280
281 private void printUsageAndExit() {
282 System.err.printf(
283 "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n",
284 getClass().getName());
285 System.err.println(" where [opts] are:");
286 System.err.println(" -help Show this help and exit.");
287 System.err.println(" -regionserver replace the table argument to regionserver,");
288 System.err.println(" which means to enable regionserver mode");
289 System.err.println(" -daemon Continuous check at defined intervals.");
290 System.err.println(" -interval <N> Interval between checks (sec)");
291 System.err.println(" -e Use region/regionserver as regular expression");
292 System.err.println(" which means the region/regionserver is regular expression pattern");
293 System.err.println(" -f <B> stop whole program if first error occurs," +
294 " default is true");
295 System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
296 System.exit(USAGE_EXIT_CODE);
297 }
298
299
300
301
302
303
304
305
306 public Monitor newMonitor(int index, String[] args) {
307 Monitor monitor = null;
308 String[] monitorTargets = null;
309
310 if(index >= 0) {
311 int length = args.length - index;
312 monitorTargets = new String[length];
313 System.arraycopy(args, index, monitorTargets, 0, length);
314 }
315
316 if(this.regionServerMode) {
317 monitor = new RegionServerMonitor(
318 this.conf,
319 monitorTargets,
320 this.useRegExp,
321 (ExtendedSink)this.sink);
322 } else {
323 monitor = new RegionMonitor(this.conf, monitorTargets, this.useRegExp, this.sink);
324 }
325 return monitor;
326 }
327
328
329 public static abstract class Monitor implements Runnable {
330
331 protected Configuration config;
332 protected HBaseAdmin admin;
333 protected String[] targets;
334 protected boolean useRegExp;
335 protected boolean initialized = false;
336
337 protected boolean done = false;
338 protected int errorCode = 0;
339 protected Sink sink;
340
341 public boolean isDone() {
342 return done;
343 }
344
345 public boolean hasError() {
346 return errorCode != 0;
347 }
348
349 protected Monitor(Configuration config, String[] monitorTargets,
350 boolean useRegExp, Sink sink) {
351 if (null == config)
352 throw new IllegalArgumentException("config shall not be null");
353
354 this.config = config;
355 this.targets = monitorTargets;
356 this.useRegExp = useRegExp;
357 this.sink = sink;
358 }
359
360 public abstract void run();
361
362 protected boolean initAdmin() {
363 if (null == this.admin) {
364 try {
365 this.admin = new HBaseAdmin(config);
366 } catch (Exception e) {
367 LOG.error("Initial HBaseAdmin failed...", e);
368 this.errorCode = INIT_ERROR_EXIT_CODE;
369 }
370 } else if (admin.isAborted()) {
371 LOG.error("HBaseAdmin aborted");
372 this.errorCode = INIT_ERROR_EXIT_CODE;
373 }
374 return !this.hasError();
375 }
376 }
377
378
379 private static class RegionMonitor extends Monitor {
380
381 public RegionMonitor(Configuration config, String[] monitorTargets,
382 boolean useRegExp, Sink sink) {
383 super(config, monitorTargets, useRegExp, sink);
384 }
385
386 @Override
387 public void run() {
388 if(this.initAdmin()) {
389 try {
390 if (this.targets != null && this.targets.length > 0) {
391 String[] tables = generateMonitorTables(this.targets);
392 this.initialized = true;
393 for (String table : tables) {
394 Canary.sniff(admin, sink, table);
395 }
396 } else {
397 sniff();
398 }
399 } catch (Exception e) {
400 LOG.error("Run regionMonitor failed", e);
401 this.errorCode = ERROR_EXIT_CODE;
402 }
403 }
404 this.done = true;
405 }
406
407 private String[] generateMonitorTables(String[] monitorTargets) throws IOException {
408 String[] returnTables = null;
409
410 if(this.useRegExp) {
411 Pattern pattern = null;
412 HTableDescriptor[] tds = null;
413 Set<String> tmpTables = new TreeSet<String>();
414 try {
415 for (String monitorTarget : monitorTargets) {
416 pattern = Pattern.compile(monitorTarget);
417 tds = this.admin.listTables(pattern);
418 if (tds != null) {
419 for (HTableDescriptor td : tds) {
420 tmpTables.add(td.getNameAsString());
421 }
422 }
423 }
424 } catch(IOException e) {
425 LOG.error("Communicate with admin failed", e);
426 throw e;
427 }
428
429 if(tmpTables.size() > 0) {
430 returnTables = tmpTables.toArray(new String[tmpTables.size()]);
431 } else {
432 String msg = "No HTable found, tablePattern:"
433 + Arrays.toString(monitorTargets);
434 LOG.error(msg);
435 this.errorCode = INIT_ERROR_EXIT_CODE;
436 throw new TableNotFoundException(msg);
437 }
438 } else {
439 returnTables = monitorTargets;
440 }
441
442 return returnTables;
443 }
444
445
446
447
448 private void sniff() throws Exception {
449 for (HTableDescriptor table : admin.listTables()) {
450 Canary.sniff(admin, sink, table);
451 }
452 }
453
454 }
455
456
457
458
459
460 public static void sniff(final HBaseAdmin admin, TableName tableName) throws Exception {
461 sniff(admin, new StdOutSink(), tableName.getNameAsString());
462 }
463
464
465
466
467
468 private static void sniff(final HBaseAdmin admin, final Sink sink, String tableName)
469 throws Exception {
470 if (admin.isTableAvailable(tableName)) {
471 sniff(admin, sink, admin.getTableDescriptor(tableName.getBytes()));
472 } else {
473 LOG.warn(String.format("Table %s is not available", tableName));
474 }
475 }
476
477
478
479
480 private static void sniff(final HBaseAdmin admin, final Sink sink, HTableDescriptor tableDesc)
481 throws Exception {
482 HTable table = null;
483
484 try {
485 table = new HTable(admin.getConfiguration(), tableDesc.getName());
486 } catch (TableNotFoundException e) {
487 return;
488 }
489
490 try {
491 for (HRegionInfo region : admin.getTableRegions(tableDesc.getName())) {
492 try {
493 sniffRegion(admin, sink, region, table);
494 } catch (Exception e) {
495 sink.publishReadFailure(region, e);
496 LOG.debug("sniffRegion failed", e);
497 }
498 }
499 } finally {
500 table.close();
501 }
502 }
503
504
505
506
507
508 private static void sniffRegion(
509 final HBaseAdmin admin,
510 final Sink sink,
511 HRegionInfo region,
512 HTable table) throws Exception {
513 HTableDescriptor tableDesc = table.getTableDescriptor();
514 byte[] startKey = null;
515 Get get = null;
516 Scan scan = null;
517 ResultScanner rs = null;
518 StopWatch stopWatch = new StopWatch();
519 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
520 stopWatch.reset();
521 startKey = region.getStartKey();
522
523 if (startKey.length > 0) {
524 get = new Get(startKey);
525 get.addFamily(column.getName());
526 } else {
527 scan = new Scan();
528 scan.setCaching(1);
529 scan.addFamily(column.getName());
530 scan.setMaxResultSize(1L);
531 }
532
533 try {
534 if (startKey.length > 0) {
535 stopWatch.start();
536 table.get(get);
537 stopWatch.stop();
538 sink.publishReadTiming(region, column, stopWatch.getTime());
539 } else {
540 stopWatch.start();
541 rs = table.getScanner(scan);
542 stopWatch.stop();
543 sink.publishReadTiming(region, column, stopWatch.getTime());
544 }
545 } catch (Exception e) {
546 sink.publishReadFailure(region, column, e);
547 } finally {
548 if (rs != null) {
549 rs.close();
550 }
551 scan = null;
552 get = null;
553 startKey = null;
554 }
555 }
556 }
557
558 private static class RegionServerMonitor extends Monitor {
559
560 public RegionServerMonitor(Configuration config, String[] monitorTargets,
561 boolean useRegExp, ExtendedSink sink) {
562 super(config, monitorTargets, useRegExp, sink);
563 }
564
565 private ExtendedSink getSink() {
566 return (ExtendedSink) this.sink;
567 }
568
569 @Override
570 public void run() {
571 if (this.initAdmin() && this.checkNoTableNames()) {
572 Map<String, List<HRegionInfo>> rsAndRMap = this.filterRegionServerByName();
573 this.initialized = true;
574 this.monitorRegionServers(rsAndRMap);
575 }
576 this.done = true;
577 }
578
579 private boolean checkNoTableNames() {
580 List<String> foundTableNames = new ArrayList<String>();
581 TableName[] tableNames = null;
582
583 try {
584 tableNames = this.admin.listTableNames();
585 } catch (IOException e) {
586 LOG.error("Get listTableNames failed", e);
587 this.errorCode = INIT_ERROR_EXIT_CODE;
588 return false;
589 }
590
591 if (this.targets == null || this.targets.length == 0) return true;
592
593 for (String target : this.targets) {
594 for (TableName tableName : tableNames) {
595 if (target.equals(tableName.getNameAsString())) {
596 foundTableNames.add(target);
597 }
598 }
599 }
600
601 if (foundTableNames.size() > 0) {
602 System.err.println("Cannot pass a tablename when using the -regionserver " +
603 "option, tablenames:" + foundTableNames.toString());
604 this.errorCode = USAGE_EXIT_CODE;
605 }
606 return foundTableNames.size() == 0;
607 }
608
609 private void monitorRegionServers(Map<String, List<HRegionInfo>> rsAndRMap) {
610 String serverName = null;
611 String tableName = null;
612 HRegionInfo region = null;
613 HTable table = null;
614 Get get = null;
615 byte[] startKey = null;
616 Scan scan = null;
617 StopWatch stopWatch = new StopWatch();
618
619 for (Map.Entry<String, List<HRegionInfo>> entry : rsAndRMap.entrySet()) {
620 stopWatch.reset();
621 serverName = entry.getKey();
622
623 region = entry.getValue().get(0);
624 try {
625 tableName = region.getTable().getNameAsString();
626 table = new HTable(this.admin.getConfiguration(), tableName);
627 startKey = region.getStartKey();
628
629 if(startKey.length > 0) {
630 get = new Get(startKey);
631 stopWatch.start();
632 table.get(get);
633 stopWatch.stop();
634 } else {
635 scan = new Scan();
636 scan.setCaching(1);
637 scan.setMaxResultSize(1L);
638 stopWatch.start();
639 table.getScanner(scan);
640 stopWatch.stop();
641 }
642 this.getSink().publishReadTiming(tableName, serverName, stopWatch.getTime());
643 } catch (TableNotFoundException tnfe) {
644
645 } catch (TableNotEnabledException tnee) {
646
647 LOG.debug("The targeted table was disabled. Assuming success.");
648 } catch (DoNotRetryIOException dnrioe) {
649 this.getSink().publishReadFailure(tableName, serverName);
650 LOG.error(dnrioe);
651 } catch (IOException e) {
652 this.getSink().publishReadFailure(tableName, serverName);
653 LOG.error(e);
654 this.errorCode = ERROR_EXIT_CODE;
655 } finally {
656 if (table != null) {
657 try {
658 table.close();
659 } catch (IOException e) {
660 }
661 }
662 scan = null;
663 get = null;
664 startKey = null;
665 }
666 }
667 }
668
669 private Map<String, List<HRegionInfo>> filterRegionServerByName() {
670 Map<String, List<HRegionInfo>> regionServerAndRegionsMap = this.getAllRegionServerByName();
671 regionServerAndRegionsMap = this.doFilterRegionServerByName(regionServerAndRegionsMap);
672 return regionServerAndRegionsMap;
673 }
674
675 private Map<String, List<HRegionInfo>> getAllRegionServerByName() {
676 Map<String, List<HRegionInfo>> rsAndRMap = new HashMap<String, List<HRegionInfo>>();
677 HTable table = null;
678 try {
679 HTableDescriptor[] tableDescs = this.admin.listTables();
680 List<HRegionInfo> regions = null;
681 for (HTableDescriptor tableDesc : tableDescs) {
682 table = new HTable(this.admin.getConfiguration(), tableDesc.getName());
683
684 for (Map.Entry<HRegionInfo, ServerName> entry : table
685 .getRegionLocations().entrySet()) {
686 ServerName rs = entry.getValue();
687 String rsName = rs.getHostname();
688 HRegionInfo r = entry.getKey();
689
690 if (rsAndRMap.containsKey(rsName)) {
691 regions = rsAndRMap.get(rsName);
692 } else {
693 regions = new ArrayList<HRegionInfo>();
694 rsAndRMap.put(rsName, regions);
695 }
696 regions.add(r);
697 }
698 table.close();
699 }
700
701 } catch (IOException e) {
702 String msg = "Get HTables info failed";
703 LOG.error(msg, e);
704 this.errorCode = INIT_ERROR_EXIT_CODE;
705 } finally {
706 if (table != null) {
707 try {
708 table.close();
709 } catch (IOException e) {
710 LOG.warn("Close table failed", e);
711 }
712 }
713 }
714
715 return rsAndRMap;
716 }
717
718 private Map<String, List<HRegionInfo>> doFilterRegionServerByName(
719 Map<String, List<HRegionInfo>> fullRsAndRMap) {
720
721 Map<String, List<HRegionInfo>> filteredRsAndRMap = null;
722
723 if (this.targets != null && this.targets.length > 0) {
724 filteredRsAndRMap = new HashMap<String, List<HRegionInfo>>();
725 Pattern pattern = null;
726 Matcher matcher = null;
727 boolean regExpFound = false;
728 for (String rsName : this.targets) {
729 if (this.useRegExp) {
730 regExpFound = false;
731 pattern = Pattern.compile(rsName);
732 for (Map.Entry<String,List<HRegionInfo>> entry : fullRsAndRMap.entrySet()) {
733 matcher = pattern.matcher(entry.getKey());
734 if (matcher.matches()) {
735 filteredRsAndRMap.put(entry.getKey(), entry.getValue());
736 regExpFound = true;
737 }
738 }
739 if (!regExpFound) {
740 LOG.info("No RegionServerInfo found, regionServerPattern:" + rsName);
741 }
742 } else {
743 if (fullRsAndRMap.containsKey(rsName)) {
744 filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName));
745 } else {
746 LOG.info("No RegionServerInfo found, regionServerName:" + rsName);
747 }
748 }
749 }
750 } else {
751 filteredRsAndRMap = fullRsAndRMap;
752 }
753 return filteredRsAndRMap;
754 }
755 }
756
757 public static void main(String[] args) throws Exception {
758 final Configuration conf = HBaseConfiguration.create();
759 int exitCode = ToolRunner.run(conf, new Canary(), args);
760 System.exit(exitCode);
761 }
762 }