1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase;
21
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.hadoop.conf.Configuration;
25 import org.apache.hadoop.hbase.HealthChecker.HealthCheckerExitStatus;
26 import org.apache.hadoop.util.StringUtils;
27
28
29
30
31 public class HealthCheckChore extends Chore {
32 private static Log LOG = LogFactory.getLog(HealthCheckChore.class);
33 private HealthChecker healthChecker;
34 private Configuration config;
35 private int threshold;
36 private int numTimesUnhealthy = 0;
37 private long failureWindow;
38 private long startWindow;
39
40 public HealthCheckChore(int sleepTime, Stoppable stopper, Configuration conf) {
41 super("HealthChecker", sleepTime, stopper);
42 LOG.info("Health Check Chore runs every " + StringUtils.formatTime(sleepTime));
43 this.config = conf;
44 String healthCheckScript = this.config.get(HConstants.HEALTH_SCRIPT_LOC);
45 long scriptTimeout = this.config.getLong(HConstants.HEALTH_SCRIPT_TIMEOUT,
46 HConstants.DEFAULT_HEALTH_SCRIPT_TIMEOUT);
47 healthChecker = new HealthChecker();
48 healthChecker.init(healthCheckScript, scriptTimeout);
49 this.threshold = config.getInt(HConstants.HEALTH_FAILURE_THRESHOLD,
50 HConstants.DEFAULT_HEALTH_FAILURE_THRESHOLD);
51 this.failureWindow = (long)this.threshold * (long)sleepTime;
52 }
53
54 @Override
55 protected void chore() {
56 HealthReport report = healthChecker.checkHealth();
57 boolean isHealthy = (report.getStatus() == HealthCheckerExitStatus.SUCCESS);
58 if (!isHealthy) {
59 boolean needToStop = decideToStop();
60 if (needToStop) {
61 this.stopper.stop("The node reported unhealthy " + threshold
62 + " number of times consecutively.");
63 }
64
65 LOG.info("Health status at " + StringUtils.formatTime(System.currentTimeMillis()) + " : "
66 + report.getHealthReport());
67 }
68 }
69
70 private boolean decideToStop() {
71 boolean stop = false;
72 if (numTimesUnhealthy == 0) {
73
74
75 numTimesUnhealthy++;
76 startWindow = System.currentTimeMillis();
77 } else {
78 if ((System.currentTimeMillis() - startWindow) < failureWindow) {
79 numTimesUnhealthy++;
80 if (numTimesUnhealthy == threshold) {
81 stop = true;
82 } else {
83 stop = false;
84 }
85 } else {
86
87 numTimesUnhealthy = 1;
88 startWindow = System.currentTimeMillis();
89 stop = false;
90 }
91 }
92 return stop;
93 }
94
95 }