1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.chaos.actions;
20
21 import java.io.IOException;
22 import java.util.Collection;
23 import java.util.LinkedList;
24 import java.util.List;
25
26 import org.apache.commons.lang.math.RandomUtils;
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.hbase.ClusterStatus;
30 import org.apache.hadoop.hbase.HBaseCluster;
31 import org.apache.hadoop.hbase.HRegionInfo;
32 import org.apache.hadoop.hbase.IntegrationTestingUtility;
33 import org.apache.hadoop.hbase.ServerLoad;
34 import org.apache.hadoop.hbase.ServerName;
35 import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
36 import org.apache.hadoop.hbase.client.HBaseAdmin;
37 import org.apache.hadoop.hbase.util.Bytes;
38
39
40
41
42 public class Action {
43
44 public static final String KILL_MASTER_TIMEOUT_KEY =
45 "hbase.chaosmonkey.action.killmastertimeout";
46 public static final String START_MASTER_TIMEOUT_KEY =
47 "hbase.chaosmonkey.action.startmastertimeout";
48 public static final String KILL_RS_TIMEOUT_KEY = "hbase.chaosmonkey.action.killrstimeout";
49 public static final String START_RS_TIMEOUT_KEY = "hbase.chaosmonkey.action.startrstimeout";
50
51 protected static Log LOG = LogFactory.getLog(Action.class);
52
53 protected static final long KILL_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
54 protected static final long START_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
55 protected static final long KILL_RS_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
56 protected static final long START_RS_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
57
58 protected ActionContext context;
59 protected HBaseCluster cluster;
60 protected ClusterStatus initialStatus;
61 protected ServerName[] initialServers;
62
63 protected long killMasterTimeout;
64 protected long startMasterTimeout;
65 protected long killRsTimeout;
66 protected long startRsTimeout;
67
68 public void init(ActionContext context) throws IOException {
69 this.context = context;
70 cluster = context.getHBaseCluster();
71 initialStatus = cluster.getInitialClusterStatus();
72 Collection<ServerName> regionServers = initialStatus.getServers();
73 initialServers = regionServers.toArray(new ServerName[regionServers.size()]);
74
75 killMasterTimeout = cluster.getConf().getLong(KILL_MASTER_TIMEOUT_KEY,
76 KILL_MASTER_TIMEOUT_DEFAULT);
77 startMasterTimeout = cluster.getConf().getLong(START_MASTER_TIMEOUT_KEY,
78 START_MASTER_TIMEOUT_DEFAULT);
79 killRsTimeout = cluster.getConf().getLong(KILL_RS_TIMEOUT_KEY, KILL_RS_TIMEOUT_DEFAULT);
80 startRsTimeout = cluster.getConf().getLong(START_RS_TIMEOUT_KEY, START_RS_TIMEOUT_DEFAULT);
81 }
82
83 public void perform() throws Exception { }
84
85
86 protected ServerName[] getCurrentServers() throws IOException {
87 Collection<ServerName> regionServers = cluster.getClusterStatus().getServers();
88 if (regionServers == null || regionServers.size() <= 0) return new ServerName [] {};
89 return regionServers.toArray(new ServerName[regionServers.size()]);
90 }
91
92 protected void killMaster(ServerName server) throws IOException {
93 LOG.info("Killing master:" + server);
94 cluster.killMaster(server);
95 cluster.waitForMasterToStop(server, killMasterTimeout);
96 LOG.info("Killed master server:" + server);
97 }
98
99 protected void startMaster(ServerName server) throws IOException {
100 LOG.info("Starting master:" + server.getHostname());
101 cluster.startMaster(server.getHostname(), server.getPort());
102 cluster.waitForActiveAndReadyMaster(startMasterTimeout);
103 LOG.info("Started master: " + server);
104 }
105
106 protected void killRs(ServerName server) throws IOException {
107 LOG.info("Killing region server:" + server);
108 cluster.killRegionServer(server);
109 cluster.waitForRegionServerToStop(server, killRsTimeout);
110 LOG.info("Killed region server:" + server + ". Reported num of rs:"
111 + cluster.getClusterStatus().getServersSize());
112 }
113
114 protected void startRs(ServerName server) throws IOException {
115 LOG.info("Starting region server:" + server.getHostname());
116 cluster.startRegionServer(server.getHostname(), server.getPort());
117 cluster.waitForRegionServerToStart(server.getHostname(), server.getPort(), startRsTimeout);
118 LOG.info("Started region server:" + server + ". Reported num of rs:"
119 + cluster.getClusterStatus().getServersSize());
120 }
121
122 protected void unbalanceRegions(ClusterStatus clusterStatus,
123 List<ServerName> fromServers, List<ServerName> toServers,
124 double fractionOfRegions) throws Exception {
125 List<byte[]> victimRegions = new LinkedList<byte[]>();
126 for (ServerName server : fromServers) {
127 ServerLoad serverLoad = clusterStatus.getLoad(server);
128
129 List<byte[]> regions = new LinkedList<byte[]>(serverLoad.getRegionsLoad().keySet());
130 int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size());
131 LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName());
132 for (int i = 0; i < victimRegionCount; ++i) {
133 int victimIx = RandomUtils.nextInt(regions.size());
134 String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx));
135 victimRegions.add(Bytes.toBytes(regionId));
136 }
137 }
138
139 LOG.info("Moving " + victimRegions.size() + " regions from " + fromServers.size()
140 + " servers to " + toServers.size() + " different servers");
141 HBaseAdmin admin = this.context.getHBaseIntegrationTestingUtility().getHBaseAdmin();
142 for (byte[] victimRegion : victimRegions) {
143 int targetIx = RandomUtils.nextInt(toServers.size());
144 admin.move(victimRegion, Bytes.toBytes(toServers.get(targetIx).getServerName()));
145 }
146 }
147
148 protected void forceBalancer() throws Exception {
149 HBaseAdmin admin = this.context.getHBaseIntegrationTestingUtility().getHBaseAdmin();
150 boolean result = false;
151 try {
152 result = admin.balancer();
153 } catch (Exception e) {
154 LOG.warn("Got exception while doing balance ", e);
155 }
156 if (!result) {
157 LOG.error("Balancer didn't succeed");
158 }
159 }
160
161
162
163
164 public static class ActionContext {
165 private IntegrationTestingUtility util;
166
167 public ActionContext(IntegrationTestingUtility util) {
168 this.util = util;
169 }
170
171 public IntegrationTestingUtility getHBaseIntegrationTestingUtility() {
172 return util;
173 }
174
175 public HBaseCluster getHBaseCluster() {
176 return util.getHBaseClusterInterface();
177 }
178 }
179 }