View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotSame;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.*;
28  import java.util.concurrent.atomic.AtomicBoolean;
29  
30  import org.apache.hadoop.hbase.CellScannable;
31  import org.apache.hadoop.hbase.CellUtil;
32  import org.apache.hadoop.hbase.DoNotRetryIOException;
33  import org.apache.hadoop.hbase.HBaseConfiguration;
34  import org.apache.hadoop.hbase.HBaseTestingUtility;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.testclassification.MediumTests;
38  import org.apache.hadoop.hbase.RegionException;
39  import org.apache.hadoop.hbase.RegionTransition;
40  import org.apache.hadoop.hbase.Server;
41  import org.apache.hadoop.hbase.ServerLoad;
42  import org.apache.hadoop.hbase.ServerName;
43  import org.apache.hadoop.hbase.TableName;
44  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
45  import org.apache.hadoop.hbase.catalog.CatalogTracker;
46  import org.apache.hadoop.hbase.catalog.MetaMockingUtil;
47  import org.apache.hadoop.hbase.client.HConnection;
48  import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
49  import org.apache.hadoop.hbase.client.Result;
50  import org.apache.hadoop.hbase.exceptions.DeserializationException;
51  import org.apache.hadoop.hbase.executor.EventType;
52  import org.apache.hadoop.hbase.executor.ExecutorService;
53  import org.apache.hadoop.hbase.executor.ExecutorType;
54  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
55  import org.apache.hadoop.hbase.master.RegionState.State;
56  import org.apache.hadoop.hbase.master.TableLockManager.NullTableLockManager;
57  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
58  import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
59  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
60  import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
61  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
62  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
63  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest;
64  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetResponse;
65  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
66  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
67  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
68  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
69  import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
70  import org.apache.hadoop.hbase.util.Bytes;
71  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
72  import org.apache.hadoop.hbase.util.Threads;
73  import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
74  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
75  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
76  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
77  import org.apache.zookeeper.KeeperException;
78  import org.apache.zookeeper.KeeperException.NodeExistsException;
79  import org.apache.zookeeper.Watcher;
80  import org.junit.After;
81  import org.junit.AfterClass;
82  import org.junit.Before;
83  import org.junit.BeforeClass;
84  import org.junit.Test;
85  import org.junit.experimental.categories.Category;
86  import org.mockito.Mockito;
87  import org.mockito.internal.util.reflection.Whitebox;
88  import org.mockito.invocation.InvocationOnMock;
89  import org.mockito.stubbing.Answer;
90  
91  import com.google.protobuf.RpcController;
92  import com.google.protobuf.ServiceException;
93  
94  
95  /**
96   * Test {@link AssignmentManager}
97   */
98  @Category(MediumTests.class)
99  public class TestAssignmentManager {
100   private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
101   private static final ServerName SERVERNAME_A =
102       ServerName.valueOf("example.org", 1234, 5678);
103   private static final ServerName SERVERNAME_B =
104       ServerName.valueOf("example.org", 0, 5678);
105   private static final ServerName SERVERNAME_C =
106       ServerName.valueOf("example.org", 0, 5678);
107   private static final HRegionInfo REGIONINFO =
108     new HRegionInfo(TableName.valueOf("t"),
109       HConstants.EMPTY_START_ROW, HConstants.EMPTY_START_ROW);
110   private static int assignmentCount;
111   private static boolean enabling = false;
112 
113   // Mocked objects or; get redone for each test.
114   private Server server;
115   private ServerManager serverManager;
116   private ZooKeeperWatcher watcher;
117   private LoadBalancer balancer;
118   private HMaster master;
119 
120   @BeforeClass
121   public static void beforeClass() throws Exception {
122     HTU.getConfiguration().setBoolean("hbase.assignment.usezk", true);
123     HTU.startMiniZKCluster();
124   }
125 
126   @AfterClass
127   public static void afterClass() throws IOException {
128     HTU.shutdownMiniZKCluster();
129   }
130 
131   @Before
132   public void before() throws ZooKeeperConnectionException, IOException {
133     // TODO: Make generic versions of what we do below and put up in a mocking
134     // utility class or move up into HBaseTestingUtility.
135 
136     // Mock a Server.  Have it return a legit Configuration and ZooKeeperWatcher.
137     // If abort is called, be sure to fail the test (don't just swallow it
138     // silently as is mockito default).
139     this.server = Mockito.mock(Server.class);
140     Mockito.when(server.getServerName()).thenReturn(ServerName.valueOf("master,1,1"));
141     Mockito.when(server.getConfiguration()).thenReturn(HTU.getConfiguration());
142     Mockito.when(server.getCatalogTracker()).thenReturn(null);
143     this.watcher =
144       new ZooKeeperWatcher(HTU.getConfiguration(), "mockedServer", this.server, true);
145     Mockito.when(server.getZooKeeper()).thenReturn(this.watcher);
146     Mockito.doThrow(new RuntimeException("Aborted")).
147       when(server).abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
148 
149     // Mock a ServerManager.  Say server SERVERNAME_{A,B} are online.  Also
150     // make it so if close or open, we return 'success'.
151     this.serverManager = Mockito.mock(ServerManager.class);
152     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
153     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true);
154     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_C)).thenReturn(true);
155     Mockito.when(this.serverManager.getDeadServers()).thenReturn(new DeadServer());
156     final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
157     onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
158     onlineServers.put(SERVERNAME_A, ServerLoad.EMPTY_SERVERLOAD);
159     onlineServers.put(SERVERNAME_C, ServerLoad.EMPTY_SERVERLOAD);
160     Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
161         new ArrayList<ServerName>(onlineServers.keySet()));
162     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
163 
164     List<ServerName> avServers = new ArrayList<ServerName>();
165     avServers.addAll(onlineServers.keySet());
166     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers);
167     Mockito.when(this.serverManager.createDestinationServersList(null)).thenReturn(avServers);
168 
169     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, -1)).
170       thenReturn(true);
171     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_B, REGIONINFO, -1)).
172       thenReturn(true);
173     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_C, REGIONINFO, -1)).
174         thenReturn(true);
175     // Ditto on open.
176     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1, null)).
177       thenReturn(RegionOpeningState.OPENED);
178     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1, null)).
179       thenReturn(RegionOpeningState.OPENED);
180     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_C, REGIONINFO, -1, null)).
181         thenReturn(RegionOpeningState.OPENED);
182     this.master = Mockito.mock(HMaster.class);
183     Mockito.when(this.serverManager.getDrainingServersList())
184         .thenReturn(Arrays.asList(SERVERNAME_C));
185 
186     Mockito.when(this.master.getServerManager()).thenReturn(serverManager);
187   }
188 
189   @After
190     public void after() throws KeeperException {
191     if (this.watcher != null) {
192       // Clean up all znodes
193       ZKAssign.deleteAllNodes(this.watcher);
194       this.watcher.close();
195     }
196   }
197 
198   /**
199    * Test a balance going on at same time as a master failover
200    *
201    * @throws IOException
202    * @throws KeeperException
203    * @throws InterruptedException
204    * @throws DeserializationException
205    */
206   @Test(timeout = 60000)
207   public void testBalanceOnMasterFailoverScenarioWithOpenedNode()
208   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
209     AssignmentManagerWithExtrasForTesting am =
210       setUpMockedAssignmentManager(this.server, this.serverManager);
211     try {
212       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
213       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
214       while (!am.processRITInvoked) Thread.sleep(1);
215       // As part of the failover cleanup, the balancing region plan is removed.
216       // So a random server will be used to open the region. For testing purpose,
217       // let's assume it is going to open on server b:
218       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
219 
220       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
221 
222       // Now fake the region closing successfully over on the regionserver; the
223       // regionserver will have set the region in CLOSED state. This will
224       // trigger callback into AM. The below zk close call is from the RS close
225       // region handler duplicated here because its down deep in a private
226       // method hard to expose.
227       int versionid =
228         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
229       assertNotSame(versionid, -1);
230       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
231 
232       // Get current versionid else will fail on transition from OFFLINE to
233       // OPENING below
234       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
235       assertNotSame(-1, versionid);
236       // This uglyness below is what the openregionhandler on RS side does.
237       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
238         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
239         EventType.RS_ZK_REGION_OPENING, versionid);
240       assertNotSame(-1, versionid);
241       // Move znode from OPENING to OPENED as RS does on successful open.
242       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
243         SERVERNAME_B, versionid);
244       assertNotSame(-1, versionid);
245       am.gate.set(false);
246       // Block here until our znode is cleared or until this test times out.
247       ZKAssign.blockUntilNoRIT(watcher);
248     } finally {
249       am.getExecutorService().shutdown();
250       am.shutdown();
251     }
252   }
253 
254   @Test(timeout = 60000)
255   public void testGettingAssignmentsExcludesDrainingServers() throws Exception {
256     AssignmentManagerWithExtrasForTesting am =
257         setUpMockedAssignmentManager(this.server, this.serverManager);
258 
259     Map<TableName, Map<ServerName, List<HRegionInfo>>>
260         result = am.getRegionStates().getAssignmentsByTable();
261     for (Map<ServerName, List<HRegionInfo>> map : result.values()) {
262       System.out.println(map.keySet());
263       assertFalse(map.containsKey(SERVERNAME_C));
264     }
265   }
266 
267   @Test(timeout = 60000)
268   public void testBalanceOnMasterFailoverScenarioWithClosedNode()
269   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
270     AssignmentManagerWithExtrasForTesting am =
271       setUpMockedAssignmentManager(this.server, this.serverManager);
272     try {
273       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
274       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
275       while (!am.processRITInvoked) Thread.sleep(1);
276       // As part of the failover cleanup, the balancing region plan is removed.
277       // So a random server will be used to open the region. For testing purpose,
278       // let's assume it is going to open on server b:
279       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
280 
281       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
282 
283       // Now fake the region closing successfully over on the regionserver; the
284       // regionserver will have set the region in CLOSED state. This will
285       // trigger callback into AM. The below zk close call is from the RS close
286       // region handler duplicated here because its down deep in a private
287       // method hard to expose.
288       int versionid =
289         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
290       assertNotSame(versionid, -1);
291       am.gate.set(false);
292       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
293 
294       // Get current versionid else will fail on transition from OFFLINE to
295       // OPENING below
296       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
297       assertNotSame(-1, versionid);
298       // This uglyness below is what the openregionhandler on RS side does.
299       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
300           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
301           EventType.RS_ZK_REGION_OPENING, versionid);
302       assertNotSame(-1, versionid);
303       // Move znode from OPENING to OPENED as RS does on successful open.
304       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
305           SERVERNAME_B, versionid);
306       assertNotSame(-1, versionid);
307 
308       // Block here until our znode is cleared or until this test timesout.
309       ZKAssign.blockUntilNoRIT(watcher);
310     } finally {
311       am.getExecutorService().shutdown();
312       am.shutdown();
313     }
314   }
315 
316   @Test(timeout = 60000)
317   public void testBalanceOnMasterFailoverScenarioWithOfflineNode()
318   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
319     AssignmentManagerWithExtrasForTesting am =
320       setUpMockedAssignmentManager(this.server, this.serverManager);
321     try {
322       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
323       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
324       while (!am.processRITInvoked) Thread.sleep(1);
325       // As part of the failover cleanup, the balancing region plan is removed.
326       // So a random server will be used to open the region. For testing purpose,
327       // let's assume it is going to open on server b:
328       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
329 
330       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
331 
332       // Now fake the region closing successfully over on the regionserver; the
333       // regionserver will have set the region in CLOSED state. This will
334       // trigger callback into AM. The below zk close call is from the RS close
335       // region handler duplicated here because its down deep in a private
336       // method hard to expose.
337       int versionid =
338         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
339       assertNotSame(versionid, -1);
340       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
341 
342       am.gate.set(false);
343       // Get current versionid else will fail on transition from OFFLINE to
344       // OPENING below
345       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
346       assertNotSame(-1, versionid);
347       // This uglyness below is what the openregionhandler on RS side does.
348       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
349           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
350           EventType.RS_ZK_REGION_OPENING, versionid);
351       assertNotSame(-1, versionid);
352       // Move znode from OPENING to OPENED as RS does on successful open.
353       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
354           SERVERNAME_B, versionid);
355       assertNotSame(-1, versionid);
356       // Block here until our znode is cleared or until this test timesout.
357       ZKAssign.blockUntilNoRIT(watcher);
358     } finally {
359       am.getExecutorService().shutdown();
360       am.shutdown();
361     }
362   }
363 
364   private void createRegionPlanAndBalance(
365       final AssignmentManager am, final ServerName from,
366       final ServerName to, final HRegionInfo hri) throws RegionException {
367     // Call the balance function but fake the region being online first at
368     // servername from.
369     am.regionOnline(hri, from);
370     // Balance region from 'from' to 'to'. It calls unassign setting CLOSING state
371     // up in zk.  Create a plan and balance
372     am.balance(new RegionPlan(hri, from, to));
373   }
374 
375   /**
376    * Tests AssignmentManager balance function.  Runs a balance moving a region
377    * from one server to another mocking regionserver responding over zk.
378    * @throws IOException
379    * @throws KeeperException
380    * @throws DeserializationException
381    */
382   @Test (timeout=180000)
383   public void testBalance()
384     throws IOException, KeeperException, DeserializationException, InterruptedException {
385     // Create and startup an executor.  This is used by AssignmentManager
386     // handling zk callbacks.
387     ExecutorService executor = startupMasterExecutor("testBalanceExecutor");
388 
389     // We need a mocked catalog tracker.
390     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
391     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
392         .getConfiguration());
393     // Create an AM.
394     AssignmentManager am = new AssignmentManager(this.server,
395       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
396     am.failoverCleanupDone.set(true);
397     try {
398       // Make sure our new AM gets callbacks; once registered, can't unregister.
399       // Thats ok because we make a new zk watcher for each test.
400       this.watcher.registerListenerFirst(am);
401       // Call the balance function but fake the region being online first at
402       // SERVERNAME_A.  Create a balance plan.
403       am.regionOnline(REGIONINFO, SERVERNAME_A);
404       // Balance region from A to B.
405       RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
406       am.balance(plan);
407 
408       RegionStates regionStates = am.getRegionStates();
409       // Must be failed to close since the server is fake
410       assertTrue(regionStates.isRegionInTransition(REGIONINFO)
411         && regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE));
412       // Move it back to pending_close
413       regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE);
414 
415       // Now fake the region closing successfully over on the regionserver; the
416       // regionserver will have set the region in CLOSED state.  This will
417       // trigger callback into AM. The below zk close call is from the RS close
418       // region handler duplicated here because its down deep in a private
419       // method hard to expose.
420       int versionid =
421         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
422       assertNotSame(versionid, -1);
423       // AM is going to notice above CLOSED and queue up a new assign.  The
424       // assign will go to open the region in the new location set by the
425       // balancer.  The zk node will be OFFLINE waiting for regionserver to
426       // transition it through OPENING, OPENED.  Wait till we see the OFFLINE
427       // zk node before we proceed.
428       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
429 
430       // Get current versionid else will fail on transition from OFFLINE to OPENING below
431       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
432       assertNotSame(-1, versionid);
433       // This uglyness below is what the openregionhandler on RS side does.
434       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
435         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
436         EventType.RS_ZK_REGION_OPENING, versionid);
437       assertNotSame(-1, versionid);
438       // Move znode from OPENING to OPENED as RS does on successful open.
439       versionid =
440         ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid);
441       assertNotSame(-1, versionid);
442       // Wait on the handler removing the OPENED znode.
443       while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1);
444     } finally {
445       executor.shutdown();
446       am.shutdown();
447       // Clean up all znodes
448       ZKAssign.deleteAllNodes(this.watcher);
449     }
450   }
451 
452   /**
453    * Run a simple server shutdown handler.
454    * @throws KeeperException
455    * @throws IOException
456    */
457   @Test (timeout=180000)
458   public void testShutdownHandler()
459       throws KeeperException, IOException, ServiceException {
460     // Create and startup an executor.  This is used by AssignmentManager
461     // handling zk callbacks.
462     ExecutorService executor = startupMasterExecutor("testShutdownHandler");
463 
464     // We need a mocked catalog tracker.
465     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
466     // Create an AM.
467     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
468         this.server, this.serverManager);
469     try {
470       processServerShutdownHandler(ct, am, false);
471     } finally {
472       executor.shutdown();
473       am.shutdown();
474       // Clean up all znodes
475       ZKAssign.deleteAllNodes(this.watcher);
476     }
477   }
478 
479   /**
480    * To test closed region handler to remove rit and delete corresponding znode
481    * if region in pending close or closing while processing shutdown of a region
482    * server.(HBASE-5927).
483    *
484    * @throws KeeperException
485    * @throws IOException
486    * @throws ServiceException
487    */
488   @Test (timeout=180000)
489   public void testSSHWhenDisableTableInProgress() throws KeeperException, IOException,
490       ServiceException {
491     testCaseWithPartiallyDisabledState(Table.State.DISABLING);
492     testCaseWithPartiallyDisabledState(Table.State.DISABLED);
493   }
494 
495 
496   /**
497    * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS
498    * has actually completed the splitting in hbase:meta but went down. See HBASE-6070 and also HBASE-5806
499    *
500    * @throws KeeperException
501    * @throws IOException
502    */
503   @Test (timeout=180000)
504   public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception {
505     // true indicates the region is split but still in RIT
506     testCaseWithSplitRegionPartial(true);
507     // false indicate the region is not split
508     testCaseWithSplitRegionPartial(false);
509   }
510 
511   private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException,
512       IOException, NodeExistsException, InterruptedException, ServiceException {
513     // Create and startup an executor. This is used by AssignmentManager
514     // handling zk callbacks.
515     ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress");
516     // We need a mocked catalog tracker.
517     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
518     ZKAssign.deleteAllNodes(this.watcher);
519 
520     // Create an AM.
521     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
522       this.server, this.serverManager);
523     // adding region to regions and servers maps.
524     am.regionOnline(REGIONINFO, SERVERNAME_A);
525     // adding region in pending close.
526     am.getRegionStates().updateRegionState(
527       REGIONINFO, State.SPLITTING, SERVERNAME_A);
528     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
529     RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
530         REGIONINFO.getRegionName(), SERVERNAME_A);
531     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
532     // create znode in M_ZK_REGION_CLOSING state.
533     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
534 
535     try {
536       processServerShutdownHandler(ct, am, regionSplitDone);
537       // check znode deleted or not.
538       // In both cases the znode should be deleted.
539 
540       if (regionSplitDone) {
541         assertFalse("Region state of region in SPLITTING should be removed from rit.",
542             am.getRegionStates().isRegionsInTransition());
543       } else {
544         while (!am.assignInvoked) {
545           Thread.sleep(1);
546         }
547         assertTrue("Assign should be invoked.", am.assignInvoked);
548       }
549     } finally {
550       REGIONINFO.setOffline(false);
551       REGIONINFO.setSplit(false);
552       executor.shutdown();
553       am.shutdown();
554       // Clean up all znodes
555       ZKAssign.deleteAllNodes(this.watcher);
556     }
557   }
558 
559   private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException,
560       IOException, NodeExistsException, ServiceException {
561     // Create and startup an executor. This is used by AssignmentManager
562     // handling zk callbacks.
563     ExecutorService executor = startupMasterExecutor("testSSHWhenDisableTableInProgress");
564     // We need a mocked catalog tracker.
565     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
566     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
567     ZKAssign.deleteAllNodes(this.watcher);
568 
569     // Create an AM.
570     AssignmentManager am = new AssignmentManager(this.server,
571       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
572     // adding region to regions and servers maps.
573     am.regionOnline(REGIONINFO, SERVERNAME_A);
574     // adding region in pending close.
575     am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
576     if (state == Table.State.DISABLING) {
577       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
578     } else {
579       am.getZKTable().setDisabledTable(REGIONINFO.getTable());
580     }
581     RegionTransition data = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
582         REGIONINFO.getRegionName(), SERVERNAME_A);
583     // RegionTransitionData data = new
584     // RegionTransitionData(EventType.M_ZK_REGION_CLOSING,
585     // REGIONINFO.getRegionName(), SERVERNAME_A);
586     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
587     // create znode in M_ZK_REGION_CLOSING state.
588     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
589 
590     try {
591       processServerShutdownHandler(ct, am, false);
592       // check znode deleted or not.
593       // In both cases the znode should be deleted.
594       assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1);
595       // check whether in rit or not. In the DISABLING case also the below
596       // assert will be true but the piece of code added for HBASE-5927 will not
597       // do that.
598       if (state == Table.State.DISABLED) {
599         assertFalse("Region state of region in pending close should be removed from rit.",
600             am.getRegionStates().isRegionsInTransition());
601       }
602     } finally {
603       am.setEnabledTable(REGIONINFO.getTable());
604       executor.shutdown();
605       am.shutdown();
606       // Clean up all znodes
607       ZKAssign.deleteAllNodes(this.watcher);
608     }
609   }
610 
611   private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am, boolean splitRegion)
612       throws IOException, ServiceException {
613     // Make sure our new AM gets callbacks; once registered, can't unregister.
614     // Thats ok because we make a new zk watcher for each test.
615     this.watcher.registerListenerFirst(am);
616 
617     // Need to set up a fake scan of meta for the servershutdown handler
618     // Make an RS Interface implementation.  Make it so a scanner can go against it.
619     ClientProtos.ClientService.BlockingInterface implementation =
620       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
621     // Get a meta row result that has region up on SERVERNAME_A
622 
623     Result r;
624     if (splitRegion) {
625       r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
626     } else {
627       r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
628     }
629 
630     final ScanResponse.Builder builder = ScanResponse.newBuilder();
631     builder.setMoreResults(true);
632     builder.addCellsPerResult(r.size());
633     final List<CellScannable> cellScannables = new ArrayList<CellScannable>(1);
634     cellScannables.add(r);
635     Mockito.when(implementation.scan(
636       (RpcController)Mockito.any(), (ScanRequest)Mockito.any())).
637       thenAnswer(new Answer<ScanResponse>() {
638           @Override
639           public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
640             PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
641                 .getArguments()[0];
642             if (controller != null) {
643               controller.setCellScanner(CellUtil.createCellScanner(cellScannables));
644             }
645             return builder.build();
646           }
647       });
648 
649     // Get a connection w/ mocked up common methods.
650     HConnection connection =
651       HConnectionTestingUtility.getMockedConnectionAndDecorate(HTU.getConfiguration(),
652         null, implementation, SERVERNAME_B, REGIONINFO);
653 
654     // Make it so we can get a catalogtracker from servermanager.. .needed
655     // down in guts of server shutdown handler.
656     Mockito.when(ct.getConnection()).thenReturn(connection);
657     Mockito.when(this.server.getCatalogTracker()).thenReturn(ct);
658 
659     // Now make a server shutdown handler instance and invoke process.
660     // Have it that SERVERNAME_A died.
661     DeadServer deadServers = new DeadServer();
662     deadServers.add(SERVERNAME_A);
663     // I need a services instance that will return the AM
664     MasterFileSystem fs = Mockito.mock(MasterFileSystem.class);
665     Mockito.doNothing().when(fs).setLogRecoveryMode();
666     Mockito.when(fs.getLogRecoveryMode()).thenReturn(RecoveryMode.LOG_REPLAY);
667     MasterServices services = Mockito.mock(MasterServices.class);
668     Mockito.when(services.getAssignmentManager()).thenReturn(am);
669     Mockito.when(services.getServerManager()).thenReturn(this.serverManager);
670     Mockito.when(services.getZooKeeper()).thenReturn(this.watcher);
671     Mockito.when(services.getMasterFileSystem()).thenReturn(fs);
672     ServerShutdownHandler handler = new ServerShutdownHandler(this.server,
673       services, deadServers, SERVERNAME_A, false);
674     am.failoverCleanupDone.set(true);
675     handler.process();
676     // The region in r will have been assigned.  It'll be up in zk as unassigned.
677   }
678 
679   /**
680    * Create and startup executor pools. Start same set as master does (just
681    * run a few less).
682    * @param name Name to give our executor
683    * @return Created executor (be sure to call shutdown when done).
684    */
685   private ExecutorService startupMasterExecutor(final String name) {
686     // TODO: Move up into HBaseTestingUtility?  Generally useful.
687     ExecutorService executor = new ExecutorService(name);
688     executor.startExecutorService(ExecutorType.MASTER_OPEN_REGION, 3);
689     executor.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, 3);
690     executor.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS, 3);
691     executor.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS, 3);
692     return executor;
693   }
694 
695   @Test (timeout=180000)
696   public void testUnassignWithSplitAtSameTime() throws KeeperException, IOException {
697     // Region to use in test.
698     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
699     // First amend the servermanager mock so that when we do send close of the
700     // first meta region on SERVERNAME_A, it will return true rather than
701     // default null.
702     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, hri, -1)).thenReturn(true);
703     // Need a mocked catalog tracker.
704     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
705     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
706         .getConfiguration());
707     // Create an AM.
708     AssignmentManager am = new AssignmentManager(this.server,
709       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
710     try {
711       // First make sure my mock up basically works.  Unassign a region.
712       unassign(am, SERVERNAME_A, hri);
713       // This delete will fail if the previous unassign did wrong thing.
714       ZKAssign.deleteClosingNode(this.watcher, hri, SERVERNAME_A);
715       // Now put a SPLITTING region in the way.  I don't have to assert it
716       // go put in place.  This method puts it in place then asserts it still
717       // owns it by moving state from SPLITTING to SPLITTING.
718       int version = createNodeSplitting(this.watcher, hri, SERVERNAME_A);
719       // Now, retry the unassign with the SPLTTING in place.  It should just
720       // complete without fail; a sort of 'silent' recognition that the
721       // region to unassign has been split and no longer exists: TOOD: what if
722       // the split fails and the parent region comes back to life?
723       unassign(am, SERVERNAME_A, hri);
724       // This transition should fail if the znode has been messed with.
725       ZKAssign.transitionNode(this.watcher, hri, SERVERNAME_A,
726         EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
727       assertFalse(am.getRegionStates().isRegionInTransition(hri));
728     } finally {
729       am.shutdown();
730     }
731   }
732 
733   /**
734    * Tests the processDeadServersAndRegionsInTransition should not fail with NPE
735    * when it failed to get the children. Let's abort the system in this
736    * situation
737    * @throws ServiceException
738    */
739   @Test(timeout = 60000)
740   public void testProcessDeadServersAndRegionsInTransitionShouldNotFailWithNPE()
741       throws IOException, KeeperException, InterruptedException, ServiceException {
742     final RecoverableZooKeeper recoverableZk = Mockito
743         .mock(RecoverableZooKeeper.class);
744     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
745       this.server, this.serverManager);
746     Watcher zkw = new ZooKeeperWatcher(HBaseConfiguration.create(), "unittest",
747         null) {
748       @Override
749       public RecoverableZooKeeper getRecoverableZooKeeper() {
750         return recoverableZk;
751       }
752     };
753     ((ZooKeeperWatcher) zkw).registerListener(am);
754     Mockito.doThrow(new InterruptedException()).when(recoverableZk)
755         .getChildren("/hbase/region-in-transition", null);
756     am.setWatcher((ZooKeeperWatcher) zkw);
757     try {
758       am.processDeadServersAndRegionsInTransition(null);
759       fail("Expected to abort");
760     } catch (NullPointerException e) {
761       fail("Should not throw NPE");
762     } catch (RuntimeException e) {
763       assertEquals("Aborted", e.getLocalizedMessage());
764     }
765   }
766   /**
767    * TestCase verifies that the regionPlan is updated whenever a region fails to open
768    * and the master tries to process RS_ZK_FAILED_OPEN state.(HBASE-5546).
769    */
770   @Test(timeout = 60000)
771   public void testRegionPlanIsUpdatedWhenRegionFailsToOpen() throws IOException, KeeperException,
772       ServiceException, InterruptedException {
773     this.server.getConfiguration().setClass(
774       HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockedLoadBalancer.class,
775       LoadBalancer.class);
776     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
777       this.server, this.serverManager);
778     try {
779       // Boolean variable used for waiting until randomAssignment is called and
780       // new
781       // plan is generated.
782       AtomicBoolean gate = new AtomicBoolean(false);
783       if (balancer instanceof MockedLoadBalancer) {
784         ((MockedLoadBalancer) balancer).setGateVariable(gate);
785       }
786       ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
787       int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
788       ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A,
789           EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
790       String path = ZKAssign.getNodeName(this.watcher, REGIONINFO
791           .getEncodedName());
792       am.getRegionStates().updateRegionState(
793         REGIONINFO, State.OPENING, SERVERNAME_A);
794       // a dummy plan inserted into the regionPlans. This plan is cleared and
795       // new one is formed
796       am.regionPlans.put(REGIONINFO.getEncodedName(), new RegionPlan(
797           REGIONINFO, null, SERVERNAME_A));
798       RegionPlan regionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
799       List<ServerName> serverList = new ArrayList<ServerName>(2);
800       serverList.add(SERVERNAME_B);
801       Mockito.when(
802           this.serverManager.createDestinationServersList(SERVERNAME_A))
803           .thenReturn(serverList);
804       am.nodeDataChanged(path);
805       // here we are waiting until the random assignment in the load balancer is
806       // called.
807       while (!gate.get()) {
808         Thread.sleep(10);
809       }
810       // new region plan may take some time to get updated after random
811       // assignment is called and
812       // gate is set to true.
813       RegionPlan newRegionPlan = am.regionPlans
814           .get(REGIONINFO.getEncodedName());
815       while (newRegionPlan == null) {
816         Thread.sleep(10);
817         newRegionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
818       }
819       // the new region plan created may contain the same RS as destination but
820       // it should
821       // be new plan.
822       assertNotSame("Same region plan should not come", regionPlan,
823           newRegionPlan);
824       assertTrue("Destination servers should be different.", !(regionPlan
825           .getDestination().equals(newRegionPlan.getDestination())));
826 
827       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
828     } finally {
829       this.server.getConfiguration().setClass(
830           HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
831           LoadBalancer.class);
832       am.getExecutorService().shutdown();
833       am.shutdown();
834     }
835   }
836 
837   /**
838    * Mocked load balancer class used in the testcase to make sure that the testcase waits until
839    * random assignment is called and the gate variable is set to true.
840    */
841   public static class MockedLoadBalancer extends SimpleLoadBalancer {
842     private AtomicBoolean gate;
843 
844     public void setGateVariable(AtomicBoolean gate) {
845       this.gate = gate;
846     }
847 
848     @Override
849     public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
850       ServerName randomServerName = super.randomAssignment(regionInfo, servers);
851       this.gate.set(true);
852       return randomServerName;
853     }
854 
855     @Override
856     public Map<ServerName, List<HRegionInfo>> retainAssignment(
857         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
858       this.gate.set(true);
859       return super.retainAssignment(regions, servers);
860     }
861   }
862 
863   /**
864    * Test the scenario when the master is in failover and trying to process a
865    * region which is in Opening state on a dead RS. Master will force offline the
866    * region and put it in transition. AM relies on SSH to reassign it.
867    */
868   @Test(timeout = 60000)
869   public void testRegionInOpeningStateOnDeadRSWhileMasterFailover() throws IOException,
870       KeeperException, ServiceException, InterruptedException {
871     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
872       this.server, this.serverManager);
873     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
874     int version = ZKAssign.getVersion(this.watcher, REGIONINFO);
875     ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
876         EventType.RS_ZK_REGION_OPENING, version);
877     RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_OPENING,
878         REGIONINFO.getRegionName(), SERVERNAME_A, HConstants.EMPTY_BYTE_ARRAY);
879     version = ZKAssign.getVersion(this.watcher, REGIONINFO);
880     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
881     am.getRegionStates().logSplit(SERVERNAME_A); // Assume log splitting is done
882     am.getRegionStates().createRegionState(REGIONINFO);
883     am.gate.set(false);
884     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
885     assertFalse(am.processRegionsInTransition(rt, REGIONINFO, version));
886     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
887     processServerShutdownHandler(ct, am, false);
888     // Waiting for the assignment to get completed.
889     while (!am.gate.get()) {
890       Thread.sleep(10);
891     }
892     assertTrue("The region should be assigned immediately.", null != am.regionPlans.get(REGIONINFO
893         .getEncodedName()));
894   }
895 
896   /**
897    * Test verifies whether assignment is skipped for regions of tables in DISABLING state during
898    * clean cluster startup. See HBASE-6281.
899    *
900    * @throws KeeperException
901    * @throws IOException
902    * @throws Exception
903    */
904   @Test(timeout = 60000)
905   public void testDisablingTableRegionsAssignmentDuringCleanClusterStartup()
906       throws KeeperException, IOException, Exception {
907     this.server.getConfiguration().setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
908         MockedLoadBalancer.class, LoadBalancer.class);
909     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(
910         new HashMap<ServerName, ServerLoad>(0));
911     List<ServerName> destServers = new ArrayList<ServerName>(1);
912     destServers.add(SERVERNAME_A);
913     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
914     // To avoid cast exception in DisableTableHandler process.
915     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
916     Server server = new HMaster(HTU.getConfiguration());
917     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
918         this.serverManager);
919     AtomicBoolean gate = new AtomicBoolean(false);
920     if (balancer instanceof MockedLoadBalancer) {
921       ((MockedLoadBalancer) balancer).setGateVariable(gate);
922     }
923     try{
924       // set table in disabling state.
925       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
926       am.joinCluster();
927       // should not call retainAssignment if we get empty regions in assignAllUserRegions.
928       assertFalse(
929           "Assign should not be invoked for disabling table regions during clean cluster startup.",
930           gate.get());
931       // need to change table state from disabling to disabled.
932       assertTrue("Table should be disabled.",
933           am.getZKTable().isDisabledTable(REGIONINFO.getTable()));
934     } finally {
935       this.server.getConfiguration().setClass(
936         HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
937         LoadBalancer.class);
938       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
939       am.shutdown();
940     }
941   }
942 
943   /**
944    * Test verifies whether all the enabling table regions assigned only once during master startup.
945    *
946    * @throws KeeperException
947    * @throws IOException
948    * @throws Exception
949    */
950   @Test (timeout=180000)
951   public void testMasterRestartWhenTableInEnabling() throws KeeperException, IOException, Exception {
952     enabling = true;
953     List<ServerName> destServers = new ArrayList<ServerName>(1);
954     destServers.add(SERVERNAME_A);
955     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
956     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
957     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
958     Server server = new HMaster(HTU.getConfiguration());
959     Whitebox.setInternalState(server, "serverManager", this.serverManager);
960     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
961         this.serverManager);
962     try {
963       // set table in enabling state.
964       am.getZKTable().setEnablingTable(REGIONINFO.getTable());
965       new EnableTableHandler(server, REGIONINFO.getTable(),
966           am.getCatalogTracker(), am, new NullTableLockManager(), true).prepare()
967           .process();
968       assertEquals("Number of assignments should be 1.", 1, assignmentCount);
969       assertTrue("Table should be enabled.",
970           am.getZKTable().isEnabledTable(REGIONINFO.getTable()));
971     } finally {
972       enabling = false;
973       assignmentCount = 0;
974       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
975       am.shutdown();
976       ZKAssign.deleteAllNodes(this.watcher);
977     }
978   }
979 
980   /**
981    * Test verifies whether stale znodes of unknown tables as for the hbase:meta will be removed or
982    * not.
983    * @throws KeeperException
984    * @throws IOException
985    * @throws Exception
986    */
987   @Test (timeout=180000)
988   public void testMasterRestartShouldRemoveStaleZnodesOfUnknownTableAsForMeta()
989       throws KeeperException, IOException, Exception {
990     List<ServerName> destServers = new ArrayList<ServerName>(1);
991     destServers.add(SERVERNAME_A);
992     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
993     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
994     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
995     Server server = new HMaster(HTU.getConfiguration());
996     Whitebox.setInternalState(server, "serverManager", this.serverManager);
997     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
998         this.serverManager);
999     try {
1000       TableName tableName = TableName.valueOf("dummyTable");
1001       // set table in enabling state.
1002       am.getZKTable().setEnablingTable(tableName);
1003       am.joinCluster();
1004       assertFalse("Table should not be present in zookeeper.",
1005         am.getZKTable().isTablePresent(tableName));
1006     } finally {
1007     }
1008   }
1009   /**
1010    * When a region is in transition, if the region server opening the region goes down,
1011    * the region assignment takes a long time normally (waiting for timeout monitor to trigger assign).
1012    * This test is to make sure SSH reassigns it right away.
1013    */
1014   @Test (timeout=180000)
1015   public void testSSHTimesOutOpeningRegionTransition()
1016       throws KeeperException, IOException, ServiceException {
1017     // We need a mocked catalog tracker.
1018     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1019     // Create an AM.
1020     AssignmentManagerWithExtrasForTesting am =
1021       setUpMockedAssignmentManager(this.server, this.serverManager);
1022     // adding region in pending open.
1023     RegionState state = new RegionState(REGIONINFO,
1024       State.OPENING, System.currentTimeMillis(), SERVERNAME_A);
1025     am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_B);
1026     am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), state);
1027     // adding region plan
1028     am.regionPlans.put(REGIONINFO.getEncodedName(),
1029       new RegionPlan(REGIONINFO, SERVERNAME_B, SERVERNAME_A));
1030     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
1031 
1032     try {
1033       am.assignInvoked = false;
1034       processServerShutdownHandler(ct, am, false);
1035       assertTrue(am.assignInvoked);
1036     } finally {
1037       am.getRegionStates().regionsInTransition.remove(REGIONINFO.getEncodedName());
1038       am.regionPlans.remove(REGIONINFO.getEncodedName());
1039     }
1040   }
1041 
1042   /**
1043    * Scenario:<ul>
1044    *  <li> master starts a close, and creates a znode</li>
1045    *  <li> it fails just at this moment, before contacting the RS</li>
1046    *  <li> while the second master is coming up, the targeted RS dies. But it's before ZK timeout so
1047    *    we don't know, and we have an exception.</li>
1048    *  <li> the master must handle this nicely and reassign.
1049    *  </ul>
1050    */
1051   @Test (timeout=180000)
1052   public void testClosingFailureDuringRecovery() throws Exception {
1053 
1054     AssignmentManagerWithExtrasForTesting am =
1055         setUpMockedAssignmentManager(this.server, this.serverManager);
1056     ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_A);
1057     am.getRegionStates().createRegionState(REGIONINFO);
1058 
1059     assertFalse( am.getRegionStates().isRegionsInTransition() );
1060 
1061     am.processRegionInTransition(REGIONINFO.getEncodedName(), REGIONINFO);
1062 
1063     assertTrue( am.getRegionStates().isRegionsInTransition() );
1064   }
1065 
1066   /**
1067    * Creates a new ephemeral node in the SPLITTING state for the specified region.
1068    * Create it ephemeral in case regionserver dies mid-split.
1069    *
1070    * <p>Does not transition nodes from other states.  If a node already exists
1071    * for this region, a {@link NodeExistsException} will be thrown.
1072    *
1073    * @param zkw zk reference
1074    * @param region region to be created as offline
1075    * @param serverName server event originates from
1076    * @return Version of znode created.
1077    * @throws KeeperException
1078    * @throws IOException
1079    */
1080   // Copied from SplitTransaction rather than open the method over there in
1081   // the regionserver package.
1082   private static int createNodeSplitting(final ZooKeeperWatcher zkw,
1083       final HRegionInfo region, final ServerName serverName)
1084   throws KeeperException, IOException {
1085     RegionTransition rt =
1086       RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
1087         region.getRegionName(), serverName);
1088 
1089     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1090     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1091       throw new IOException("Failed create of ephemeral " + node);
1092     }
1093     // Transition node from SPLITTING to SPLITTING and pick up version so we
1094     // can be sure this znode is ours; version is needed deleting.
1095     return transitionNodeSplitting(zkw, region, serverName, -1);
1096   }
1097 
1098   // Copied from SplitTransaction rather than open the method over there in
1099   // the regionserver package.
1100   private static int transitionNodeSplitting(final ZooKeeperWatcher zkw,
1101       final HRegionInfo parent,
1102       final ServerName serverName, final int version)
1103   throws KeeperException, IOException {
1104     return ZKAssign.transitionNode(zkw, parent, serverName,
1105       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
1106   }
1107 
1108   private void unassign(final AssignmentManager am, final ServerName sn,
1109       final HRegionInfo hri) throws RegionException {
1110     // Before I can unassign a region, I need to set it online.
1111     am.regionOnline(hri, sn);
1112     // Unassign region.
1113     am.unassign(hri);
1114   }
1115 
1116   /**
1117    * Create an {@link AssignmentManagerWithExtrasForTesting} that has mocked
1118    * {@link CatalogTracker} etc.
1119    * @param server
1120    * @param manager
1121    * @return An AssignmentManagerWithExtras with mock connections, etc.
1122    * @throws IOException
1123    * @throws KeeperException
1124    */
1125   private AssignmentManagerWithExtrasForTesting setUpMockedAssignmentManager(final Server server,
1126       final ServerManager manager) throws IOException, KeeperException, ServiceException {
1127     // We need a mocked catalog tracker. Its used by our AM instance.
1128     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1129     // Make an RS Interface implementation. Make it so a scanner can go against
1130     // it and a get to return the single region, REGIONINFO, this test is
1131     // messing with. Needed when "new master" joins cluster. AM will try and
1132     // rebuild its list of user regions and it will also get the HRI that goes
1133     // with an encoded name by doing a Get on hbase:meta
1134     ClientProtos.ClientService.BlockingInterface ri =
1135       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
1136     // Get a meta row result that has region up on SERVERNAME_A for REGIONINFO
1137     Result r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
1138     final ScanResponse.Builder builder = ScanResponse.newBuilder();
1139     builder.setMoreResults(true);
1140     builder.addCellsPerResult(r.size());
1141     final List<CellScannable> rows = new ArrayList<CellScannable>(1);
1142     rows.add(r);
1143     Answer<ScanResponse> ans = new Answer<ClientProtos.ScanResponse>() {
1144       @Override
1145       public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
1146         PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
1147             .getArguments()[0];
1148         if (controller != null) {
1149           controller.setCellScanner(CellUtil.createCellScanner(rows));
1150         }
1151         return builder.build();
1152       }
1153     };
1154     if (enabling) {
1155       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any()))
1156           .thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans)
1157           .thenReturn(ScanResponse.newBuilder().setMoreResults(false).build());
1158     } else {
1159       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any())).thenAnswer(
1160           ans);
1161     }
1162     // If a get, return the above result too for REGIONINFO
1163     GetResponse.Builder getBuilder = GetResponse.newBuilder();
1164     getBuilder.setResult(ProtobufUtil.toResult(r));
1165     Mockito.when(ri.get((RpcController)Mockito.any(), (GetRequest) Mockito.any())).
1166       thenReturn(getBuilder.build());
1167     // Get a connection w/ mocked up common methods.
1168     HConnection connection = HConnectionTestingUtility.
1169       getMockedConnectionAndDecorate(HTU.getConfiguration(), null,
1170         ri, SERVERNAME_B, REGIONINFO);
1171     // Make it so we can get the connection from our mocked catalogtracker
1172     Mockito.when(ct.getConnection()).thenReturn(connection);
1173     // Create and startup an executor. Used by AM handling zk callbacks.
1174     ExecutorService executor = startupMasterExecutor("mockedAMExecutor");
1175     this.balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
1176     AssignmentManagerWithExtrasForTesting am = new AssignmentManagerWithExtrasForTesting(
1177       server, manager, ct, this.balancer, executor, new NullTableLockManager());
1178     return am;
1179   }
1180 
1181   /**
1182    * An {@link AssignmentManager} with some extra facility used testing
1183    */
1184   class AssignmentManagerWithExtrasForTesting extends AssignmentManager {
1185     // Keep a reference so can give it out below in {@link #getExecutorService}
1186     private final ExecutorService es;
1187     // Ditto for ct
1188     private final CatalogTracker ct;
1189     boolean processRITInvoked = false;
1190     boolean assignInvoked = false;
1191     AtomicBoolean gate = new AtomicBoolean(true);
1192 
1193     public AssignmentManagerWithExtrasForTesting(
1194         final Server master, final ServerManager serverManager,
1195         final CatalogTracker catalogTracker, final LoadBalancer balancer,
1196         final ExecutorService service, final TableLockManager tableLockManager)
1197             throws KeeperException, IOException {
1198       super(master, serverManager, catalogTracker, balancer, service, null, tableLockManager);
1199       this.es = service;
1200       this.ct = catalogTracker;
1201     }
1202 
1203     @Override
1204     boolean processRegionInTransition(String encodedRegionName,
1205         HRegionInfo regionInfo) throws KeeperException, IOException {
1206       this.processRITInvoked = true;
1207       return super.processRegionInTransition(encodedRegionName, regionInfo);
1208     }
1209 
1210     @Override
1211     public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan) {
1212       if (enabling) {
1213         assignmentCount++;
1214         this.regionOnline(region, SERVERNAME_A);
1215       } else {
1216         super.assign(region, setOfflineInZK, forceNewPlan);
1217         this.gate.set(true);
1218       }
1219     }
1220 
1221     @Override
1222     boolean assign(ServerName destination, List<HRegionInfo> regions) {
1223       if (enabling) {
1224         for (HRegionInfo region : regions) {
1225           assignmentCount++;
1226           this.regionOnline(region, SERVERNAME_A);
1227         }
1228         return true;
1229       }
1230       return super.assign(destination, regions);
1231     }
1232 
1233     @Override
1234     public void assign(List<HRegionInfo> regions)
1235         throws IOException, InterruptedException {
1236       assignInvoked = (regions != null && regions.size() > 0);
1237       super.assign(regions);
1238       this.gate.set(true);
1239     }
1240 
1241     /** reset the watcher */
1242     void setWatcher(ZooKeeperWatcher watcher) {
1243       this.watcher = watcher;
1244     }
1245 
1246     /**
1247      * @return ExecutorService used by this instance.
1248      */
1249     ExecutorService getExecutorService() {
1250       return this.es;
1251     }
1252 
1253     /**
1254      * @return CatalogTracker used by this AM (Its a mock).
1255      */
1256     CatalogTracker getCatalogTracker() {
1257       return this.ct;
1258     }
1259   }
1260 
1261   /**
1262    * Call joinCluster on the passed AssignmentManager.  Do it in a thread
1263    * so it runs independent of what all else is going on.  Try to simulate
1264    * an AM running insided a failed over master by clearing all in-memory
1265    * AM state first.
1266   */
1267   private void startFakeFailedOverMasterAssignmentManager(final AssignmentManager am,
1268       final ZooKeeperWatcher watcher) {
1269     // Make sure our new AM gets callbacks; once registered, we can't unregister.
1270     // Thats ok because we make a new zk watcher for each test.
1271     watcher.registerListenerFirst(am);
1272     Thread t = new Thread("RunAmJoinCluster") {
1273       @Override
1274       public void run() {
1275         // Call the joinCluster function as though we were doing a master
1276         // failover at this point. It will stall just before we go to add
1277         // the RIT region to our RIT Map in AM at processRegionsInTransition.
1278         // First clear any inmemory state from AM so it acts like a new master
1279         // coming on line.
1280         am.getRegionStates().regionsInTransition.clear();
1281         am.regionPlans.clear();
1282         try {
1283           am.joinCluster();
1284         } catch (IOException e) {
1285           throw new RuntimeException(e);
1286         } catch (KeeperException e) {
1287           throw new RuntimeException(e);
1288         } catch (InterruptedException e) {
1289           throw new RuntimeException(e);
1290         }
1291       }
1292     };
1293     t.start();
1294     while (!t.isAlive()) Threads.sleep(1);
1295   }
1296 
1297   @Test (timeout=180000)
1298   public void testForceAssignMergingRegion() throws Exception {
1299     // Region to use in test.
1300     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1301     // Need a mocked catalog tracker.
1302     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1303     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1304       server.getConfiguration());
1305     // Create an AM.
1306     AssignmentManager am = new AssignmentManager(this.server,
1307       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1308     RegionStates regionStates = am.getRegionStates();
1309     try {
1310       // First set the state of the region to merging
1311       regionStates.updateRegionState(hri, RegionState.State.MERGING);
1312       // Now, try to assign it with force new plan
1313       am.assign(hri, true, true);
1314       assertEquals("The region should be still in merging state",
1315         RegionState.State.MERGING, regionStates.getRegionState(hri).getState());
1316     } finally {
1317       am.shutdown();
1318     }
1319   }
1320 
1321   /**
1322    * Test assignment related ZK events are ignored by AM if the region is not known
1323    * by AM to be in transition. During normal operation, all assignments are started
1324    * by AM (not considering split/merge), if an event is received but the region
1325    * is not in transition, the event must be a very late one. So it can be ignored.
1326    * During master failover, since AM watches assignment znodes after failover cleanup
1327    * is completed, when an event comes in, AM should already have the region in transition
1328    * if ZK is used during the assignment action (only hbck doesn't use ZK for region
1329    * assignment). So during master failover, we can ignored such events too.
1330    */
1331   @Test (timeout=180000)
1332   public void testAssignmentEventIgnoredIfNotExpected() throws KeeperException, IOException {
1333     // Region to use in test.
1334     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1335     // Need a mocked catalog tracker.
1336     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1337     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1338       server.getConfiguration());
1339     final AtomicBoolean zkEventProcessed = new AtomicBoolean(false);
1340     // Create an AM.
1341     AssignmentManager am = new AssignmentManager(this.server,
1342       this.serverManager, ct, balancer, null, null, master.getTableLockManager()) {
1343 
1344       @Override
1345       void handleRegion(final RegionTransition rt, int expectedVersion) {
1346         super.handleRegion(rt, expectedVersion);
1347         if (rt != null && Bytes.equals(hri.getRegionName(),
1348           rt.getRegionName()) && rt.getEventType() == EventType.RS_ZK_REGION_OPENING) {
1349           zkEventProcessed.set(true);
1350         }
1351       }
1352     };
1353     try {
1354       // First make sure the region is not in transition
1355       am.getRegionStates().regionOffline(hri);
1356       zkEventProcessed.set(false); // Reset it before faking zk transition
1357       this.watcher.registerListenerFirst(am);
1358       assertFalse("The region should not be in transition",
1359         am.getRegionStates().isRegionInTransition(hri));
1360       ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A);
1361       // Trigger a transition event
1362       ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A);
1363       long startTime = EnvironmentEdgeManager.currentTimeMillis();
1364       while (!zkEventProcessed.get()) {
1365         assertTrue("Timed out in waiting for ZK event to be processed",
1366           EnvironmentEdgeManager.currentTimeMillis() - startTime < 30000);
1367         Threads.sleepWithoutInterrupt(100);
1368       }
1369       assertFalse(am.getRegionStates().isRegionInTransition(hri));
1370     } finally {
1371       am.shutdown();
1372     }
1373   }
1374 
1375   /**
1376    * If a table is deleted, we should not be able to balance it anymore.
1377    * Otherwise, the region will be brought back.
1378    * @throws Exception
1379    */
1380   @Test (timeout=180000)
1381   public void testBalanceRegionOfDeletedTable() throws Exception {
1382     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1383     AssignmentManager am = new AssignmentManager(this.server, this.serverManager,
1384       ct, balancer, null, null, master.getTableLockManager());
1385     RegionStates regionStates = am.getRegionStates();
1386     HRegionInfo hri = REGIONINFO;
1387     regionStates.createRegionState(hri);
1388     assertFalse(regionStates.isRegionInTransition(hri));
1389     RegionPlan plan = new RegionPlan(hri, SERVERNAME_A, SERVERNAME_B);
1390     // Fake table is deleted
1391     regionStates.tableDeleted(hri.getTable());
1392     am.balance(plan);
1393     assertFalse("The region should not in transition",
1394       regionStates.isRegionInTransition(hri));
1395   }
1396 
1397   /**
1398    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
1399    * for openRegion. AM should assign this somewhere else. (HBASE-9721)
1400    */
1401   @SuppressWarnings("unchecked")
1402   @Test (timeout=180000)
1403   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
1404     Mockito.when(this.serverManager.sendRegionOpen(Mockito.eq(SERVERNAME_B), Mockito.eq(REGIONINFO),
1405       Mockito.anyInt(), (List<ServerName>)Mockito.any()))
1406       .thenThrow(new DoNotRetryIOException());
1407     this.server.getConfiguration().setInt("hbase.assignment.maximum.attempts", 100);
1408 
1409     HRegionInfo hri = REGIONINFO;
1410     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1411     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1412       server.getConfiguration());
1413     // Create an AM.
1414     AssignmentManager am = new AssignmentManager(this.server,
1415       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1416     RegionStates regionStates = am.getRegionStates();
1417     try {
1418       am.regionPlans.put(REGIONINFO.getEncodedName(),
1419         new RegionPlan(REGIONINFO, null, SERVERNAME_B));
1420 
1421       // Should fail once, but succeed on the second attempt for the SERVERNAME_A
1422       am.assign(hri, true, false);
1423     } finally {
1424       assertEquals(SERVERNAME_A, regionStates.getRegionState(REGIONINFO).getServerName());
1425     }
1426   }
1427 }