View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotNull;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.List;
30  import java.util.Set;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileSystem;
35  import org.apache.hadoop.fs.Path;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HColumnDescriptor;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.testclassification.MediumTests;
42  import org.apache.hadoop.hbase.MiniHBaseCluster;
43  import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
44  import org.apache.hadoop.hbase.ServerLoad;
45  import org.apache.hadoop.hbase.ServerName;
46  import org.apache.hadoop.hbase.TableName;
47  import org.apache.hadoop.hbase.UnknownRegionException;
48  import org.apache.hadoop.hbase.Waiter;
49  import org.apache.hadoop.hbase.catalog.MetaEditor;
50  import org.apache.hadoop.hbase.catalog.MetaReader;
51  import org.apache.hadoop.hbase.client.HBaseAdmin;
52  import org.apache.hadoop.hbase.client.HTable;
53  import org.apache.hadoop.hbase.client.Result;
54  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
55  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
56  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
57  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
58  import org.apache.hadoop.hbase.coprocessor.RegionObserver;
59  import org.apache.hadoop.hbase.executor.EventType;
60  import org.apache.hadoop.hbase.master.RegionState.State;
61  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
62  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
63  import org.apache.hadoop.hbase.regionserver.HRegionServer;
64  import org.apache.hadoop.hbase.util.Bytes;
65  import org.apache.hadoop.hbase.util.ConfigUtil;
66  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
67  import org.apache.hadoop.hbase.util.FSUtils;
68  import org.apache.hadoop.hbase.util.Threads;
69  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
70  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
71  import org.apache.zookeeper.KeeperException;
72  import org.junit.AfterClass;
73  import org.junit.BeforeClass;
74  import org.junit.Test;
75  import org.junit.experimental.categories.Category;
76  
77  
78  /**
79   * This tests AssignmentManager with a testing cluster.
80   */
81  @Category(MediumTests.class)
82  public class TestAssignmentManagerOnCluster {
83    private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
84    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
85    final static Configuration conf = TEST_UTIL.getConfiguration();
86    private static HBaseAdmin admin;
87  
88    static void setupOnce() throws Exception {
89      // Using the our load balancer to control region plans
90      conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
91        MyLoadBalancer.class, LoadBalancer.class);
92      conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
93        MyRegionObserver.class, RegionObserver.class);
94      // Reduce the maximum attempts to speed up the test
95      conf.setInt("hbase.assignment.maximum.attempts", 3);
96      conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
97      conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
98  
99      TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, MyRegionServer.class);
100     admin = TEST_UTIL.getHBaseAdmin();
101   }
102 
103   @BeforeClass
104   public static void setUpBeforeClass() throws Exception {
105     // Use ZK for region assignment
106     conf.setBoolean("hbase.assignment.usezk", true);
107     setupOnce();
108   }
109 
110   @AfterClass
111   public static void tearDownAfterClass() throws Exception {
112     TEST_UTIL.shutdownMiniCluster();
113   }
114 
115   /**
116    * This tests region assignment
117    */
118   @Test (timeout=60000)
119   public void testAssignRegion() throws Exception {
120     String table = "testAssignRegion";
121     try {
122       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
123       desc.addFamily(new HColumnDescriptor(FAMILY));
124       admin.createTable(desc);
125 
126       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
127       HRegionInfo hri = new HRegionInfo(
128         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
129       MetaEditor.addRegionToMeta(meta, hri);
130 
131       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
132       master.assignRegion(hri);
133       AssignmentManager am = master.getAssignmentManager();
134       am.waitForAssignment(hri);
135 
136       RegionStates regionStates = am.getRegionStates();
137       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
138       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
139 
140       // Region is assigned now. Let's assign it again.
141       // Master should not abort, and region should be assigned.
142       RegionState oldState = regionStates.getRegionState(hri);
143       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
144       master.getAssignmentManager().waitForAssignment(hri);
145       RegionState newState = regionStates.getRegionState(hri);
146       assertTrue(newState.isOpened()
147         && newState.getStamp() != oldState.getStamp());
148     } finally {
149       TEST_UTIL.deleteTable(Bytes.toBytes(table));
150     }
151   }
152   
153   // Simulate a scenario where the AssignCallable and SSH are trying to assign a region
154   @Test (timeout=60000)
155   public void testAssignRegionBySSH() throws Exception {
156     if (!conf.getBoolean("hbase.assignment.usezk", true)) {
157       return;
158     }
159     String table = "testAssignRegionBySSH";
160     MyMaster master = (MyMaster) TEST_UTIL.getHBaseCluster().getMaster();
161     try {
162       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
163       desc.addFamily(new HColumnDescriptor(FAMILY));
164       admin.createTable(desc);
165 
166       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
167       HRegionInfo hri = new HRegionInfo(
168         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
169       MetaEditor.addRegionToMeta(meta, hri);
170       // Add some dummy server for the region entry
171       MetaEditor.updateRegionLocation(TEST_UTIL.getHBaseCluster().getMaster().getCatalogTracker(), hri,
172         ServerName.valueOf("example.org", 1234, System.currentTimeMillis()), 0);
173       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
174       int i = TEST_UTIL.getHBaseCluster().getServerWithMeta();
175       HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(i == 0 ? 1 : 0);
176       // Choose a server other than meta to kill
177       ServerName controlledServer = rs.getServerName();
178       master.enableSSH(false);
179       TEST_UTIL.getHBaseCluster().killRegionServer(controlledServer);
180       TEST_UTIL.getHBaseCluster().waitForRegionServerToStop(controlledServer, -1);
181       AssignmentManager am = master.getAssignmentManager();
182       
183       // Simulate the AssignCallable trying to assign the region. Have the region in OFFLINE state,
184       // but not in transition and the server is the dead 'controlledServer'  
185       regionStates.createRegionState(hri, State.OFFLINE, controlledServer);
186       am.assign(hri, true, true);
187       // Region should remain in OFFLINE and go to transition
188       assertEquals(State.OFFLINE, regionStates.getRegionState(hri).getState());
189       assertTrue (regionStates.isRegionInTransition(hri));
190       
191       master.enableSSH(true);
192       am.waitForAssignment(hri);
193       assertTrue (regionStates.getRegionState(hri).isOpened());
194       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
195       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
196     } finally {
197       if (master != null) {
198         master.enableSSH(true);
199       }
200       TEST_UTIL.deleteTable(Bytes.toBytes(table));
201       TEST_UTIL.getHBaseCluster().startRegionServer();
202     }
203   }
204 
205   /**
206    * This tests region assignment on a simulated restarted server
207    */
208   @Test (timeout=120000)
209   public void testAssignRegionOnRestartedServer() throws Exception {
210     String table = "testAssignRegionOnRestartedServer";
211     TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
212     TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
213     TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
214    
215     ServerName deadServer = null;
216     HMaster master = null;
217     try {
218       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
219       desc.addFamily(new HColumnDescriptor(FAMILY));
220       admin.createTable(desc);
221 
222       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
223       final HRegionInfo hri = new HRegionInfo(
224         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
225       MetaEditor.addRegionToMeta(meta, hri);
226 
227       master = TEST_UTIL.getHBaseCluster().getMaster();
228       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
229       assertFalse("There should be some servers online", onlineServers.isEmpty());
230 
231       // Use the first server as the destination server
232       ServerName destServer = onlineServers.iterator().next();
233 
234       // Created faked dead server
235       deadServer = ServerName.valueOf(destServer.getHostname(),
236           destServer.getPort(), destServer.getStartcode() - 100L);
237       master.serverManager.recordNewServerWithLock(deadServer, ServerLoad.EMPTY_SERVERLOAD);
238 
239       final AssignmentManager am = master.getAssignmentManager();
240       RegionPlan plan = new RegionPlan(hri, null, deadServer);
241       am.addPlan(hri.getEncodedName(), plan);
242       master.assignRegion(hri);
243 
244       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
245         destServer, EventType.M_ZK_REGION_OFFLINE,
246         EventType.RS_ZK_REGION_OPENING, 0);
247       assertEquals("TansitionNode should fail", -1, version);
248 
249       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
250         @Override
251         public boolean evaluate() throws Exception {
252           return ! am.getRegionStates().isRegionInTransition(hri);
253         }
254       });
255 
256     assertFalse("Region should be assigned", am.getRegionStates().isRegionInTransition(hri));
257     } finally {
258       if (deadServer != null) {
259         master.serverManager.expireServer(deadServer);
260       }
261 
262       TEST_UTIL.deleteTable(Bytes.toBytes(table));
263 
264       // reset the value for other tests
265       TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 3);
266       ServerName masterServerName = TEST_UTIL.getMiniHBaseCluster().getMaster().getServerName();
267       TEST_UTIL.getMiniHBaseCluster().stopMaster(masterServerName);
268       TEST_UTIL.getMiniHBaseCluster().startMaster();
269       // Wait till master is active and is initialized
270       while (TEST_UTIL.getMiniHBaseCluster().getMaster() == null
271           || !TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized()) {
272         Threads.sleep(1);
273       }
274     }
275   }
276 
277   /**
278    * This tests offlining a region
279    */
280   @Test (timeout=60000)
281   public void testOfflineRegion() throws Exception {
282     TableName table =
283         TableName.valueOf("testOfflineRegion");
284     try {
285       HRegionInfo hri = createTableAndGetOneRegion(table);
286 
287       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
288         getMaster().getAssignmentManager().getRegionStates();
289       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
290       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
291       admin.offline(hri.getRegionName());
292 
293       long timeoutTime = System.currentTimeMillis() + 800;
294       while (true) {
295         if (regionStates.getRegionByStateOfTable(table)
296             .get(RegionState.State.OFFLINE).contains(hri))
297           break;
298         long now = System.currentTimeMillis();
299         if (now > timeoutTime) {
300           fail("Failed to offline the region in time");
301           break;
302         }
303         Thread.sleep(10);
304       }
305       RegionState regionState = regionStates.getRegionState(hri);
306       assertTrue(regionState.isOffline());
307     } finally {
308       TEST_UTIL.deleteTable(table);
309     }
310   }
311 
312   /**
313    * This tests moving a region
314    */
315   @Test (timeout=50000)
316   public void testMoveRegion() throws Exception {
317     TableName table =
318         TableName.valueOf("testMoveRegion");
319     try {
320       HRegionInfo hri = createTableAndGetOneRegion(table);
321 
322       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
323         getMaster().getAssignmentManager().getRegionStates();
324       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
325       ServerName destServerName = null;
326       for (int i = 0; i < 3; i++) {
327         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
328         if (!destServer.getServerName().equals(serverName)) {
329           destServerName = destServer.getServerName();
330           break;
331         }
332       }
333       assertTrue(destServerName != null
334         && !destServerName.equals(serverName));
335       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
336         Bytes.toBytes(destServerName.getServerName()));
337 
338       long timeoutTime = System.currentTimeMillis() + 30000;
339       while (true) {
340         ServerName sn = regionStates.getRegionServerOfRegion(hri);
341         if (sn != null && sn.equals(destServerName)) {
342           TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
343           break;
344         }
345         long now = System.currentTimeMillis();
346         if (now > timeoutTime) {
347           fail("Failed to move the region in time: "
348             + regionStates.getRegionState(hri));
349         }
350         regionStates.waitForUpdate(50);
351       }
352 
353     } finally {
354       TEST_UTIL.deleteTable(table);
355     }
356   }
357 
358   /**
359    * If a table is deleted, we should not be able to move it anymore.
360    * Otherwise, the region will be brought back.
361    * @throws Exception
362    */
363   @Test (timeout=50000)
364   public void testMoveRegionOfDeletedTable() throws Exception {
365     TableName table =
366         TableName.valueOf("testMoveRegionOfDeletedTable");
367     HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
368     try {
369       HRegionInfo hri = createTableAndGetOneRegion(table);
370 
371       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
372       AssignmentManager am = master.getAssignmentManager();
373       RegionStates regionStates = am.getRegionStates();
374       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
375       ServerName destServerName = null;
376       for (int i = 0; i < 3; i++) {
377         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
378         if (!destServer.getServerName().equals(serverName)) {
379           destServerName = destServer.getServerName();
380           break;
381         }
382       }
383       assertTrue(destServerName != null
384         && !destServerName.equals(serverName));
385 
386       TEST_UTIL.deleteTable(table);
387 
388       try {
389         admin.move(hri.getEncodedNameAsBytes(),
390           Bytes.toBytes(destServerName.getServerName()));
391         fail("We should not find the region");
392       } catch (IOException ioe) {
393         assertTrue(ioe instanceof UnknownRegionException);
394       }
395 
396       am.balance(new RegionPlan(hri, serverName, destServerName));
397       assertFalse("The region should not be in transition",
398         regionStates.isRegionInTransition(hri));
399     } finally {
400       if (admin.tableExists(table)) {
401         TEST_UTIL.deleteTable(table);
402       }
403     }
404   }
405 
406   HRegionInfo createTableAndGetOneRegion(
407       final TableName tableName) throws IOException, InterruptedException {
408     HTableDescriptor desc = new HTableDescriptor(tableName);
409     desc.addFamily(new HColumnDescriptor(FAMILY));
410     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
411 
412     // wait till the table is assigned
413     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
414     long timeoutTime = System.currentTimeMillis() + 1000;
415     while (true) {
416       List<HRegionInfo> regions = master.getAssignmentManager().
417         getRegionStates().getRegionsOfTable(tableName);
418       if (regions.size() > 3) {
419         return regions.get(2);
420       }
421       long now = System.currentTimeMillis();
422       if (now > timeoutTime) {
423         fail("Could not find an online region");
424       }
425       Thread.sleep(10);
426     }
427   }
428 
429   /**
430    * This test should not be flaky. If it is flaky, it means something
431    * wrong with AssignmentManager which should be reported and fixed
432    *
433    * This tests forcefully assign a region while it's closing and re-assigned.
434    */
435   @Test (timeout=60000)
436   public void testForceAssignWhileClosing() throws Exception {
437     String table = "testForceAssignWhileClosing";
438     try {
439       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
440       desc.addFamily(new HColumnDescriptor(FAMILY));
441       admin.createTable(desc);
442 
443       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
444       HRegionInfo hri = new HRegionInfo(
445         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
446       MetaEditor.addRegionToMeta(meta, hri);
447 
448       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
449       master.assignRegion(hri);
450       AssignmentManager am = master.getAssignmentManager();
451       assertTrue(am.waitForAssignment(hri));
452       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
453       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
454       
455       MyRegionObserver.preCloseEnabled.set(true);
456       am.unassign(hri);
457       RegionState state = am.getRegionStates().getRegionState(hri);
458       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
459 
460       MyRegionObserver.preCloseEnabled.set(false);
461       am.unassign(hri, true);
462 
463       // region is closing now, will be re-assigned automatically.
464       // now, let's forcefully assign it again. it should be
465       // assigned properly and no double-assignment
466       am.assign(hri, true, true);
467 
468       // let's check if it's assigned after it's out of transition
469       am.waitOnRegionToClearRegionsInTransition(hri);
470       assertTrue(am.waitForAssignment(hri));
471 
472       ServerName serverName = master.getAssignmentManager().
473         getRegionStates().getRegionServerOfRegion(hri);
474       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
475     } finally {
476       MyRegionObserver.preCloseEnabled.set(false);
477       TEST_UTIL.deleteTable(Bytes.toBytes(table));
478     }
479   }
480 
481   /**
482    * This tests region close failed
483    */
484   @Test (timeout=60000)
485   public void testCloseFailed() throws Exception {
486     String table = "testCloseFailed";
487     try {
488       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
489       desc.addFamily(new HColumnDescriptor(FAMILY));
490       admin.createTable(desc);
491 
492       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
493       HRegionInfo hri = new HRegionInfo(
494         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
495       MetaEditor.addRegionToMeta(meta, hri);
496 
497       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
498       master.assignRegion(hri);
499       AssignmentManager am = master.getAssignmentManager();
500       assertTrue(am.waitForAssignment(hri));
501       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
502       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
503 
504       MyRegionObserver.preCloseEnabled.set(true);
505       am.unassign(hri);
506       RegionState state = am.getRegionStates().getRegionState(hri);
507       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
508 
509       MyRegionObserver.preCloseEnabled.set(false);
510       am.unassign(hri, true);
511 
512       // region may still be assigned now since it's closing,
513       // let's check if it's assigned after it's out of transition
514       am.waitOnRegionToClearRegionsInTransition(hri);
515 
516       // region should be closed and re-assigned
517       assertTrue(am.waitForAssignment(hri));
518       ServerName serverName = master.getAssignmentManager().
519         getRegionStates().getRegionServerOfRegion(hri);
520       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
521     } finally {
522       MyRegionObserver.preCloseEnabled.set(false);
523       TEST_UTIL.deleteTable(Bytes.toBytes(table));
524     }
525   }
526 
527   /**
528    * This tests region open failed
529    */
530   @Test (timeout=60000)
531   public void testOpenFailed() throws Exception {
532     String table = "testOpenFailed";
533     try {
534       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
535       desc.addFamily(new HColumnDescriptor(FAMILY));
536       admin.createTable(desc);
537 
538       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
539       HRegionInfo hri = new HRegionInfo(
540         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
541       MetaEditor.addRegionToMeta(meta, hri);
542 
543       MyLoadBalancer.controledRegion = hri.getEncodedName();
544 
545       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
546       master.assignRegion(hri);
547       AssignmentManager am = master.getAssignmentManager();
548       assertFalse(am.waitForAssignment(hri));
549 
550       RegionState state = am.getRegionStates().getRegionState(hri);
551       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
552       // Failed to open since no plan, so it's on no server
553       assertNull(state.getServerName());
554 
555       MyLoadBalancer.controledRegion = null;
556       master.assignRegion(hri);
557       assertTrue(am.waitForAssignment(hri));
558 
559       ServerName serverName = master.getAssignmentManager().
560         getRegionStates().getRegionServerOfRegion(hri);
561       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
562     } finally {
563       MyLoadBalancer.controledRegion = null;
564       TEST_UTIL.deleteTable(Bytes.toBytes(table));
565     }
566   }
567 
568   /**
569    * This tests region open failure which is not recoverable
570    */
571   @Test (timeout=60000)
572   public void testOpenFailedUnrecoverable() throws Exception {
573     TableName table =
574         TableName.valueOf("testOpenFailedUnrecoverable");
575     try {
576       HTableDescriptor desc = new HTableDescriptor(table);
577       desc.addFamily(new HColumnDescriptor(FAMILY));
578       admin.createTable(desc);
579 
580       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
581       HRegionInfo hri = new HRegionInfo(
582         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
583       MetaEditor.addRegionToMeta(meta, hri);
584 
585       FileSystem fs = FileSystem.get(conf);
586       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
587       Path regionDir = new Path(tableDir, hri.getEncodedName());
588       // create a file named the same as the region dir to
589       // mess up with region opening
590       fs.create(regionDir, true);
591 
592       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
593       master.assignRegion(hri);
594       AssignmentManager am = master.getAssignmentManager();
595       assertFalse(am.waitForAssignment(hri));
596 
597       RegionState state = am.getRegionStates().getRegionState(hri);
598       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
599       // Failed to open due to file system issue. Region state should
600       // carry the opening region server so that we can force close it
601       // later on before opening it again. See HBASE-9092.
602       assertNotNull(state.getServerName());
603 
604       // remove the blocking file, so that region can be opened
605       fs.delete(regionDir, true);
606       master.assignRegion(hri);
607       assertTrue(am.waitForAssignment(hri));
608 
609       ServerName serverName = master.getAssignmentManager().
610         getRegionStates().getRegionServerOfRegion(hri);
611       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
612     } finally {
613       TEST_UTIL.deleteTable(table);
614     }
615   }
616 
617   @Test (timeout=60000)
618   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
619     final TableName table =
620         TableName.valueOf
621             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
622     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
623     HRegionInfo hri = null;
624     ServerName serverName = null;
625     try {
626       hri = createTableAndGetOneRegion(table);
627       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
628       ServerName destServerName = null;
629       HRegionServer destServer = null;
630       for (int i = 0; i < 3; i++) {
631         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
632         if (!destServer.getServerName().equals(serverName)) {
633           destServerName = destServer.getServerName();
634           break;
635         }
636       }
637       ServerName metaServerName = am.getRegionStates().getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO);
638       // We don't want to process shutdown of meta, so move meta if required
639       if (ServerName.isSameHostnameAndPort(destServerName, metaServerName)) {
640         int i = TEST_UTIL.getHBaseCluster().getServerWithMeta();
641         HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(i == 0 ? 1 : 0);
642         TEST_UTIL
643             .getHBaseCluster()
644             .getMaster()
645             .move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
646               Bytes.toBytes(rs.getServerName().getServerName()));
647         am.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
648       }
649       
650       am.regionOffline(hri);
651       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
652       am.getRegionStates().updateRegionState(hri, State.PENDING_OPEN, destServerName);
653       if (ConfigUtil.useZKForAssignment(conf)) {
654         ZKAssign.createNodeOffline(zkw, hri, destServerName);
655         ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
656 
657         // Wait till the event is processed and the region is in transition
658         long timeoutTime = System.currentTimeMillis() + 20000;
659         while (!am.getRegionStates().isRegionInTransition(hri)) {
660           assertTrue("Failed to process ZK opening event in time",
661             System.currentTimeMillis() < timeoutTime);
662           Thread.sleep(100);
663         }
664       }
665 
666       am.getZKTable().setDisablingTable(table);
667       List<HRegionInfo> toAssignRegions = am.processServerShutdown(destServerName);
668       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
669       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
670           .getRegionState(hri).isOffline());
671     } finally {
672       if (hri != null && serverName != null) {
673         am.regionOnline(hri, serverName);
674       }
675       am.getZKTable().setDisabledTable(table);
676       TEST_UTIL.deleteTable(table);
677     }
678   }
679 
680   /**
681    * This tests region close hanging
682    */
683   @Test (timeout=60000)
684   public void testCloseHang() throws Exception {
685     String table = "testCloseHang";
686     try {
687       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
688       desc.addFamily(new HColumnDescriptor(FAMILY));
689       admin.createTable(desc);
690 
691       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
692       HRegionInfo hri = new HRegionInfo(
693         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
694       MetaEditor.addRegionToMeta(meta, hri);
695 
696       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
697       master.assignRegion(hri);
698       AssignmentManager am = master.getAssignmentManager();
699       assertTrue(am.waitForAssignment(hri));
700       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
701       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
702 
703       MyRegionObserver.postCloseEnabled.set(true);
704       am.unassign(hri);
705       // Now region should pending_close or closing
706       // Unassign it again forcefully so that we can trigger already
707       // in transition exception. This test is to make sure this scenario
708       // is handled properly.
709       am.server.getConfiguration().setLong(
710         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
711       am.unassign(hri, true);
712       RegionState state = am.getRegionStates().getRegionState(hri);
713       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
714 
715       // Let region closing move ahead. The region should be closed
716       // properly and re-assigned automatically
717       MyRegionObserver.postCloseEnabled.set(false);
718 
719       // region may still be assigned now since it's closing,
720       // let's check if it's assigned after it's out of transition
721       am.waitOnRegionToClearRegionsInTransition(hri);
722 
723       // region should be closed and re-assigned
724       assertTrue(am.waitForAssignment(hri));
725       ServerName serverName = master.getAssignmentManager().
726         getRegionStates().getRegionServerOfRegion(hri);
727       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
728     } finally {
729       MyRegionObserver.postCloseEnabled.set(false);
730       TEST_UTIL.deleteTable(Bytes.toBytes(table));
731     }
732   }
733 
734   /**
735    * This tests region close racing with open
736    */
737   @Test (timeout=60000)
738   public void testOpenCloseRacing() throws Exception {
739     String table = "testOpenCloseRacing";
740     try {
741       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
742       desc.addFamily(new HColumnDescriptor(FAMILY));
743       admin.createTable(desc);
744 
745       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
746       HRegionInfo hri = new HRegionInfo(
747         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
748       MetaEditor.addRegionToMeta(meta, hri);
749       meta.close();
750 
751       MyRegionObserver.postOpenEnabled.set(true);
752       MyRegionObserver.postOpenCalled = false;
753       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
754       // Region will be opened, but it won't complete
755       master.assignRegion(hri);
756       long end = EnvironmentEdgeManager.currentTimeMillis() + 20000;
757       // Wait till postOpen is called
758       while (!MyRegionObserver.postOpenCalled ) {
759         assertFalse("Timed out waiting for postOpen to be called",
760           EnvironmentEdgeManager.currentTimeMillis() > end);
761         Thread.sleep(300);
762       }
763 
764       AssignmentManager am = master.getAssignmentManager();
765       // Now let's unassign it, it should do nothing
766       am.unassign(hri);
767       RegionState state = am.getRegionStates().getRegionState(hri);
768       ServerName oldServerName = state.getServerName();
769       assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
770 
771       // Now the region is stuck in opening
772       // Let's forcefully re-assign it to trigger closing/opening
773       // racing. This test is to make sure this scenario
774       // is handled properly.
775       ServerName destServerName = null;
776       int numRS = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size();
777       for (int i = 0; i < numRS; i++) {
778         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
779         if (!destServer.getServerName().equals(oldServerName)) {
780           destServerName = destServer.getServerName();
781           break;
782         }
783       }
784       assertNotNull(destServerName);
785       assertFalse("Region should be assigned on a new region server",
786         oldServerName.equals(destServerName));
787       List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
788       regions.add(hri);
789       am.assign(destServerName, regions);
790       
791       // let region open continue
792       MyRegionObserver.postOpenEnabled.set(false);
793 
794       // let's check if it's assigned after it's out of transition
795       am.waitOnRegionToClearRegionsInTransition(hri);
796       assertTrue(am.waitForAssignment(hri));
797 
798       ServerName serverName = master.getAssignmentManager().
799         getRegionStates().getRegionServerOfRegion(hri);
800       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
801     } finally {
802       MyRegionObserver.postOpenEnabled.set(false);
803       TEST_UTIL.deleteTable(Bytes.toBytes(table));
804     }
805   }
806 
807   /**
808    * Test force unassign/assign a region hosted on a dead server
809    */
810   @Test (timeout=60000)
811   public void testAssignRacingWithSSH() throws Exception {
812     String table = "testAssignRacingWithSSH";
813     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
814     MyMaster master = null;
815     try {
816       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
817       desc.addFamily(new HColumnDescriptor(FAMILY));
818       admin.createTable(desc);
819 
820       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
821       HRegionInfo hri = new HRegionInfo(
822         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
823       MetaEditor.addRegionToMeta(meta, hri);
824 
825       // Assign the region
826       master = (MyMaster)cluster.getMaster();
827       master.assignRegion(hri);
828 
829       // Hold SSH before killing the hosting server
830       master.enableSSH(false);
831 
832       AssignmentManager am = master.getAssignmentManager();
833       RegionStates regionStates = am.getRegionStates();
834       ServerName metaServer = regionStates.getRegionServerOfRegion(
835         HRegionInfo.FIRST_META_REGIONINFO);
836       while (true) {
837         assertTrue(am.waitForAssignment(hri));
838         RegionState state = regionStates.getRegionState(hri);
839         ServerName oldServerName = state.getServerName();
840         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
841           // Kill the hosting server, which doesn't have meta on it.
842           cluster.killRegionServer(oldServerName);
843           cluster.waitForRegionServerToStop(oldServerName, -1);
844           break;
845         }
846         int i = cluster.getServerWithMeta();
847         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
848         oldServerName = rs.getServerName();
849         master.move(hri.getEncodedNameAsBytes(),
850           Bytes.toBytes(oldServerName.getServerName()));
851       }
852 
853       // You can't assign a dead region before SSH
854       am.assign(hri, true, true);
855       RegionState state = regionStates.getRegionState(hri);
856       assertTrue(state.isFailedClose());
857 
858       // You can't unassign a dead region before SSH either
859       am.unassign(hri, true);
860       assertTrue(state.isFailedClose());
861 
862       // Enable SSH so that log can be split
863       master.enableSSH(true);
864 
865       // let's check if it's assigned after it's out of transition.
866       // no need to assign it manually, SSH should do it
867       am.waitOnRegionToClearRegionsInTransition(hri);
868       assertTrue(am.waitForAssignment(hri));
869 
870       ServerName serverName = master.getAssignmentManager().
871         getRegionStates().getRegionServerOfRegion(hri);
872       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
873     } finally {
874       if (master != null) {
875         master.enableSSH(true);
876       }
877       TEST_UTIL.deleteTable(Bytes.toBytes(table));
878     }
879   }
880 
881   /**
882    * Test force unassign/assign a region of a disabled table
883    */
884   @Test (timeout=60000)
885   public void testAssignDisabledRegion() throws Exception {
886     String table = "testAssignDisabledRegion";
887     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
888     MyMaster master = null;
889     try {
890       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
891       desc.addFamily(new HColumnDescriptor(FAMILY));
892       admin.createTable(desc);
893 
894       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
895       HRegionInfo hri = new HRegionInfo(
896         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
897       MetaEditor.addRegionToMeta(meta, hri);
898 
899       // Assign the region
900       master = (MyMaster)cluster.getMaster();
901       master.assignRegion(hri);
902       AssignmentManager am = master.getAssignmentManager();
903       RegionStates regionStates = am.getRegionStates();
904       assertTrue(am.waitForAssignment(hri));
905 
906       // Disable the table
907       admin.disableTable(table);
908       assertTrue(regionStates.isRegionOffline(hri));
909 
910       // You can't assign a disabled region
911       am.assign(hri, true, true);
912       assertTrue(regionStates.isRegionOffline(hri));
913 
914       // You can't unassign a disabled region either
915       am.unassign(hri, true);
916       assertTrue(regionStates.isRegionOffline(hri));
917     } finally {
918       TEST_UTIL.deleteTable(Bytes.toBytes(table));
919     }
920   }
921 
922   /**
923    * Test that region state transition call is idempotent
924    */
925   @Test(timeout = 60000)
926   public void testReportRegionStateTransition() throws Exception {
927     String table = "testReportRegionStateTransition";
928     try {
929       MyRegionServer.simulateRetry = true;
930       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
931       desc.addFamily(new HColumnDescriptor(FAMILY));
932       admin.createTable(desc);
933       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
934       HRegionInfo hri =
935           new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
936       MetaEditor.addRegionToMeta(meta, hri);
937       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
938       master.assignRegion(hri);
939       AssignmentManager am = master.getAssignmentManager();
940       am.waitForAssignment(hri);
941       RegionStates regionStates = am.getRegionStates();
942       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
943       // Assert the the region is actually open on the server
944       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
945       // Closing region should just work fine
946       admin.disableTable(TableName.valueOf(table));
947       assertTrue(regionStates.isRegionOffline(hri));
948       List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getOnlineRegions(serverName);
949       assertTrue(!regions.contains(hri));
950     } finally {
951       MyRegionServer.simulateRetry = false;
952       TEST_UTIL.deleteTable(Bytes.toBytes(table));
953     }
954   }
955 
956   /**
957    * Test concurrent updates to meta when meta is not on master. Only for zk-less assignment
958    * @throws Exception
959    */
960   @Test(timeout = 30000)
961   public void testUpdatesRemoteMeta() throws Exception {
962     // Not for zk less assignment
963     if (conf.getBoolean("hbase.assignment.usezk", true)) {
964       return;
965     }
966     conf.setInt("hbase.regionstatestore.meta.connection", 3);
967     final RegionStateStore rss = new RegionStateStore(new MyRegionServer(conf));
968     rss.start();
969     // Create 10 threads and make each do 10 puts related to region state update
970     Thread[] th = new Thread[10];
971     List<String> nameList = new ArrayList<String>();
972     List<TableName> tableNameList = new ArrayList<TableName>();
973     for (int i = 0; i < th.length; i++) {
974       th[i] = new Thread() {
975         @Override
976         public void run() {
977           HRegionInfo[] hri = new HRegionInfo[10];
978           ServerName serverName = ServerName.valueOf("dummyhost", 1000, 1234);
979           for (int i = 0; i < 10; i++) {
980             hri[i] = new HRegionInfo(TableName.valueOf(Thread.currentThread().getName() + "_" + i));
981             RegionState newState = new RegionState(hri[i], RegionState.State.OPEN, serverName);
982             RegionState oldState =
983                 new RegionState(hri[i], RegionState.State.PENDING_OPEN, serverName);
984             rss.updateRegionState(1, newState, oldState);
985           }
986         }
987       };
988       th[i].start();
989       nameList.add(th[i].getName());
990     }
991     for (int i = 0; i < th.length; i++) {
992       th[i].join();
993     }
994     // Add all the expected table names in meta to tableNameList
995     for (String name : nameList) {
996       for (int i = 0; i < 10; i++) {
997         tableNameList.add(TableName.valueOf(name + "_" + i));
998       }
999     }
1000     List<Result> metaRows =
1001         MetaReader.fullScan(TEST_UTIL.getMiniHBaseCluster().getMaster().getCatalogTracker());
1002     int count = 0;
1003     // Check all 100 rows are in meta
1004     for (Result result : metaRows) {
1005       if (tableNameList.contains(HRegionInfo.getTable(result.getRow()))) {
1006         count++;
1007         if (count == 100) {
1008           break;
1009         }
1010       }
1011     }
1012     assertTrue(count == 100);
1013     rss.stop();
1014   }
1015 
1016   static class MyLoadBalancer extends StochasticLoadBalancer {
1017     // For this region, if specified, always assign to nowhere
1018     static volatile String controledRegion = null;
1019 
1020     @Override
1021     public ServerName randomAssignment(HRegionInfo regionInfo,
1022         List<ServerName> servers) {
1023       if (regionInfo.getEncodedName().equals(controledRegion)) {
1024         return null;
1025       }
1026       return super.randomAssignment(regionInfo, servers);
1027     }
1028   }
1029 
1030   public static class MyMaster extends HMaster {
1031     AtomicBoolean enabled = new AtomicBoolean(true);
1032 
1033     public MyMaster(Configuration conf) throws IOException, KeeperException,
1034         InterruptedException {
1035       super(conf);
1036     }
1037 
1038     @Override
1039     public boolean isServerShutdownHandlerEnabled() {
1040       return enabled.get() && super.isServerShutdownHandlerEnabled();
1041     }
1042 
1043     public void enableSSH(boolean enabled) {
1044       this.enabled.set(enabled);
1045       if (enabled) {
1046         serverManager.processQueuedDeadServers();
1047       }
1048     }
1049   }
1050   
1051   public static class MyRegionServer extends MiniHBaseClusterRegionServer {
1052     static volatile ServerName abortedServer = null;
1053     static volatile boolean simulateRetry;
1054 
1055     public MyRegionServer(Configuration conf)
1056       throws IOException, KeeperException,
1057         InterruptedException {
1058       super(conf);
1059     }
1060 
1061     @Override
1062     public boolean
1063         reportRegionStateTransition(TransitionCode code, long openSeqNum, HRegionInfo... hris) {
1064       if (simulateRetry == true) {
1065         // Simulate retry by calling the method twice
1066         super.reportRegionStateTransition(code, openSeqNum, hris);
1067         return super.reportRegionStateTransition(code, openSeqNum, hris);
1068       }
1069       return super.reportRegionStateTransition(code, openSeqNum, hris);
1070     }
1071 
1072     @Override
1073     public boolean isAborted() {
1074       return getServerName().equals(abortedServer) || super.isAborted();
1075     }
1076   }
1077 
1078 
1079   public static class MyRegionObserver extends BaseRegionObserver {
1080     // If enabled, fail all preClose calls
1081     static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);
1082 
1083     // If enabled, stall postClose calls
1084     static AtomicBoolean postCloseEnabled = new AtomicBoolean(false);
1085 
1086     // If enabled, stall postOpen calls
1087     static AtomicBoolean postOpenEnabled = new AtomicBoolean(false);
1088 
1089     // A flag to track if postOpen is called
1090     static volatile boolean postOpenCalled = false;
1091 
1092     @Override
1093     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
1094         boolean abortRequested) throws IOException {
1095       if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor");
1096     }
1097 
1098     @Override
1099     public void postClose(ObserverContext<RegionCoprocessorEnvironment> c,
1100         boolean abortRequested) {
1101       stallOnFlag(postCloseEnabled);
1102     }
1103 
1104     @Override
1105     public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) {
1106       postOpenCalled = true;
1107       stallOnFlag(postOpenEnabled);
1108     }
1109 
1110     private void stallOnFlag(final AtomicBoolean flag) {
1111       try {
1112         // If enabled, stall
1113         while (flag.get()) {
1114           Thread.sleep(1000);
1115         }
1116       } catch (InterruptedException ie) {
1117         Thread.currentThread().interrupt();
1118       }
1119     }
1120   }
1121 }