1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertTrue;
25
26 import java.io.IOException;
27 import java.util.ArrayList;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.Set;
31 import java.util.TreeSet;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FileSystem;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.hbase.Abortable;
39 import org.apache.hadoop.hbase.ClusterStatus;
40 import org.apache.hadoop.hbase.HBaseConfiguration;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.HColumnDescriptor;
43 import org.apache.hadoop.hbase.HConstants;
44 import org.apache.hadoop.hbase.HRegionInfo;
45 import org.apache.hadoop.hbase.HTableDescriptor;
46 import org.apache.hadoop.hbase.testclassification.LargeTests;
47 import org.apache.hadoop.hbase.MiniHBaseCluster;
48 import org.apache.hadoop.hbase.RegionTransition;
49 import org.apache.hadoop.hbase.ServerName;
50 import org.apache.hadoop.hbase.TableName;
51 import org.apache.hadoop.hbase.catalog.CatalogTracker;
52 import org.apache.hadoop.hbase.catalog.MetaEditor;
53 import org.apache.hadoop.hbase.client.HTable;
54 import org.apache.hadoop.hbase.executor.EventType;
55 import org.apache.hadoop.hbase.master.RegionState.State;
56 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
57 import org.apache.hadoop.hbase.protobuf.RequestConverter;
58 import org.apache.hadoop.hbase.regionserver.HRegion;
59 import org.apache.hadoop.hbase.regionserver.HRegionServer;
60 import org.apache.hadoop.hbase.regionserver.RegionMergeTransaction;
61 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
62 import org.apache.hadoop.hbase.util.Bytes;
63 import org.apache.hadoop.hbase.util.FSTableDescriptors;
64 import org.apache.hadoop.hbase.util.FSUtils;
65 import org.apache.hadoop.hbase.util.JVMClusterUtil;
66 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
67 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
68 import org.apache.hadoop.hbase.util.Threads;
69 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
70 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
71 import org.apache.hadoop.hbase.zookeeper.ZKTable;
72 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
73 import org.apache.zookeeper.data.Stat;
74 import org.junit.Test;
75 import org.junit.experimental.categories.Category;
76
77 @Category(LargeTests.class)
78 public class TestMasterFailover {
79 private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160 @Test (timeout=240000)
161 public void testMasterFailoverWithMockedRIT() throws Exception {
162
163 final int NUM_MASTERS = 1;
164 final int NUM_RS = 3;
165
166
167 Configuration conf = HBaseConfiguration.create();
168 conf.setBoolean("hbase.assignment.usezk", true);
169
170
171 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
172 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
173 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
174 log("Cluster started");
175
176
177 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
178
179
180 List<MasterThread> masterThreads = cluster.getMasterThreads();
181 assertEquals(1, masterThreads.size());
182
183
184 assertTrue(cluster.waitForActiveAndReadyMaster());
185 HMaster master = masterThreads.get(0).getMaster();
186 assertTrue(master.isActiveMaster());
187 assertTrue(master.isInitialized());
188
189
190 master.balanceSwitch(false);
191
192
193 byte [] FAMILY = Bytes.toBytes("family");
194 byte [][] SPLIT_KEYS = new byte [][] {
195 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
196 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
197 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
198 Bytes.toBytes("iii"), Bytes.toBytes("jjj")
199 };
200
201 byte [] enabledTable = Bytes.toBytes("enabledTable");
202 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
203 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
204
205 FileSystem filesystem = FileSystem.get(conf);
206 Path rootdir = FSUtils.getRootDir(conf);
207 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
208
209 fstd.createTableDescriptor(htdEnabled);
210
211 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null);
212 createRegion(hriEnabled, rootdir, conf, htdEnabled);
213
214 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
215 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
216
217 TableName disabledTable = TableName.valueOf("disabledTable");
218 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
219 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
220
221 fstd.createTableDescriptor(htdDisabled);
222 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
223 createRegion(hriDisabled, rootdir, conf, htdDisabled);
224 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
225 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
226
227 TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
228 TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
229
230 log("Regions in hbase:meta and namespace have been created");
231
232
233
234 assertEquals(4, cluster.countServedRegions());
235
236
237 AssignmentManager am = master.getAssignmentManager();
238 RegionStates regionStates = am.getRegionStates();
239 List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
240 assertEquals(2, mergingRegions.size());
241 HRegionInfo a = mergingRegions.get(0);
242 HRegionInfo b = mergingRegions.get(1);
243 HRegionInfo newRegion = RegionMergeTransaction.getMergedRegionInfo(a, b);
244 ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
245 ServerName serverB = regionStates.getRegionServerOfRegion(b);
246 if (!serverB.equals(mergingServer)) {
247 RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
248 am.balance(plan);
249 assertTrue(am.waitForAssignment(b));
250 }
251
252
253 HRegionServer hrs = cluster.getRegionServer(0);
254 ServerName serverName = hrs.getServerName();
255 HRegionInfo closingRegion = enabledRegions.remove(0);
256
257 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
258 enabledAndAssignedRegions.add(enabledRegions.remove(0));
259 enabledAndAssignedRegions.add(enabledRegions.remove(0));
260 enabledAndAssignedRegions.add(closingRegion);
261
262 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
263 disabledAndAssignedRegions.add(disabledRegions.remove(0));
264 disabledAndAssignedRegions.add(disabledRegions.remove(0));
265
266
267 for (HRegionInfo hri : enabledAndAssignedRegions) {
268 master.assignmentManager.addPlan(hri.getEncodedName(),
269 new RegionPlan(hri, null, serverName));
270 master.assignRegion(hri);
271 }
272
273 for (HRegionInfo hri : disabledAndAssignedRegions) {
274 master.assignmentManager.addPlan(hri.getEncodedName(),
275 new RegionPlan(hri, null, serverName));
276 master.assignRegion(hri);
277 }
278
279
280 log("Waiting for assignment to finish");
281 ZKAssign.blockUntilNoRIT(zkw);
282 log("Assignment completed");
283
284
285 log("Aborting master");
286 cluster.abortMaster(0);
287 cluster.waitOnMaster(0);
288 log("Master has aborted");
289
290
291
292
293
294
295 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
296 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
297
298 log("Beginning to mock scenarios");
299
300
301 ZKTable zktable = new ZKTable(zkw);
302 zktable.setDisabledTable(disabledTable);
303
304
305
306
307
308
309
310
311 HRegionInfo region = enabledRegions.remove(0);
312 regionsThatShouldBeOnline.add(region);
313 ZKAssign.createNodeOffline(zkw, region, serverName);
314
315
316
317
318
319 regionsThatShouldBeOnline.add(closingRegion);
320 ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
321
322
323
324
325
326
327
328 region = enabledRegions.remove(0);
329 regionsThatShouldBeOnline.add(region);
330 int version = ZKAssign.createNodeClosing(zkw, region, serverName);
331 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
332
333
334 region = disabledRegions.remove(0);
335 regionsThatShouldBeOffline.add(region);
336 version = ZKAssign.createNodeClosing(zkw, region, serverName);
337 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
338
339
340
341
342
343
344
345 region = enabledRegions.remove(0);
346 regionsThatShouldBeOnline.add(region);
347 ZKAssign.createNodeOffline(zkw, region, serverName);
348 ProtobufUtil.openRegion(hrs, hrs.getServerName(), region);
349 while (true) {
350 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
351 RegionTransition rt = RegionTransition.parseFrom(bytes);
352 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
353 break;
354 }
355 Thread.sleep(100);
356 }
357
358
359
360 region = disabledRegions.remove(0);
361 regionsThatShouldBeOffline.add(region);
362 ZKAssign.createNodeOffline(zkw, region, serverName);
363 ProtobufUtil.openRegion(hrs, hrs.getServerName(), region);
364 while (true) {
365 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
366 RegionTransition rt = RegionTransition.parseFrom(bytes);
367 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
368 break;
369 }
370 Thread.sleep(100);
371 }
372
373
374
375
376
377
378
379 RegionMergeTransaction.createNodeMerging(
380 zkw, newRegion, mergingServer, a, b);
381
382
383
384
385
386
387
388
389
390 log("Done mocking data up in ZK");
391
392
393 log("Starting up a new master");
394 master = cluster.startMaster().getMaster();
395 log("Waiting for master to be ready");
396 cluster.waitForActiveAndReadyMaster();
397 log("Master is ready");
398
399
400 regionStates = master.getAssignmentManager().getRegionStates();
401
402 assertTrue(regionStates.isRegionInState(a, State.MERGING));
403 assertTrue(regionStates.isRegionInState(b, State.MERGING));
404 assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
405
406
407 ZKAssign.deleteNodeFailSilent(zkw, newRegion);
408
409
410 log("Waiting for no more RIT");
411 ZKAssign.blockUntilNoRIT(zkw);
412 log("No more RIT in ZK, now doing final test verification");
413
414
415 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
416 for (JVMClusterUtil.RegionServerThread rst :
417 cluster.getRegionServerThreads()) {
418 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rst.getRegionServer()));
419 }
420
421
422 for (HRegionInfo hri : regionsThatShouldBeOnline) {
423 assertTrue(onlineRegions.contains(hri));
424 }
425
426
427 for (HRegionInfo hri : regionsThatShouldBeOffline) {
428 if (onlineRegions.contains(hri)) {
429 LOG.debug(hri);
430 }
431 assertFalse(onlineRegions.contains(hri));
432 }
433
434 log("Done with verification, all passed, shutting down cluster");
435
436
437 TEST_UTIL.shutdownMiniCluster();
438 }
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496 @Test (timeout=180000)
497 public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
498
499 final int NUM_MASTERS = 1;
500 final int NUM_RS = 2;
501
502
503 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
504 Configuration conf = TEST_UTIL.getConfiguration();
505 conf.setBoolean("hbase.assignment.usezk", true);
506
507 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
508 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
509 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
510 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
511 log("Cluster started");
512
513
514 ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
515 "unittest", new Abortable() {
516
517 @Override
518 public void abort(String why, Throwable e) {
519 LOG.error("Fatal ZK Error: " + why, e);
520 org.junit.Assert.assertFalse("Fatal ZK error", true);
521 }
522
523 @Override
524 public boolean isAborted() {
525 return false;
526 }
527
528 });
529
530
531 List<MasterThread> masterThreads = cluster.getMasterThreads();
532 assertEquals(1, masterThreads.size());
533
534
535 assertTrue(cluster.waitForActiveAndReadyMaster());
536 HMaster master = masterThreads.get(0).getMaster();
537 assertTrue(master.isActiveMaster());
538 assertTrue(master.isInitialized());
539
540
541 master.balanceSwitch(false);
542
543
544 byte [] FAMILY = Bytes.toBytes("family");
545 byte[][] SPLIT_KEYS =
546 TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30);
547
548 byte [] enabledTable = Bytes.toBytes("enabledTable");
549 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
550 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
551 FileSystem filesystem = FileSystem.get(conf);
552 Path rootdir = FSUtils.getRootDir(conf);
553 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
554
555 fstd.createTableDescriptor(htdEnabled);
556 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(),
557 null, null);
558 createRegion(hriEnabled, rootdir, conf, htdEnabled);
559
560 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
561 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
562
563 TableName disabledTable =
564 TableName.valueOf("disabledTable");
565 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
566 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
567
568 fstd.createTableDescriptor(htdDisabled);
569 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
570 createRegion(hriDisabled, rootdir, conf, htdDisabled);
571
572 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
573 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
574
575 log("Regions in hbase:meta and Namespace have been created");
576
577
578 assertEquals(2, cluster.countServedRegions());
579
580
581 List<RegionServerThread> regionservers =
582 cluster.getRegionServerThreads();
583 HRegionServer hrs = regionservers.get(0).getRegionServer();
584
585
586 RegionServerThread hrsDeadThread = regionservers.get(1);
587 HRegionServer hrsDead = hrsDeadThread.getRegionServer();
588 ServerName deadServerName = hrsDead.getServerName();
589
590
591 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
592 enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6));
593 enabledRegions.removeAll(enabledAndAssignedRegions);
594 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
595 disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6));
596 disabledRegions.removeAll(disabledAndAssignedRegions);
597
598
599 for (HRegionInfo hri : enabledAndAssignedRegions) {
600 master.assignmentManager.addPlan(hri.getEncodedName(),
601 new RegionPlan(hri, null, hrs.getServerName()));
602 master.assignRegion(hri);
603 }
604 for (HRegionInfo hri : disabledAndAssignedRegions) {
605 master.assignmentManager.addPlan(hri.getEncodedName(),
606 new RegionPlan(hri, null, hrs.getServerName()));
607 master.assignRegion(hri);
608 }
609
610 log("Waiting for assignment to finish");
611 ZKAssign.blockUntilNoRIT(zkw);
612 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
613 log("Assignment completed");
614
615 assertTrue(" Table must be enabled.", master.getAssignmentManager()
616 .getZKTable().isEnabledTable(TableName.valueOf("enabledTable")));
617
618 List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
619 enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6));
620 enabledRegions.removeAll(enabledAndOnDeadRegions);
621 List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
622 disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6));
623 disabledRegions.removeAll(disabledAndOnDeadRegions);
624
625
626 for (HRegionInfo hri : enabledAndOnDeadRegions) {
627 master.assignmentManager.addPlan(hri.getEncodedName(),
628 new RegionPlan(hri, null, deadServerName));
629 master.assignRegion(hri);
630 }
631 for (HRegionInfo hri : disabledAndOnDeadRegions) {
632 master.assignmentManager.addPlan(hri.getEncodedName(),
633 new RegionPlan(hri, null, deadServerName));
634 master.assignRegion(hri);
635 }
636
637
638 log("Waiting for assignment to finish");
639 ZKAssign.blockUntilNoRIT(zkw);
640 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
641 log("Assignment completed");
642
643
644
645 verifyRegionLocation(hrs, enabledAndAssignedRegions);
646 verifyRegionLocation(hrs, disabledAndAssignedRegions);
647 verifyRegionLocation(hrsDead, enabledAndOnDeadRegions);
648 verifyRegionLocation(hrsDead, disabledAndOnDeadRegions);
649
650 assertTrue(" Didn't get enough regions of enabledTalbe on live rs.",
651 enabledAndAssignedRegions.size() >= 2);
652 assertTrue(" Didn't get enough regions of disalbedTable on live rs.",
653 disabledAndAssignedRegions.size() >= 2);
654 assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.",
655 enabledAndOnDeadRegions.size() >= 2);
656 assertTrue(" Didn't get enough regions of disalbedTable on dead rs.",
657 disabledAndOnDeadRegions.size() >= 2);
658
659
660 log("Aborting master");
661 cluster.abortMaster(0);
662 cluster.waitOnMaster(0);
663 log("Master has aborted");
664
665
666
667
668
669
670 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
671 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
672
673 log("Beginning to mock scenarios");
674
675
676 ZKTable zktable = new ZKTable(zkw);
677 zktable.setDisabledTable(disabledTable);
678
679 assertTrue(" The enabled table should be identified on master fail over.",
680 zktable.isEnabledTable(TableName.valueOf("enabledTable")));
681
682
683
684
685
686
687 HRegionInfo region = enabledAndOnDeadRegions.remove(0);
688 regionsThatShouldBeOnline.add(region);
689 ZKAssign.createNodeClosing(zkw, region, deadServerName);
690 LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
691 region + "\n\n");
692
693
694 region = disabledAndOnDeadRegions.remove(0);
695 regionsThatShouldBeOffline.add(region);
696 ZKAssign.createNodeClosing(zkw, region, deadServerName);
697 LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
698 region + "\n\n");
699
700
701
702
703
704
705 region = enabledAndOnDeadRegions.remove(0);
706 regionsThatShouldBeOnline.add(region);
707 int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
708 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
709 LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
710 region + "\n\n");
711
712
713 region = disabledAndOnDeadRegions.remove(0);
714 regionsThatShouldBeOffline.add(region);
715 version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
716 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
717 LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
718 region + "\n\n");
719
720
721
722
723
724
725 region = enabledRegions.remove(0);
726 regionsThatShouldBeOnline.add(region);
727 ZKAssign.createNodeOffline(zkw, region, deadServerName);
728 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
729 LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
730 region + "\n\n");
731
732
733 region = disabledRegions.remove(0);
734 regionsThatShouldBeOffline.add(region);
735 ZKAssign.createNodeOffline(zkw, region, deadServerName);
736 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
737 LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
738 region + "\n\n");
739
740
741
742
743
744
745 region = enabledRegions.remove(0);
746 regionsThatShouldBeOnline.add(region);
747 ZKAssign.createNodeOffline(zkw, region, deadServerName);
748 ProtobufUtil.openRegion(hrsDead, hrsDead.getServerName(), region);
749 while (true) {
750 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
751 RegionTransition rt = RegionTransition.parseFrom(bytes);
752 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
753 break;
754 }
755 Thread.sleep(100);
756 }
757 LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
758 region + "\n\n");
759
760
761 region = disabledRegions.remove(0);
762 regionsThatShouldBeOffline.add(region);
763 ZKAssign.createNodeOffline(zkw, region, deadServerName);
764 ProtobufUtil.openRegion(hrsDead, hrsDead.getServerName(), region);
765 while (true) {
766 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
767 RegionTransition rt = RegionTransition.parseFrom(bytes);
768 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
769 break;
770 }
771 Thread.sleep(100);
772 }
773 LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
774 region + "\n\n");
775
776
777
778
779
780
781 region = enabledRegions.remove(0);
782 regionsThatShouldBeOnline.add(region);
783 ZKAssign.createNodeOffline(zkw, region, deadServerName);
784 ProtobufUtil.openRegion(hrsDead, hrsDead.getServerName(), region);
785 while (true) {
786 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
787 RegionTransition rt = RegionTransition.parseFrom(bytes);
788 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
789 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
790 LOG.debug("DELETED " + rt);
791 break;
792 }
793 Thread.sleep(100);
794 }
795 LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
796 + "\n" + region + "\n\n");
797
798
799 region = disabledRegions.remove(0);
800 regionsThatShouldBeOffline.add(region);
801 ZKAssign.createNodeOffline(zkw, region, deadServerName);
802 ProtobufUtil.openRegion(hrsDead, hrsDead.getServerName(), region);
803 while (true) {
804 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
805 RegionTransition rt = RegionTransition.parseFrom(bytes);
806 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
807 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
808 break;
809 }
810 Thread.sleep(100);
811 }
812 LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
813 + "\n" + region + "\n\n");
814
815
816
817
818
819 log("Done mocking data up in ZK");
820
821
822 log("Killing RS " + deadServerName);
823 hrsDead.abort("Killing for unit test");
824 log("RS " + deadServerName + " killed");
825
826
827
828 while (hrsDeadThread.isAlive()) {
829 Threads.sleep(10);
830 }
831 log("Starting up a new master");
832 master = cluster.startMaster().getMaster();
833 log("Waiting for master to be ready");
834 assertTrue(cluster.waitForActiveAndReadyMaster());
835 log("Master is ready");
836
837
838 while (master.getServerManager().areDeadServersInProgress()) {
839 Thread.sleep(10);
840 }
841
842
843 log("Waiting for no more RIT");
844 ZKAssign.blockUntilNoRIT(zkw);
845 log("No more RIT in ZK");
846 long now = System.currentTimeMillis();
847 long maxTime = 120000;
848 boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
849 if (!done) {
850 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
851 LOG.info("rit=" + regionStates.getRegionsInTransition());
852 }
853 long elapsed = System.currentTimeMillis() - now;
854 assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
855 elapsed < maxTime);
856 log("No more RIT in RIT map, doing final test verification");
857
858
859 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
860 now = System.currentTimeMillis();
861 maxTime = 30000;
862 for (JVMClusterUtil.RegionServerThread rst :
863 cluster.getRegionServerThreads()) {
864 try {
865 HRegionServer rs = rst.getRegionServer();
866 while (!rs.getRegionsInTransitionInRS().isEmpty()) {
867 elapsed = System.currentTimeMillis() - now;
868 assertTrue("Test timed out in getting online regions", elapsed < maxTime);
869 if (rs.isAborted() || rs.isStopped()) {
870
871 break;
872 }
873 Thread.sleep(100);
874 }
875 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs));
876 } catch (RegionServerStoppedException e) {
877 LOG.info("Got RegionServerStoppedException", e);
878 }
879 }
880
881
882 for (HRegionInfo hri : regionsThatShouldBeOnline) {
883 assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(),
884 onlineRegions.contains(hri));
885 }
886
887
888 for (HRegionInfo hri : regionsThatShouldBeOffline) {
889 assertFalse(onlineRegions.contains(hri));
890 }
891
892 log("Done with verification, all passed, shutting down cluster");
893
894
895 TEST_UTIL.shutdownMiniCluster();
896 }
897
898
899
900
901 private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)
902 throws IOException {
903 List<HRegionInfo> tmpOnlineRegions = ProtobufUtil.getOnlineRegions(hrs);
904 Iterator<HRegionInfo> itr = regions.iterator();
905 while (itr.hasNext()) {
906 HRegionInfo tmp = itr.next();
907 if (!tmpOnlineRegions.contains(tmp)) {
908 itr.remove();
909 }
910 }
911 }
912
913 HRegion createRegion(final HRegionInfo hri, final Path rootdir, final Configuration c,
914 final HTableDescriptor htd)
915 throws IOException {
916 HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
917
918
919
920
921
922 HRegion.closeHRegion(r);
923 return r;
924 }
925
926
927
928
929 private void log(String string) {
930 LOG.info("\n\n" + string + " \n\n");
931 }
932
933 @Test (timeout=180000)
934 public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
935 throws Exception {
936 LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
937 final int NUM_MASTERS = 1;
938 final int NUM_RS = 2;
939
940
941 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
942 Configuration conf = TEST_UTIL.getConfiguration();
943 conf.setInt("hbase.master.info.port", -1);
944 conf.setBoolean("hbase.assignment.usezk", true);
945
946 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
947 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
948
949
950 List<RegionServerThread> regionServerThreads =
951 cluster.getRegionServerThreads();
952 int count = -1;
953 HRegion metaRegion = null;
954 for (RegionServerThread regionServerThread : regionServerThreads) {
955 HRegionServer regionServer = regionServerThread.getRegionServer();
956 metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
957 count++;
958 regionServer.abort("");
959 if (null != metaRegion) break;
960 }
961 HRegionServer regionServer = cluster.getRegionServer(count);
962
963 TEST_UTIL.shutdownMiniHBaseCluster();
964
965
966 ZooKeeperWatcher zkw =
967 HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
968 metaRegion, regionServer.getServerName());
969
970 LOG.info("Staring cluster for second time");
971 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
972
973 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
974 while (!master.isInitialized()) {
975 Thread.sleep(100);
976 }
977
978 log("Waiting for no more RIT");
979 ZKAssign.blockUntilNoRIT(zkw);
980
981 zkw.close();
982
983 TEST_UTIL.shutdownMiniCluster();
984 }
985
986
987
988
989 @Test(timeout=240000)
990 public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
991 final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
992 final int NUM_MASTERS = 1;
993 final int NUM_RS = 2;
994
995
996 Configuration conf = HBaseConfiguration.create();
997 conf.setBoolean("hbase.assignment.usezk", true);
998
999
1000 final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1001 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1002 log("Cluster started");
1003
1004 TEST_UTIL.createTable(table, Bytes.toBytes("family"));
1005 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1006 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1007 HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
1008 ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1009 TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
1010
1011 ServerName dstName = null;
1012 for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
1013 if (!tmpServer.equals(serverName)) {
1014 dstName = tmpServer;
1015 break;
1016 }
1017 }
1018
1019 assertTrue(dstName != null);
1020
1021 TEST_UTIL.shutdownMiniHBaseCluster();
1022
1023 ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
1024 ZKAssign.createNodeOffline(zkw, hri, dstName);
1025 Stat stat = new Stat();
1026 byte[] data =
1027 ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
1028 assertTrue(data != null);
1029 RegionTransition rt = RegionTransition.parseFrom(data);
1030 assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
1031
1032 LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
1033 + " and dst server=" + dstName);
1034
1035
1036 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
1037
1038 while (true) {
1039 master = TEST_UTIL.getHBaseCluster().getMaster();
1040 if (master != null && master.isInitialized()) {
1041 ServerManager serverManager = master.getServerManager();
1042 if (!serverManager.areDeadServersInProgress()) {
1043 break;
1044 }
1045 }
1046 Thread.sleep(200);
1047 }
1048
1049
1050 master = TEST_UTIL.getHBaseCluster().getMaster();
1051 master.getAssignmentManager().waitForAssignment(hri);
1052 regionStates = master.getAssignmentManager().getRegionStates();
1053 RegionState newState = regionStates.getRegionState(hri);
1054 assertTrue(newState.isOpened());
1055 }
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065 @Test (timeout=240000)
1066 public void testSimpleMasterFailover() throws Exception {
1067
1068 final int NUM_MASTERS = 3;
1069 final int NUM_RS = 3;
1070
1071
1072 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
1073
1074 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1075 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1076
1077
1078 List<MasterThread> masterThreads = cluster.getMasterThreads();
1079
1080
1081 for (MasterThread mt : masterThreads) {
1082 assertTrue(mt.isAlive());
1083 }
1084
1085
1086 int numActive = 0;
1087 int activeIndex = -1;
1088 ServerName activeName = null;
1089 HMaster active = null;
1090 for (int i = 0; i < masterThreads.size(); i++) {
1091 if (masterThreads.get(i).getMaster().isActiveMaster()) {
1092 numActive++;
1093 activeIndex = i;
1094 active = masterThreads.get(activeIndex).getMaster();
1095 activeName = active.getServerName();
1096 }
1097 }
1098 assertEquals(1, numActive);
1099 assertEquals(NUM_MASTERS, masterThreads.size());
1100 LOG.info("Active master " + activeName);
1101
1102
1103 assertNotNull(active);
1104 ClusterStatus status = active.getClusterStatus();
1105 assertTrue(status.getMaster().equals(activeName));
1106 assertEquals(2, status.getBackupMastersSize());
1107 assertEquals(2, status.getBackupMasters().size());
1108
1109
1110 int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
1111 HMaster master = cluster.getMaster(backupIndex);
1112 LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
1113 cluster.stopMaster(backupIndex, false);
1114 cluster.waitOnMaster(backupIndex);
1115
1116
1117 for (int i = 0; i < masterThreads.size(); i++) {
1118 if (masterThreads.get(i).getMaster().isActiveMaster()) {
1119 assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
1120 activeIndex = i;
1121 active = masterThreads.get(activeIndex).getMaster();
1122 }
1123 }
1124 assertEquals(1, numActive);
1125 assertEquals(2, masterThreads.size());
1126 int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
1127 LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
1128 assertEquals(3, rsCount);
1129
1130
1131 assertNotNull(active);
1132 status = active.getClusterStatus();
1133 assertTrue(status.getMaster().equals(activeName));
1134 assertEquals(1, status.getBackupMastersSize());
1135 assertEquals(1, status.getBackupMasters().size());
1136
1137
1138 LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
1139 cluster.stopMaster(activeIndex, false);
1140 cluster.waitOnMaster(activeIndex);
1141
1142
1143 assertTrue(cluster.waitForActiveAndReadyMaster());
1144
1145 LOG.debug("\n\nVerifying backup master is now active\n");
1146
1147 assertEquals(1, masterThreads.size());
1148
1149
1150 active = masterThreads.get(0).getMaster();
1151 assertNotNull(active);
1152 status = active.getClusterStatus();
1153 ServerName mastername = status.getMaster();
1154 assertTrue(mastername.equals(active.getServerName()));
1155 assertTrue(active.isActiveMaster());
1156 assertEquals(0, status.getBackupMastersSize());
1157 assertEquals(0, status.getBackupMasters().size());
1158 int rss = status.getServersSize();
1159 LOG.info("Active master " + mastername.getServerName() + " managing " +
1160 rss + " region servers");
1161 assertEquals(3, rss);
1162
1163
1164 TEST_UTIL.shutdownMiniCluster();
1165 }
1166
1167
1168
1169
1170 @Test (timeout=180000)
1171 public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
1172 final int NUM_MASTERS = 1;
1173 final int NUM_RS = 1;
1174
1175
1176 Configuration conf = HBaseConfiguration.create();
1177 conf.setBoolean("hbase.assignment.usezk", false);
1178
1179
1180 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1181 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1182 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1183 log("Cluster started");
1184
1185
1186 List<MasterThread> masterThreads = cluster.getMasterThreads();
1187 assertEquals(1, masterThreads.size());
1188
1189
1190 assertTrue(cluster.waitForActiveAndReadyMaster());
1191 HMaster master = masterThreads.get(0).getMaster();
1192 assertTrue(master.isActiveMaster());
1193 assertTrue(master.isInitialized());
1194
1195
1196 HTable onlineTable = TEST_UTIL.createTable("onlineTable", "family");
1197
1198
1199 HTableDescriptor offlineTable = new HTableDescriptor(
1200 TableName.valueOf(Bytes.toBytes("offlineTable")));
1201 offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
1202
1203 FileSystem filesystem = FileSystem.get(conf);
1204 Path rootdir = FSUtils.getRootDir(conf);
1205 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
1206 fstd.createTableDescriptor(offlineTable);
1207
1208 HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
1209 createRegion(hriOffline, rootdir, conf, offlineTable);
1210 MetaEditor.addRegionToMeta(master.getCatalogTracker(), hriOffline);
1211
1212 log("Regions in hbase:meta and namespace have been created");
1213
1214
1215
1216 assertEquals(3, cluster.countServedRegions());
1217 HRegionInfo hriOnline = onlineTable.getRegionLocation("").getRegionInfo();
1218
1219 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1220 RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
1221
1222
1223
1224 RegionState oldState = regionStates.getRegionState(hriOnline);
1225 RegionState newState = new RegionState(hriOnline, State.PENDING_CLOSE, oldState.getServerName());
1226 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1227
1228
1229
1230 oldState = new RegionState(hriOffline, State.OFFLINE);
1231 newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
1232 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1233
1234 HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
1235 createRegion(failedClose, rootdir, conf, offlineTable);
1236 MetaEditor.addRegionToMeta(master.getCatalogTracker(), failedClose);
1237
1238 oldState = new RegionState(failedClose, State.PENDING_CLOSE);
1239 newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
1240 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1241
1242
1243 HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
1244 createRegion(failedOpen, rootdir, conf, offlineTable);
1245 MetaEditor.addRegionToMeta(master.getCatalogTracker(), failedOpen);
1246
1247
1248
1249 oldState = new RegionState(failedOpen, State.PENDING_OPEN);
1250 newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
1251 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1252
1253 HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
1254 createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
1255 MetaEditor.addRegionToMeta(master.getCatalogTracker(), failedOpenNullServer);
1256
1257
1258
1259 oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
1260 newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
1261 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1262
1263
1264
1265
1266 log("Aborting master");
1267 cluster.abortMaster(0);
1268 cluster.waitOnMaster(0);
1269 log("Master has aborted");
1270
1271
1272 log("Starting up a new master");
1273 master = cluster.startMaster().getMaster();
1274 log("Waiting for master to be ready");
1275 cluster.waitForActiveAndReadyMaster();
1276 log("Master is ready");
1277
1278
1279 master.getAssignmentManager().waitUntilNoRegionsInTransition(60000);
1280
1281
1282 regionStates = master.getAssignmentManager().getRegionStates();
1283
1284
1285 assertTrue(regionStates.isRegionOnline(hriOffline));
1286 assertTrue(regionStates.isRegionOnline(hriOnline));
1287 assertTrue(regionStates.isRegionOnline(failedClose));
1288 assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
1289 assertTrue(regionStates.isRegionOnline(failedOpen));
1290
1291 log("Done with verification, shutting down cluster");
1292
1293
1294 TEST_UTIL.shutdownMiniCluster();
1295 }
1296
1297
1298
1299
1300 @Test(timeout = 180000)
1301 public void testMetaInTransitionWhenMasterFailover() throws Exception {
1302 final int NUM_MASTERS = 1;
1303 final int NUM_RS = 1;
1304
1305
1306 Configuration conf = HBaseConfiguration.create();
1307 conf.setBoolean("hbase.assignment.usezk", false);
1308 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1309 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1310 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1311 CatalogTracker catalogTracker = new CatalogTracker(HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL),
1312 conf, null);
1313
1314 log("Cluster started");
1315
1316 log("Moving meta off the master");
1317 HMaster activeMaster = cluster.getMaster();
1318 HRegionServer rs = cluster.getRegionServer(0);
1319 ServerName metaServerName = cluster.getLiveRegionServerThreads()
1320 .get(0).getRegionServer().getServerName();
1321 activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
1322 Bytes.toBytes(metaServerName.getServerName()));
1323 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1324 assertEquals("Meta should be assigned on expected regionserver",
1325 metaServerName, catalogTracker.getMetaLocation());
1326
1327
1328 log("Aborting master");
1329 activeMaster.stop("test-kill");
1330 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1331 log("Master has aborted");
1332
1333
1334 RegionState metaState =
1335 catalogTracker.getMetaRegionState();
1336 assertEquals("hbase:root should be onlined on RS",
1337 metaState.getServerName(), rs.getServerName());
1338 assertEquals("hbase:root should be onlined on RS",
1339 metaState.getState(), State.OPEN);
1340
1341
1342 log("Starting up a new master");
1343 activeMaster = cluster.startMaster().getMaster();
1344 log("Waiting for master to be ready");
1345 cluster.waitForActiveAndReadyMaster();
1346 log("Master is ready");
1347
1348
1349 metaState =
1350 catalogTracker.getMetaRegionState();
1351 assertEquals("hbase:root should be onlined on RS",
1352 metaState.getServerName(), rs.getServerName());
1353 assertEquals("hbase:root should be onlined on RS",
1354 metaState.getState(), State.OPEN);
1355
1356
1357
1358
1359
1360 MetaRegionTracker.setMetaLocation(activeMaster.getZooKeeper(),
1361 rs.getServerName(), State.PENDING_OPEN);
1362 HRegion meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1363 rs.removeFromOnlineRegions(meta, null);
1364 meta.close();
1365
1366 log("Aborting master");
1367 activeMaster.stop("test-kill");
1368 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1369 log("Master has aborted");
1370
1371
1372 log("Starting up a new master");
1373 activeMaster = cluster.startMaster().getMaster();
1374 log("Waiting for master to be ready");
1375 cluster.waitForActiveAndReadyMaster();
1376 log("Master is ready");
1377
1378 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1379 log("ROOT was assigned");
1380
1381 metaState =
1382 catalogTracker.getMetaRegionState();
1383 assertEquals("hbase:root should be onlined on RS",
1384 metaState.getServerName(), rs.getServerName());
1385 assertEquals("hbase:root should be onlined on RS",
1386 metaState.getState(), State.OPEN);
1387
1388
1389
1390
1391
1392 MetaRegionTracker.setMetaLocation(activeMaster.getZooKeeper(),
1393 rs.getServerName(), State.PENDING_CLOSE);
1394
1395 log("Aborting master");
1396 activeMaster.stop("test-kill");
1397 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1398 log("Master has aborted");
1399
1400 rs.closeRegion(null, RequestConverter.buildCloseRegionRequest(
1401 rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1402
1403
1404 log("Starting up a new master");
1405 activeMaster = cluster.startMaster().getMaster();
1406 log("Waiting for master to be ready");
1407 cluster.waitForActiveAndReadyMaster();
1408 log("Master is ready");
1409
1410 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1411 log("Meta was assigned");
1412
1413 rs.closeRegion(null, RequestConverter.buildCloseRegionRequest(
1414 rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1415
1416
1417 MetaRegionTracker.setMetaLocation(activeMaster.getZooKeeper(),
1418 ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN);
1419
1420 log("Aborting master");
1421 activeMaster.stop("test-kill");
1422
1423 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1424 log("Master has aborted");
1425
1426
1427 log("Starting up a new master");
1428 activeMaster = cluster.startMaster().getMaster();
1429 log("Waiting for master to be ready");
1430 cluster.waitForActiveAndReadyMaster();
1431 log("Master is ready");
1432
1433 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1434 catalogTracker.verifyMetaRegionLocation(10000);
1435 log("Meta was assigned");
1436
1437
1438 TEST_UTIL.shutdownMiniCluster();
1439 }
1440 }
1441