1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertNull;
26 import static org.junit.Assert.assertTrue;
27 import static org.junit.Assert.fail;
28
29 import java.io.IOException;
30 import java.util.Collection;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.concurrent.CountDownLatch;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.Path;
40 import org.apache.hadoop.hbase.Abortable;
41 import org.apache.hadoop.hbase.Coprocessor;
42 import org.apache.hadoop.hbase.CoprocessorEnvironment;
43 import org.apache.hadoop.hbase.HBaseIOException;
44 import org.apache.hadoop.hbase.HBaseTestingUtility;
45 import org.apache.hadoop.hbase.HColumnDescriptor;
46 import org.apache.hadoop.hbase.HConstants;
47 import org.apache.hadoop.hbase.HRegionInfo;
48 import org.apache.hadoop.hbase.HTableDescriptor;
49 import org.apache.hadoop.hbase.MasterNotRunningException;
50 import org.apache.hadoop.hbase.MiniHBaseCluster;
51 import org.apache.hadoop.hbase.RegionTransition;
52 import org.apache.hadoop.hbase.Server;
53 import org.apache.hadoop.hbase.ServerName;
54 import org.apache.hadoop.hbase.TableName;
55 import org.apache.hadoop.hbase.UnknownRegionException;
56 import org.apache.hadoop.hbase.Waiter;
57 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
58 import org.apache.hadoop.hbase.catalog.MetaEditor;
59 import org.apache.hadoop.hbase.catalog.MetaReader;
60 import org.apache.hadoop.hbase.client.Delete;
61 import org.apache.hadoop.hbase.client.HBaseAdmin;
62 import org.apache.hadoop.hbase.client.HTable;
63 import org.apache.hadoop.hbase.client.Mutation;
64 import org.apache.hadoop.hbase.client.Put;
65 import org.apache.hadoop.hbase.client.Result;
66 import org.apache.hadoop.hbase.client.ResultScanner;
67 import org.apache.hadoop.hbase.client.Scan;
68 import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
69 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
70 import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
71 import org.apache.hadoop.hbase.exceptions.DeserializationException;
72 import org.apache.hadoop.hbase.executor.EventType;
73 import org.apache.hadoop.hbase.master.AssignmentManager;
74 import org.apache.hadoop.hbase.master.HMaster;
75 import org.apache.hadoop.hbase.master.RegionState;
76 import org.apache.hadoop.hbase.master.RegionState.State;
77 import org.apache.hadoop.hbase.master.RegionStates;
78 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
79 import org.apache.hadoop.hbase.testclassification.LargeTests;
80 import org.apache.hadoop.hbase.util.Bytes;
81 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
82 import org.apache.hadoop.hbase.util.FSUtils;
83 import org.apache.hadoop.hbase.util.HBaseFsck;
84 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
85 import org.apache.hadoop.hbase.util.PairOfSameType;
86 import org.apache.hadoop.hbase.util.Threads;
87 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
88 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
89 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
90 import org.apache.zookeeper.KeeperException;
91 import org.apache.zookeeper.KeeperException.NodeExistsException;
92 import org.apache.zookeeper.data.Stat;
93 import org.junit.After;
94 import org.junit.AfterClass;
95 import org.junit.Assert;
96 import org.junit.Before;
97 import org.junit.BeforeClass;
98 import org.junit.Test;
99 import org.junit.experimental.categories.Category;
100
101 import com.google.protobuf.ServiceException;
102
103
104
105
106
107
108 @Category(LargeTests.class)
109 public class TestSplitTransactionOnCluster {
110 private static final Log LOG =
111 LogFactory.getLog(TestSplitTransactionOnCluster.class);
112 private HBaseAdmin admin = null;
113 private MiniHBaseCluster cluster = null;
114 private static final int NB_SERVERS = 3;
115 private static CountDownLatch latch = new CountDownLatch(1);
116 private static volatile boolean secondSplit = false;
117 private static volatile boolean callRollBack = false;
118 private static volatile boolean firstSplitCompleted = false;
119 private static boolean useZKForAssignment = true;
120
121 static final HBaseTestingUtility TESTING_UTIL =
122 new HBaseTestingUtility();
123
124 static void setupOnce() throws Exception {
125 TESTING_UTIL.getConfiguration().setInt("hbase.balancer.period", 60000);
126 useZKForAssignment =
127 TESTING_UTIL.getConfiguration().getBoolean("hbase.assignment.usezk", false);
128 TESTING_UTIL.startMiniCluster(NB_SERVERS);
129 }
130
131 @BeforeClass public static void before() throws Exception {
132
133 TESTING_UTIL.getConfiguration().setBoolean("hbase.assignment.usezk", true);
134 setupOnce();
135 }
136
137 @AfterClass public static void after() throws Exception {
138 TESTING_UTIL.shutdownMiniCluster();
139 }
140
141 @Before public void setup() throws IOException {
142 TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
143 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
144 this.cluster = TESTING_UTIL.getMiniHBaseCluster();
145 }
146
147 @After
148 public void tearDown() throws Exception {
149 this.admin.close();
150 }
151
152 private HRegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) {
153 assertEquals(1, regions.size());
154 HRegionInfo hri = regions.get(0).getRegionInfo();
155 return waitOnRIT(hri);
156 }
157
158
159
160
161
162
163
164
165 private HRegionInfo waitOnRIT(final HRegionInfo hri) {
166
167
168 while (TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
169 getRegionStates().isRegionInTransition(hri)) {
170 LOG.info("Waiting on region in transition: " +
171 TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates().
172 getRegionTransitionState(hri));
173 Threads.sleep(10);
174 }
175 return hri;
176 }
177
178 @SuppressWarnings("deprecation")
179 @Test(timeout = 60000)
180 public void testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack() throws Exception {
181 final TableName tableName =
182 TableName.valueOf("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack");
183
184 if (!useZKForAssignment) {
185
186 return;
187 }
188
189 try {
190
191 HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
192 final List<HRegion> regions = cluster.getRegions(tableName);
193 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
194 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
195 final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
196 insertData(tableName.getName(), admin, t);
197 t.close();
198
199
200 this.admin.setBalancerRunning(false, true);
201
202 cluster.getMaster().setCatalogJanitorEnabled(false);
203
204
205 final HRegion region = findSplittableRegion(regions);
206 assertTrue("not able to find a splittable region", region != null);
207
208 new Thread() {
209 @Override
210 public void run() {
211 SplitTransaction st = null;
212 st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
213 try {
214 st.prepare();
215 st.execute(regionServer, regionServer);
216 } catch (IOException e) {
217
218 }
219 }
220 }.start();
221 for (int i = 0; !callRollBack && i < 100; i++) {
222 Thread.sleep(100);
223 }
224 assertTrue("Waited too long for rollback", callRollBack);
225 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row3"));
226 try {
227 secondSplit = true;
228
229 region.initialize();
230 st.prepare();
231 st.execute(regionServer, regionServer);
232 } catch (IOException e) {
233 LOG.debug("Rollback started :"+ e.getMessage());
234 st.rollback(regionServer, regionServer);
235 }
236 for (int i=0; !firstSplitCompleted && i<100; i++) {
237 Thread.sleep(100);
238 }
239 assertTrue("fist split did not complete", firstSplitCompleted);
240
241 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
242 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
243
244 for (int i=0; rit.containsKey(hri.getTable()) && i<100; i++) {
245 Thread.sleep(100);
246 }
247 assertFalse("region still in transition", rit.containsKey(
248 rit.containsKey(hri.getTable())));
249
250 List<HRegion> onlineRegions = regionServer.getOnlineRegions(tableName);
251
252 assertEquals("The parent region should be splitted", 2, onlineRegions.size());
253
254 List<HRegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager()
255 .getRegionStates().getRegionsOfTable(tableName);
256
257 assertEquals("No of regions in master", 2, regionsOfTable.size());
258 } finally {
259 admin.setBalancerRunning(true, false);
260 secondSplit = false;
261 firstSplitCompleted = false;
262 callRollBack = false;
263 cluster.getMaster().setCatalogJanitorEnabled(true);
264 TESTING_UTIL.deleteTable(tableName);
265 }
266 }
267
268 @Test(timeout = 60000)
269 public void testRITStateForRollback() throws Exception {
270 final TableName tableName =
271 TableName.valueOf("testRITStateForRollback");
272 try {
273
274 HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
275 final List<HRegion> regions = cluster.getRegions(tableName);
276 final HRegionInfo hri = getAndCheckSingleTableRegion(regions);
277 insertData(tableName.getName(), admin, t);
278 t.close();
279
280
281 this.admin.setBalancerRunning(false, true);
282
283 cluster.getMaster().setCatalogJanitorEnabled(false);
284
285
286 final HRegion region = findSplittableRegion(regions);
287 assertTrue("not able to find a splittable region", region != null);
288
289
290 region.getCoprocessorHost().load(FailingSplitRegionObserver.class,
291 Coprocessor.PRIORITY_USER, region.getBaseConf());
292
293
294 this.admin.split(region.getRegionName(), new byte[] {42});
295
296
297 FailingSplitRegionObserver observer = (FailingSplitRegionObserver) region
298 .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
299 assertNotNull(observer);
300 observer.latch.await();
301
302 LOG.info("Waiting for region to come out of RIT");
303 TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
304 @Override
305 public boolean evaluate() throws Exception {
306 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
307 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
308 return !rit.containsKey(hri.getEncodedName());
309 }
310 });
311 } finally {
312 admin.setBalancerRunning(true, false);
313 cluster.getMaster().setCatalogJanitorEnabled(true);
314 TESTING_UTIL.deleteTable(tableName);
315 }
316 }
317
318 public static class FailingSplitRegionObserver extends BaseRegionObserver {
319 volatile CountDownLatch latch;
320 volatile CountDownLatch postSplit;
321 @Override
322 public void start(CoprocessorEnvironment e) throws IOException {
323 latch = new CountDownLatch(1);
324 postSplit = new CountDownLatch(1);
325 }
326 @Override
327 public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
328 byte[] splitKey, List<Mutation> metaEntries) throws IOException {
329 latch.countDown();
330 LOG.info("Causing rollback of region split");
331 throw new IOException("Causing rollback of region split");
332 }
333 @Override
334 public void postCompleteSplit(ObserverContext<RegionCoprocessorEnvironment> ctx)
335 throws IOException {
336 postSplit.countDown();
337 LOG.info("postCompleteSplit called");
338 }
339 }
340
341
342
343
344
345
346
347
348
349
350
351
352 @Test (timeout = 300000) public void testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling()
353 throws IOException, InterruptedException, NodeExistsException, KeeperException,
354 DeserializationException, ServiceException {
355 final byte [] tableName =
356 Bytes.toBytes("testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling");
357
358
359 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
360 List<HRegion> regions = cluster.getRegions(tableName);
361 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
362
363 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
364
365
366 this.admin.setBalancerRunning(false, true);
367
368 cluster.getMaster().setCatalogJanitorEnabled(false);
369 try {
370
371 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
372
373 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
374 printOutRegions(server, "Initial regions: ");
375 int regionCount = ProtobufUtil.getOnlineRegions(server).size();
376
377
378 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
379
380 split(hri, server, regionCount);
381
382 String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
383 hri.getEncodedName());
384 RegionTransition rt = null;
385 Stat stats = null;
386 List<HRegion> daughters = null;
387 if (useZKForAssignment) {
388 daughters = checkAndGetDaughters(tableName);
389
390
391 for (int i=0; i<100; i++) {
392 stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
393 rt = RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
394 hri.getEncodedName()));
395 if (rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)) break;
396 Thread.sleep(100);
397 }
398 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
399 assertTrue(rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT));
400
401 cluster.abortRegionServer(tableRegionIndex);
402 }
403 waitUntilRegionServerDead();
404 awaitDaughters(tableName, 2);
405 if (useZKForAssignment) {
406 regions = cluster.getRegions(tableName);
407 for (HRegion r: regions) {
408 assertTrue(daughters.contains(r));
409 }
410
411
412 for (int i=0; i<100; i++) {
413
414 stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
415 if (stats == null) break;
416 Thread.sleep(100);
417 }
418 LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats);
419 assertTrue(stats == null);
420 }
421 } finally {
422
423 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
424 admin.setBalancerRunning(true, false);
425 cluster.getMaster().setCatalogJanitorEnabled(true);
426 cluster.startRegionServer();
427 t.close();
428 }
429 }
430
431 @Test (timeout = 300000) public void testExistingZnodeBlocksSplitAndWeRollback()
432 throws IOException, InterruptedException, NodeExistsException, KeeperException, ServiceException {
433 final byte [] tableName =
434 Bytes.toBytes("testExistingZnodeBlocksSplitAndWeRollback");
435
436
437 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
438 List<HRegion> regions = cluster.getRegions(tableName);
439 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
440
441 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
442
443 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
444
445
446 this.admin.setBalancerRunning(false, true);
447
448 cluster.getMaster().setCatalogJanitorEnabled(false);
449 try {
450
451 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
452
453 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
454 printOutRegions(server, "Initial regions: ");
455 int regionCount = ProtobufUtil.getOnlineRegions(server).size();
456
457
458 ServerName fakedServer = ServerName.valueOf("any.old.server", 1234, -1);
459 if (useZKForAssignment) {
460 ZKAssign.createNodeClosing(TESTING_UTIL.getZooKeeperWatcher(),
461 hri, fakedServer);
462 } else {
463 regionStates.updateRegionState(hri, RegionState.State.CLOSING);
464 }
465
466
467 this.admin.split(hri.getRegionNameAsString());
468 this.admin.split(hri.getRegionNameAsString());
469 this.admin.split(hri.getRegionNameAsString());
470
471 for (int i = 0; i < 10; i++) {
472 Thread.sleep(100);
473 assertEquals(regionCount, ProtobufUtil.getOnlineRegions(server).size());
474 }
475 if (useZKForAssignment) {
476
477 ZKAssign.deleteClosingNode(TESTING_UTIL.getZooKeeperWatcher(),
478 hri, fakedServer);
479 } else {
480 regionStates.regionOnline(hri, server.getServerName());
481 }
482
483 split(hri, server, regionCount);
484
485 checkAndGetDaughters(tableName);
486
487 } finally {
488 admin.setBalancerRunning(true, false);
489 cluster.getMaster().setCatalogJanitorEnabled(true);
490 t.close();
491 }
492 }
493
494
495
496
497
498
499
500 @Test (timeout=300000) public void testShutdownFixupWhenDaughterHasSplit()
501 throws IOException, InterruptedException, ServiceException {
502 final byte [] tableName =
503 Bytes.toBytes("testShutdownFixupWhenDaughterHasSplit");
504
505
506 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
507 List<HRegion> regions = cluster.getRegions(tableName);
508 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
509
510 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
511
512
513 this.admin.setBalancerRunning(false, true);
514
515 cluster.getMaster().setCatalogJanitorEnabled(false);
516 try {
517
518 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
519
520 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
521 printOutRegions(server, "Initial regions: ");
522 int regionCount = ProtobufUtil.getOnlineRegions(server).size();
523
524 split(hri, server, regionCount);
525
526 List<HRegion> daughters = checkAndGetDaughters(tableName);
527
528 regionCount = ProtobufUtil.getOnlineRegions(server).size();
529 HRegionInfo daughter = daughters.get(0).getRegionInfo();
530 LOG.info("Daughter we are going to split: " + daughter);
531
532
533 this.admin.compact(daughter.getRegionName());
534 daughters = cluster.getRegions(tableName);
535 HRegion daughterRegion = null;
536 for (HRegion r: daughters) {
537 if (r.getRegionInfo().equals(daughter)) {
538 daughterRegion = r;
539 LOG.info("Found matching HRI: " + daughterRegion);
540 break;
541 }
542 }
543 assertTrue(daughterRegion != null);
544 for (int i=0; i<100; i++) {
545 if (!daughterRegion.hasReferences()) break;
546 Threads.sleep(100);
547 }
548 assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
549 LOG.info("Daughter hri before split (has been compacted): " + daughter);
550 split(daughter, server, regionCount);
551
552 daughters = cluster.getRegions(tableName);
553 for (HRegion d: daughters) {
554 LOG.info("Regions before crash: " + d);
555 }
556
557 cluster.abortRegionServer(tableRegionIndex);
558 waitUntilRegionServerDead();
559 awaitDaughters(tableName, daughters.size());
560
561
562 regions = cluster.getRegions(tableName);
563 for (HRegion d: daughters) {
564 LOG.info("Regions after crash: " + d);
565 }
566 assertEquals(daughters.size(), regions.size());
567 for (HRegion r: regions) {
568 LOG.info("Regions post crash " + r);
569 assertTrue("Missing region post crash " + r, daughters.contains(r));
570 }
571 } finally {
572 admin.setBalancerRunning(true, false);
573 cluster.getMaster().setCatalogJanitorEnabled(true);
574 t.close();
575 }
576 }
577
578 @Test(timeout = 180000)
579 public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
580 Configuration conf = TESTING_UTIL.getConfiguration();
581 TableName userTableName =
582 TableName.valueOf("testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles");
583 HTableDescriptor htd = new HTableDescriptor(userTableName);
584 HColumnDescriptor hcd = new HColumnDescriptor("col");
585 htd.addFamily(hcd);
586 admin.createTable(htd);
587 HTable table = new HTable(conf, userTableName);
588 try {
589 for (int i = 0; i <= 5; i++) {
590 String row = "row" + i;
591 Put p = new Put(row.getBytes());
592 String val = "Val" + i;
593 p.add("col".getBytes(), "ql".getBytes(), val.getBytes());
594 table.put(p);
595 admin.flush(userTableName.getName());
596 Delete d = new Delete(row.getBytes());
597
598 table.delete(d);
599 admin.flush(userTableName.getName());
600 }
601 admin.majorCompact(userTableName.getName());
602 List<HRegionInfo> regionsOfTable = TESTING_UTIL.getMiniHBaseCluster()
603 .getMaster().getAssignmentManager().getRegionStates()
604 .getRegionsOfTable(userTableName);
605 HRegionInfo hRegionInfo = regionsOfTable.get(0);
606 Put p = new Put("row6".getBytes());
607 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
608 table.put(p);
609 p = new Put("row7".getBytes());
610 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
611 table.put(p);
612 p = new Put("row8".getBytes());
613 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
614 table.put(p);
615 admin.flush(userTableName.getName());
616 admin.split(hRegionInfo.getRegionName(), "row7".getBytes());
617 regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
618 .getAssignmentManager().getRegionStates()
619 .getRegionsOfTable(userTableName);
620
621 while (regionsOfTable.size() != 2) {
622 Thread.sleep(2000);
623 regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
624 .getAssignmentManager().getRegionStates()
625 .getRegionsOfTable(userTableName);
626 }
627 Assert.assertEquals(2, regionsOfTable.size());
628 Scan s = new Scan();
629 ResultScanner scanner = table.getScanner(s);
630 int mainTableCount = 0;
631 for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
632 mainTableCount++;
633 }
634 Assert.assertEquals(3, mainTableCount);
635 } finally {
636 table.close();
637 }
638 }
639
640
641
642
643 static class UselessTestAbortable implements Abortable {
644 boolean aborted = false;
645 @Override
646 public void abort(String why, Throwable e) {
647 LOG.warn("ABORTED (But nothing to abort): why=" + why, e);
648 aborted = true;
649 }
650
651 @Override
652 public boolean isAborted() {
653 return this.aborted;
654 }
655 }
656
657
658
659
660
661
662
663
664
665
666
667 @Test(timeout = 400000)
668 public void testMasterRestartWhenSplittingIsPartial()
669 throws IOException, InterruptedException, NodeExistsException,
670 KeeperException, DeserializationException, ServiceException {
671 final byte[] tableName = Bytes.toBytes("testMasterRestartWhenSplittingIsPartial");
672
673 if (!useZKForAssignment) {
674
675 return;
676 }
677
678
679 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
680 List<HRegion> regions = cluster.getRegions(tableName);
681 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
682
683 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
684
685
686 this.admin.setBalancerRunning(false, true);
687
688 cluster.getMaster().setCatalogJanitorEnabled(false);
689 ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
690 "testMasterRestartWhenSplittingIsPartial", new UselessTestAbortable());
691 try {
692
693 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
694
695 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
696 printOutRegions(server, "Initial regions: ");
697
698
699 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
700
701
702 this.admin.split(hri.getRegionNameAsString());
703 checkAndGetDaughters(tableName);
704
705 String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
706 Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
707 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
708 + stats);
709 byte[] bytes = ZKAssign.getData(zkw, hri.getEncodedName());
710 RegionTransition rtd = RegionTransition.parseFrom(bytes);
711
712 assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)
713 || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
714
715
716 MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
717
718 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
719
720
721
722 hri.setOffline(true);
723 hri.setSplit(true);
724 ServerName regionServerOfRegion = master.getAssignmentManager()
725 .getRegionStates().getRegionServerOfRegion(hri);
726 assertTrue(regionServerOfRegion != null);
727
728
729 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
730 String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
731 Stat stat = new Stat();
732 byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
733
734 for (int i=0; data != null && i<60; i++) {
735 Thread.sleep(1000);
736 data = ZKUtil.getDataNoWatch(zkw, node, stat);
737 }
738 assertNull("Waited too long for ZK node to be removed: "+node, data);
739 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
740 assertTrue("Split parent should be in SPLIT state",
741 regionStates.isRegionInState(hri, State.SPLIT));
742 regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
743 assertTrue(regionServerOfRegion == null);
744 } finally {
745
746 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
747 admin.setBalancerRunning(true, false);
748 cluster.getMaster().setCatalogJanitorEnabled(true);
749 t.close();
750 zkw.close();
751 }
752 }
753
754
755
756
757
758
759
760
761
762 @Test (timeout = 300000)
763 public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
764 throws IOException, InterruptedException, NodeExistsException,
765 KeeperException, ServiceException {
766 final byte[] tableName = Bytes.toBytes("testMasterRestartAtRegionSplitPendingCatalogJanitor");
767
768
769 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
770 List<HRegion> regions = cluster.getRegions(tableName);
771 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
772
773 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
774
775
776 this.admin.setBalancerRunning(false, true);
777
778 cluster.getMaster().setCatalogJanitorEnabled(false);
779 ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
780 "testMasterRestartAtRegionSplitPendingCatalogJanitor", new UselessTestAbortable());
781 try {
782
783 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
784
785 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
786 printOutRegions(server, "Initial regions: ");
787
788 this.admin.split(hri.getRegionNameAsString());
789 checkAndGetDaughters(tableName);
790
791 String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
792 Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
793 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
794 + stats);
795 String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
796 Stat stat = new Stat();
797 byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
798
799 for (int i=0; data != null && i<60; i++) {
800 Thread.sleep(1000);
801 data = ZKUtil.getDataNoWatch(zkw, node, stat);
802 }
803 assertNull("Waited too long for ZK node to be removed: "+node, data);
804
805 MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
806
807 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
808
809
810
811 hri.setOffline(true);
812 hri.setSplit(true);
813 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
814 assertTrue("Split parent should be in SPLIT state",
815 regionStates.isRegionInState(hri, State.SPLIT));
816 ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
817 assertTrue(regionServerOfRegion == null);
818 } finally {
819 this.admin.setBalancerRunning(true, false);
820 cluster.getMaster().setCatalogJanitorEnabled(true);
821 t.close();
822 zkw.close();
823 }
824 }
825
826
827
828
829
830
831
832
833
834
835
836
837 @Test(timeout = 60000)
838 public void testSplitBeforeSettingSplittingInZK() throws Exception,
839 InterruptedException, KeeperException {
840 testSplitBeforeSettingSplittingInZKInternals();
841 }
842
843 @Test(timeout = 60000)
844 public void testTableExistsIfTheSpecifiedTableRegionIsSplitParent() throws Exception {
845 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
846 final TableName tableName =
847 TableName.valueOf("testTableExistsIfTheSpecifiedTableRegionIsSplitParent");
848
849 HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
850 List<HRegion> regions = null;
851 try {
852 regions = cluster.getRegions(tableName);
853 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
854 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
855 insertData(tableName.getName(), admin, t);
856
857 admin.setBalancerRunning(false, true);
858
859 cluster.getMaster().setCatalogJanitorEnabled(false);
860 boolean tableExists = MetaReader.tableExists(regionServer.getCatalogTracker(),
861 tableName);
862 assertEquals("The specified table should present.", true, tableExists);
863 final HRegion region = findSplittableRegion(regions);
864 assertTrue("not able to find a splittable region", region != null);
865 SplitTransaction st = new SplitTransaction(region, Bytes.toBytes("row2"));
866 try {
867 st.prepare();
868 st.createDaughters(regionServer, regionServer);
869 } catch (IOException e) {
870
871 }
872 tableExists = MetaReader.tableExists(regionServer.getCatalogTracker(),
873 tableName);
874 assertEquals("The specified table should present.", true, tableExists);
875 Map<String, RegionState> rit = cluster.getMaster().getAssignmentManager().getRegionStates()
876 .getRegionsInTransition();
877 assertTrue(rit.size() == 3);
878 cluster.getMaster().getAssignmentManager().regionOffline(st.getFirstDaughter());
879 cluster.getMaster().getAssignmentManager().regionOffline(st.getSecondDaughter());
880 cluster.getMaster().getAssignmentManager().regionOffline(region.getRegionInfo());
881 rit = cluster.getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition();
882 assertTrue(rit.size() == 0);
883 } finally {
884 if (regions != null) {
885 String node = ZKAssign.getNodeName(zkw, regions.get(0).getRegionInfo()
886 .getEncodedName());
887 ZKUtil.deleteNodeFailSilent(zkw, node);
888 }
889 admin.setBalancerRunning(true, false);
890 cluster.getMaster().setCatalogJanitorEnabled(true);
891 t.close();
892 TESTING_UTIL.deleteTable(tableName);
893 }
894 }
895
896 private void insertData(final byte[] tableName, HBaseAdmin admin, HTable t) throws IOException,
897 InterruptedException {
898 Put p = new Put(Bytes.toBytes("row1"));
899 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
900 t.put(p);
901 p = new Put(Bytes.toBytes("row2"));
902 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
903 t.put(p);
904 p = new Put(Bytes.toBytes("row3"));
905 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
906 t.put(p);
907 p = new Put(Bytes.toBytes("row4"));
908 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
909 t.put(p);
910 admin.flush(tableName);
911 }
912
913
914
915
916
917 @Test(timeout = 60000)
918 public void testSplitRegionWithNoStoreFiles()
919 throws Exception {
920 final TableName tableName =
921 TableName.valueOf("testSplitRegionWithNoStoreFiles");
922
923 createTableAndWait(tableName.getName(), HConstants.CATALOG_FAMILY);
924 List<HRegion> regions = cluster.getRegions(tableName);
925 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
926 ensureTableRegionNotOnSameServerAsMeta(admin, hri);
927 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
928 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
929
930 this.admin.setBalancerRunning(false, true);
931
932 cluster.getMaster().setCatalogJanitorEnabled(false);
933 try {
934
935 printOutRegions(regionServer, "Initial regions: ");
936 Configuration conf = cluster.getConfiguration();
937 HBaseFsck.debugLsr(conf, new Path("/"));
938 Path rootDir = FSUtils.getRootDir(conf);
939 FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
940 Map<String, Path> storefiles =
941 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
942 assertEquals("Expected nothing but found " + storefiles.toString(), storefiles.size(), 0);
943
944
945 regions = cluster.getRegions(tableName);
946 final HRegion region = findSplittableRegion(regions);
947 assertTrue("not able to find a splittable region", region != null);
948
949
950 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
951 try {
952 st.prepare();
953 st.execute(regionServer, regionServer);
954 } catch (IOException e) {
955 fail("Split execution should have succeeded with no exceptions thrown");
956 }
957
958
959
960 List<HRegion> daughters = cluster.getRegions(tableName);
961 assertTrue(daughters.size() == 2);
962
963
964 HBaseFsck.debugLsr(conf, new Path("/"));
965 Map<String, Path> storefilesAfter =
966 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
967 assertEquals("Expected nothing but found " + storefilesAfter.toString(),
968 storefilesAfter.size(), 0);
969
970 hri = region.getRegionInfo();
971 AssignmentManager am = cluster.getMaster().getAssignmentManager();
972 RegionStates regionStates = am.getRegionStates();
973 long start = EnvironmentEdgeManager.currentTimeMillis();
974 while (!regionStates.isRegionInState(hri, State.SPLIT)) {
975 assertFalse("Timed out in waiting split parent to be in state SPLIT",
976 EnvironmentEdgeManager.currentTimeMillis() - start > 60000);
977 Thread.sleep(500);
978 }
979
980
981 am.assign(hri, true, true);
982 assertFalse("Split region can't be assigned",
983 regionStates.isRegionInTransition(hri));
984 assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
985
986
987 am.unassign(hri, true, null);
988 assertFalse("Split region can't be unassigned",
989 regionStates.isRegionInTransition(hri));
990 assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
991 } finally {
992 admin.setBalancerRunning(true, false);
993 cluster.getMaster().setCatalogJanitorEnabled(true);
994 }
995 }
996
997 @Test(timeout = 180000)
998 public void testSplitHooksBeforeAndAfterPONR() throws Exception {
999 TableName firstTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_1");
1000 TableName secondTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2");
1001 HColumnDescriptor hcd = new HColumnDescriptor("cf");
1002
1003 HTableDescriptor desc = new HTableDescriptor(firstTable);
1004 desc.addCoprocessor(MockedRegionObserver.class.getName());
1005 desc.addFamily(hcd);
1006 admin.createTable(desc);
1007 TESTING_UTIL.waitUntilAllRegionsAssigned(firstTable);
1008
1009 desc = new HTableDescriptor(secondTable);
1010 desc.addFamily(hcd);
1011 admin.createTable(desc);
1012 TESTING_UTIL.waitUntilAllRegionsAssigned(secondTable);
1013
1014 List<HRegion> firstTableRegions = cluster.getRegions(firstTable);
1015 List<HRegion> secondTableRegions = cluster.getRegions(secondTable);
1016
1017
1018 if (firstTableRegions.size() == 0 || secondTableRegions.size() == 0) {
1019 fail("Each table should have at least one region.");
1020 }
1021 ServerName serverName =
1022 cluster.getServerHoldingRegion(firstTableRegions.get(0).getRegionName());
1023 admin.move(secondTableRegions.get(0).getRegionInfo().getEncodedNameAsBytes(),
1024 Bytes.toBytes(serverName.getServerName()));
1025 HTable table1 = null;
1026 HTable table2 = null;
1027 try {
1028 table1 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1029 table2 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1030 insertData(firstTable.getName(), admin, table1);
1031 insertData(secondTable.getName(), admin, table2);
1032 admin.split(firstTable.getName(), "row2".getBytes());
1033 firstTableRegions = cluster.getRegions(firstTable.getName());
1034 while (firstTableRegions.size() != 2) {
1035 Thread.sleep(1000);
1036 firstTableRegions = cluster.getRegions(firstTable.getName());
1037 }
1038 assertEquals("Number of regions after split should be 2.", 2, firstTableRegions.size());
1039 secondTableRegions = cluster.getRegions(secondTable.getName());
1040 assertEquals("Number of regions after split should be 2.", 2, secondTableRegions.size());
1041 } finally {
1042 if (table1 != null) {
1043 table1.close();
1044 }
1045 if (table2 != null) {
1046 table2.close();
1047 }
1048 TESTING_UTIL.deleteTable(firstTable);
1049 TESTING_UTIL.deleteTable(secondTable);
1050 }
1051 }
1052
1053 private void testSplitBeforeSettingSplittingInZKInternals() throws Exception {
1054 final byte[] tableName = Bytes.toBytes("testSplitBeforeSettingSplittingInZK");
1055 try {
1056
1057 createTableAndWait(tableName, Bytes.toBytes("cf"));
1058
1059 List<HRegion> regions = awaitTableRegions(tableName);
1060 assertTrue("Table not online", cluster.getRegions(tableName).size() != 0);
1061
1062 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
1063 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1064 final HRegion region = findSplittableRegion(regions);
1065 assertTrue("not able to find a splittable region", region != null);
1066 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2")) {
1067 @Override
1068 public PairOfSameType<HRegion> stepsBeforePONR(final Server server,
1069 final RegionServerServices services, boolean testing) throws IOException {
1070 throw new SplittingNodeCreationFailedException ();
1071 }
1072 };
1073 String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1074 region.getRegionInfo().getEncodedName());
1075 regionServer.getZooKeeper().sync(node);
1076 for (int i = 0; i < 100; i++) {
1077
1078
1079
1080 if (ZKUtil.checkExists(regionServer.getZooKeeper(), node) != -1) {
1081 Thread.sleep(100);
1082 }
1083 }
1084 try {
1085 st.prepare();
1086 st.execute(regionServer, regionServer);
1087 } catch (IOException e) {
1088
1089
1090
1091 assertTrue("Should be instance of CreateSplittingNodeFailedException",
1092 e instanceof SplittingNodeCreationFailedException );
1093 node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1094 region.getRegionInfo().getEncodedName());
1095 {
1096 assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1097 }
1098 assertTrue(st.rollback(regionServer, regionServer));
1099 assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1100 }
1101 } finally {
1102 TESTING_UTIL.deleteTable(tableName);
1103 }
1104 }
1105
1106 @Test
1107 public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck()
1108 throws Exception {
1109 final TableName tableName =
1110 TableName.valueOf("testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck");
1111 try {
1112 HTableDescriptor htd = new HTableDescriptor(tableName);
1113 htd.addFamily(new HColumnDescriptor("f"));
1114 htd.setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName());
1115 admin.createTable(htd);
1116 List<HRegion> regions = awaitTableRegions(tableName.toBytes());
1117 HRegion region = regions.get(0);
1118 for(int i = 3;i<9;i++) {
1119 Put p = new Put(Bytes.toBytes("row"+i));
1120 p.add(Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("value"+i));
1121 region.put(p);
1122 }
1123 region.flushcache();
1124 Store store = region.getStore(Bytes.toBytes("f"));
1125 Collection<StoreFile> storefiles = store.getStorefiles();
1126 assertEquals(storefiles.size(), 1);
1127 assertFalse(region.hasReferences());
1128 Path referencePath = region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f",
1129 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
1130 assertNotNull(referencePath);
1131 } finally {
1132 TESTING_UTIL.deleteTable(tableName);
1133 }
1134 }
1135
1136 @Test (timeout=300000)
1137 public void testSSHCleanupDaugtherRegionsOfAbortedSplit() throws Exception {
1138 TableName table = TableName.valueOf("testSSHCleanupDaugtherRegionsOfAbortedSplit");
1139 try {
1140 HTableDescriptor desc = new HTableDescriptor(table);
1141 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
1142 admin.createTable(desc);
1143 HTable hTable = new HTable(cluster.getConfiguration(), desc.getTableName());
1144 for(int i = 1; i < 5; i++) {
1145 Put p1 = new Put(("r"+i).getBytes());
1146 p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
1147 hTable.put(p1);
1148 }
1149 admin.flush(desc.getTableName().toString());
1150 List<HRegion> regions = cluster.getRegions(desc.getTableName());
1151 int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
1152 HRegionServer regionServer = cluster.getRegionServer(serverWith);
1153 cluster.getServerWith(regions.get(0).getRegionName());
1154 SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3"));
1155 st.prepare();
1156 st.stepsBeforePONR(regionServer, regionServer, false);
1157 Path tableDir =
1158 FSUtils.getTableDir(cluster.getMaster().getMasterFileSystem().getRootDir(),
1159 desc.getTableName());
1160 tableDir.getFileSystem(cluster.getConfiguration());
1161 List<Path> regionDirs =
1162 FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1163 assertEquals(3,regionDirs.size());
1164 cluster.startRegionServer();
1165 regionServer.kill();
1166 cluster.getRegionServerThreads().get(serverWith).join();
1167
1168 while (cluster.getMaster().getServerManager().areDeadServersInProgress()) {
1169 Thread.sleep(10);
1170 }
1171 AssignmentManager am = cluster.getMaster().getAssignmentManager();
1172 while(am.getRegionStates().isRegionsInTransition()){
1173 Thread.sleep(10);
1174 }
1175 assertEquals(am.getRegionStates().getRegionsInTransition().toString(), am.getRegionStates()
1176 .getRegionsInTransition().size(), 0);
1177 regionDirs =
1178 FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1179 assertEquals(1,regionDirs.size());
1180 } finally {
1181 TESTING_UTIL.deleteTable(table);
1182 }
1183 }
1184
1185 public static class MockedSplitTransaction extends SplitTransaction {
1186
1187 private HRegion currentRegion;
1188 public MockedSplitTransaction(HRegion r, byte[] splitrow) {
1189 super(r, splitrow);
1190 this.currentRegion = r;
1191 }
1192
1193 @Override
1194 void transitionZKNode(Server server, RegionServerServices services, HRegion a, HRegion b)
1195 throws IOException {
1196 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1197 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1198 try {
1199 if (!secondSplit){
1200 callRollBack = true;
1201 latch.await();
1202 }
1203 } catch (InterruptedException e) {
1204 }
1205
1206 }
1207 super.transitionZKNode(server, services, a, b);
1208 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1209 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1210 firstSplitCompleted = true;
1211 }
1212 }
1213 @Override
1214 public boolean rollback(Server server, RegionServerServices services) throws IOException {
1215 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1216 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1217 if(secondSplit){
1218 super.rollback(server, services);
1219 latch.countDown();
1220 return true;
1221 }
1222 }
1223 return super.rollback(server, services);
1224 }
1225
1226 }
1227
1228 private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
1229 for (int i = 0; i < 5; ++i) {
1230 for (HRegion r: regions) {
1231 if (r.isSplittable()) {
1232 return(r);
1233 }
1234 }
1235 Thread.sleep(100);
1236 }
1237 return(null);
1238 }
1239
1240 @Test(timeout = 120000)
1241 public void testFailedSplit() throws Exception {
1242 TableName tableName = TableName.valueOf("testFailedSplit");
1243 byte[] colFamily = Bytes.toBytes("info");
1244 TESTING_UTIL.createTable(tableName, colFamily);
1245 HTable table = new HTable(TESTING_UTIL.getConfiguration(), tableName);
1246 try {
1247 TESTING_UTIL.loadTable(table, colFamily);
1248 List<HRegionInfo> regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1249 assertTrue(regions.size() == 1);
1250 final HRegion actualRegion = cluster.getRegions(tableName).get(0);
1251 actualRegion.getCoprocessorHost().load(FailingSplitRegionObserver.class,
1252 Coprocessor.PRIORITY_USER, actualRegion.getBaseConf());
1253
1254
1255 admin.split(tableName.getNameAsString());
1256 FailingSplitRegionObserver observer = (FailingSplitRegionObserver) actualRegion
1257 .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
1258 assertNotNull(observer);
1259 observer.latch.await();
1260 observer.postSplit.await();
1261 LOG.info("Waiting for region to come out of RIT");
1262 TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
1263 @Override
1264 public boolean evaluate() throws Exception {
1265 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
1266 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
1267 return (rit.size() == 0);
1268 }
1269 });
1270 regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1271 assertTrue(regions.size() == 1);
1272 assertTrue(admin.balancer());
1273 } finally {
1274 table.close();
1275 TESTING_UTIL.deleteTable(tableName);
1276 }
1277 }
1278
1279 private List<HRegion> checkAndGetDaughters(byte[] tableName)
1280 throws InterruptedException {
1281 List<HRegion> daughters = null;
1282
1283 for (int i=0; i<100; i++) {
1284 daughters = cluster.getRegions(tableName);
1285 if (daughters.size() >= 2) break;
1286 Thread.sleep(100);
1287 }
1288 assertTrue(daughters.size() >= 2);
1289 return daughters;
1290 }
1291
1292 private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
1293 throws IOException, InterruptedException {
1294 cluster.abortMaster(0);
1295 cluster.waitOnMaster(0);
1296 cluster.getConfiguration().setClass(HConstants.MASTER_IMPL,
1297 MockMasterWithoutCatalogJanitor.class, HMaster.class);
1298 MockMasterWithoutCatalogJanitor master = null;
1299 master = (MockMasterWithoutCatalogJanitor) cluster.startMaster().getMaster();
1300 cluster.waitForActiveAndReadyMaster();
1301 return master;
1302 }
1303
1304 private void split(final HRegionInfo hri, final HRegionServer server, final int regionCount)
1305 throws IOException, InterruptedException {
1306 this.admin.split(hri.getRegionNameAsString());
1307 try {
1308 for (int i = 0; ProtobufUtil.getOnlineRegions(server).size() <= regionCount && i < 300; i++) {
1309 LOG.debug("Waiting on region to split");
1310 Thread.sleep(100);
1311 }
1312
1313 assertFalse("Waited too long for split",
1314 ProtobufUtil.getOnlineRegions(server).size() <= regionCount);
1315 } catch (RegionServerStoppedException e) {
1316 if (useZKForAssignment) {
1317
1318 LOG.error(e);
1319 throw e;
1320 }
1321 }
1322 }
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335 private int ensureTableRegionNotOnSameServerAsMeta(final HBaseAdmin admin,
1336 final HRegionInfo hri)
1337 throws HBaseIOException, MasterNotRunningException,
1338 ZooKeeperConnectionException, InterruptedException {
1339
1340
1341
1342 int metaServerIndex = cluster.getServerWithMeta();
1343 assertTrue(metaServerIndex != -1);
1344 HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
1345 int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1346 assertTrue(tableRegionIndex != -1);
1347 HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
1348 if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
1349 HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
1350 assertNotNull(hrs);
1351 assertNotNull(hri);
1352 LOG.info("Moving " + hri.getRegionNameAsString() + " from " +
1353 metaRegionServer.getServerName() + " to " +
1354 hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1355 admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName().toString()));
1356 }
1357
1358 for (int i = 0; i < 100; i++) {
1359 tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1360 if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1361 LOG.debug("Waiting on region move off the hbase:meta server; current index " +
1362 tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1363 Thread.sleep(100);
1364 }
1365 assertTrue("Region not moved off hbase:meta server", tableRegionIndex != -1
1366 && tableRegionIndex != metaServerIndex);
1367
1368 tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1369 assertTrue(tableRegionIndex != -1);
1370 assertNotSame(metaServerIndex, tableRegionIndex);
1371 return tableRegionIndex;
1372 }
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383 private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
1384 final HRegionServer notThisOne) {
1385 for (RegionServerThread rst: cluster.getRegionServerThreads()) {
1386 HRegionServer hrs = rst.getRegionServer();
1387 if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1388 if (hrs.isStopping() || hrs.isStopped()) continue;
1389 return hrs;
1390 }
1391 return null;
1392 }
1393
1394 private void printOutRegions(final HRegionServer hrs, final String prefix)
1395 throws IOException {
1396 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
1397 for (HRegionInfo region: regions) {
1398 LOG.info(prefix + region.getRegionNameAsString());
1399 }
1400 }
1401
1402 private void waitUntilRegionServerDead() throws InterruptedException {
1403
1404 for (int i=0; cluster.getMaster().getClusterStatus().
1405 getServers().size() == NB_SERVERS && i<100; i++) {
1406 LOG.info("Waiting on server to go down");
1407 Thread.sleep(100);
1408 }
1409 assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
1410 getServers().size() == NB_SERVERS);
1411 }
1412
1413 private void awaitDaughters(byte[] tableName, int numDaughters) throws InterruptedException {
1414
1415 for (int i=0; cluster.getRegions(tableName).size() < numDaughters && i<60; i++) {
1416 LOG.info("Waiting for repair to happen");
1417 Thread.sleep(1000);
1418 }
1419 if (cluster.getRegions(tableName).size() < numDaughters) {
1420 fail("Waiting too long for daughter regions");
1421 }
1422 }
1423
1424 private List<HRegion> awaitTableRegions(final byte[] tableName) throws InterruptedException {
1425 List<HRegion> regions = null;
1426 for (int i = 0; i < 100; i++) {
1427 regions = cluster.getRegions(tableName);
1428 if (regions.size() > 0) break;
1429 Thread.sleep(100);
1430 }
1431 return regions;
1432 }
1433
1434 private HTable createTableAndWait(byte[] tableName, byte[] cf) throws IOException,
1435 InterruptedException {
1436 HTable t = TESTING_UTIL.createTable(tableName, cf);
1437 awaitTableRegions(tableName);
1438 assertTrue("Table not online: " + Bytes.toString(tableName),
1439 cluster.getRegions(tableName).size() != 0);
1440 return t;
1441 }
1442
1443 public static class MockMasterWithoutCatalogJanitor extends HMaster {
1444
1445 public MockMasterWithoutCatalogJanitor(Configuration conf) throws IOException, KeeperException,
1446 InterruptedException {
1447 super(conf);
1448 }
1449 }
1450
1451 private static class SplittingNodeCreationFailedException extends IOException {
1452 private static final long serialVersionUID = 1652404976265623004L;
1453
1454 public SplittingNodeCreationFailedException () {
1455 super();
1456 }
1457 }
1458
1459 public static class MockedRegionObserver extends BaseRegionObserver {
1460 private SplitTransaction st = null;
1461 private PairOfSameType<HRegion> daughterRegions = null;
1462
1463 @Override
1464 public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
1465 byte[] splitKey, List<Mutation> metaEntries) throws IOException {
1466 RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1467 HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1468 List<HRegion> onlineRegions =
1469 rs.getOnlineRegions(TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2"));
1470 HRegion region = onlineRegions.get(0);
1471 for (HRegion r : onlineRegions) {
1472 if (r.getRegionInfo().containsRow(splitKey)) {
1473 region = r;
1474 break;
1475 }
1476 }
1477 st = new SplitTransaction(region, splitKey);
1478 if (!st.prepare()) {
1479 LOG.error("Prepare for the table " + region.getTableDesc().getNameAsString()
1480 + " failed. So returning null. ");
1481 ctx.bypass();
1482 return;
1483 }
1484 region.forceSplit(splitKey);
1485 daughterRegions = st.stepsBeforePONR(rs, rs, false);
1486 HRegionInfo copyOfParent = new HRegionInfo(region.getRegionInfo());
1487 copyOfParent.setOffline(true);
1488 copyOfParent.setSplit(true);
1489
1490 Put putParent = MetaEditor.makePutFromRegionInfo(copyOfParent);
1491 MetaEditor.addDaughtersToPut(putParent, daughterRegions.getFirst().getRegionInfo(),
1492 daughterRegions.getSecond().getRegionInfo());
1493 metaEntries.add(putParent);
1494
1495 Put putA = MetaEditor.makePutFromRegionInfo(daughterRegions.getFirst().getRegionInfo());
1496 Put putB = MetaEditor.makePutFromRegionInfo(daughterRegions.getSecond().getRegionInfo());
1497 st.addLocation(putA, rs.getServerName(), 1);
1498 st.addLocation(putB, rs.getServerName(), 1);
1499 metaEntries.add(putA);
1500 metaEntries.add(putB);
1501 }
1502
1503 @Override
1504 public void preSplitAfterPONR(ObserverContext<RegionCoprocessorEnvironment> ctx)
1505 throws IOException {
1506 RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1507 HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1508 st.stepsAfterPONR(rs, rs, daughterRegions);
1509 }
1510
1511 }
1512
1513 static class CustomSplitPolicy extends RegionSplitPolicy {
1514
1515 @Override
1516 protected boolean shouldSplit() {
1517 return true;
1518 }
1519
1520 @Override
1521 public boolean skipStoreFileRangeCheck() {
1522 return true;
1523 }
1524 }
1525 }
1526