1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.io.PrintWriter;
23 import java.io.StringWriter;
24 import java.net.InetAddress;
25 import java.net.URI;
26 import java.util.ArrayList;
27 import java.util.Arrays;
28 import java.util.Collection;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.HashMap;
32 import java.util.HashSet;
33 import java.util.Iterator;
34 import java.util.List;
35 import java.util.Map;
36 import java.util.Map.Entry;
37 import java.util.Set;
38 import java.util.SortedMap;
39 import java.util.SortedSet;
40 import java.util.TreeMap;
41 import java.util.TreeSet;
42 import java.util.concurrent.Callable;
43 import java.util.concurrent.ConcurrentSkipListMap;
44 import java.util.concurrent.ExecutionException;
45 import java.util.concurrent.ExecutorService;
46 import java.util.concurrent.Future;
47 import java.util.concurrent.ScheduledThreadPoolExecutor;
48 import java.util.concurrent.atomic.AtomicInteger;
49 import java.util.concurrent.atomic.AtomicBoolean;
50
51 import org.apache.commons.lang.StringUtils;
52 import org.apache.commons.logging.Log;
53 import org.apache.commons.logging.LogFactory;
54 import org.apache.hadoop.hbase.classification.InterfaceAudience;
55 import org.apache.hadoop.hbase.classification.InterfaceStability;
56 import org.apache.hadoop.conf.Configuration;
57 import org.apache.hadoop.conf.Configured;
58 import org.apache.hadoop.fs.FSDataOutputStream;
59 import org.apache.hadoop.fs.FileStatus;
60 import org.apache.hadoop.fs.FileSystem;
61 import org.apache.hadoop.fs.Path;
62 import org.apache.hadoop.fs.permission.FsAction;
63 import org.apache.hadoop.fs.permission.FsPermission;
64 import org.apache.hadoop.hbase.Abortable;
65 import org.apache.hadoop.hbase.Cell;
66 import org.apache.hadoop.hbase.ClusterStatus;
67 import org.apache.hadoop.hbase.HBaseConfiguration;
68 import org.apache.hadoop.hbase.HColumnDescriptor;
69 import org.apache.hadoop.hbase.HConstants;
70 import org.apache.hadoop.hbase.HRegionInfo;
71 import org.apache.hadoop.hbase.HRegionLocation;
72 import org.apache.hadoop.hbase.HTableDescriptor;
73 import org.apache.hadoop.hbase.KeyValue;
74 import org.apache.hadoop.hbase.MasterNotRunningException;
75 import org.apache.hadoop.hbase.ServerName;
76 import org.apache.hadoop.hbase.TableName;
77 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
78 import org.apache.hadoop.hbase.catalog.MetaEditor;
79 import org.apache.hadoop.hbase.client.Delete;
80 import org.apache.hadoop.hbase.client.Get;
81 import org.apache.hadoop.hbase.client.HBaseAdmin;
82 import org.apache.hadoop.hbase.client.HConnectable;
83 import org.apache.hadoop.hbase.client.HConnection;
84 import org.apache.hadoop.hbase.client.HConnectionManager;
85 import org.apache.hadoop.hbase.client.HTable;
86 import org.apache.hadoop.hbase.client.MetaScanner;
87 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
88 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
89 import org.apache.hadoop.hbase.client.Put;
90 import org.apache.hadoop.hbase.client.Result;
91 import org.apache.hadoop.hbase.client.RowMutations;
92 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
93 import org.apache.hadoop.hbase.io.hfile.HFile;
94 import org.apache.hadoop.hbase.master.MasterFileSystem;
95 import org.apache.hadoop.hbase.master.RegionState;
96 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
97 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
98 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
99 import org.apache.hadoop.hbase.regionserver.HRegion;
100 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
101 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
102 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
103 import org.apache.hadoop.hbase.security.UserProvider;
104 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
105 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
106 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
107 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
108 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
109 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
110 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
111 import org.apache.hadoop.hbase.zookeeper.ZKTable;
112 import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
113 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
114 import org.apache.hadoop.hbase.security.AccessDeniedException;
115 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
116 import org.apache.hadoop.io.IOUtils;
117 import org.apache.hadoop.ipc.RemoteException;
118 import org.apache.hadoop.security.UserGroupInformation;
119 import org.apache.hadoop.util.ReflectionUtils;
120 import org.apache.hadoop.util.Tool;
121 import org.apache.hadoop.util.ToolRunner;
122 import org.apache.zookeeper.KeeperException;
123
124 import com.google.common.annotations.VisibleForTesting;
125 import com.google.common.base.Joiner;
126 import com.google.common.base.Preconditions;
127 import com.google.common.collect.ImmutableList;
128 import com.google.common.collect.Lists;
129 import com.google.common.collect.Multimap;
130 import com.google.common.collect.Ordering;
131 import com.google.common.collect.TreeMultimap;
132 import com.google.protobuf.ServiceException;
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179 @InterfaceAudience.Public
180 @InterfaceStability.Evolving
181 public class HBaseFsck extends Configured {
182 public static final long DEFAULT_TIME_LAG = 60000;
183 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
184 private static final int MAX_NUM_THREADS = 50;
185 private static boolean rsSupportsOffline = true;
186 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
187 private static final int DEFAULT_MAX_MERGE = 5;
188 private static final String TO_BE_LOADED = "to_be_loaded";
189 private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
190
191
192
193
194
195 private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
196 private ClusterStatus status;
197 private HConnection connection;
198 private HBaseAdmin admin;
199 private HTable meta;
200
201 protected ExecutorService executor;
202 private long startMillis = System.currentTimeMillis();
203 private HFileCorruptionChecker hfcc;
204 private int retcode = 0;
205 private Path HBCK_LOCK_PATH;
206 private FSDataOutputStream hbckOutFd;
207
208
209
210 private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
211
212
213
214
215 private static boolean details = false;
216 private long timelag = DEFAULT_TIME_LAG;
217 private boolean fixAssignments = false;
218 private boolean fixMeta = false;
219 private boolean checkHdfs = true;
220 private boolean fixHdfsHoles = false;
221 private boolean fixHdfsOverlaps = false;
222 private boolean fixHdfsOrphans = false;
223 private boolean fixTableOrphans = false;
224 private boolean fixVersionFile = false;
225 private boolean fixSplitParents = false;
226 private boolean fixReferenceFiles = false;
227 private boolean fixEmptyMetaCells = false;
228 private boolean fixTableLocks = false;
229 private boolean fixTableZNodes = false;
230 private boolean fixAny = false;
231
232
233
234 private Set<TableName> tablesIncluded = new HashSet<TableName>();
235 private int maxMerge = DEFAULT_MAX_MERGE;
236 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
237 private boolean sidelineBigOverlaps = false;
238 private Path sidelineDir = null;
239
240 private boolean rerun = false;
241 private static boolean summary = false;
242 private boolean checkMetaOnly = false;
243 private boolean checkRegionBoundaries = false;
244 private boolean ignorePreCheckPermission = false;
245
246
247
248
249 final private ErrorReporter errors;
250 int fixes = 0;
251
252
253
254
255
256
257 private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
258 private TreeSet<TableName> disabledTables =
259 new TreeSet<TableName>();
260
261 private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
262
263
264
265
266
267
268
269
270
271
272
273 private SortedMap<TableName, TableInfo> tablesInfo =
274 new ConcurrentSkipListMap<TableName, TableInfo>();
275
276
277
278
279 private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
280
281 private Map<TableName, Set<String>> orphanTableDirs =
282 new HashMap<TableName, Set<String>>();
283
284
285
286
287 private Set<TableName> orphanedTableZNodes = new HashSet<TableName>();
288
289
290
291
292
293
294
295
296 public HBaseFsck(Configuration conf) throws MasterNotRunningException,
297 ZooKeeperConnectionException, IOException, ClassNotFoundException {
298 super(conf);
299
300 setConf(HBaseConfiguration.create(getConf()));
301
302 getConf().setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
303 errors = getErrorReporter(conf);
304
305 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
306 executor = new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
307 }
308
309
310
311
312
313
314
315
316
317
318
319 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
320 ZooKeeperConnectionException, IOException, ClassNotFoundException {
321 super(conf);
322 errors = getErrorReporter(getConf());
323 this.executor = exec;
324 }
325
326
327
328
329
330
331
332 private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
333 long start = EnvironmentEdgeManager.currentTimeMillis();
334 try {
335 FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
336 FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
337 HConstants.DATA_FILE_UMASK_KEY);
338 Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
339 fs.mkdirs(tmpDir);
340 HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
341 final FSDataOutputStream out = FSUtils.create(fs, HBCK_LOCK_PATH, defaultPerms, false);
342 out.writeBytes(InetAddress.getLocalHost().toString());
343 out.flush();
344 return out;
345 } catch(RemoteException e) {
346 if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
347 return null;
348 } else {
349 throw e;
350 }
351 } finally {
352 long duration = EnvironmentEdgeManager.currentTimeMillis() - start;
353 if (duration > 30000) {
354 LOG.warn("Took " + duration + " milliseconds to obtain lock");
355
356 return null;
357 }
358 }
359 }
360
361 private void unlockHbck() {
362 if(hbckLockCleanup.compareAndSet(true, false)){
363 IOUtils.closeStream(hbckOutFd);
364 try{
365 FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
366 } catch(IOException ioe) {
367 LOG.warn("Failed to delete " + HBCK_LOCK_PATH);
368 LOG.debug(ioe);
369 }
370 }
371 }
372
373
374
375
376
377 public void connect() throws IOException {
378
379
380 hbckOutFd = checkAndMarkRunningHbck();
381 if (hbckOutFd == null) {
382 setRetCode(-1);
383 LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" +
384 " no other instance is running, delete the lock file " +
385 HBCK_LOCK_PATH + " and rerun the tool]");
386 throw new IOException("Duplicate hbck - Abort");
387 }
388
389
390 hbckLockCleanup.set(true);
391
392
393
394
395 Runtime.getRuntime().addShutdownHook(new Thread() {
396 @Override
397 public void run() {
398 unlockHbck();
399 }
400 });
401 LOG.debug("Launching hbck");
402
403 connection = HConnectionManager.createConnection(getConf());
404 admin = new HBaseAdmin(connection);
405 meta = new HTable(TableName.META_TABLE_NAME, connection);
406 status = admin.getClusterStatus();
407 }
408
409
410
411
412 private void loadDeployedRegions() throws IOException, InterruptedException {
413
414 Collection<ServerName> regionServers = status.getServers();
415 errors.print("Number of live region servers: " + regionServers.size());
416 if (details) {
417 for (ServerName rsinfo: regionServers) {
418 errors.print(" " + rsinfo.getServerName());
419 }
420 }
421
422
423 Collection<ServerName> deadRegionServers = status.getDeadServerNames();
424 errors.print("Number of dead region servers: " + deadRegionServers.size());
425 if (details) {
426 for (ServerName name: deadRegionServers) {
427 errors.print(" " + name);
428 }
429 }
430
431
432 errors.print("Master: " + status.getMaster());
433
434
435 Collection<ServerName> backupMasters = status.getBackupMasters();
436 errors.print("Number of backup masters: " + backupMasters.size());
437 if (details) {
438 for (ServerName name: backupMasters) {
439 errors.print(" " + name);
440 }
441 }
442
443 errors.print("Average load: " + status.getAverageLoad());
444 errors.print("Number of requests: " + status.getRequestsCount());
445 errors.print("Number of regions: " + status.getRegionsCount());
446
447 Map<String, RegionState> rits = status.getRegionsInTransition();
448 errors.print("Number of regions in transition: " + rits.size());
449 if (details) {
450 for (RegionState state: rits.values()) {
451 errors.print(" " + state.toDescriptiveString());
452 }
453 }
454
455
456 processRegionServers(regionServers);
457 }
458
459
460
461
462 private void clearState() {
463
464 fixes = 0;
465 regionInfoMap.clear();
466 emptyRegionInfoQualifiers.clear();
467 disabledTables.clear();
468 errors.clear();
469 tablesInfo.clear();
470 orphanHdfsDirs.clear();
471 }
472
473
474
475
476
477
478 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
479
480 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
481 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
482 LOG.info("Loading regioninfos HDFS");
483
484 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
485 int curIter = 0;
486 do {
487 clearState();
488
489 restoreHdfsIntegrity();
490 curIter++;
491 } while (fixes > 0 && curIter <= maxIterations);
492
493
494
495 if (curIter > 2) {
496 if (curIter == maxIterations) {
497 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
498 + "Tables integrity may not be fully repaired!");
499 } else {
500 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
501 }
502 }
503 }
504 }
505
506
507
508
509
510
511
512
513
514 public int onlineConsistencyRepair() throws IOException, KeeperException,
515 InterruptedException {
516 clearState();
517
518
519 loadDeployedRegions();
520
521 recordMetaRegion();
522
523 if (!checkMetaRegion()) {
524 String errorMsg = "hbase:meta table is not consistent. ";
525 if (shouldFixAssignments()) {
526 errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
527 } else {
528 errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
529 }
530 errors.reportError(errorMsg + " Exiting...");
531 return -2;
532 }
533
534 LOG.info("Loading regionsinfo from the hbase:meta table");
535 boolean success = loadMetaEntries();
536 if (!success) return -1;
537
538
539 reportEmptyMetaCells();
540
541
542 if (shouldFixEmptyMetaCells()) {
543 fixEmptyMetaCells();
544 }
545
546
547 if (!checkMetaOnly) {
548 reportTablesInFlux();
549 }
550
551
552 if (shouldCheckHdfs()) {
553 loadHdfsRegionDirs();
554 loadHdfsRegionInfos();
555 }
556
557
558 loadDisabledTables();
559
560
561 fixOrphanTables();
562
563
564 checkAndFixConsistency();
565
566
567 checkIntegrity();
568 return errors.getErrorList().size();
569 }
570
571
572
573
574
575 public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
576
577 errors.print("Version: " + status.getHBaseVersion());
578 offlineHdfsIntegrityRepair();
579
580
581 boolean oldBalancer = admin.setBalancerRunning(false, true);
582 try {
583 onlineConsistencyRepair();
584 }
585 finally {
586 admin.setBalancerRunning(oldBalancer, false);
587 }
588
589 if (checkRegionBoundaries) {
590 checkRegionBoundaries();
591 }
592
593 offlineReferenceFileRepair();
594
595 checkAndFixTableLocks();
596
597
598 checkAndFixOrphanedTableZNodes();
599
600
601 unlockHbck();
602
603
604 printTableSummary(tablesInfo);
605 return errors.summarize();
606 }
607
608 public static byte[] keyOnly (byte[] b) {
609 if (b == null)
610 return b;
611 int rowlength = Bytes.toShort(b, 0);
612 byte[] result = new byte[rowlength];
613 System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
614 return result;
615 }
616
617 private static class RegionBoundariesInformation {
618 public byte [] regionName;
619 public byte [] metaFirstKey;
620 public byte [] metaLastKey;
621 public byte [] storesFirstKey;
622 public byte [] storesLastKey;
623 @Override
624 public String toString () {
625 return "regionName=" + Bytes.toStringBinary(regionName) +
626 "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
627 "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
628 "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
629 "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
630 }
631 }
632
633 public void checkRegionBoundaries() {
634 try {
635 ByteArrayComparator comparator = new ByteArrayComparator();
636 List<HRegionInfo> regions = MetaScanner.listAllRegions(getConf(), false);
637 final RegionBoundariesInformation currentRegionBoundariesInformation =
638 new RegionBoundariesInformation();
639 Path hbaseRoot = FSUtils.getRootDir(getConf());
640 for (HRegionInfo regionInfo : regions) {
641 Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
642 currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
643
644
645 Path path = new Path(tableDir, regionInfo.getEncodedName());
646 FileSystem fs = path.getFileSystem(getConf());
647 FileStatus[] files = fs.listStatus(path);
648
649 byte[] storeFirstKey = null;
650 byte[] storeLastKey = null;
651 for (FileStatus file : files) {
652 String fileName = file.getPath().toString();
653 fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
654 if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
655 FileStatus[] storeFiles = fs.listStatus(file.getPath());
656
657 for (FileStatus storeFile : storeFiles) {
658 HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
659 getConf()), getConf());
660 if ((reader.getFirstKey() != null)
661 && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
662 reader.getFirstKey()) > 0))) {
663 storeFirstKey = reader.getFirstKey();
664 }
665 if ((reader.getLastKey() != null)
666 && ((storeLastKey == null) || (comparator.compare(storeLastKey,
667 reader.getLastKey())) < 0)) {
668 storeLastKey = reader.getLastKey();
669 }
670 reader.close();
671 }
672 }
673 }
674 currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
675 currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
676 currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
677 currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
678 if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
679 currentRegionBoundariesInformation.metaFirstKey = null;
680 if (currentRegionBoundariesInformation.metaLastKey.length == 0)
681 currentRegionBoundariesInformation.metaLastKey = null;
682
683
684
685
686
687
688 boolean valid = true;
689
690 if ((currentRegionBoundariesInformation.storesFirstKey != null)
691 && (currentRegionBoundariesInformation.metaFirstKey != null)) {
692 valid = valid
693 && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
694 currentRegionBoundariesInformation.metaFirstKey) >= 0;
695 }
696
697 if ((currentRegionBoundariesInformation.storesLastKey != null)
698 && (currentRegionBoundariesInformation.metaLastKey != null)) {
699 valid = valid
700 && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
701 currentRegionBoundariesInformation.metaLastKey) < 0;
702 }
703 if (!valid) {
704 errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
705 tablesInfo.get(regionInfo.getTable()));
706 LOG.warn("Region's boundaries not alligned between stores and META for:");
707 LOG.warn(currentRegionBoundariesInformation);
708 }
709 }
710 } catch (IOException e) {
711 LOG.error(e);
712 }
713 }
714
715
716
717
718 private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
719 for (HbckInfo hi : orphanHdfsDirs) {
720 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
721 adoptHdfsOrphan(hi);
722 }
723 }
724
725
726
727
728
729
730
731
732
733
734 @SuppressWarnings("deprecation")
735 private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
736 Path p = hi.getHdfsRegionDir();
737 FileSystem fs = p.getFileSystem(getConf());
738 FileStatus[] dirs = fs.listStatus(p);
739 if (dirs == null) {
740 LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
741 p + ". This dir could probably be deleted.");
742 return ;
743 }
744
745 TableName tableName = hi.getTableName();
746 TableInfo tableInfo = tablesInfo.get(tableName);
747 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
748 HTableDescriptor template = tableInfo.getHTD();
749
750
751 Pair<byte[],byte[]> orphanRegionRange = null;
752 for (FileStatus cf : dirs) {
753 String cfName= cf.getPath().getName();
754
755 if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
756
757 FileStatus[] hfiles = fs.listStatus(cf.getPath());
758 for (FileStatus hfile : hfiles) {
759 byte[] start, end;
760 HFile.Reader hf = null;
761 try {
762 CacheConfig cacheConf = new CacheConfig(getConf());
763 hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
764 hf.loadFileInfo();
765 KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
766 start = startKv.getRow();
767 KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
768 end = endKv.getRow();
769 } catch (IOException ioe) {
770 LOG.warn("Problem reading orphan file " + hfile + ", skipping");
771 continue;
772 } catch (NullPointerException ioe) {
773 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
774 continue;
775 } finally {
776 if (hf != null) {
777 hf.close();
778 }
779 }
780
781
782 if (orphanRegionRange == null) {
783
784 orphanRegionRange = new Pair<byte[], byte[]>(start, end);
785 } else {
786
787
788
789 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
790 orphanRegionRange.setFirst(start);
791 }
792 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
793 orphanRegionRange.setSecond(end);
794 }
795 }
796 }
797 }
798 if (orphanRegionRange == null) {
799 LOG.warn("No data in dir " + p + ", sidelining data");
800 fixes++;
801 sidelineRegionDir(fs, hi);
802 return;
803 }
804 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
805 Bytes.toString(orphanRegionRange.getSecond()) + ")");
806
807
808 HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
809 LOG.info("Creating new region : " + hri);
810 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
811 Path target = region.getRegionFileSystem().getRegionDir();
812
813
814 mergeRegionDirs(target, hi);
815 fixes++;
816 }
817
818
819
820
821
822
823
824
825
826 private int restoreHdfsIntegrity() throws IOException, InterruptedException {
827
828 LOG.info("Loading HBase regioninfo from HDFS...");
829 loadHdfsRegionDirs();
830
831 int errs = errors.getErrorList().size();
832
833 tablesInfo = loadHdfsRegionInfos();
834 checkHdfsIntegrity(false, false);
835
836 if (errors.getErrorList().size() == errs) {
837 LOG.info("No integrity errors. We are done with this phase. Glorious.");
838 return 0;
839 }
840
841 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
842 adoptHdfsOrphans(orphanHdfsDirs);
843
844 }
845
846
847 if (shouldFixHdfsHoles()) {
848 clearState();
849 loadHdfsRegionDirs();
850 tablesInfo = loadHdfsRegionInfos();
851 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
852 }
853
854
855 if (shouldFixHdfsOverlaps()) {
856
857 clearState();
858 loadHdfsRegionDirs();
859 tablesInfo = loadHdfsRegionInfos();
860 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
861 }
862
863 return errors.getErrorList().size();
864 }
865
866
867
868
869
870
871
872
873
874 private void offlineReferenceFileRepair() throws IOException {
875 Configuration conf = getConf();
876 Path hbaseRoot = FSUtils.getRootDir(conf);
877 FileSystem fs = hbaseRoot.getFileSystem(conf);
878 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot);
879 for (Path path: allFiles.values()) {
880 boolean isReference = false;
881 try {
882 isReference = StoreFileInfo.isReference(path);
883 } catch (Throwable t) {
884
885
886
887
888 }
889 if (!isReference) continue;
890
891 Path referredToFile = StoreFileInfo.getReferredToFile(path);
892 if (fs.exists(referredToFile)) continue;
893
894
895 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
896 "Found lingering reference file " + path);
897 if (!shouldFixReferenceFiles()) continue;
898
899
900 boolean success = false;
901 String pathStr = path.toString();
902
903
904
905
906
907 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
908 for (int i = 0; index > 0 && i < 5; i++) {
909 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
910 }
911 if (index > 0) {
912 Path rootDir = getSidelineDir();
913 Path dst = new Path(rootDir, pathStr.substring(index + 1));
914 fs.mkdirs(dst.getParent());
915 LOG.info("Trying to sildeline reference file "
916 + path + " to " + dst);
917 setShouldRerun();
918
919 success = fs.rename(path, dst);
920 }
921 if (!success) {
922 LOG.error("Failed to sideline reference file " + path);
923 }
924 }
925 }
926
927
928
929
930 private void reportEmptyMetaCells() {
931 errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
932 emptyRegionInfoQualifiers.size());
933 if (details) {
934 for (Result r: emptyRegionInfoQualifiers) {
935 errors.print(" " + r);
936 }
937 }
938 }
939
940
941
942
943 private void reportTablesInFlux() {
944 AtomicInteger numSkipped = new AtomicInteger(0);
945 HTableDescriptor[] allTables = getTables(numSkipped);
946 errors.print("Number of Tables: " + allTables.length);
947 if (details) {
948 if (numSkipped.get() > 0) {
949 errors.detail("Number of Tables in flux: " + numSkipped.get());
950 }
951 for (HTableDescriptor td : allTables) {
952 errors.detail(" Table: " + td.getTableName() + "\t" +
953 (td.isReadOnly() ? "ro" : "rw") + "\t" +
954 (td.isMetaRegion() ? "META" : " ") + "\t" +
955 " families: " + td.getFamilies().size());
956 }
957 }
958 }
959
960 public ErrorReporter getErrors() {
961 return errors;
962 }
963
964
965
966
967
968 private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
969 Path regionDir = hbi.getHdfsRegionDir();
970 if (regionDir == null) {
971 LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
972 return;
973 }
974
975 if (hbi.hdfsEntry.hri != null) {
976
977 return;
978 }
979
980 FileSystem fs = FileSystem.get(getConf());
981 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
982 LOG.debug("HRegionInfo read: " + hri.toString());
983 hbi.hdfsEntry.hri = hri;
984 }
985
986
987
988
989
990 public static class RegionRepairException extends IOException {
991 private static final long serialVersionUID = 1L;
992 final IOException ioe;
993 public RegionRepairException(String s, IOException ioe) {
994 super(s);
995 this.ioe = ioe;
996 }
997 }
998
999
1000
1001
1002 private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1003 throws IOException, InterruptedException {
1004 tablesInfo.clear();
1005
1006 Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1007
1008
1009 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1010 List<Future<Void>> hbiFutures;
1011
1012 for (HbckInfo hbi : hbckInfos) {
1013 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1014 hbis.add(work);
1015 }
1016
1017
1018 hbiFutures = executor.invokeAll(hbis);
1019
1020 for(int i=0; i<hbiFutures.size(); i++) {
1021 WorkItemHdfsRegionInfo work = hbis.get(i);
1022 Future<Void> f = hbiFutures.get(i);
1023 try {
1024 f.get();
1025 } catch(ExecutionException e) {
1026 LOG.warn("Failed to read .regioninfo file for region " +
1027 work.hbi.getRegionNameAsString(), e.getCause());
1028 }
1029 }
1030
1031 Path hbaseRoot = FSUtils.getRootDir(getConf());
1032 FileSystem fs = hbaseRoot.getFileSystem(getConf());
1033
1034 for (HbckInfo hbi: hbckInfos) {
1035
1036 if (hbi.getHdfsHRI() == null) {
1037
1038 continue;
1039 }
1040
1041
1042
1043 TableName tableName = hbi.getTableName();
1044 if (tableName == null) {
1045
1046 LOG.warn("tableName was null for: " + hbi);
1047 continue;
1048 }
1049
1050 TableInfo modTInfo = tablesInfo.get(tableName);
1051 if (modTInfo == null) {
1052
1053 modTInfo = new TableInfo(tableName);
1054 tablesInfo.put(tableName, modTInfo);
1055 try {
1056 HTableDescriptor htd =
1057 FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1058 modTInfo.htds.add(htd);
1059 } catch (IOException ioe) {
1060 if (!orphanTableDirs.containsKey(tableName)) {
1061 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1062
1063 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1064 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1065 Set<String> columns = new HashSet<String>();
1066 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1067 }
1068 }
1069 }
1070 if (!hbi.isSkipChecks()) {
1071 modTInfo.addRegionInfo(hbi);
1072 }
1073 }
1074
1075 loadTableInfosForTablesWithNoRegion();
1076
1077 return tablesInfo;
1078 }
1079
1080
1081
1082
1083
1084
1085
1086
1087 private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1088 Path regionDir = hbi.getHdfsRegionDir();
1089 FileSystem fs = regionDir.getFileSystem(getConf());
1090 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1091 for (FileStatus subdir : subDirs) {
1092 String columnfamily = subdir.getPath().getName();
1093 columns.add(columnfamily);
1094 }
1095 return columns;
1096 }
1097
1098
1099
1100
1101
1102
1103
1104
1105 private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1106 Set<String> columns) throws IOException {
1107 if (columns ==null || columns.isEmpty()) return false;
1108 HTableDescriptor htd = new HTableDescriptor(tableName);
1109 for (String columnfamimly : columns) {
1110 htd.addFamily(new HColumnDescriptor(columnfamimly));
1111 }
1112 fstd.createTableDescriptor(htd, true);
1113 return true;
1114 }
1115
1116
1117
1118
1119
1120 public void fixEmptyMetaCells() throws IOException {
1121 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1122 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1123 for (Result region : emptyRegionInfoQualifiers) {
1124 deleteMetaRegion(region.getRow());
1125 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1126 }
1127 emptyRegionInfoQualifiers.clear();
1128 }
1129 }
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140 public void fixOrphanTables() throws IOException {
1141 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1142
1143 List<TableName> tmpList = new ArrayList<TableName>();
1144 tmpList.addAll(orphanTableDirs.keySet());
1145 HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1146 Iterator<Entry<TableName, Set<String>>> iter =
1147 orphanTableDirs.entrySet().iterator();
1148 int j = 0;
1149 int numFailedCase = 0;
1150 FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1151 while (iter.hasNext()) {
1152 Entry<TableName, Set<String>> entry =
1153 iter.next();
1154 TableName tableName = entry.getKey();
1155 LOG.info("Trying to fix orphan table error: " + tableName);
1156 if (j < htds.length) {
1157 if (tableName.equals(htds[j].getTableName())) {
1158 HTableDescriptor htd = htds[j];
1159 LOG.info("fixing orphan table: " + tableName + " from cache");
1160 fstd.createTableDescriptor(htd, true);
1161 j++;
1162 iter.remove();
1163 }
1164 } else {
1165 if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1166 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1167 LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1168 iter.remove();
1169 } else {
1170 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1171 numFailedCase++;
1172 }
1173 }
1174 fixes++;
1175 }
1176
1177 if (orphanTableDirs.isEmpty()) {
1178
1179
1180 setShouldRerun();
1181 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1182 } else if (numFailedCase > 0) {
1183 LOG.error("Failed to fix " + numFailedCase
1184 + " OrphanTables with default .tableinfo files");
1185 }
1186
1187 }
1188
1189 orphanTableDirs.clear();
1190
1191 }
1192
1193
1194
1195
1196
1197
1198 private HRegion createNewMeta() throws IOException {
1199 Path rootdir = FSUtils.getRootDir(getConf());
1200 Configuration c = getConf();
1201 HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1202 HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1203 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1204 HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor);
1205 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1206 return meta;
1207 }
1208
1209
1210
1211
1212
1213
1214
1215 private ArrayList<Put> generatePuts(
1216 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1217 ArrayList<Put> puts = new ArrayList<Put>();
1218 boolean hasProblems = false;
1219 for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1220 TableName name = e.getKey();
1221
1222
1223 if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1224 continue;
1225 }
1226
1227 TableInfo ti = e.getValue();
1228 for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1229 .entrySet()) {
1230 Collection<HbckInfo> his = spl.getValue();
1231 int sz = his.size();
1232 if (sz != 1) {
1233
1234 LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1235 + " had " + sz + " regions instead of exactly 1." );
1236 hasProblems = true;
1237 continue;
1238 }
1239
1240
1241 HbckInfo hi = his.iterator().next();
1242 HRegionInfo hri = hi.getHdfsHRI();
1243 Put p = MetaEditor.makePutFromRegionInfo(hri);
1244 puts.add(p);
1245 }
1246 }
1247 return hasProblems ? null : puts;
1248 }
1249
1250
1251
1252
1253 private void suggestFixes(
1254 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1255 for (TableInfo tInfo : tablesInfo.values()) {
1256 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1257 tInfo.checkRegionChain(handler);
1258 }
1259 }
1260
1261
1262
1263
1264
1265
1266
1267
1268 public boolean rebuildMeta(boolean fix) throws IOException,
1269 InterruptedException {
1270
1271
1272
1273
1274
1275 LOG.info("Loading HBase regioninfo from HDFS...");
1276 loadHdfsRegionDirs();
1277
1278 int errs = errors.getErrorList().size();
1279 tablesInfo = loadHdfsRegionInfos();
1280 checkHdfsIntegrity(false, false);
1281
1282
1283 if (errors.getErrorList().size() != errs) {
1284
1285 while(true) {
1286 fixes = 0;
1287 suggestFixes(tablesInfo);
1288 errors.clear();
1289 loadHdfsRegionInfos();
1290 checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1291
1292 int errCount = errors.getErrorList().size();
1293
1294 if (fixes == 0) {
1295 if (errCount > 0) {
1296 return false;
1297 } else {
1298 break;
1299 }
1300 }
1301 }
1302 }
1303
1304
1305 LOG.info("HDFS regioninfo's seems good. Sidelining old hbase:meta");
1306 Path backupDir = sidelineOldMeta();
1307
1308 LOG.info("Creating new hbase:meta");
1309 HRegion meta = createNewMeta();
1310
1311
1312 List<Put> puts = generatePuts(tablesInfo);
1313 if (puts == null) {
1314 LOG.fatal("Problem encountered when creating new hbase:meta entries. " +
1315 "You may need to restore the previously sidelined hbase:meta");
1316 return false;
1317 }
1318 meta.batchMutate(puts.toArray(new Put[puts.size()]));
1319 HRegion.closeHRegion(meta);
1320 LOG.info("Success! hbase:meta table rebuilt.");
1321 LOG.info("Old hbase:meta is moved into " + backupDir);
1322 return true;
1323 }
1324
1325 private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1326 boolean fixOverlaps) throws IOException {
1327 LOG.info("Checking HBase region split map from HDFS data...");
1328 for (TableInfo tInfo : tablesInfo.values()) {
1329 TableIntegrityErrorHandler handler;
1330 if (fixHoles || fixOverlaps) {
1331 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1332 fixHoles, fixOverlaps);
1333 } else {
1334 handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1335 }
1336 if (!tInfo.checkRegionChain(handler)) {
1337
1338 errors.report("Found inconsistency in table " + tInfo.getName());
1339 }
1340 }
1341 return tablesInfo;
1342 }
1343
1344 private Path getSidelineDir() throws IOException {
1345 if (sidelineDir == null) {
1346 Path hbaseDir = FSUtils.getRootDir(getConf());
1347 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1348 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1349 + startMillis);
1350 }
1351 return sidelineDir;
1352 }
1353
1354
1355
1356
1357 Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1358 return sidelineRegionDir(fs, null, hi);
1359 }
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369 Path sidelineRegionDir(FileSystem fs,
1370 String parentDir, HbckInfo hi) throws IOException {
1371 TableName tableName = hi.getTableName();
1372 Path regionDir = hi.getHdfsRegionDir();
1373
1374 if (!fs.exists(regionDir)) {
1375 LOG.warn("No previous " + regionDir + " exists. Continuing.");
1376 return null;
1377 }
1378
1379 Path rootDir = getSidelineDir();
1380 if (parentDir != null) {
1381 rootDir = new Path(rootDir, parentDir);
1382 }
1383 Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1384 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1385 fs.mkdirs(sidelineRegionDir);
1386 boolean success = false;
1387 FileStatus[] cfs = fs.listStatus(regionDir);
1388 if (cfs == null) {
1389 LOG.info("Region dir is empty: " + regionDir);
1390 } else {
1391 for (FileStatus cf : cfs) {
1392 Path src = cf.getPath();
1393 Path dst = new Path(sidelineRegionDir, src.getName());
1394 if (fs.isFile(src)) {
1395
1396 success = fs.rename(src, dst);
1397 if (!success) {
1398 String msg = "Unable to rename file " + src + " to " + dst;
1399 LOG.error(msg);
1400 throw new IOException(msg);
1401 }
1402 continue;
1403 }
1404
1405
1406 fs.mkdirs(dst);
1407
1408 LOG.info("Sidelining files from " + src + " into containing region " + dst);
1409
1410
1411
1412
1413 FileStatus[] hfiles = fs.listStatus(src);
1414 if (hfiles != null && hfiles.length > 0) {
1415 for (FileStatus hfile : hfiles) {
1416 success = fs.rename(hfile.getPath(), dst);
1417 if (!success) {
1418 String msg = "Unable to rename file " + src + " to " + dst;
1419 LOG.error(msg);
1420 throw new IOException(msg);
1421 }
1422 }
1423 }
1424 LOG.debug("Sideline directory contents:");
1425 debugLsr(sidelineRegionDir);
1426 }
1427 }
1428
1429 LOG.info("Removing old region dir: " + regionDir);
1430 success = fs.delete(regionDir, true);
1431 if (!success) {
1432 String msg = "Unable to delete dir " + regionDir;
1433 LOG.error(msg);
1434 throw new IOException(msg);
1435 }
1436 return sidelineRegionDir;
1437 }
1438
1439
1440
1441
1442 void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1443 Path backupHbaseDir) throws IOException {
1444 Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1445 if (fs.exists(tableDir)) {
1446 Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1447 fs.mkdirs(backupTableDir.getParent());
1448 boolean success = fs.rename(tableDir, backupTableDir);
1449 if (!success) {
1450 throw new IOException("Failed to move " + tableName + " from "
1451 + tableDir + " to " + backupTableDir);
1452 }
1453 } else {
1454 LOG.info("No previous " + tableName + " exists. Continuing.");
1455 }
1456 }
1457
1458
1459
1460
1461 Path sidelineOldMeta() throws IOException {
1462
1463 Path hbaseDir = FSUtils.getRootDir(getConf());
1464 FileSystem fs = hbaseDir.getFileSystem(getConf());
1465 Path backupDir = getSidelineDir();
1466 fs.mkdirs(backupDir);
1467
1468 try {
1469 sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1470 } catch (IOException e) {
1471 LOG.fatal("... failed to sideline meta. Currently in inconsistent state. To restore "
1472 + "try to rename hbase:meta in " + backupDir.getName() + " to "
1473 + hbaseDir.getName() + ".", e);
1474 throw e;
1475 }
1476 return backupDir;
1477 }
1478
1479
1480
1481
1482
1483
1484 private void loadDisabledTables()
1485 throws ZooKeeperConnectionException, IOException {
1486 HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1487 @Override
1488 public Void connect(HConnection connection) throws IOException {
1489 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1490 try {
1491 for (TableName tableName :
1492 ZKTableReadOnly.getDisabledOrDisablingTables(zkw)) {
1493 disabledTables.add(tableName);
1494 }
1495 } catch (KeeperException ke) {
1496 throw new IOException(ke);
1497 } finally {
1498 zkw.close();
1499 }
1500 return null;
1501 }
1502 });
1503 }
1504
1505
1506
1507
1508 private boolean isTableDisabled(HRegionInfo regionInfo) {
1509 return disabledTables.contains(regionInfo.getTable());
1510 }
1511
1512
1513
1514
1515
1516 public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1517 Path rootDir = FSUtils.getRootDir(getConf());
1518 FileSystem fs = rootDir.getFileSystem(getConf());
1519
1520
1521 List<FileStatus> tableDirs = Lists.newArrayList();
1522
1523 boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1524
1525 List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1526 for (Path path : paths) {
1527 TableName tableName = FSUtils.getTableName(path);
1528 if ((!checkMetaOnly &&
1529 isTableIncluded(tableName)) ||
1530 tableName.equals(TableName.META_TABLE_NAME)) {
1531 tableDirs.add(fs.getFileStatus(path));
1532 }
1533 }
1534
1535
1536 if (!foundVersionFile) {
1537 errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1538 "Version file does not exist in root dir " + rootDir);
1539 if (shouldFixVersionFile()) {
1540 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1541 + " file.");
1542 setShouldRerun();
1543 FSUtils.setVersion(fs, rootDir, getConf().getInt(
1544 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1545 HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1546 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1547 }
1548 }
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574 private boolean recordMetaRegion() throws IOException {
1575 HRegionLocation metaLocation = connection.locateRegion(
1576 TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW);
1577
1578
1579 if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1580 metaLocation.getHostname() == null) {
1581 errors.reportError(ERROR_CODE.NULL_META_REGION,
1582 "META region or some of its attributes are null.");
1583 return false;
1584 }
1585 ServerName sn;
1586 try {
1587 sn = getMetaRegionServerName();
1588 } catch (KeeperException e) {
1589 throw new IOException(e);
1590 }
1591 MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
1592 HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1593 if (hbckInfo == null) {
1594 regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1595 } else {
1596 hbckInfo.metaEntry = m;
1597 }
1598 return true;
1599 }
1600
1601 private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1602 return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1603 @Override
1604 public void abort(String why, Throwable e) {
1605 LOG.error(why, e);
1606 System.exit(1);
1607 }
1608
1609 @Override
1610 public boolean isAborted() {
1611 return false;
1612 }
1613
1614 });
1615 }
1616
1617 private ServerName getMetaRegionServerName()
1618 throws IOException, KeeperException {
1619 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1620 ServerName sn = null;
1621 try {
1622 sn = MetaRegionTracker.getMetaRegionLocation(zkw);
1623 } finally {
1624 zkw.close();
1625 }
1626 return sn;
1627 }
1628
1629
1630
1631
1632
1633
1634 void processRegionServers(Collection<ServerName> regionServerList)
1635 throws IOException, InterruptedException {
1636
1637 List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1638 List<Future<Void>> workFutures;
1639
1640
1641 for (ServerName rsinfo: regionServerList) {
1642 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1643 }
1644
1645 workFutures = executor.invokeAll(workItems);
1646
1647 for(int i=0; i<workFutures.size(); i++) {
1648 WorkItemRegion item = workItems.get(i);
1649 Future<Void> f = workFutures.get(i);
1650 try {
1651 f.get();
1652 } catch(ExecutionException e) {
1653 LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1654 e.getCause());
1655 }
1656 }
1657 }
1658
1659
1660
1661
1662 private void checkAndFixConsistency()
1663 throws IOException, KeeperException, InterruptedException {
1664 List<CheckRegionConsistencyWorkItem> workItems =
1665 new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1666 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1667 workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1668 }
1669 checkRegionConsistencyConcurrently(workItems);
1670 }
1671
1672
1673
1674
1675 private void checkRegionConsistencyConcurrently(
1676 final List<CheckRegionConsistencyWorkItem> workItems)
1677 throws IOException, KeeperException, InterruptedException {
1678 if (workItems.isEmpty()) {
1679 return;
1680 }
1681
1682 List<Future<Void>> workFutures = executor.invokeAll(workItems);
1683 for(Future<Void> f: workFutures) {
1684 try {
1685 f.get();
1686 } catch(ExecutionException e1) {
1687 LOG.warn("Could not check region consistency " , e1.getCause());
1688 if (e1.getCause() instanceof IOException) {
1689 throw (IOException)e1.getCause();
1690 } else if (e1.getCause() instanceof KeeperException) {
1691 throw (KeeperException)e1.getCause();
1692 } else if (e1.getCause() instanceof InterruptedException) {
1693 throw (InterruptedException)e1.getCause();
1694 } else {
1695 throw new IOException(e1.getCause());
1696 }
1697 }
1698 }
1699 }
1700
1701 class CheckRegionConsistencyWorkItem implements Callable<Void> {
1702 private final String key;
1703 private final HbckInfo hbi;
1704
1705 CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
1706 this.key = key;
1707 this.hbi = hbi;
1708 }
1709
1710 @Override
1711 public synchronized Void call() throws Exception {
1712 checkRegionConsistency(key, hbi);
1713 return null;
1714 }
1715 }
1716
1717 private void preCheckPermission() throws IOException, AccessDeniedException {
1718 if (shouldIgnorePreCheckPermission()) {
1719 return;
1720 }
1721
1722 Path hbaseDir = FSUtils.getRootDir(getConf());
1723 FileSystem fs = hbaseDir.getFileSystem(getConf());
1724 UserProvider userProvider = UserProvider.instantiate(getConf());
1725 UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1726 FileStatus[] files = fs.listStatus(hbaseDir);
1727 for (FileStatus file : files) {
1728 try {
1729 FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1730 } catch (AccessDeniedException ace) {
1731 LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1732 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1733 + " does not have write perms to " + file.getPath()
1734 + ". Please rerun hbck as hdfs user " + file.getOwner());
1735 throw ace;
1736 }
1737 }
1738 }
1739
1740
1741
1742
1743 private void deleteMetaRegion(HbckInfo hi) throws IOException {
1744 deleteMetaRegion(hi.metaEntry.getRegionName());
1745 }
1746
1747
1748
1749
1750 private void deleteMetaRegion(byte[] metaKey) throws IOException {
1751 Delete d = new Delete(metaKey);
1752 meta.delete(d);
1753 meta.flushCommits();
1754 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1755 }
1756
1757
1758
1759
1760 private void resetSplitParent(HbckInfo hi) throws IOException {
1761 RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1762 Delete d = new Delete(hi.metaEntry.getRegionName());
1763 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1764 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1765 mutations.add(d);
1766
1767 HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1768 hri.setOffline(false);
1769 hri.setSplit(false);
1770 Put p = MetaEditor.makePutFromRegionInfo(hri);
1771 mutations.add(p);
1772
1773 meta.mutateRow(mutations);
1774 meta.flushCommits();
1775 LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1776 }
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786 private void offline(byte[] regionName) throws IOException {
1787 String regionString = Bytes.toStringBinary(regionName);
1788 if (!rsSupportsOffline) {
1789 LOG.warn("Using unassign region " + regionString
1790 + " instead of using offline method, you should"
1791 + " restart HMaster after these repairs");
1792 admin.unassign(regionName, true);
1793 return;
1794 }
1795
1796
1797 try {
1798 LOG.info("Offlining region " + regionString);
1799 admin.offline(regionName);
1800 } catch (IOException ioe) {
1801 String notFoundMsg = "java.lang.NoSuchMethodException: " +
1802 "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1803 if (ioe.getMessage().contains(notFoundMsg)) {
1804 LOG.warn("Using unassign region " + regionString
1805 + " instead of using offline method, you should"
1806 + " restart HMaster after these repairs");
1807 rsSupportsOffline = false;
1808 admin.unassign(regionName, true);
1809 return;
1810 }
1811 throw ioe;
1812 }
1813 }
1814
1815 private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1816 for (OnlineEntry rse : hi.deployedEntries) {
1817 LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);
1818 try {
1819 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
1820 offline(rse.hri.getRegionName());
1821 } catch (IOException ioe) {
1822 LOG.warn("Got exception when attempting to offline region "
1823 + Bytes.toString(rse.hri.getRegionName()), ioe);
1824 }
1825 }
1826 }
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840 private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1841 if (hi.metaEntry == null && hi.hdfsEntry == null) {
1842 undeployRegions(hi);
1843 return;
1844 }
1845
1846
1847 Get get = new Get(hi.getRegionName());
1848 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1849 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1850 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1851 Result r = meta.get(get);
1852 ServerName serverName = HRegionInfo.getServerName(r);
1853 if (serverName == null) {
1854 errors.reportError("Unable to close region "
1855 + hi.getRegionNameAsString() + " because meta does not "
1856 + "have handle to reach it.");
1857 return;
1858 }
1859
1860 HRegionInfo hri = HRegionInfo.getHRegionInfo(r);
1861 if (hri == null) {
1862 LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1863 + " because hbase:meta had invalid or missing "
1864 + HConstants.CATALOG_FAMILY_STR + ":"
1865 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1866 + " qualifier value.");
1867 return;
1868 }
1869
1870
1871 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, serverName, hri);
1872 }
1873
1874 private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1875 KeeperException, InterruptedException {
1876
1877 if (shouldFixAssignments()) {
1878 errors.print(msg);
1879 undeployRegions(hbi);
1880 setShouldRerun();
1881 HRegionInfo hri = hbi.getHdfsHRI();
1882 if (hri == null) {
1883 hri = hbi.metaEntry;
1884 }
1885 HBaseFsckRepair.fixUnassigned(admin, hri);
1886 HBaseFsckRepair.waitUntilAssigned(admin, hri);
1887 }
1888 }
1889
1890
1891
1892
1893 private void checkRegionConsistency(final String key, final HbckInfo hbi)
1894 throws IOException, KeeperException, InterruptedException {
1895 String descriptiveName = hbi.toString();
1896
1897 boolean inMeta = hbi.metaEntry != null;
1898
1899 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
1900 boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1901 boolean isDeployed = !hbi.deployedOn.isEmpty();
1902 boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1903 boolean deploymentMatchesMeta =
1904 hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1905 hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1906 boolean splitParent =
1907 (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1908 boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1909 boolean recentlyModified = inHdfs &&
1910 hbi.getModTime() + timelag > System.currentTimeMillis();
1911
1912
1913 if (hbi.containsOnlyHdfsEdits()) {
1914 return;
1915 }
1916 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1917 return;
1918 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1919 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1920 "tabled that is not deployed");
1921 return;
1922 } else if (recentlyModified) {
1923 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1924 return;
1925 }
1926
1927 else if (!inMeta && !inHdfs && !isDeployed) {
1928
1929 assert false : "Entry for region with no data";
1930 } else if (!inMeta && !inHdfs && isDeployed) {
1931 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1932 + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
1933 "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1934 if (shouldFixAssignments()) {
1935 undeployRegions(hbi);
1936 }
1937
1938 } else if (!inMeta && inHdfs && !isDeployed) {
1939 if (hbi.isMerged()) {
1940
1941
1942 hbi.setSkipChecks(true);
1943 LOG.info("Region " + descriptiveName
1944 + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
1945 return;
1946 }
1947 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
1948 + descriptiveName + " on HDFS, but not listed in hbase:meta " +
1949 "or deployed on any region server");
1950
1951 if (shouldFixMeta()) {
1952 if (!hbi.isHdfsRegioninfoPresent()) {
1953 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
1954 + " in table integrity repair phase if -fixHdfsOrphans was" +
1955 " used.");
1956 return;
1957 }
1958
1959 HRegionInfo hri = hbi.getHdfsHRI();
1960 TableInfo tableInfo = tablesInfo.get(hri.getTable());
1961 for (HRegionInfo region : tableInfo.getRegionsFromMeta()) {
1962 if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
1963 && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
1964 hri.getEndKey()) >= 0)
1965 && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
1966 if(region.isSplit() || region.isOffline()) continue;
1967 Path regionDir = hbi.getHdfsRegionDir();
1968 FileSystem fs = regionDir.getFileSystem(getConf());
1969 List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
1970 for (Path familyDir : familyDirs) {
1971 List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
1972 for (Path referenceFilePath : referenceFilePaths) {
1973 Path parentRegionDir =
1974 StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
1975 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
1976 LOG.warn(hri + " start and stop keys are in the range of " + region
1977 + ". The region might not be cleaned up from hdfs when region " + region
1978 + " split failed. Hence deleting from hdfs.");
1979 HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
1980 regionDir.getParent(), hri);
1981 return;
1982 }
1983 }
1984 }
1985 }
1986 }
1987
1988 LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
1989 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1990
1991 tryAssignmentRepair(hbi, "Trying to reassign region...");
1992 }
1993
1994 } else if (!inMeta && inHdfs && isDeployed) {
1995 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
1996 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1997 debugLsr(hbi.getHdfsRegionDir());
1998 if (shouldFixMeta()) {
1999 if (!hbi.isHdfsRegioninfoPresent()) {
2000 LOG.error("This should have been repaired in table integrity repair phase");
2001 return;
2002 }
2003
2004 LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2005 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
2006
2007 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2008 }
2009
2010
2011 } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2012
2013
2014 if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2015
2016 HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2017 HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2018 if (infoA != null && infoB != null) {
2019
2020 hbi.setSkipChecks(true);
2021 return;
2022 }
2023 }
2024 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2025 + descriptiveName + " is a split parent in META, in HDFS, "
2026 + "and not deployed on any region server. This could be transient.");
2027 if (shouldFixSplitParents()) {
2028 setShouldRerun();
2029 resetSplitParent(hbi);
2030 }
2031 } else if (inMeta && !inHdfs && !isDeployed) {
2032 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2033 + descriptiveName + " found in META, but not in HDFS "
2034 + "or deployed on any region server.");
2035 if (shouldFixMeta()) {
2036 deleteMetaRegion(hbi);
2037 }
2038 } else if (inMeta && !inHdfs && isDeployed) {
2039 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2040 + " found in META, but not in HDFS, " +
2041 "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2042
2043
2044
2045 if (shouldFixAssignments()) {
2046 errors.print("Trying to fix unassigned region...");
2047 undeployRegions(hbi);
2048 }
2049 if (shouldFixMeta()) {
2050
2051 deleteMetaRegion(hbi);
2052 }
2053 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2054 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2055 + " not deployed on any region server.");
2056 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2057 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2058 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2059 "Region " + descriptiveName + " should not be deployed according " +
2060 "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2061 if (shouldFixAssignments()) {
2062 errors.print("Trying to close the region " + descriptiveName);
2063 setShouldRerun();
2064 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
2065 }
2066 } else if (inMeta && inHdfs && isMultiplyDeployed) {
2067 errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2068 + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2069 + " but is multiply assigned to region servers " +
2070 Joiner.on(", ").join(hbi.deployedOn));
2071
2072 if (shouldFixAssignments()) {
2073 errors.print("Trying to fix assignment error...");
2074 setShouldRerun();
2075 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
2076 }
2077 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2078 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2079 + descriptiveName + " listed in hbase:meta on region server " +
2080 hbi.metaEntry.regionServer + " but found on region server " +
2081 hbi.deployedOn.get(0));
2082
2083 if (shouldFixAssignments()) {
2084 errors.print("Trying to fix assignment error...");
2085 setShouldRerun();
2086 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
2087 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2088 }
2089 } else {
2090 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2091 " is in an unforeseen state:" +
2092 " inMeta=" + inMeta +
2093 " inHdfs=" + inHdfs +
2094 " isDeployed=" + isDeployed +
2095 " isMultiplyDeployed=" + isMultiplyDeployed +
2096 " deploymentMatchesMeta=" + deploymentMatchesMeta +
2097 " shouldBeDeployed=" + shouldBeDeployed);
2098 }
2099 }
2100
2101
2102
2103
2104
2105
2106
2107 SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2108 tablesInfo = new TreeMap<TableName,TableInfo> ();
2109 List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
2110 LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2111 for (HbckInfo hbi : regionInfoMap.values()) {
2112
2113 if (hbi.metaEntry == null) {
2114
2115 noHDFSRegionInfos.add(hbi);
2116 Path p = hbi.getHdfsRegionDir();
2117 if (p == null) {
2118 errors.report("No regioninfo in Meta or HDFS. " + hbi);
2119 }
2120
2121
2122 continue;
2123 }
2124 if (hbi.metaEntry.regionServer == null) {
2125 errors.detail("Skipping region because no region server: " + hbi);
2126 continue;
2127 }
2128 if (hbi.metaEntry.isOffline()) {
2129 errors.detail("Skipping region because it is offline: " + hbi);
2130 continue;
2131 }
2132 if (hbi.containsOnlyHdfsEdits()) {
2133 errors.detail("Skipping region because it only contains edits" + hbi);
2134 continue;
2135 }
2136
2137
2138
2139
2140
2141
2142 if (hbi.deployedOn.size() == 0) continue;
2143
2144
2145 TableName tableName = hbi.metaEntry.getTable();
2146 TableInfo modTInfo = tablesInfo.get(tableName);
2147 if (modTInfo == null) {
2148 modTInfo = new TableInfo(tableName);
2149 }
2150 for (ServerName server : hbi.deployedOn) {
2151 modTInfo.addServer(server);
2152 }
2153
2154 if (!hbi.isSkipChecks()) {
2155 modTInfo.addRegionInfo(hbi);
2156 }
2157
2158 tablesInfo.put(tableName, modTInfo);
2159 }
2160
2161 loadTableInfosForTablesWithNoRegion();
2162
2163 for (TableInfo tInfo : tablesInfo.values()) {
2164 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2165 if (!tInfo.checkRegionChain(handler)) {
2166 errors.report("Found inconsistency in table " + tInfo.getName());
2167 }
2168 }
2169 return tablesInfo;
2170 }
2171
2172
2173
2174
2175 private void loadTableInfosForTablesWithNoRegion() throws IOException {
2176 Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2177 for (HTableDescriptor htd : allTables.values()) {
2178 if (checkMetaOnly && !htd.isMetaTable()) {
2179 continue;
2180 }
2181
2182 TableName tableName = htd.getTableName();
2183 if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2184 TableInfo tableInfo = new TableInfo(tableName);
2185 tableInfo.htds.add(htd);
2186 tablesInfo.put(htd.getTableName(), tableInfo);
2187 }
2188 }
2189 }
2190
2191
2192
2193
2194
2195 public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2196 int fileMoves = 0;
2197 String thread = Thread.currentThread().getName();
2198 LOG.debug("[" + thread + "] Contained region dir after close and pause");
2199 debugLsr(contained.getHdfsRegionDir());
2200
2201
2202 FileSystem fs = targetRegionDir.getFileSystem(getConf());
2203 FileStatus[] dirs = null;
2204 try {
2205 dirs = fs.listStatus(contained.getHdfsRegionDir());
2206 } catch (FileNotFoundException fnfe) {
2207
2208
2209 if (!fs.exists(contained.getHdfsRegionDir())) {
2210 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2211 + " is missing. Assuming already sidelined or moved.");
2212 } else {
2213 sidelineRegionDir(fs, contained);
2214 }
2215 return fileMoves;
2216 }
2217
2218 if (dirs == null) {
2219 if (!fs.exists(contained.getHdfsRegionDir())) {
2220 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2221 + " already sidelined.");
2222 } else {
2223 sidelineRegionDir(fs, contained);
2224 }
2225 return fileMoves;
2226 }
2227
2228 for (FileStatus cf : dirs) {
2229 Path src = cf.getPath();
2230 Path dst = new Path(targetRegionDir, src.getName());
2231
2232 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2233
2234 continue;
2235 }
2236
2237 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2238
2239 continue;
2240 }
2241
2242 LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2243
2244
2245
2246
2247 for (FileStatus hfile : fs.listStatus(src)) {
2248 boolean success = fs.rename(hfile.getPath(), dst);
2249 if (success) {
2250 fileMoves++;
2251 }
2252 }
2253 LOG.debug("[" + thread + "] Sideline directory contents:");
2254 debugLsr(targetRegionDir);
2255 }
2256
2257
2258 sidelineRegionDir(fs, contained);
2259 LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2260 getSidelineDir());
2261 debugLsr(contained.getHdfsRegionDir());
2262
2263 return fileMoves;
2264 }
2265
2266
2267 static class WorkItemOverlapMerge implements Callable<Void> {
2268 private TableIntegrityErrorHandler handler;
2269 Collection<HbckInfo> overlapgroup;
2270
2271 WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2272 this.handler = handler;
2273 this.overlapgroup = overlapgroup;
2274 }
2275
2276 @Override
2277 public Void call() throws Exception {
2278 handler.handleOverlapGroup(overlapgroup);
2279 return null;
2280 }
2281 };
2282
2283
2284
2285
2286
2287 public class TableInfo {
2288 TableName tableName;
2289 TreeSet <ServerName> deployedOn;
2290
2291
2292 final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2293
2294
2295 final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2296
2297
2298 final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2299
2300
2301 final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2302
2303
2304 final Multimap<byte[], HbckInfo> overlapGroups =
2305 TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2306
2307
2308 private ImmutableList<HRegionInfo> regionsFromMeta = null;
2309
2310 TableInfo(TableName name) {
2311 this.tableName = name;
2312 deployedOn = new TreeSet <ServerName>();
2313 }
2314
2315
2316
2317
2318 private HTableDescriptor getHTD() {
2319 if (htds.size() == 1) {
2320 return (HTableDescriptor)htds.toArray()[0];
2321 } else {
2322 LOG.error("None/Multiple table descriptors found for table '"
2323 + tableName + "' regions: " + htds);
2324 }
2325 return null;
2326 }
2327
2328 public void addRegionInfo(HbckInfo hir) {
2329 if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2330
2331 sc.add(hir);
2332 return;
2333 }
2334
2335
2336 if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2337 errors.reportError(
2338 ERROR_CODE.REGION_CYCLE,
2339 String.format("The endkey for this region comes before the "
2340 + "startkey, startkey=%s, endkey=%s",
2341 Bytes.toStringBinary(hir.getStartKey()),
2342 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2343 backwards.add(hir);
2344 return;
2345 }
2346
2347
2348 sc.add(hir);
2349 }
2350
2351 public void addServer(ServerName server) {
2352 this.deployedOn.add(server);
2353 }
2354
2355 public TableName getName() {
2356 return tableName;
2357 }
2358
2359 public int getNumRegions() {
2360 return sc.getStarts().size() + backwards.size();
2361 }
2362
2363 public synchronized ImmutableList<HRegionInfo> getRegionsFromMeta() {
2364
2365 if (regionsFromMeta == null) {
2366 List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2367 for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2368 if (tableName.equals(h.getTableName())) {
2369 if (h.metaEntry != null) {
2370 regions.add((HRegionInfo) h.metaEntry);
2371 }
2372 }
2373 }
2374 regionsFromMeta = Ordering.natural().immutableSortedCopy(regions);
2375 }
2376
2377 return regionsFromMeta;
2378 }
2379
2380 private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2381 ErrorReporter errors;
2382
2383 IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2384 this.errors = errors;
2385 setTableInfo(ti);
2386 }
2387
2388 @Override
2389 public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2390 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2391 "First region should start with an empty key. You need to "
2392 + " create a new region and regioninfo in HDFS to plug the hole.",
2393 getTableInfo(), hi);
2394 }
2395
2396 @Override
2397 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2398 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2399 "Last region should end with an empty key. You need to "
2400 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2401 }
2402
2403 @Override
2404 public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2405 errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2406 "Region has the same start and end key.", getTableInfo(), hi);
2407 }
2408
2409 @Override
2410 public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2411 byte[] key = r1.getStartKey();
2412
2413 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2414 "Multiple regions have the same startkey: "
2415 + Bytes.toStringBinary(key), getTableInfo(), r1);
2416 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2417 "Multiple regions have the same startkey: "
2418 + Bytes.toStringBinary(key), getTableInfo(), r2);
2419 }
2420
2421 @Override
2422 public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2423 errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2424 "There is an overlap in the region chain.",
2425 getTableInfo(), hi1, hi2);
2426 }
2427
2428 @Override
2429 public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2430 errors.reportError(
2431 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2432 "There is a hole in the region chain between "
2433 + Bytes.toStringBinary(holeStart) + " and "
2434 + Bytes.toStringBinary(holeStop)
2435 + ". You need to create a new .regioninfo and region "
2436 + "dir in hdfs to plug the hole.");
2437 }
2438 };
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452 private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2453 Configuration conf;
2454
2455 boolean fixOverlaps = true;
2456
2457 HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2458 boolean fixHoles, boolean fixOverlaps) {
2459 super(ti, errors);
2460 this.conf = conf;
2461 this.fixOverlaps = fixOverlaps;
2462
2463 }
2464
2465
2466
2467
2468
2469
2470 @Override
2471 public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2472 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2473 "First region should start with an empty key. Creating a new " +
2474 "region and regioninfo in HDFS to plug the hole.",
2475 getTableInfo(), next);
2476 HTableDescriptor htd = getTableInfo().getHTD();
2477
2478 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2479 HConstants.EMPTY_START_ROW, next.getStartKey());
2480
2481
2482 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2483 LOG.info("Table region start key was not empty. Created new empty region: "
2484 + newRegion + " " +region);
2485 fixes++;
2486 }
2487
2488 @Override
2489 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2490 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2491 "Last region should end with an empty key. Creating a new "
2492 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2493 HTableDescriptor htd = getTableInfo().getHTD();
2494
2495 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2496 HConstants.EMPTY_START_ROW);
2497
2498 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2499 LOG.info("Table region end key was not empty. Created new empty region: " + newRegion
2500 + " " + region);
2501 fixes++;
2502 }
2503
2504
2505
2506
2507
2508 @Override
2509 public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2510 errors.reportError(
2511 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2512 "There is a hole in the region chain between "
2513 + Bytes.toStringBinary(holeStartKey) + " and "
2514 + Bytes.toStringBinary(holeStopKey)
2515 + ". Creating a new regioninfo and region "
2516 + "dir in hdfs to plug the hole.");
2517 HTableDescriptor htd = getTableInfo().getHTD();
2518 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2519 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2520 LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2521 fixes++;
2522 }
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535 @Override
2536 public void handleOverlapGroup(Collection<HbckInfo> overlap)
2537 throws IOException {
2538 Preconditions.checkNotNull(overlap);
2539 Preconditions.checkArgument(overlap.size() >0);
2540
2541 if (!this.fixOverlaps) {
2542 LOG.warn("Not attempting to repair overlaps.");
2543 return;
2544 }
2545
2546 if (overlap.size() > maxMerge) {
2547 LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2548 "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2549 if (sidelineBigOverlaps) {
2550
2551 sidelineBigOverlaps(overlap);
2552 }
2553 return;
2554 }
2555
2556 mergeOverlaps(overlap);
2557 }
2558
2559 void mergeOverlaps(Collection<HbckInfo> overlap)
2560 throws IOException {
2561 String thread = Thread.currentThread().getName();
2562 LOG.info("== [" + thread + "] Merging regions into one region: "
2563 + Joiner.on(",").join(overlap));
2564
2565 Pair<byte[], byte[]> range = null;
2566 for (HbckInfo hi : overlap) {
2567 if (range == null) {
2568 range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2569 } else {
2570 if (RegionSplitCalculator.BYTES_COMPARATOR
2571 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2572 range.setFirst(hi.getStartKey());
2573 }
2574 if (RegionSplitCalculator.BYTES_COMPARATOR
2575 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2576 range.setSecond(hi.getEndKey());
2577 }
2578 }
2579
2580 LOG.debug("[" + thread + "] Closing region before moving data around: " + hi);
2581 LOG.debug("[" + thread + "] Contained region dir before close");
2582 debugLsr(hi.getHdfsRegionDir());
2583 try {
2584 LOG.info("[" + thread + "] Closing region: " + hi);
2585 closeRegion(hi);
2586 } catch (IOException ioe) {
2587 LOG.warn("[" + thread + "] Was unable to close region " + hi
2588 + ". Just continuing... ", ioe);
2589 } catch (InterruptedException e) {
2590 LOG.warn("[" + thread + "] Was unable to close region " + hi
2591 + ". Just continuing... ", e);
2592 }
2593
2594 try {
2595 LOG.info("[" + thread + "] Offlining region: " + hi);
2596 offline(hi.getRegionName());
2597 } catch (IOException ioe) {
2598 LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2599 + ". Just continuing... ", ioe);
2600 }
2601 }
2602
2603
2604 HTableDescriptor htd = getTableInfo().getHTD();
2605
2606 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2607 range.getSecond());
2608 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2609 LOG.info("[" + thread + "] Created new empty container region: " +
2610 newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2611 debugLsr(region.getRegionFileSystem().getRegionDir());
2612
2613
2614 boolean didFix= false;
2615 Path target = region.getRegionFileSystem().getRegionDir();
2616 for (HbckInfo contained : overlap) {
2617 LOG.info("[" + thread + "] Merging " + contained + " into " + target );
2618 int merges = mergeRegionDirs(target, contained);
2619 if (merges > 0) {
2620 didFix = true;
2621 }
2622 }
2623 if (didFix) {
2624 fixes++;
2625 }
2626 }
2627
2628
2629
2630
2631
2632
2633
2634
2635 void sidelineBigOverlaps(
2636 Collection<HbckInfo> bigOverlap) throws IOException {
2637 int overlapsToSideline = bigOverlap.size() - maxMerge;
2638 if (overlapsToSideline > maxOverlapsToSideline) {
2639 overlapsToSideline = maxOverlapsToSideline;
2640 }
2641 List<HbckInfo> regionsToSideline =
2642 RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2643 FileSystem fs = FileSystem.get(conf);
2644 for (HbckInfo regionToSideline: regionsToSideline) {
2645 try {
2646 LOG.info("Closing region: " + regionToSideline);
2647 closeRegion(regionToSideline);
2648 } catch (IOException ioe) {
2649 LOG.warn("Was unable to close region " + regionToSideline
2650 + ". Just continuing... ", ioe);
2651 } catch (InterruptedException e) {
2652 LOG.warn("Was unable to close region " + regionToSideline
2653 + ". Just continuing... ", e);
2654 }
2655
2656 try {
2657 LOG.info("Offlining region: " + regionToSideline);
2658 offline(regionToSideline.getRegionName());
2659 } catch (IOException ioe) {
2660 LOG.warn("Unable to offline region from master: " + regionToSideline
2661 + ". Just continuing... ", ioe);
2662 }
2663
2664 LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2665 Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2666 if (sidelineRegionDir != null) {
2667 sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2668 LOG.info("After sidelined big overlapped region: "
2669 + regionToSideline.getRegionNameAsString()
2670 + " to " + sidelineRegionDir.toString());
2671 fixes++;
2672 }
2673 }
2674 }
2675 }
2676
2677
2678
2679
2680
2681
2682
2683 public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2684
2685
2686
2687 if (disabledTables.contains(this.tableName)) {
2688 return true;
2689 }
2690 int originalErrorsCount = errors.getErrorList().size();
2691 Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2692 SortedSet<byte[]> splits = sc.getSplits();
2693
2694 byte[] prevKey = null;
2695 byte[] problemKey = null;
2696
2697 if (splits.size() == 0) {
2698
2699 handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
2700 }
2701
2702 for (byte[] key : splits) {
2703 Collection<HbckInfo> ranges = regions.get(key);
2704 if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2705 for (HbckInfo rng : ranges) {
2706 handler.handleRegionStartKeyNotEmpty(rng);
2707 }
2708 }
2709
2710
2711 for (HbckInfo rng : ranges) {
2712
2713 byte[] endKey = rng.getEndKey();
2714 endKey = (endKey.length == 0) ? null : endKey;
2715 if (Bytes.equals(rng.getStartKey(),endKey)) {
2716 handler.handleDegenerateRegion(rng);
2717 }
2718 }
2719
2720 if (ranges.size() == 1) {
2721
2722 if (problemKey != null) {
2723 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2724 }
2725 problemKey = null;
2726 } else if (ranges.size() > 1) {
2727
2728
2729 if (problemKey == null) {
2730
2731 LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2732 problemKey = key;
2733 }
2734 overlapGroups.putAll(problemKey, ranges);
2735
2736
2737 ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2738
2739 for (HbckInfo r1 : ranges) {
2740 subRange.remove(r1);
2741 for (HbckInfo r2 : subRange) {
2742 if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2743 handler.handleDuplicateStartKeys(r1,r2);
2744 } else {
2745
2746 handler.handleOverlapInRegionChain(r1, r2);
2747 }
2748 }
2749 }
2750
2751 } else if (ranges.size() == 0) {
2752 if (problemKey != null) {
2753 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2754 }
2755 problemKey = null;
2756
2757 byte[] holeStopKey = sc.getSplits().higher(key);
2758
2759 if (holeStopKey != null) {
2760
2761 handler.handleHoleInRegionChain(key, holeStopKey);
2762 }
2763 }
2764 prevKey = key;
2765 }
2766
2767
2768
2769 if (prevKey != null) {
2770 handler.handleRegionEndKeyNotEmpty(prevKey);
2771 }
2772
2773
2774 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
2775 LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
2776 " false to run serially.");
2777 boolean ok = handleOverlapsParallel(handler, prevKey);
2778 if (!ok) {
2779 return false;
2780 }
2781 } else {
2782 LOG.info("Handling overlap merges serially. set hbasefsck.overlap.merge.parallel to" +
2783 " true to run in parallel.");
2784 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2785 handler.handleOverlapGroup(overlap);
2786 }
2787 }
2788
2789 if (details) {
2790
2791 errors.print("---- Table '" + this.tableName
2792 + "': region split map");
2793 dump(splits, regions);
2794 errors.print("---- Table '" + this.tableName
2795 + "': overlap groups");
2796 dumpOverlapProblems(overlapGroups);
2797 errors.print("There are " + overlapGroups.keySet().size()
2798 + " overlap groups with " + overlapGroups.size()
2799 + " overlapping regions");
2800 }
2801 if (!sidelinedRegions.isEmpty()) {
2802 LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2803 errors.print("---- Table '" + this.tableName
2804 + "': sidelined big overlapped regions");
2805 dumpSidelinedRegions(sidelinedRegions);
2806 }
2807 return errors.getErrorList().size() == originalErrorsCount;
2808 }
2809
2810 private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
2811 throws IOException {
2812
2813
2814 List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
2815 List<Future<Void>> rets;
2816 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2817
2818 merges.add(new WorkItemOverlapMerge(overlap, handler));
2819 }
2820 try {
2821 rets = executor.invokeAll(merges);
2822 } catch (InterruptedException e) {
2823 LOG.error("Overlap merges were interrupted", e);
2824 return false;
2825 }
2826 for(int i=0; i<merges.size(); i++) {
2827 WorkItemOverlapMerge work = merges.get(i);
2828 Future<Void> f = rets.get(i);
2829 try {
2830 f.get();
2831 } catch(ExecutionException e) {
2832 LOG.warn("Failed to merge overlap group" + work, e.getCause());
2833 } catch (InterruptedException e) {
2834 LOG.error("Waiting for overlap merges was interrupted", e);
2835 return false;
2836 }
2837 }
2838 return true;
2839 }
2840
2841
2842
2843
2844
2845
2846
2847 void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2848
2849 StringBuilder sb = new StringBuilder();
2850 for (byte[] k : splits) {
2851 sb.setLength(0);
2852 sb.append(Bytes.toStringBinary(k) + ":\t");
2853 for (HbckInfo r : regions.get(k)) {
2854 sb.append("[ "+ r.toString() + ", "
2855 + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2856 }
2857 errors.print(sb.toString());
2858 }
2859 }
2860 }
2861
2862 public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2863
2864
2865 for (byte[] k : regions.keySet()) {
2866 errors.print(Bytes.toStringBinary(k) + ":");
2867 for (HbckInfo r : regions.get(k)) {
2868 errors.print("[ " + r.toString() + ", "
2869 + Bytes.toStringBinary(r.getEndKey()) + "]");
2870 }
2871 errors.print("----");
2872 }
2873 }
2874
2875 public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2876 for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
2877 TableName tableName = entry.getValue().getTableName();
2878 Path path = entry.getKey();
2879 errors.print("This sidelined region dir should be bulk loaded: "
2880 + path.toString());
2881 errors.print("Bulk load command looks like: "
2882 + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
2883 + path.toUri().getPath() + " "+ tableName);
2884 }
2885 }
2886
2887 public Multimap<byte[], HbckInfo> getOverlapGroups(
2888 TableName table) {
2889 TableInfo ti = tablesInfo.get(table);
2890 return ti.overlapGroups;
2891 }
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902 HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2903 List<TableName> tableNames = new ArrayList<TableName>();
2904 long now = System.currentTimeMillis();
2905
2906 for (HbckInfo hbi : regionInfoMap.values()) {
2907 MetaEntry info = hbi.metaEntry;
2908
2909
2910
2911 if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2912 if (info.modTime + timelag < now) {
2913 tableNames.add(info.getTable());
2914 } else {
2915 numSkipped.incrementAndGet();
2916 }
2917 }
2918 }
2919 return getHTableDescriptors(tableNames);
2920 }
2921
2922 HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
2923 HTableDescriptor[] htd = new HTableDescriptor[0];
2924 try {
2925 LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2926 htd = new HBaseAdmin(getConf()).getTableDescriptorsByTableName(tableNames);
2927 } catch (IOException e) {
2928 LOG.debug("Exception getting table descriptors", e);
2929 }
2930 return htd;
2931 }
2932
2933
2934
2935
2936
2937
2938 private synchronized HbckInfo getOrCreateInfo(String name) {
2939 HbckInfo hbi = regionInfoMap.get(name);
2940 if (hbi == null) {
2941 hbi = new HbckInfo(null);
2942 regionInfoMap.put(name, hbi);
2943 }
2944 return hbi;
2945 }
2946
2947 private void checkAndFixTableLocks() throws IOException {
2948 ZooKeeperWatcher zkw = createZooKeeperWatcher();
2949
2950 try {
2951 TableLockChecker checker = new TableLockChecker(createZooKeeperWatcher(), errors);
2952 checker.checkTableLocks();
2953
2954 if (this.fixTableLocks) {
2955 checker.fixExpiredTableLocks();
2956 }
2957 } finally {
2958 zkw.close();
2959 }
2960 }
2961
2962
2963
2964
2965
2966
2967
2968 private void checkAndFixOrphanedTableZNodes()
2969 throws IOException, KeeperException, InterruptedException {
2970 ZooKeeperWatcher zkw = createZooKeeperWatcher();
2971 try {
2972 ZKTable zkTable = new ZKTable(zkw);
2973 Set<TableName> enablingTables = zkTable.getEnablingTables(zkw);
2974 String msg;
2975 TableInfo tableInfo;
2976
2977 for (TableName tableName : enablingTables) {
2978
2979 tableInfo = tablesInfo.get(tableName);
2980 if (tableInfo != null) {
2981
2982 continue;
2983 }
2984
2985 msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
2986 LOG.warn(msg);
2987 orphanedTableZNodes.add(tableName);
2988 errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
2989 }
2990
2991 if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
2992 for (TableName tableName : orphanedTableZNodes) {
2993
2994
2995
2996
2997 zkTable.setDisabledTable(tableName);
2998 }
2999 }
3000 } finally {
3001 zkw.close();
3002 }
3003 }
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014 boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3015 List<HbckInfo> metaRegions = Lists.newArrayList();
3016 for (HbckInfo value : regionInfoMap.values()) {
3017 if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3018 metaRegions.add(value);
3019 }
3020 }
3021
3022
3023
3024 List<ServerName> servers = new ArrayList<ServerName>();
3025 HbckInfo metaHbckInfo = null;
3026 if (!metaRegions.isEmpty()) {
3027 metaHbckInfo = metaRegions.get(0);
3028 servers = metaHbckInfo.deployedOn;
3029 }
3030 if (servers.size() != 1) {
3031 if (servers.size() == 0) {
3032 errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta is not found on any region.");
3033 if (shouldFixAssignments()) {
3034 errors.print("Trying to fix a problem with hbase:meta..");
3035 setShouldRerun();
3036
3037 HBaseFsckRepair.fixUnassigned(admin, HRegionInfo.FIRST_META_REGIONINFO);
3038 HBaseFsckRepair.waitUntilAssigned(admin, HRegionInfo.FIRST_META_REGIONINFO);
3039 }
3040 } else if (servers.size() > 1) {
3041 errors
3042 .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta is found on more than one region.");
3043 if (shouldFixAssignments()) {
3044 if (metaHbckInfo == null) {
3045 errors.print(
3046 "Unable to fix problem with hbase:meta due to hbase:meta region info missing");
3047 return false;
3048 }
3049 errors.print("Trying to fix a problem with hbase:meta..");
3050 setShouldRerun();
3051
3052 HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers);
3053 }
3054 }
3055
3056 return false;
3057 }
3058
3059 return true;
3060 }
3061
3062
3063
3064
3065
3066 boolean loadMetaEntries() throws IOException {
3067 MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
3068 int countRecord = 1;
3069
3070
3071 final Comparator<Cell> comp = new Comparator<Cell>() {
3072 @Override
3073 public int compare(Cell k1, Cell k2) {
3074 return (int)(k1.getTimestamp() - k2.getTimestamp());
3075 }
3076 };
3077
3078 @Override
3079 public boolean processRow(Result result) throws IOException {
3080 try {
3081
3082
3083 long ts = Collections.max(result.listCells(), comp).getTimestamp();
3084 Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(result);
3085 if (pair == null || pair.getFirst() == null) {
3086 emptyRegionInfoQualifiers.add(result);
3087 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3088 "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3089 return true;
3090 }
3091 ServerName sn = null;
3092 if (pair.getSecond() != null) {
3093 sn = pair.getSecond();
3094 }
3095 HRegionInfo hri = pair.getFirst();
3096 if (!(isTableIncluded(hri.getTable())
3097 || hri.isMetaRegion())) {
3098 return true;
3099 }
3100 PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
3101 MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3102 HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3103 if (previous == null) {
3104 regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3105 } else if (previous.metaEntry == null) {
3106 previous.metaEntry = m;
3107 } else {
3108 throw new IOException("Two entries in hbase:meta are same " + previous);
3109 }
3110
3111 PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
3112 for (HRegionInfo mergeRegion : new HRegionInfo[] {
3113 mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3114 if (mergeRegion != null) {
3115
3116 HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3117 hbInfo.setMerged(true);
3118 }
3119 }
3120
3121
3122 if (countRecord % 100 == 0) {
3123 errors.progress();
3124 }
3125 countRecord++;
3126 return true;
3127 } catch (RuntimeException e) {
3128 LOG.error("Result=" + result);
3129 throw e;
3130 }
3131 }
3132 };
3133 if (!checkMetaOnly) {
3134
3135 MetaScanner.metaScan(getConf(), visitor);
3136 }
3137
3138 errors.print("");
3139 return true;
3140 }
3141
3142
3143
3144
3145 static class MetaEntry extends HRegionInfo {
3146 ServerName regionServer;
3147 long modTime;
3148 HRegionInfo splitA, splitB;
3149
3150 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3151 this(rinfo, regionServer, modTime, null, null);
3152 }
3153
3154 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3155 HRegionInfo splitA, HRegionInfo splitB) {
3156 super(rinfo);
3157 this.regionServer = regionServer;
3158 this.modTime = modTime;
3159 this.splitA = splitA;
3160 this.splitB = splitB;
3161 }
3162
3163 @Override
3164 public boolean equals(Object o) {
3165 boolean superEq = super.equals(o);
3166 if (!superEq) {
3167 return superEq;
3168 }
3169
3170 MetaEntry me = (MetaEntry) o;
3171 if (!regionServer.equals(me.regionServer)) {
3172 return false;
3173 }
3174 return (modTime == me.modTime);
3175 }
3176
3177 @Override
3178 public int hashCode() {
3179 int hash = Arrays.hashCode(getRegionName());
3180 hash ^= getRegionId();
3181 hash ^= Arrays.hashCode(getStartKey());
3182 hash ^= Arrays.hashCode(getEndKey());
3183 hash ^= Boolean.valueOf(isOffline()).hashCode();
3184 hash ^= getTable().hashCode();
3185 if (regionServer != null) {
3186 hash ^= regionServer.hashCode();
3187 }
3188 hash ^= modTime;
3189 return hash;
3190 }
3191 }
3192
3193
3194
3195
3196 static class HdfsEntry {
3197 HRegionInfo hri;
3198 Path hdfsRegionDir = null;
3199 long hdfsRegionDirModTime = 0;
3200 boolean hdfsRegioninfoFilePresent = false;
3201 boolean hdfsOnlyEdits = false;
3202 }
3203
3204
3205
3206
3207 static class OnlineEntry {
3208 HRegionInfo hri;
3209 ServerName hsa;
3210
3211 @Override
3212 public String toString() {
3213 return hsa.toString() + ";" + hri.getRegionNameAsString();
3214 }
3215 }
3216
3217
3218
3219
3220
3221 public static class HbckInfo implements KeyRange {
3222 private MetaEntry metaEntry = null;
3223 private HdfsEntry hdfsEntry = null;
3224 private List<OnlineEntry> deployedEntries = Lists.newArrayList();
3225 private List<ServerName> deployedOn = Lists.newArrayList();
3226 private boolean skipChecks = false;
3227 private boolean isMerged = false;
3228
3229 HbckInfo(MetaEntry metaEntry) {
3230 this.metaEntry = metaEntry;
3231 }
3232
3233 public synchronized void addServer(HRegionInfo hri, ServerName server) {
3234 OnlineEntry rse = new OnlineEntry() ;
3235 rse.hri = hri;
3236 rse.hsa = server;
3237 this.deployedEntries.add(rse);
3238 this.deployedOn.add(server);
3239 }
3240
3241 @Override
3242 public synchronized String toString() {
3243 StringBuilder sb = new StringBuilder();
3244 sb.append("{ meta => ");
3245 sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3246 sb.append( ", hdfs => " + getHdfsRegionDir());
3247 sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3248 sb.append(" }");
3249 return sb.toString();
3250 }
3251
3252 @Override
3253 public byte[] getStartKey() {
3254 if (this.metaEntry != null) {
3255 return this.metaEntry.getStartKey();
3256 } else if (this.hdfsEntry != null) {
3257 return this.hdfsEntry.hri.getStartKey();
3258 } else {
3259 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3260 return null;
3261 }
3262 }
3263
3264 @Override
3265 public byte[] getEndKey() {
3266 if (this.metaEntry != null) {
3267 return this.metaEntry.getEndKey();
3268 } else if (this.hdfsEntry != null) {
3269 return this.hdfsEntry.hri.getEndKey();
3270 } else {
3271 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3272 return null;
3273 }
3274 }
3275
3276 public TableName getTableName() {
3277 if (this.metaEntry != null) {
3278 return this.metaEntry.getTable();
3279 } else if (this.hdfsEntry != null) {
3280
3281
3282 Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3283 return FSUtils.getTableName(tableDir);
3284 } else {
3285
3286
3287 return null;
3288 }
3289 }
3290
3291 public String getRegionNameAsString() {
3292 if (metaEntry != null) {
3293 return metaEntry.getRegionNameAsString();
3294 } else if (hdfsEntry != null) {
3295 if (hdfsEntry.hri != null) {
3296 return hdfsEntry.hri.getRegionNameAsString();
3297 }
3298 }
3299 return null;
3300 }
3301
3302 public byte[] getRegionName() {
3303 if (metaEntry != null) {
3304 return metaEntry.getRegionName();
3305 } else if (hdfsEntry != null) {
3306 return hdfsEntry.hri.getRegionName();
3307 } else {
3308 return null;
3309 }
3310 }
3311
3312 Path getHdfsRegionDir() {
3313 if (hdfsEntry == null) {
3314 return null;
3315 }
3316 return hdfsEntry.hdfsRegionDir;
3317 }
3318
3319 boolean containsOnlyHdfsEdits() {
3320 if (hdfsEntry == null) {
3321 return false;
3322 }
3323 return hdfsEntry.hdfsOnlyEdits;
3324 }
3325
3326 boolean isHdfsRegioninfoPresent() {
3327 if (hdfsEntry == null) {
3328 return false;
3329 }
3330 return hdfsEntry.hdfsRegioninfoFilePresent;
3331 }
3332
3333 long getModTime() {
3334 if (hdfsEntry == null) {
3335 return 0;
3336 }
3337 return hdfsEntry.hdfsRegionDirModTime;
3338 }
3339
3340 HRegionInfo getHdfsHRI() {
3341 if (hdfsEntry == null) {
3342 return null;
3343 }
3344 return hdfsEntry.hri;
3345 }
3346
3347 public void setSkipChecks(boolean skipChecks) {
3348 this.skipChecks = skipChecks;
3349 }
3350
3351 public boolean isSkipChecks() {
3352 return skipChecks;
3353 }
3354
3355 public void setMerged(boolean isMerged) {
3356 this.isMerged = isMerged;
3357 }
3358
3359 public boolean isMerged() {
3360 return this.isMerged;
3361 }
3362 }
3363
3364 final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3365 @Override
3366 public int compare(HbckInfo l, HbckInfo r) {
3367 if (l == r) {
3368
3369 return 0;
3370 }
3371
3372 int tableCompare = l.getTableName().compareTo(r.getTableName());
3373 if (tableCompare != 0) {
3374 return tableCompare;
3375 }
3376
3377 int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3378 l.getStartKey(), r.getStartKey());
3379 if (startComparison != 0) {
3380 return startComparison;
3381 }
3382
3383
3384 byte[] endKey = r.getEndKey();
3385 endKey = (endKey.length == 0) ? null : endKey;
3386 byte[] endKey2 = l.getEndKey();
3387 endKey2 = (endKey2.length == 0) ? null : endKey2;
3388 int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3389 endKey2, endKey);
3390
3391 if (endComparison != 0) {
3392 return endComparison;
3393 }
3394
3395
3396
3397 if (l.hdfsEntry == null && r.hdfsEntry == null) {
3398 return 0;
3399 }
3400 if (l.hdfsEntry == null && r.hdfsEntry != null) {
3401 return 1;
3402 }
3403
3404 if (r.hdfsEntry == null) {
3405 return -1;
3406 }
3407
3408 return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3409 }
3410 };
3411
3412
3413
3414
3415 private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3416 StringBuilder sb = new StringBuilder();
3417 errors.print("Summary:");
3418 for (TableInfo tInfo : tablesInfo.values()) {
3419 if (errors.tableHasErrors(tInfo)) {
3420 errors.print("Table " + tInfo.getName() + " is inconsistent.");
3421 } else {
3422 errors.print(" " + tInfo.getName() + " is okay.");
3423 }
3424 errors.print(" Number of regions: " + tInfo.getNumRegions());
3425 sb.setLength(0);
3426 sb.append(" Deployed on: ");
3427 for (ServerName server : tInfo.deployedOn) {
3428 sb.append(" " + server.toString());
3429 }
3430 errors.print(sb.toString());
3431 }
3432 }
3433
3434 static ErrorReporter getErrorReporter(
3435 final Configuration conf) throws ClassNotFoundException {
3436 Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3437 return ReflectionUtils.newInstance(reporter, conf);
3438 }
3439
3440 public interface ErrorReporter {
3441 enum ERROR_CODE {
3442 UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3443 NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
3444 MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3445 FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3446 HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3447 ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3448 WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR
3449 }
3450 void clear();
3451 void report(String message);
3452 void reportError(String message);
3453 void reportError(ERROR_CODE errorCode, String message);
3454 void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3455 void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3456 void reportError(
3457 ERROR_CODE errorCode,
3458 String message,
3459 TableInfo table,
3460 HbckInfo info1,
3461 HbckInfo info2
3462 );
3463 int summarize();
3464 void detail(String details);
3465 ArrayList<ERROR_CODE> getErrorList();
3466 void progress();
3467 void print(String message);
3468 void resetErrors();
3469 boolean tableHasErrors(TableInfo table);
3470 }
3471
3472 static class PrintingErrorReporter implements ErrorReporter {
3473 public int errorCount = 0;
3474 private int showProgress;
3475
3476 Set<TableInfo> errorTables = new HashSet<TableInfo>();
3477
3478
3479 private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3480
3481 @Override
3482 public void clear() {
3483 errorTables.clear();
3484 errorList.clear();
3485 errorCount = 0;
3486 }
3487
3488 @Override
3489 public synchronized void reportError(ERROR_CODE errorCode, String message) {
3490 if (errorCode == ERROR_CODE.WRONG_USAGE) {
3491 System.err.println(message);
3492 return;
3493 }
3494
3495 errorList.add(errorCode);
3496 if (!summary) {
3497 System.out.println("ERROR: " + message);
3498 }
3499 errorCount++;
3500 showProgress = 0;
3501 }
3502
3503 @Override
3504 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3505 errorTables.add(table);
3506 reportError(errorCode, message);
3507 }
3508
3509 @Override
3510 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3511 HbckInfo info) {
3512 errorTables.add(table);
3513 String reference = "(region " + info.getRegionNameAsString() + ")";
3514 reportError(errorCode, reference + " " + message);
3515 }
3516
3517 @Override
3518 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3519 HbckInfo info1, HbckInfo info2) {
3520 errorTables.add(table);
3521 String reference = "(regions " + info1.getRegionNameAsString()
3522 + " and " + info2.getRegionNameAsString() + ")";
3523 reportError(errorCode, reference + " " + message);
3524 }
3525
3526 @Override
3527 public synchronized void reportError(String message) {
3528 reportError(ERROR_CODE.UNKNOWN, message);
3529 }
3530
3531
3532
3533
3534
3535
3536 @Override
3537 public synchronized void report(String message) {
3538 if (! summary) {
3539 System.out.println("ERROR: " + message);
3540 }
3541 showProgress = 0;
3542 }
3543
3544 @Override
3545 public synchronized int summarize() {
3546 System.out.println(Integer.toString(errorCount) +
3547 " inconsistencies detected.");
3548 if (errorCount == 0) {
3549 System.out.println("Status: OK");
3550 return 0;
3551 } else {
3552 System.out.println("Status: INCONSISTENT");
3553 return -1;
3554 }
3555 }
3556
3557 @Override
3558 public ArrayList<ERROR_CODE> getErrorList() {
3559 return errorList;
3560 }
3561
3562 @Override
3563 public synchronized void print(String message) {
3564 if (!summary) {
3565 System.out.println(message);
3566 }
3567 }
3568
3569 @Override
3570 public boolean tableHasErrors(TableInfo table) {
3571 return errorTables.contains(table);
3572 }
3573
3574 @Override
3575 public void resetErrors() {
3576 errorCount = 0;
3577 }
3578
3579 @Override
3580 public synchronized void detail(String message) {
3581 if (details) {
3582 System.out.println(message);
3583 }
3584 showProgress = 0;
3585 }
3586
3587 @Override
3588 public synchronized void progress() {
3589 if (showProgress++ == 10) {
3590 if (!summary) {
3591 System.out.print(".");
3592 }
3593 showProgress = 0;
3594 }
3595 }
3596 }
3597
3598
3599
3600
3601 static class WorkItemRegion implements Callable<Void> {
3602 private HBaseFsck hbck;
3603 private ServerName rsinfo;
3604 private ErrorReporter errors;
3605 private HConnection connection;
3606
3607 WorkItemRegion(HBaseFsck hbck, ServerName info,
3608 ErrorReporter errors, HConnection connection) {
3609 this.hbck = hbck;
3610 this.rsinfo = info;
3611 this.errors = errors;
3612 this.connection = connection;
3613 }
3614
3615 @Override
3616 public synchronized Void call() throws IOException {
3617 errors.progress();
3618 try {
3619 BlockingInterface server = connection.getAdmin(rsinfo);
3620
3621
3622 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3623 regions = filterRegions(regions);
3624
3625 if (details) {
3626 errors.detail("RegionServer: " + rsinfo.getServerName() +
3627 " number of regions: " + regions.size());
3628 for (HRegionInfo rinfo: regions) {
3629 errors.detail(" " + rinfo.getRegionNameAsString() +
3630 " id: " + rinfo.getRegionId() +
3631 " encoded_name: " + rinfo.getEncodedName() +
3632 " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3633 " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3634 }
3635 }
3636
3637
3638 for (HRegionInfo r:regions) {
3639 HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3640 hbi.addServer(r, rsinfo);
3641 }
3642 } catch (IOException e) {
3643 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3644 " Unable to fetch region information. " + e);
3645 throw e;
3646 }
3647 return null;
3648 }
3649
3650 private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3651 List<HRegionInfo> ret = Lists.newArrayList();
3652 for (HRegionInfo hri : regions) {
3653 if (hri.isMetaTable() || (!hbck.checkMetaOnly
3654 && hbck.isTableIncluded(hri.getTable()))) {
3655 ret.add(hri);
3656 }
3657 }
3658 return ret;
3659 }
3660 }
3661
3662
3663
3664
3665
3666 static class WorkItemHdfsDir implements Callable<Void> {
3667 private HBaseFsck hbck;
3668 private FileStatus tableDir;
3669 private ErrorReporter errors;
3670 private FileSystem fs;
3671
3672 WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
3673 FileStatus status) {
3674 this.hbck = hbck;
3675 this.fs = fs;
3676 this.tableDir = status;
3677 this.errors = errors;
3678 }
3679
3680 @Override
3681 public synchronized Void call() throws IOException {
3682 try {
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737 static class WorkItemHdfsRegionInfo implements Callable<Void> {
3738 private HbckInfo hbi;
3739 private HBaseFsck hbck;
3740 private ErrorReporter errors;
3741
3742 WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
3743 this.hbi = hbi;
3744 this.hbck = hbck;
3745 this.errors = errors;
3746 }
3747
3748 @Override
3749 public synchronized Void call() throws IOException {
3750
3751 if (hbi.getHdfsHRI() == null) {
3752 try {
3753 hbck.loadHdfsRegioninfo(hbi);
3754 } catch (IOException ioe) {
3755 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3756 + hbi.getTableName() + " in hdfs dir "
3757 + hbi.getHdfsRegionDir()
3758 + "! It may be an invalid format or version file. Treating as "
3759 + "an orphaned regiondir.";
3760 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3761 try {
3762 hbck.debugLsr(hbi.getHdfsRegionDir());
3763 } catch (IOException ioe2) {
3764 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3765 throw ioe2;
3766 }
3767 hbck.orphanHdfsDirs.add(hbi);
3768 throw ioe;
3769 }
3770 }
3771 return null;
3772 }
3773 };
3774
3775
3776
3777
3778
3779 public static void setDisplayFullReport() {
3780 details = true;
3781 }
3782
3783
3784
3785
3786
3787 void setSummary() {
3788 summary = true;
3789 }
3790
3791
3792
3793
3794
3795 void setCheckMetaOnly() {
3796 checkMetaOnly = true;
3797 }
3798
3799
3800
3801
3802 void setRegionBoundariesCheck() {
3803 checkRegionBoundaries = true;
3804 }
3805
3806
3807
3808
3809
3810 public void setFixTableLocks(boolean shouldFix) {
3811 fixTableLocks = shouldFix;
3812 fixAny |= shouldFix;
3813 }
3814
3815
3816
3817
3818
3819 public void setFixTableZNodes(boolean shouldFix) {
3820 fixTableZNodes = shouldFix;
3821 fixAny |= shouldFix;
3822 }
3823
3824
3825
3826
3827
3828
3829
3830 void setShouldRerun() {
3831 rerun = true;
3832 }
3833
3834 boolean shouldRerun() {
3835 return rerun;
3836 }
3837
3838
3839
3840
3841
3842 public void setFixAssignments(boolean shouldFix) {
3843 fixAssignments = shouldFix;
3844 fixAny |= shouldFix;
3845 }
3846
3847 boolean shouldFixAssignments() {
3848 return fixAssignments;
3849 }
3850
3851 public void setFixMeta(boolean shouldFix) {
3852 fixMeta = shouldFix;
3853 fixAny |= shouldFix;
3854 }
3855
3856 boolean shouldFixMeta() {
3857 return fixMeta;
3858 }
3859
3860 public void setFixEmptyMetaCells(boolean shouldFix) {
3861 fixEmptyMetaCells = shouldFix;
3862 fixAny |= shouldFix;
3863 }
3864
3865 boolean shouldFixEmptyMetaCells() {
3866 return fixEmptyMetaCells;
3867 }
3868
3869 public void setCheckHdfs(boolean checking) {
3870 checkHdfs = checking;
3871 }
3872
3873 boolean shouldCheckHdfs() {
3874 return checkHdfs;
3875 }
3876
3877 public void setFixHdfsHoles(boolean shouldFix) {
3878 fixHdfsHoles = shouldFix;
3879 fixAny |= shouldFix;
3880 }
3881
3882 boolean shouldFixHdfsHoles() {
3883 return fixHdfsHoles;
3884 }
3885
3886 public void setFixTableOrphans(boolean shouldFix) {
3887 fixTableOrphans = shouldFix;
3888 fixAny |= shouldFix;
3889 }
3890
3891 boolean shouldFixTableOrphans() {
3892 return fixTableOrphans;
3893 }
3894
3895 public void setFixHdfsOverlaps(boolean shouldFix) {
3896 fixHdfsOverlaps = shouldFix;
3897 fixAny |= shouldFix;
3898 }
3899
3900 boolean shouldFixHdfsOverlaps() {
3901 return fixHdfsOverlaps;
3902 }
3903
3904 public void setFixHdfsOrphans(boolean shouldFix) {
3905 fixHdfsOrphans = shouldFix;
3906 fixAny |= shouldFix;
3907 }
3908
3909 boolean shouldFixHdfsOrphans() {
3910 return fixHdfsOrphans;
3911 }
3912
3913 public void setFixVersionFile(boolean shouldFix) {
3914 fixVersionFile = shouldFix;
3915 fixAny |= shouldFix;
3916 }
3917
3918 public boolean shouldFixVersionFile() {
3919 return fixVersionFile;
3920 }
3921
3922 public void setSidelineBigOverlaps(boolean sbo) {
3923 this.sidelineBigOverlaps = sbo;
3924 }
3925
3926 public boolean shouldSidelineBigOverlaps() {
3927 return sidelineBigOverlaps;
3928 }
3929
3930 public void setFixSplitParents(boolean shouldFix) {
3931 fixSplitParents = shouldFix;
3932 fixAny |= shouldFix;
3933 }
3934
3935 boolean shouldFixSplitParents() {
3936 return fixSplitParents;
3937 }
3938
3939 public void setFixReferenceFiles(boolean shouldFix) {
3940 fixReferenceFiles = shouldFix;
3941 fixAny |= shouldFix;
3942 }
3943
3944 boolean shouldFixReferenceFiles() {
3945 return fixReferenceFiles;
3946 }
3947
3948 public boolean shouldIgnorePreCheckPermission() {
3949 return !fixAny || ignorePreCheckPermission;
3950 }
3951
3952 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3953 this.ignorePreCheckPermission = ignorePreCheckPermission;
3954 }
3955
3956
3957
3958
3959 public void setMaxMerge(int mm) {
3960 this.maxMerge = mm;
3961 }
3962
3963 public int getMaxMerge() {
3964 return maxMerge;
3965 }
3966
3967 public void setMaxOverlapsToSideline(int mo) {
3968 this.maxOverlapsToSideline = mo;
3969 }
3970
3971 public int getMaxOverlapsToSideline() {
3972 return maxOverlapsToSideline;
3973 }
3974
3975
3976
3977
3978
3979 boolean isTableIncluded(TableName table) {
3980 return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
3981 }
3982
3983 public void includeTable(TableName table) {
3984 tablesIncluded.add(table);
3985 }
3986
3987 Set<TableName> getIncludedTables() {
3988 return new HashSet<TableName>(tablesIncluded);
3989 }
3990
3991
3992
3993
3994
3995
3996 public void setTimeLag(long seconds) {
3997 timelag = seconds * 1000;
3998 }
3999
4000
4001
4002
4003
4004 public void setSidelineDir(String sidelineDir) {
4005 this.sidelineDir = new Path(sidelineDir);
4006 }
4007
4008 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4009 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4010 }
4011
4012 public HFileCorruptionChecker getHFilecorruptionChecker() {
4013 return hfcc;
4014 }
4015
4016 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4017 this.hfcc = hfcc;
4018 }
4019
4020 public void setRetCode(int code) {
4021 this.retcode = code;
4022 }
4023
4024 public int getRetCode() {
4025 return retcode;
4026 }
4027
4028 protected HBaseFsck printUsageAndExit() {
4029 StringWriter sw = new StringWriter(2048);
4030 PrintWriter out = new PrintWriter(sw);
4031 out.println("Usage: fsck [opts] {only tables}");
4032 out.println(" where [opts] are:");
4033 out.println(" -help Display help options (this)");
4034 out.println(" -details Display full report of all regions.");
4035 out.println(" -timelag <timeInSeconds> Process only regions that " +
4036 " have not experienced any metadata updates in the last " +
4037 " <timeInSeconds> seconds.");
4038 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4039 " before checking if the fix worked if run with -fix");
4040 out.println(" -summary Print only summary of the tables and status.");
4041 out.println(" -metaonly Only check the state of the hbase:meta table.");
4042 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4043 out.println(" -boundaries Verify that regions boundaries are the same between META and store files.");
4044
4045 out.println("");
4046 out.println(" Metadata Repair options: (expert features, use with caution!)");
4047 out.println(" -fix Try to fix region assignments. This is for backwards compatiblity");
4048 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
4049 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
4050 out.println(" -noHdfsChecking Don't load/check region info from HDFS."
4051 + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4052 out.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
4053 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
4054 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4055 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
4056 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
4057 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4058 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
4059 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4060 out.println(" -fixSplitParents Try to force offline split parents to be online.");
4061 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
4062 out.println(" -fixReferenceFiles Try to offline lingering reference store files");
4063 out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region"
4064 + " (empty REGIONINFO_QUALIFIER rows)");
4065
4066 out.println("");
4067 out.println(" Datafile Repair options: (expert features, use with caution!)");
4068 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid");
4069 out.println(" -sidelineCorruptHFiles Quarantine corrupted HFiles. implies -checkCorruptHFiles");
4070
4071 out.println("");
4072 out.println(" Metadata Repair shortcuts");
4073 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4074 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " +
4075 "-fixReferenceFiles -fixTableLocks -fixOrphanedTableZnodes");
4076 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4077
4078 out.println("");
4079 out.println(" Table lock options");
4080 out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4081
4082 out.println("");
4083 out.println(" Table Znode options");
4084 out.println(" -fixOrphanedTableZnodes Set table state in ZNode to disabled if table does not exists");
4085
4086 out.flush();
4087 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4088
4089 setRetCode(-2);
4090 return this;
4091 }
4092
4093
4094
4095
4096
4097
4098
4099 public static void main(String[] args) throws Exception {
4100
4101 Configuration conf = HBaseConfiguration.create();
4102 Path hbasedir = FSUtils.getRootDir(conf);
4103 URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4104 FSUtils.setFsDefault(conf, new Path(defaultFs));
4105 int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4106 System.exit(ret);
4107 }
4108
4109
4110
4111
4112 static class HBaseFsckTool extends Configured implements Tool {
4113 HBaseFsckTool(Configuration conf) { super(conf); }
4114 @Override
4115 public int run(String[] args) throws Exception {
4116 HBaseFsck hbck = new HBaseFsck(getConf());
4117 hbck.exec(hbck.executor, args);
4118 return hbck.getRetCode();
4119 }
4120 };
4121
4122
4123 public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4124 ServiceException, InterruptedException {
4125 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4126
4127 boolean checkCorruptHFiles = false;
4128 boolean sidelineCorruptHFiles = false;
4129
4130
4131 for (int i = 0; i < args.length; i++) {
4132 String cmd = args[i];
4133 if (cmd.equals("-help") || cmd.equals("-h")) {
4134 return printUsageAndExit();
4135 } else if (cmd.equals("-details")) {
4136 setDisplayFullReport();
4137 } else if (cmd.equals("-timelag")) {
4138 if (i == args.length - 1) {
4139 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4140 return printUsageAndExit();
4141 }
4142 try {
4143 long timelag = Long.parseLong(args[i+1]);
4144 setTimeLag(timelag);
4145 } catch (NumberFormatException e) {
4146 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4147 return printUsageAndExit();
4148 }
4149 i++;
4150 } else if (cmd.equals("-sleepBeforeRerun")) {
4151 if (i == args.length - 1) {
4152 errors.reportError(ERROR_CODE.WRONG_USAGE,
4153 "HBaseFsck: -sleepBeforeRerun needs a value.");
4154 return printUsageAndExit();
4155 }
4156 try {
4157 sleepBeforeRerun = Long.parseLong(args[i+1]);
4158 } catch (NumberFormatException e) {
4159 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4160 return printUsageAndExit();
4161 }
4162 i++;
4163 } else if (cmd.equals("-sidelineDir")) {
4164 if (i == args.length - 1) {
4165 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4166 return printUsageAndExit();
4167 }
4168 i++;
4169 setSidelineDir(args[i]);
4170 } else if (cmd.equals("-fix")) {
4171 errors.reportError(ERROR_CODE.WRONG_USAGE,
4172 "This option is deprecated, please use -fixAssignments instead.");
4173 setFixAssignments(true);
4174 } else if (cmd.equals("-fixAssignments")) {
4175 setFixAssignments(true);
4176 } else if (cmd.equals("-fixMeta")) {
4177 setFixMeta(true);
4178 } else if (cmd.equals("-noHdfsChecking")) {
4179 setCheckHdfs(false);
4180 } else if (cmd.equals("-fixHdfsHoles")) {
4181 setFixHdfsHoles(true);
4182 } else if (cmd.equals("-fixHdfsOrphans")) {
4183 setFixHdfsOrphans(true);
4184 } else if (cmd.equals("-fixTableOrphans")) {
4185 setFixTableOrphans(true);
4186 } else if (cmd.equals("-fixHdfsOverlaps")) {
4187 setFixHdfsOverlaps(true);
4188 } else if (cmd.equals("-fixVersionFile")) {
4189 setFixVersionFile(true);
4190 } else if (cmd.equals("-sidelineBigOverlaps")) {
4191 setSidelineBigOverlaps(true);
4192 } else if (cmd.equals("-fixSplitParents")) {
4193 setFixSplitParents(true);
4194 } else if (cmd.equals("-ignorePreCheckPermission")) {
4195 setIgnorePreCheckPermission(true);
4196 } else if (cmd.equals("-checkCorruptHFiles")) {
4197 checkCorruptHFiles = true;
4198 } else if (cmd.equals("-sidelineCorruptHFiles")) {
4199 sidelineCorruptHFiles = true;
4200 } else if (cmd.equals("-fixReferenceFiles")) {
4201 setFixReferenceFiles(true);
4202 } else if (cmd.equals("-fixEmptyMetaCells")) {
4203 setFixEmptyMetaCells(true);
4204 } else if (cmd.equals("-repair")) {
4205
4206
4207 setFixHdfsHoles(true);
4208 setFixHdfsOrphans(true);
4209 setFixMeta(true);
4210 setFixAssignments(true);
4211 setFixHdfsOverlaps(true);
4212 setFixVersionFile(true);
4213 setSidelineBigOverlaps(true);
4214 setFixSplitParents(false);
4215 setCheckHdfs(true);
4216 setFixReferenceFiles(true);
4217 setFixTableLocks(true);
4218 setFixTableZNodes(true);
4219 } else if (cmd.equals("-repairHoles")) {
4220
4221 setFixHdfsHoles(true);
4222 setFixHdfsOrphans(false);
4223 setFixMeta(true);
4224 setFixAssignments(true);
4225 setFixHdfsOverlaps(false);
4226 setSidelineBigOverlaps(false);
4227 setFixSplitParents(false);
4228 setCheckHdfs(true);
4229 } else if (cmd.equals("-maxOverlapsToSideline")) {
4230 if (i == args.length - 1) {
4231 errors.reportError(ERROR_CODE.WRONG_USAGE,
4232 "-maxOverlapsToSideline needs a numeric value argument.");
4233 return printUsageAndExit();
4234 }
4235 try {
4236 int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4237 setMaxOverlapsToSideline(maxOverlapsToSideline);
4238 } catch (NumberFormatException e) {
4239 errors.reportError(ERROR_CODE.WRONG_USAGE,
4240 "-maxOverlapsToSideline needs a numeric value argument.");
4241 return printUsageAndExit();
4242 }
4243 i++;
4244 } else if (cmd.equals("-maxMerge")) {
4245 if (i == args.length - 1) {
4246 errors.reportError(ERROR_CODE.WRONG_USAGE,
4247 "-maxMerge needs a numeric value argument.");
4248 return printUsageAndExit();
4249 }
4250 try {
4251 int maxMerge = Integer.parseInt(args[i+1]);
4252 setMaxMerge(maxMerge);
4253 } catch (NumberFormatException e) {
4254 errors.reportError(ERROR_CODE.WRONG_USAGE,
4255 "-maxMerge needs a numeric value argument.");
4256 return printUsageAndExit();
4257 }
4258 i++;
4259 } else if (cmd.equals("-summary")) {
4260 setSummary();
4261 } else if (cmd.equals("-metaonly")) {
4262 setCheckMetaOnly();
4263 } else if (cmd.equals("-boundaries")) {
4264 setRegionBoundariesCheck();
4265 } else if (cmd.equals("-fixTableLocks")) {
4266 setFixTableLocks(true);
4267 } else if (cmd.equals("-fixOrphanedTableZnodes")) {
4268 setFixTableZNodes(true);
4269 } else if (cmd.startsWith("-")) {
4270 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4271 return printUsageAndExit();
4272 } else {
4273 includeTable(TableName.valueOf(cmd));
4274 errors.print("Allow checking/fixes for table: " + cmd);
4275 }
4276 }
4277
4278 errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4279
4280
4281 try {
4282 preCheckPermission();
4283 } catch (AccessDeniedException ace) {
4284 Runtime.getRuntime().exit(-1);
4285 } catch (IOException ioe) {
4286 Runtime.getRuntime().exit(-1);
4287 }
4288
4289
4290 connect();
4291
4292 try {
4293
4294 if (checkCorruptHFiles || sidelineCorruptHFiles) {
4295 LOG.info("Checking all hfiles for corruption");
4296 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4297 setHFileCorruptionChecker(hfcc);
4298 Collection<TableName> tables = getIncludedTables();
4299 Collection<Path> tableDirs = new ArrayList<Path>();
4300 Path rootdir = FSUtils.getRootDir(getConf());
4301 if (tables.size() > 0) {
4302 for (TableName t : tables) {
4303 tableDirs.add(FSUtils.getTableDir(rootdir, t));
4304 }
4305 } else {
4306 tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4307 }
4308 hfcc.checkTables(tableDirs);
4309 hfcc.report(errors);
4310 }
4311
4312
4313 int code = onlineHbck();
4314 setRetCode(code);
4315
4316
4317
4318
4319 if (shouldRerun()) {
4320 try {
4321 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4322 Thread.sleep(sleepBeforeRerun);
4323 } catch (InterruptedException ie) {
4324 return this;
4325 }
4326
4327 setFixAssignments(false);
4328 setFixMeta(false);
4329 setFixHdfsHoles(false);
4330 setFixHdfsOverlaps(false);
4331 setFixVersionFile(false);
4332 setFixTableOrphans(false);
4333 errors.resetErrors();
4334 code = onlineHbck();
4335 setRetCode(code);
4336 }
4337 } finally {
4338 IOUtils.cleanup(null, connection, meta, admin);
4339 }
4340 return this;
4341 }
4342
4343
4344
4345
4346 void debugLsr(Path p) throws IOException {
4347 debugLsr(getConf(), p, errors);
4348 }
4349
4350
4351
4352
4353 public static void debugLsr(Configuration conf,
4354 Path p) throws IOException {
4355 debugLsr(conf, p, new PrintingErrorReporter());
4356 }
4357
4358
4359
4360
4361 public static void debugLsr(Configuration conf,
4362 Path p, ErrorReporter errors) throws IOException {
4363 if (!LOG.isDebugEnabled() || p == null) {
4364 return;
4365 }
4366 FileSystem fs = p.getFileSystem(conf);
4367
4368 if (!fs.exists(p)) {
4369
4370 return;
4371 }
4372 errors.print(p.toString());
4373
4374 if (fs.isFile(p)) {
4375 return;
4376 }
4377
4378 if (fs.getFileStatus(p).isDir()) {
4379 FileStatus[] fss= fs.listStatus(p);
4380 for (FileStatus status : fss) {
4381 debugLsr(conf, status.getPath(), errors);
4382 }
4383 }
4384 }
4385 }