1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import java.io.ByteArrayOutputStream;
23 import java.io.DataInput;
24 import java.io.IOException;
25 import java.io.PrintStream;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Locale;
29 import java.util.Map;
30 import java.util.SortedMap;
31
32 import com.yammer.metrics.core.*;
33 import com.yammer.metrics.reporting.ConsoleReporter;
34
35 import org.apache.commons.cli.CommandLine;
36 import org.apache.commons.cli.CommandLineParser;
37 import org.apache.commons.cli.HelpFormatter;
38 import org.apache.commons.cli.Option;
39 import org.apache.commons.cli.OptionGroup;
40 import org.apache.commons.cli.Options;
41 import org.apache.commons.cli.ParseException;
42 import org.apache.commons.cli.PosixParser;
43 import org.apache.commons.logging.Log;
44 import org.apache.commons.logging.LogFactory;
45 import org.apache.hadoop.hbase.classification.InterfaceAudience;
46 import org.apache.hadoop.hbase.classification.InterfaceStability;
47 import org.apache.hadoop.conf.Configuration;
48 import org.apache.hadoop.conf.Configured;
49 import org.apache.hadoop.fs.FileSystem;
50 import org.apache.hadoop.fs.Path;
51 import org.apache.hadoop.hbase.HConstants;
52 import org.apache.hadoop.hbase.TableName;
53 import org.apache.hadoop.hbase.HBaseConfiguration;
54 import org.apache.hadoop.hbase.HRegionInfo;
55 import org.apache.hadoop.hbase.KeyValue;
56 import org.apache.hadoop.hbase.Tag;
57 import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
58 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
59 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
60 import org.apache.hadoop.hbase.util.BloomFilter;
61 import org.apache.hadoop.hbase.util.BloomFilterFactory;
62 import org.apache.hadoop.hbase.util.ByteBloomFilter;
63 import org.apache.hadoop.hbase.util.Bytes;
64 import org.apache.hadoop.hbase.util.FSUtils;
65 import org.apache.hadoop.hbase.util.Writables;
66 import org.apache.hadoop.util.Tool;
67 import org.apache.hadoop.util.ToolRunner;
68
69
70
71
72 @InterfaceAudience.Public
73 @InterfaceStability.Evolving
74 public class HFilePrettyPrinter extends Configured implements Tool {
75
76 private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
77
78 private Options options = new Options();
79
80 private boolean verbose;
81 private boolean printValue;
82 private boolean printKey;
83 private boolean shouldPrintMeta;
84 private boolean printBlockIndex;
85 private boolean printBlockHeaders;
86 private boolean printStats;
87 private boolean checkRow;
88 private boolean checkFamily;
89 private boolean isSeekToRow = false;
90
91
92
93
94 private byte[] row = null;
95
96 private List<Path> files = new ArrayList<Path>();
97 private int count;
98
99 private static final String FOUR_SPACES = " ";
100
101 public HFilePrettyPrinter() {
102 super();
103 init();
104 }
105
106 public HFilePrettyPrinter(Configuration conf) {
107 super(conf);
108 init();
109 }
110
111 private void init() {
112 options.addOption("v", "verbose", false,
113 "Verbose output; emits file and meta data delimiters");
114 options.addOption("p", "printkv", false, "Print key/value pairs");
115 options.addOption("e", "printkey", false, "Print keys");
116 options.addOption("m", "printmeta", false, "Print meta data of file");
117 options.addOption("b", "printblocks", false, "Print block index meta data");
118 options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
119 options.addOption("k", "checkrow", false,
120 "Enable row order check; looks for out-of-order keys");
121 options.addOption("a", "checkfamily", false, "Enable family check");
122 options.addOption("w", "seekToRow", true,
123 "Seek to this row and print all the kvs for this row only");
124 options.addOption("s", "stats", false, "Print statistics");
125
126 OptionGroup files = new OptionGroup();
127 files.addOption(new Option("f", "file", true,
128 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
129 files.addOption(new Option("r", "region", true,
130 "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
131 options.addOptionGroup(files);
132 }
133
134 public boolean parseOptions(String args[]) throws ParseException,
135 IOException {
136 if (args.length == 0) {
137 HelpFormatter formatter = new HelpFormatter();
138 formatter.printHelp("HFile", options, true);
139 return false;
140 }
141 CommandLineParser parser = new PosixParser();
142 CommandLine cmd = parser.parse(options, args);
143
144 verbose = cmd.hasOption("v");
145 printValue = cmd.hasOption("p");
146 printKey = cmd.hasOption("e") || printValue;
147 shouldPrintMeta = cmd.hasOption("m");
148 printBlockIndex = cmd.hasOption("b");
149 printBlockHeaders = cmd.hasOption("h");
150 printStats = cmd.hasOption("s");
151 checkRow = cmd.hasOption("k");
152 checkFamily = cmd.hasOption("a");
153
154 if (cmd.hasOption("f")) {
155 files.add(new Path(cmd.getOptionValue("f")));
156 }
157
158 if (cmd.hasOption("w")) {
159 String key = cmd.getOptionValue("w");
160 if (key != null && key.length() != 0) {
161 row = key.getBytes();
162 isSeekToRow = true;
163 } else {
164 System.err.println("Invalid row is specified.");
165 System.exit(-1);
166 }
167 }
168
169 if (cmd.hasOption("r")) {
170 String regionName = cmd.getOptionValue("r");
171 byte[] rn = Bytes.toBytes(regionName);
172 byte[][] hri = HRegionInfo.parseRegionName(rn);
173 Path rootDir = FSUtils.getRootDir(getConf());
174 Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
175 String enc = HRegionInfo.encodeRegionName(rn);
176 Path regionDir = new Path(tableDir, enc);
177 if (verbose)
178 System.out.println("region dir -> " + regionDir);
179 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()),
180 regionDir);
181 if (verbose)
182 System.out.println("Number of region files found -> "
183 + regionFiles.size());
184 if (verbose) {
185 int i = 1;
186 for (Path p : regionFiles) {
187 if (verbose)
188 System.out.println("Found file[" + i++ + "] -> " + p);
189 }
190 }
191 files.addAll(regionFiles);
192 }
193
194 return true;
195 }
196
197
198
199
200
201 public int run(String[] args) {
202 if (getConf() == null) {
203 throw new RuntimeException("A Configuration instance must be provided.");
204 }
205 try {
206 FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
207 if (!parseOptions(args))
208 return 1;
209 } catch (IOException ex) {
210 LOG.error("Error parsing command-line options", ex);
211 return 1;
212 } catch (ParseException ex) {
213 LOG.error("Error parsing command-line options", ex);
214 return 1;
215 }
216
217
218 for (Path fileName : files) {
219 try {
220 processFile(fileName);
221 } catch (IOException ex) {
222 LOG.error("Error reading " + fileName, ex);
223 System.exit(-2);
224 }
225 }
226
227 if (verbose || printKey) {
228 System.out.println("Scanned kv count -> " + count);
229 }
230
231 return 0;
232 }
233
234 private void processFile(Path file) throws IOException {
235 if (verbose)
236 System.out.println("Scanning -> " + file);
237 FileSystem fs = file.getFileSystem(getConf());
238 if (!fs.exists(file)) {
239 System.err.println("ERROR, file doesnt exist: " + file);
240 System.exit(-2);
241 }
242
243 HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(getConf()), getConf());
244
245 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
246
247 KeyValueStatsCollector fileStats = null;
248
249 if (verbose || printKey || checkRow || checkFamily || printStats) {
250
251 HFileScanner scanner = reader.getScanner(false, false, false);
252 fileStats = new KeyValueStatsCollector();
253 boolean shouldScanKeysValues = false;
254 if (this.isSeekToRow) {
255
256 shouldScanKeysValues =
257 (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
258 } else {
259 shouldScanKeysValues = scanner.seekTo();
260 }
261 if (shouldScanKeysValues)
262 scanKeysValues(file, fileStats, scanner, row);
263 }
264
265
266 if (shouldPrintMeta) {
267 printMeta(reader, fileInfo);
268 }
269
270 if (printBlockIndex) {
271 System.out.println("Block Index:");
272 System.out.println(reader.getDataBlockIndexReader());
273 }
274
275 if (printBlockHeaders) {
276 System.out.println("Block Headers:");
277
278
279
280
281 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
282 long fileSize = fs.getFileStatus(file).getLen();
283 FixedFileTrailer trailer =
284 FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
285 long offset = trailer.getFirstDataBlockOffset(),
286 max = trailer.getLastDataBlockOffset();
287 HFileBlock block;
288 while (offset <= max) {
289 block = reader.readBlock(offset, -1,
290
291 offset += block.getOnDiskSizeWithHeader();
292 System.out.println(block);
293 }
294 }
295
296 if (printStats) {
297 fileStats.finish();
298 System.out.println("Stats:\n" + fileStats);
299 }
300
301 reader.close();
302 }
303
304 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
305 HFileScanner scanner, byte[] row) throws IOException {
306 KeyValue pkv = null;
307 do {
308 KeyValue kv = scanner.getKeyValue();
309 if (row != null && row.length != 0) {
310 int result = Bytes.compareTo(kv.getRow(), row);
311 if (result > 0) {
312 break;
313 } else if (result < 0) {
314 continue;
315 }
316 }
317
318 if (printStats) {
319 fileStats.collect(kv);
320 }
321
322 if (printKey) {
323 System.out.print("K: " + kv);
324 if (printValue) {
325 System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
326 int i = 0;
327 List<Tag> tags = kv.getTags();
328 for (Tag tag : tags) {
329 System.out
330 .print(String.format(" T[%d]: %s", i++, Bytes.toStringBinary(tag.getValue())));
331 }
332 }
333 System.out.println();
334 }
335
336 if (checkRow && pkv != null) {
337 if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
338 System.err.println("WARNING, previous row is greater then"
339 + " current row\n\tfilename -> " + file + "\n\tprevious -> "
340 + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent -> "
341 + Bytes.toStringBinary(kv.getKey()));
342 }
343 }
344
345 if (checkFamily) {
346 String fam = Bytes.toString(kv.getFamily());
347 if (!file.toString().contains(fam)) {
348 System.err.println("WARNING, filename does not match kv family,"
349 + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
350 + Bytes.toStringBinary(kv.getKey()));
351 }
352 if (pkv != null
353 && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
354 System.err.println("WARNING, previous kv has different family"
355 + " compared to current key\n\tfilename -> " + file
356 + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
357 + "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey()));
358 }
359 }
360 pkv = kv;
361 ++count;
362 } while (scanner.next());
363 }
364
365
366
367
368
369 private static String asSeparateLines(String keyValueStr) {
370 return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
371 ",\n" + FOUR_SPACES + "$1");
372 }
373
374 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
375 throws IOException {
376 System.out.println("Block index size as per heapsize: "
377 + reader.indexSize());
378 System.out.println(asSeparateLines(reader.toString()));
379 System.out.println("Trailer:\n "
380 + asSeparateLines(reader.getTrailer().toString()));
381 System.out.println("Fileinfo:");
382 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
383 System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
384 if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
385 long seqid = Bytes.toLong(e.getValue());
386 System.out.println(seqid);
387 } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
388 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
389 Writables.copyWritable(e.getValue(), timeRangeTracker);
390 System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
391 + timeRangeTracker.getMaximumTimestamp());
392 } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
393 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
394 System.out.println(Bytes.toInt(e.getValue()));
395 } else {
396 System.out.println(Bytes.toStringBinary(e.getValue()));
397 }
398 }
399
400 try {
401 System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
402 } catch (Exception e) {
403 System.out.println ("Unable to retrieve the midkey");
404 }
405
406
407 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
408 BloomFilter bloomFilter = null;
409 if (bloomMeta != null)
410 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
411
412 System.out.println("Bloom filter:");
413 if (bloomFilter != null) {
414 System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
415 ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
416 } else {
417 System.out.println(FOUR_SPACES + "Not present");
418 }
419
420
421 bloomMeta = reader.getDeleteBloomFilterMetadata();
422 bloomFilter = null;
423 if (bloomMeta != null)
424 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
425
426 System.out.println("Delete Family Bloom filter:");
427 if (bloomFilter != null) {
428 System.out.println(FOUR_SPACES
429 + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
430 "\n" + FOUR_SPACES));
431 } else {
432 System.out.println(FOUR_SPACES + "Not present");
433 }
434 }
435
436 private static class KeyValueStatsCollector {
437 private final MetricsRegistry metricsRegistry = new MetricsRegistry();
438 private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
439 private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
440 Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
441 Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
442 Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
443 Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
444
445 long curRowBytes = 0;
446 long curRowCols = 0;
447
448 byte[] biggestRow = null;
449
450 private KeyValue prevKV = null;
451 private long maxRowBytes = 0;
452 private long curRowKeyLength;
453
454 public void collect(KeyValue kv) {
455 valLen.update(kv.getValueLength());
456 if (prevKV != null &&
457 KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
458
459 collectRow();
460 }
461 curRowBytes += kv.getLength();
462 curRowKeyLength = kv.getKeyLength();
463 curRowCols++;
464 prevKV = kv;
465 }
466
467 private void collectRow() {
468 rowSizeBytes.update(curRowBytes);
469 rowSizeCols.update(curRowCols);
470 keyLen.update(curRowKeyLength);
471
472 if (curRowBytes > maxRowBytes && prevKV != null) {
473 biggestRow = prevKV.getRow();
474 maxRowBytes = curRowBytes;
475 }
476
477 curRowBytes = 0;
478 curRowCols = 0;
479 }
480
481 public void finish() {
482 if (curRowCols > 0) {
483 collectRow();
484 }
485 }
486
487 @Override
488 public String toString() {
489 if (prevKV == null)
490 return "no data available for statistics";
491
492
493 simpleReporter.shutdown();
494 simpleReporter.run();
495 metricsRegistry.shutdown();
496
497 return
498 metricsOutput.toString() +
499 "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
500 }
501 }
502
503 private static class SimpleReporter extends ConsoleReporter {
504 private final PrintStream out;
505
506 public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
507 super(metricsRegistry, out, MetricPredicate.ALL);
508 this.out = out;
509 }
510
511 @Override
512 public void run() {
513 for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
514 MetricPredicate.ALL).entrySet()) {
515 try {
516 for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
517 out.print(" " + subEntry.getKey().getName());
518 out.println(':');
519
520 subEntry.getValue().processWith(this, subEntry.getKey(), out);
521 }
522 } catch (Exception e) {
523 e.printStackTrace(out);
524 }
525 }
526 }
527
528 @Override
529 public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
530 super.processHistogram(name, histogram, stream);
531 stream.printf(Locale.getDefault(), " count = %d\n", histogram.count());
532 }
533 }
534
535 public static void main(String[] args) throws Exception {
536 Configuration conf = HBaseConfiguration.create();
537
538 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
539 int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
540 System.exit(ret);
541 }
542 }