View Javadoc

1   
2   /*
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.IOException;
25  import java.io.PrintStream;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.Locale;
29  import java.util.Map;
30  import java.util.SortedMap;
31  
32  import com.yammer.metrics.core.*;
33  import com.yammer.metrics.reporting.ConsoleReporter;
34  
35  import org.apache.commons.cli.CommandLine;
36  import org.apache.commons.cli.CommandLineParser;
37  import org.apache.commons.cli.HelpFormatter;
38  import org.apache.commons.cli.Option;
39  import org.apache.commons.cli.OptionGroup;
40  import org.apache.commons.cli.Options;
41  import org.apache.commons.cli.ParseException;
42  import org.apache.commons.cli.PosixParser;
43  import org.apache.commons.logging.Log;
44  import org.apache.commons.logging.LogFactory;
45  import org.apache.hadoop.hbase.classification.InterfaceAudience;
46  import org.apache.hadoop.hbase.classification.InterfaceStability;
47  import org.apache.hadoop.conf.Configuration;
48  import org.apache.hadoop.conf.Configured;
49  import org.apache.hadoop.fs.FileSystem;
50  import org.apache.hadoop.fs.Path;
51  import org.apache.hadoop.hbase.HConstants;
52  import org.apache.hadoop.hbase.TableName;
53  import org.apache.hadoop.hbase.HBaseConfiguration;
54  import org.apache.hadoop.hbase.HRegionInfo;
55  import org.apache.hadoop.hbase.KeyValue;
56  import org.apache.hadoop.hbase.Tag;
57  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
58  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
59  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
60  import org.apache.hadoop.hbase.util.BloomFilter;
61  import org.apache.hadoop.hbase.util.BloomFilterFactory;
62  import org.apache.hadoop.hbase.util.ByteBloomFilter;
63  import org.apache.hadoop.hbase.util.Bytes;
64  import org.apache.hadoop.hbase.util.FSUtils;
65  import org.apache.hadoop.hbase.util.Writables;
66  import org.apache.hadoop.util.Tool;
67  import org.apache.hadoop.util.ToolRunner;
68  
69  /**
70   * Implements pretty-printing functionality for {@link HFile}s.
71   */
72  @InterfaceAudience.Public
73  @InterfaceStability.Evolving
74  public class HFilePrettyPrinter extends Configured implements Tool {
75  
76    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
77  
78    private Options options = new Options();
79  
80    private boolean verbose;
81    private boolean printValue;
82    private boolean printKey;
83    private boolean shouldPrintMeta;
84    private boolean printBlockIndex;
85    private boolean printBlockHeaders;
86    private boolean printStats;
87    private boolean checkRow;
88    private boolean checkFamily;
89    private boolean isSeekToRow = false;
90  
91    /**
92     * The row which the user wants to specify and print all the KeyValues for.
93     */
94    private byte[] row = null;
95  
96    private List<Path> files = new ArrayList<Path>();
97    private int count;
98  
99    private static final String FOUR_SPACES = "    ";
100 
101   public HFilePrettyPrinter() {
102     super();
103     init();
104   }
105 
106   public HFilePrettyPrinter(Configuration conf) {
107     super(conf);
108     init();
109   }
110 
111   private void init() {
112     options.addOption("v", "verbose", false,
113         "Verbose output; emits file and meta data delimiters");
114     options.addOption("p", "printkv", false, "Print key/value pairs");
115     options.addOption("e", "printkey", false, "Print keys");
116     options.addOption("m", "printmeta", false, "Print meta data of file");
117     options.addOption("b", "printblocks", false, "Print block index meta data");
118     options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
119     options.addOption("k", "checkrow", false,
120         "Enable row order check; looks for out-of-order keys");
121     options.addOption("a", "checkfamily", false, "Enable family check");
122     options.addOption("w", "seekToRow", true,
123       "Seek to this row and print all the kvs for this row only");
124     options.addOption("s", "stats", false, "Print statistics");
125 
126     OptionGroup files = new OptionGroup();
127     files.addOption(new Option("f", "file", true,
128       "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
129     files.addOption(new Option("r", "region", true,
130       "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
131     options.addOptionGroup(files);
132   }
133 
134   public boolean parseOptions(String args[]) throws ParseException,
135       IOException {
136     if (args.length == 0) {
137       HelpFormatter formatter = new HelpFormatter();
138       formatter.printHelp("HFile", options, true);
139       return false;
140     }
141     CommandLineParser parser = new PosixParser();
142     CommandLine cmd = parser.parse(options, args);
143 
144     verbose = cmd.hasOption("v");
145     printValue = cmd.hasOption("p");
146     printKey = cmd.hasOption("e") || printValue;
147     shouldPrintMeta = cmd.hasOption("m");
148     printBlockIndex = cmd.hasOption("b");
149     printBlockHeaders = cmd.hasOption("h");
150     printStats = cmd.hasOption("s");
151     checkRow = cmd.hasOption("k");
152     checkFamily = cmd.hasOption("a");
153 
154     if (cmd.hasOption("f")) {
155       files.add(new Path(cmd.getOptionValue("f")));
156     }
157 
158     if (cmd.hasOption("w")) {
159       String key = cmd.getOptionValue("w");
160       if (key != null && key.length() != 0) {
161         row = key.getBytes();
162         isSeekToRow = true;
163       } else {
164         System.err.println("Invalid row is specified.");
165         System.exit(-1);
166       }
167     }
168 
169     if (cmd.hasOption("r")) {
170       String regionName = cmd.getOptionValue("r");
171       byte[] rn = Bytes.toBytes(regionName);
172       byte[][] hri = HRegionInfo.parseRegionName(rn);
173       Path rootDir = FSUtils.getRootDir(getConf());
174       Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
175       String enc = HRegionInfo.encodeRegionName(rn);
176       Path regionDir = new Path(tableDir, enc);
177       if (verbose)
178         System.out.println("region dir -> " + regionDir);
179       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()),
180           regionDir);
181       if (verbose)
182         System.out.println("Number of region files found -> "
183             + regionFiles.size());
184       if (verbose) {
185         int i = 1;
186         for (Path p : regionFiles) {
187           if (verbose)
188             System.out.println("Found file[" + i++ + "] -> " + p);
189         }
190       }
191       files.addAll(regionFiles);
192     }
193 
194     return true;
195   }
196 
197   /**
198    * Runs the command-line pretty-printer, and returns the desired command
199    * exit code (zero for success, non-zero for failure).
200    */
201   public int run(String[] args) {
202     if (getConf() == null) {
203       throw new RuntimeException("A Configuration instance must be provided.");
204     }
205     try {
206       FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
207       if (!parseOptions(args))
208         return 1;
209     } catch (IOException ex) {
210       LOG.error("Error parsing command-line options", ex);
211       return 1;
212     } catch (ParseException ex) {
213       LOG.error("Error parsing command-line options", ex);
214       return 1;
215     }
216 
217     // iterate over all files found
218     for (Path fileName : files) {
219       try {
220         processFile(fileName);
221       } catch (IOException ex) {
222         LOG.error("Error reading " + fileName, ex);
223         System.exit(-2);
224       }
225     }
226 
227     if (verbose || printKey) {
228       System.out.println("Scanned kv count -> " + count);
229     }
230 
231     return 0;
232   }
233 
234   private void processFile(Path file) throws IOException {
235     if (verbose)
236       System.out.println("Scanning -> " + file);
237     FileSystem fs = file.getFileSystem(getConf());
238     if (!fs.exists(file)) {
239       System.err.println("ERROR, file doesnt exist: " + file);
240       System.exit(-2);
241     }
242 
243     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(getConf()), getConf());
244 
245     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
246 
247     KeyValueStatsCollector fileStats = null;
248 
249     if (verbose || printKey || checkRow || checkFamily || printStats) {
250       // scan over file and read key/value's and check if requested
251       HFileScanner scanner = reader.getScanner(false, false, false);
252       fileStats = new KeyValueStatsCollector();
253       boolean shouldScanKeysValues = false;
254       if (this.isSeekToRow) {
255         // seek to the first kv on this row
256         shouldScanKeysValues = 
257           (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
258       } else {
259         shouldScanKeysValues = scanner.seekTo();
260       }
261       if (shouldScanKeysValues)
262         scanKeysValues(file, fileStats, scanner, row);
263     }
264 
265     // print meta data
266     if (shouldPrintMeta) {
267       printMeta(reader, fileInfo);
268     }
269 
270     if (printBlockIndex) {
271       System.out.println("Block Index:");
272       System.out.println(reader.getDataBlockIndexReader());
273     }
274 
275     if (printBlockHeaders) {
276       System.out.println("Block Headers:");
277       /*
278        * TODO: this same/similar block iteration logic is used in HFileBlock#blockRange and
279        * TestLazyDataBlockDecompression. Refactor?
280        */
281       FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
282       long fileSize = fs.getFileStatus(file).getLen();
283       FixedFileTrailer trailer =
284         FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
285       long offset = trailer.getFirstDataBlockOffset(),
286         max = trailer.getLastDataBlockOffset();
287       HFileBlock block;
288       while (offset <= max) {
289         block = reader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false,
290           /* isCompaction */ false, /* updateCacheMetrics */ false, null);
291         offset += block.getOnDiskSizeWithHeader();
292         System.out.println(block);
293       }
294     }
295 
296     if (printStats) {
297       fileStats.finish();
298       System.out.println("Stats:\n" + fileStats);
299     }
300 
301     reader.close();
302   }
303 
304   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
305       HFileScanner scanner,  byte[] row) throws IOException {
306     KeyValue pkv = null;
307     do {
308       KeyValue kv = scanner.getKeyValue();
309       if (row != null && row.length != 0) {
310         int result = Bytes.compareTo(kv.getRow(), row);
311         if (result > 0) {
312           break;
313         } else if (result < 0) {
314           continue;
315         }
316       }
317       // collect stats
318       if (printStats) {
319         fileStats.collect(kv);
320       }
321       // dump key value
322       if (printKey) {
323         System.out.print("K: " + kv);
324         if (printValue) {
325           System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
326           int i = 0;
327           List<Tag> tags = kv.getTags();
328           for (Tag tag : tags) {
329             System.out
330                 .print(String.format(" T[%d]: %s", i++, Bytes.toStringBinary(tag.getValue())));
331           }
332         }
333         System.out.println();
334       }
335       // check if rows are in order
336       if (checkRow && pkv != null) {
337         if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
338           System.err.println("WARNING, previous row is greater then"
339               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
340               + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent  -> "
341               + Bytes.toStringBinary(kv.getKey()));
342         }
343       }
344       // check if families are consistent
345       if (checkFamily) {
346         String fam = Bytes.toString(kv.getFamily());
347         if (!file.toString().contains(fam)) {
348           System.err.println("WARNING, filename does not match kv family,"
349               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
350               + Bytes.toStringBinary(kv.getKey()));
351         }
352         if (pkv != null
353             && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
354           System.err.println("WARNING, previous kv has different family"
355               + " compared to current key\n\tfilename -> " + file
356               + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
357               + "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
358         }
359       }
360       pkv = kv;
361       ++count;
362     } while (scanner.next());
363   }
364 
365   /**
366    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
367    * with a four-space indentation.
368    */
369   private static String asSeparateLines(String keyValueStr) {
370     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
371                                   ",\n" + FOUR_SPACES + "$1");
372   }
373 
374   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
375       throws IOException {
376     System.out.println("Block index size as per heapsize: "
377         + reader.indexSize());
378     System.out.println(asSeparateLines(reader.toString()));
379     System.out.println("Trailer:\n    "
380         + asSeparateLines(reader.getTrailer().toString()));
381     System.out.println("Fileinfo:");
382     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
383       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
384       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
385         long seqid = Bytes.toLong(e.getValue());
386         System.out.println(seqid);
387       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
388         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
389         Writables.copyWritable(e.getValue(), timeRangeTracker);
390         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
391             + timeRangeTracker.getMaximumTimestamp());
392       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
393           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
394         System.out.println(Bytes.toInt(e.getValue()));
395       } else {
396         System.out.println(Bytes.toStringBinary(e.getValue()));
397       }
398     }
399 
400     try {
401       System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
402     } catch (Exception e) {
403       System.out.println ("Unable to retrieve the midkey");
404     }
405 
406     // Printing general bloom information
407     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
408     BloomFilter bloomFilter = null;
409     if (bloomMeta != null)
410       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
411 
412     System.out.println("Bloom filter:");
413     if (bloomFilter != null) {
414       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
415           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
416     } else {
417       System.out.println(FOUR_SPACES + "Not present");
418     }
419 
420     // Printing delete bloom information
421     bloomMeta = reader.getDeleteBloomFilterMetadata();
422     bloomFilter = null;
423     if (bloomMeta != null)
424       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
425 
426     System.out.println("Delete Family Bloom filter:");
427     if (bloomFilter != null) {
428       System.out.println(FOUR_SPACES
429           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
430               "\n" + FOUR_SPACES));
431     } else {
432       System.out.println(FOUR_SPACES + "Not present");
433     }
434   }
435 
436   private static class KeyValueStatsCollector {
437     private final MetricsRegistry metricsRegistry = new MetricsRegistry();
438     private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
439     private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
440     Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
441     Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
442     Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
443     Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
444 
445     long curRowBytes = 0;
446     long curRowCols = 0;
447 
448     byte[] biggestRow = null;
449 
450     private KeyValue prevKV = null;
451     private long maxRowBytes = 0;
452     private long curRowKeyLength;
453 
454     public void collect(KeyValue kv) {
455       valLen.update(kv.getValueLength());
456       if (prevKV != null &&
457           KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
458         // new row
459         collectRow();
460       }
461       curRowBytes += kv.getLength();
462       curRowKeyLength = kv.getKeyLength();
463       curRowCols++;
464       prevKV = kv;
465     }
466 
467     private void collectRow() {
468       rowSizeBytes.update(curRowBytes);
469       rowSizeCols.update(curRowCols);
470       keyLen.update(curRowKeyLength);
471 
472       if (curRowBytes > maxRowBytes && prevKV != null) {
473         biggestRow = prevKV.getRow();
474         maxRowBytes = curRowBytes;
475       }
476 
477       curRowBytes = 0;
478       curRowCols = 0;
479     }
480 
481     public void finish() {
482       if (curRowCols > 0) {
483         collectRow();
484       }
485     }
486 
487     @Override
488     public String toString() {
489       if (prevKV == null)
490         return "no data available for statistics";
491 
492       // Dump the metrics to the output stream
493       simpleReporter.shutdown();
494       simpleReporter.run();
495       metricsRegistry.shutdown();
496 
497       return
498               metricsOutput.toString() +
499                       "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
500     }
501   }
502 
503   private static class SimpleReporter extends ConsoleReporter {
504     private final PrintStream out;
505 
506     public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
507       super(metricsRegistry, out, MetricPredicate.ALL);
508       this.out = out;
509     }
510 
511     @Override
512     public void run() {
513       for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
514               MetricPredicate.ALL).entrySet()) {
515         try {
516           for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
517             out.print("   " + subEntry.getKey().getName());
518             out.println(':');
519 
520             subEntry.getValue().processWith(this, subEntry.getKey(), out);
521           }
522         } catch (Exception e) {
523           e.printStackTrace(out);
524         }
525       }
526     }
527 
528     @Override
529     public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
530       super.processHistogram(name, histogram, stream);
531       stream.printf(Locale.getDefault(), "             count = %d\n", histogram.count());
532     }
533   }
534 
535   public static void main(String[] args) throws Exception {
536     Configuration conf = HBaseConfiguration.create();
537     // no need for a block cache
538     conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
539     int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
540     System.exit(ret);
541   }
542 }