View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayInputStream;
21  import java.io.ByteArrayOutputStream;
22  import java.io.DataInputStream;
23  import java.io.DataOutput;
24  import java.io.DataOutputStream;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.nio.ByteBuffer;
28  import java.util.concurrent.locks.Lock;
29  import java.util.concurrent.locks.ReentrantLock;
30  
31  import org.apache.hadoop.hbase.classification.InterfaceAudience;
32  import org.apache.hadoop.fs.FSDataInputStream;
33  import org.apache.hadoop.fs.FSDataOutputStream;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.fs.HFileSystem;
37  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
38  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
39  import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
40  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
41  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
42  import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
43  import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.hbase.util.ChecksumType;
46  import org.apache.hadoop.hbase.util.ClassSize;
47  import org.apache.hadoop.hbase.util.CompoundBloomFilter;
48  import org.apache.hadoop.io.IOUtils;
49  
50  import com.google.common.base.Preconditions;
51  
52  /**
53   * Reading {@link HFile} version 1 and 2 blocks, and writing version 2 blocks.
54   * <ul>
55   * <li>In version 1 all blocks are always compressed or uncompressed, as
56   * specified by the {@link HFile}'s compression algorithm, with a type-specific
57   * magic record stored in the beginning of the compressed data (i.e. one needs
58   * to uncompress the compressed block to determine the block type). There is
59   * only a single compression algorithm setting for all blocks. Offset and size
60   * information from the block index are required to read a block.
61   * <li>In version 2 a block is structured as follows:
62   * <ul>
63   * <li>header (see {@link Writer#finishBlock()})
64   * <ul>
65   * <li>Magic record identifying the block type (8 bytes)
66   * <li>Compressed block size, excluding header, including checksum (4 bytes)
67   * <li>Uncompressed block size, excluding header, excluding checksum (4 bytes)
68   * <li>The offset of the previous block of the same type (8 bytes). This is
69   * used to be able to navigate to the previous block without going to the block
70   * <li>For minorVersions >=1, the ordinal describing checksum type (1 byte)
71   * <li>For minorVersions >=1, the number of data bytes/checksum chunk (4 bytes)
72   * <li>For minorVersions >=1, the size of data on disk, including header,
73   * excluding checksums (4 bytes)
74   * </ul>
75   * </li>
76   * <li>Raw/Compressed/Encrypted/Encoded data. The compression algorithm is the
77   * same for all the blocks in the {@link HFile}, similarly to what was done in
78   * version 1.
79   * <li>For minorVersions >=1, a series of 4 byte checksums, one each for
80   * the number of bytes specified by bytesPerChecksum.
81   * </ul>
82   * </ul>
83   */
84  @InterfaceAudience.Private
85  public class HFileBlock implements Cacheable {
86  
87    /**
88     * On a checksum failure on a Reader, these many suceeding read
89     * requests switch back to using hdfs checksums before auto-reenabling
90     * hbase checksum verification.
91     */
92    static final int CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD = 3;
93  
94    public static final boolean FILL_HEADER = true;
95    public static final boolean DONT_FILL_HEADER = false;
96  
97    /**
98     * The size of block header when blockType is {@link BlockType#ENCODED_DATA}.
99     * This extends normal header by adding the id of encoder.
100    */
101   public static final int ENCODED_HEADER_SIZE = HConstants.HFILEBLOCK_HEADER_SIZE
102       + DataBlockEncoding.ID_SIZE;
103 
104   static final byte[] DUMMY_HEADER_NO_CHECKSUM =
105      new byte[HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM];
106 
107   public static final int BYTE_BUFFER_HEAP_SIZE = (int) ClassSize.estimateBase(
108       ByteBuffer.wrap(new byte[0], 0, 0).getClass(), false);
109 
110   // meta.usesHBaseChecksum+offset+nextBlockOnDiskSizeWithHeader
111   public static final int EXTRA_SERIALIZATION_SPACE = Bytes.SIZEOF_BYTE + Bytes.SIZEOF_INT
112       + Bytes.SIZEOF_LONG;
113 
114   /**
115    * Each checksum value is an integer that can be stored in 4 bytes.
116    */
117   static final int CHECKSUM_SIZE = Bytes.SIZEOF_INT;
118 
119   private static final CacheableDeserializer<Cacheable> blockDeserializer =
120       new CacheableDeserializer<Cacheable>() {
121         public HFileBlock deserialize(ByteBuffer buf, boolean reuse) throws IOException{
122           buf.limit(buf.limit() - HFileBlock.EXTRA_SERIALIZATION_SPACE).rewind();
123           ByteBuffer newByteBuffer;
124           if (reuse) {
125             newByteBuffer = buf.slice();
126           } else {
127            newByteBuffer = ByteBuffer.allocate(buf.limit());
128            newByteBuffer.put(buf);
129           }
130           buf.position(buf.limit());
131           buf.limit(buf.limit() + HFileBlock.EXTRA_SERIALIZATION_SPACE);
132           boolean usesChecksum = buf.get() == (byte)1;
133           HFileBlock ourBuffer = new HFileBlock(newByteBuffer, usesChecksum);
134           ourBuffer.offset = buf.getLong();
135           ourBuffer.nextBlockOnDiskSizeWithHeader = buf.getInt();
136           if (ourBuffer.hasNextBlockHeader()) {
137             ourBuffer.buf.limit(ourBuffer.buf.limit() - ourBuffer.headerSize());
138           }
139           return ourBuffer;
140         }
141         
142         @Override
143         public int getDeserialiserIdentifier() {
144           return deserializerIdentifier;
145         }
146 
147         @Override
148         public HFileBlock deserialize(ByteBuffer b) throws IOException {
149           return deserialize(b, false);
150         }
151       };
152   private static final int deserializerIdentifier;
153   static {
154     deserializerIdentifier = CacheableDeserializerIdManager
155         .registerDeserializer(blockDeserializer);
156   }
157 
158   /** Type of block. Header field 0. */
159   private BlockType blockType;
160 
161   /** Size on disk excluding header, including checksum. Header field 1. */
162   private int onDiskSizeWithoutHeader;
163 
164   /** Size of pure data. Does not include header or checksums. Header field 2. */
165   private final int uncompressedSizeWithoutHeader;
166 
167   /** The offset of the previous block on disk. Header field 3. */
168   private final long prevBlockOffset;
169 
170   /**
171    * Size on disk of header + data. Excludes checksum. Header field 6,
172    * OR calculated from {@link #onDiskSizeWithoutHeader} when using HDFS checksum.
173    */
174   private final int onDiskDataSizeWithHeader;
175 
176   /** The in-memory representation of the hfile block */
177   private ByteBuffer buf;
178 
179   /** Meta data that holds meta information on the hfileblock */
180   private HFileContext fileContext;
181 
182   /**
183    * The offset of this block in the file. Populated by the reader for
184    * convenience of access. This offset is not part of the block header.
185    */
186   private long offset = -1;
187 
188   /**
189    * The on-disk size of the next block, including the header, obtained by
190    * peeking into the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of the next block's
191    * header, or -1 if unknown.
192    */
193   private int nextBlockOnDiskSizeWithHeader = -1;
194 
195   /**
196    * Creates a new {@link HFile} block from the given fields. This constructor
197    * is mostly used when the block data has already been read and uncompressed,
198    * and is sitting in a byte buffer. 
199    *
200    * @param blockType the type of this block, see {@link BlockType}
201    * @param onDiskSizeWithoutHeader see {@link #onDiskSizeWithoutHeader}
202    * @param uncompressedSizeWithoutHeader see {@link #uncompressedSizeWithoutHeader}
203    * @param prevBlockOffset see {@link #prevBlockOffset}
204    * @param buf block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes) followed by
205    *          uncompressed data. This
206    * @param fillHeader when true, parse {@code buf} and override the first 4 header fields.
207    * @param offset the file offset the block was read from
208    * @param onDiskDataSizeWithHeader see {@link #onDiskDataSizeWithHeader}
209    * @param fileContext HFile meta data
210    */
211   HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader, int uncompressedSizeWithoutHeader,
212       long prevBlockOffset, ByteBuffer buf, boolean fillHeader, long offset,
213       int onDiskDataSizeWithHeader, HFileContext fileContext) {
214     this.blockType = blockType;
215     this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader;
216     this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader;
217     this.prevBlockOffset = prevBlockOffset;
218     this.buf = buf;
219     this.offset = offset;
220     this.onDiskDataSizeWithHeader = onDiskDataSizeWithHeader;
221     this.fileContext = fileContext;
222     if (fillHeader)
223       overwriteHeader();
224     this.buf.rewind();
225   }
226 
227   /**
228    * Copy constructor. Creates a shallow copy of {@code that}'s buffer.
229    */
230   HFileBlock(HFileBlock that) {
231     this.blockType = that.blockType;
232     this.onDiskSizeWithoutHeader = that.onDiskSizeWithoutHeader;
233     this.uncompressedSizeWithoutHeader = that.uncompressedSizeWithoutHeader;
234     this.prevBlockOffset = that.prevBlockOffset;
235     this.buf = that.buf.duplicate();
236     this.offset = that.offset;
237     this.onDiskDataSizeWithHeader = that.onDiskDataSizeWithHeader;
238     this.fileContext = that.fileContext;
239     this.nextBlockOnDiskSizeWithHeader = that.nextBlockOnDiskSizeWithHeader;
240   }
241 
242   /**
243    * Creates a block from an existing buffer starting with a header. Rewinds
244    * and takes ownership of the buffer. By definition of rewind, ignores the
245    * buffer position, but if you slice the buffer beforehand, it will rewind
246    * to that point. The reason this has a minorNumber and not a majorNumber is
247    * because majorNumbers indicate the format of a HFile whereas minorNumbers 
248    * indicate the format inside a HFileBlock.
249    */
250   HFileBlock(ByteBuffer b, boolean usesHBaseChecksum) throws IOException {
251     b.rewind();
252     blockType = BlockType.read(b);
253     onDiskSizeWithoutHeader = b.getInt();
254     uncompressedSizeWithoutHeader = b.getInt();
255     prevBlockOffset = b.getLong();
256     HFileContextBuilder contextBuilder = new HFileContextBuilder();
257     contextBuilder.withHBaseCheckSum(usesHBaseChecksum);
258     if (usesHBaseChecksum) {
259       contextBuilder.withChecksumType(ChecksumType.codeToType(b.get()));
260       contextBuilder.withBytesPerCheckSum(b.getInt());
261       this.onDiskDataSizeWithHeader = b.getInt();
262     } else {
263       contextBuilder.withChecksumType(ChecksumType.NULL);
264       contextBuilder.withBytesPerCheckSum(0);
265       this.onDiskDataSizeWithHeader = onDiskSizeWithoutHeader +
266                                        HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
267     }
268     this.fileContext = contextBuilder.build();
269     buf = b;
270     buf.rewind();
271   }
272 
273   public BlockType getBlockType() {
274     return blockType;
275   }
276 
277   /** @return get data block encoding id that was used to encode this block */
278   public short getDataBlockEncodingId() {
279     if (blockType != BlockType.ENCODED_DATA) {
280       throw new IllegalArgumentException("Querying encoder ID of a block " +
281           "of type other than " + BlockType.ENCODED_DATA + ": " + blockType);
282     }
283     return buf.getShort(headerSize());
284   }
285 
286   /**
287    * @return the on-disk size of header + data part + checksum.
288    */
289   public int getOnDiskSizeWithHeader() {
290     return onDiskSizeWithoutHeader + headerSize();
291   }
292 
293   /**
294    * @return the on-disk size of the data part + checksum (header excluded).
295    */
296   public int getOnDiskSizeWithoutHeader() {
297     return onDiskSizeWithoutHeader;
298   }
299 
300   /**
301    * @return the uncompressed size of data part (header and checksum excluded).
302    */
303    public int getUncompressedSizeWithoutHeader() {
304     return uncompressedSizeWithoutHeader;
305   }
306 
307   /**
308    * @return the offset of the previous block of the same type in the file, or
309    *         -1 if unknown
310    */
311   public long getPrevBlockOffset() {
312     return prevBlockOffset;
313   }
314 
315   /**
316    * Rewinds {@code buf} and writes first 4 header fields. {@code buf} position
317    * is modified as side-effect.
318    */
319   private void overwriteHeader() {
320     buf.rewind();
321     blockType.write(buf);
322     buf.putInt(onDiskSizeWithoutHeader);
323     buf.putInt(uncompressedSizeWithoutHeader);
324     buf.putLong(prevBlockOffset);
325     if (this.fileContext.isUseHBaseChecksum()) {
326       buf.put(fileContext.getChecksumType().getCode());
327       buf.putInt(fileContext.getBytesPerChecksum());
328       buf.putInt(onDiskDataSizeWithHeader);
329     }
330   }
331 
332   /**
333    * Returns a buffer that does not include the header or checksum.
334    *
335    * @return the buffer with header skipped and checksum omitted.
336    */
337   public ByteBuffer getBufferWithoutHeader() {
338     return ByteBuffer.wrap(buf.array(), buf.arrayOffset() + headerSize(),
339         buf.limit() - headerSize() - totalChecksumBytes()).slice();
340   }
341 
342   /**
343    * Returns the buffer this block stores internally. The clients must not
344    * modify the buffer object. This method has to be public because it is
345    * used in {@link CompoundBloomFilter} to avoid object creation on every
346    * Bloom filter lookup, but has to be used with caution. Checksum data
347    * is not included in the returned buffer but header data is.
348    *
349    * @return the buffer of this block for read-only operations
350    */
351   public ByteBuffer getBufferReadOnly() {
352     return ByteBuffer.wrap(buf.array(), buf.arrayOffset(),
353         buf.limit() - totalChecksumBytes()).slice();
354   }
355 
356   /**
357    * Returns the buffer of this block, including header data. The clients must
358    * not modify the buffer object. This method has to be public because it is
359    * used in {@link BucketCache} to avoid buffer copy.
360    * 
361    * @return the buffer with header and checksum included for read-only operations
362    */
363   public ByteBuffer getBufferReadOnlyWithHeader() {
364     return ByteBuffer.wrap(buf.array(), buf.arrayOffset(), buf.limit()).slice();
365   }
366 
367   /**
368    * Returns a byte buffer of this block, including header data and checksum, positioned at
369    * the beginning of header. The underlying data array is not copied.
370    *
371    * @return the byte buffer with header and checksum included
372    */
373   ByteBuffer getBufferWithHeader() {
374     ByteBuffer dupBuf = buf.duplicate();
375     dupBuf.rewind();
376     return dupBuf;
377   }
378 
379   private void sanityCheckAssertion(long valueFromBuf, long valueFromField,
380       String fieldName) throws IOException {
381     if (valueFromBuf != valueFromField) {
382       throw new AssertionError(fieldName + " in the buffer (" + valueFromBuf
383           + ") is different from that in the field (" + valueFromField + ")");
384     }
385   }
386 
387   private void sanityCheckAssertion(BlockType valueFromBuf, BlockType valueFromField)
388       throws IOException {
389     if (valueFromBuf != valueFromField) {
390       throw new IOException("Block type stored in the buffer: " +
391         valueFromBuf + ", block type field: " + valueFromField);
392     }
393   }
394 
395   /**
396    * Checks if the block is internally consistent, i.e. the first
397    * {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of the buffer contain a
398    * valid header consistent with the fields. Assumes a packed block structure.
399    * This function is primary for testing and debugging, and is not
400    * thread-safe, because it alters the internal buffer pointer.
401    */
402   void sanityCheck() throws IOException {
403     buf.rewind();
404 
405     sanityCheckAssertion(BlockType.read(buf), blockType);
406 
407     sanityCheckAssertion(buf.getInt(), onDiskSizeWithoutHeader,
408         "onDiskSizeWithoutHeader");
409 
410     sanityCheckAssertion(buf.getInt(), uncompressedSizeWithoutHeader,
411         "uncompressedSizeWithoutHeader");
412 
413     sanityCheckAssertion(buf.getLong(), prevBlockOffset, "prevBlocKOffset");
414     if (this.fileContext.isUseHBaseChecksum()) {
415       sanityCheckAssertion(buf.get(), this.fileContext.getChecksumType().getCode(), "checksumType");
416       sanityCheckAssertion(buf.getInt(), this.fileContext.getBytesPerChecksum(), "bytesPerChecksum");
417       sanityCheckAssertion(buf.getInt(), onDiskDataSizeWithHeader, "onDiskDataSizeWithHeader");
418     }
419 
420     int cksumBytes = totalChecksumBytes();
421     int expectedBufLimit = onDiskDataSizeWithHeader + cksumBytes;
422     if (buf.limit() != expectedBufLimit) {
423       throw new AssertionError("Expected buffer limit " + expectedBufLimit
424           + ", got " + buf.limit());
425     }
426 
427     // We might optionally allocate HFILEBLOCK_HEADER_SIZE more bytes to read the next
428     // block's header, so there are two sensible values for buffer capacity.
429     int hdrSize = headerSize();
430     if (buf.capacity() != expectedBufLimit &&
431         buf.capacity() != expectedBufLimit + hdrSize) {
432       throw new AssertionError("Invalid buffer capacity: " + buf.capacity() +
433           ", expected " + expectedBufLimit + " or " + (expectedBufLimit + hdrSize));
434     }
435   }
436 
437   @Override
438   public String toString() {
439     StringBuilder sb = new StringBuilder()
440       .append("HFileBlock [")
441       .append(" fileOffset=").append(offset)
442       .append(" headerSize()=").append(headerSize())
443       .append(" blockType=").append(blockType)
444       .append(" onDiskSizeWithoutHeader=").append(onDiskSizeWithoutHeader)
445       .append(" uncompressedSizeWithoutHeader=").append(uncompressedSizeWithoutHeader)
446       .append(" prevBlockOffset=").append(prevBlockOffset)
447       .append(" isUseHBaseChecksum()=").append(fileContext.isUseHBaseChecksum());
448     if (fileContext.isUseHBaseChecksum()) {
449       sb.append(" checksumType=").append(ChecksumType.codeToType(this.buf.get(24)))
450         .append(" bytesPerChecksum=").append(this.buf.getInt(24 + 1))
451         .append(" onDiskDataSizeWithHeader=").append(onDiskDataSizeWithHeader);
452     } else {
453       sb.append(" onDiskDataSizeWithHeader=").append(onDiskDataSizeWithHeader)
454         .append("(").append(onDiskSizeWithoutHeader)
455         .append("+").append(HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM).append(")");
456     }
457     sb.append(" getOnDiskSizeWithHeader()=").append(getOnDiskSizeWithHeader())
458       .append(" totalChecksumBytes()=").append(totalChecksumBytes())
459       .append(" isUnpacked()=").append(isUnpacked())
460       .append(" buf=[ ")
461         .append(buf)
462         .append(", array().length=").append(buf.array().length)
463         .append(", arrayOffset()=").append(buf.arrayOffset())
464       .append(" ]")
465       .append(" dataBeginsWith=")
466       .append(Bytes.toStringBinary(buf.array(), buf.arrayOffset() + headerSize(),
467         Math.min(32, buf.limit() - buf.arrayOffset() - headerSize())))
468       .append(" fileContext=").append(fileContext)
469       .append(" ]");
470     return sb.toString();
471   }
472 
473   /**
474    * Called after reading a block with provided onDiskSizeWithHeader.
475    */
476   private void validateOnDiskSizeWithoutHeader(
477       int expectedOnDiskSizeWithoutHeader) throws IOException {
478     if (onDiskSizeWithoutHeader != expectedOnDiskSizeWithoutHeader) {
479       String blockInfoMsg =
480         "Block offset: " + offset + ", data starts with: "
481           + Bytes.toStringBinary(buf.array(), buf.arrayOffset(),
482               buf.arrayOffset() + Math.min(32, buf.limit()));
483       throw new IOException("On-disk size without header provided is "
484           + expectedOnDiskSizeWithoutHeader + ", but block "
485           + "header contains " + onDiskSizeWithoutHeader + ". " +
486           blockInfoMsg);
487     }
488   }
489 
490   /**
491    * Retrieves the decompressed/decrypted view of this block. An encoded block remains in its
492    * encoded structure. Internal structures are shared between instances where applicable.
493    */
494   HFileBlock unpack(HFileContext fileContext, FSReader reader) throws IOException {
495     if (!fileContext.isCompressedOrEncrypted()) {
496       // TODO: cannot use our own fileContext here because HFileBlock(ByteBuffer, boolean),
497       // which is used for block serialization to L2 cache, does not preserve encoding and
498       // encryption details.
499       return this;
500     }
501 
502     HFileBlock unpacked = new HFileBlock(this);
503     unpacked.allocateBuffer(); // allocates space for the decompressed block
504 
505     HFileBlockDecodingContext ctx = blockType == BlockType.ENCODED_DATA ?
506       reader.getBlockDecodingContext() : reader.getDefaultBlockDecodingContext();
507     ctx.prepareDecoding(unpacked.getOnDiskSizeWithoutHeader(),
508       unpacked.getUncompressedSizeWithoutHeader(), unpacked.getBufferWithoutHeader(),
509       this.getBufferReadOnlyWithHeader().array(), this.headerSize());
510 
511     // Preserve the next block's header bytes in the new block if we have them.
512     if (unpacked.hasNextBlockHeader()) {
513       System.arraycopy(this.buf.array(), this.buf.arrayOffset() + this.onDiskDataSizeWithHeader,
514         unpacked.buf.array(), unpacked.buf.arrayOffset() + unpacked.headerSize() +
515           unpacked.uncompressedSizeWithoutHeader + unpacked.totalChecksumBytes(),
516         unpacked.headerSize());
517     }
518     return unpacked;
519   }
520 
521   /**
522    * Return true when this buffer includes next block's header.
523    */
524   private boolean hasNextBlockHeader() {
525     return nextBlockOnDiskSizeWithHeader > 0;
526   }
527 
528   /**
529    * Always allocates a new buffer of the correct size. Copies header bytes
530    * from the existing buffer. Does not change header fields. 
531    * Reserve room to keep checksum bytes too.
532    */
533   private void allocateBuffer() {
534     int cksumBytes = totalChecksumBytes();
535     int headerSize = headerSize();
536     int capacityNeeded = headerSize + uncompressedSizeWithoutHeader +
537         cksumBytes + (hasNextBlockHeader() ? headerSize : 0);
538 
539     ByteBuffer newBuf = ByteBuffer.allocate(capacityNeeded);
540 
541     // Copy header bytes.
542     System.arraycopy(buf.array(), buf.arrayOffset(), newBuf.array(),
543         newBuf.arrayOffset(), headerSize);
544 
545     buf = newBuf;
546     // set limit to exclude next block's header
547     buf.limit(headerSize + uncompressedSizeWithoutHeader + cksumBytes);
548   }
549 
550   /**
551    * Return true when this block's buffer has been unpacked, false otherwise. Note this is a
552    * calculated heuristic, not tracked attribute of the block.
553    */
554   public boolean isUnpacked() {
555     final int cksumBytes = totalChecksumBytes();
556     final int headerSize = headerSize();
557     final int expectedCapacity = headerSize + uncompressedSizeWithoutHeader + cksumBytes;
558     final int bufCapacity = buf.capacity();
559     return bufCapacity == expectedCapacity || bufCapacity == expectedCapacity + headerSize;
560   }
561 
562   /** An additional sanity-check in case no compression or encryption is being used. */
563   public void assumeUncompressed() throws IOException {
564     if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader +
565         totalChecksumBytes()) {
566       throw new IOException("Using no compression but "
567           + "onDiskSizeWithoutHeader=" + onDiskSizeWithoutHeader + ", "
568           + "uncompressedSizeWithoutHeader=" + uncompressedSizeWithoutHeader
569           + ", numChecksumbytes=" + totalChecksumBytes());
570     }
571   }
572 
573   /**
574    * @param expectedType the expected type of this block
575    * @throws IOException if this block's type is different than expected
576    */
577   public void expectType(BlockType expectedType) throws IOException {
578     if (blockType != expectedType) {
579       throw new IOException("Invalid block type: expected=" + expectedType
580           + ", actual=" + blockType);
581     }
582   }
583 
584   /** @return the offset of this block in the file it was read from */
585   public long getOffset() {
586     if (offset < 0) {
587       throw new IllegalStateException(
588           "HFile block offset not initialized properly");
589     }
590     return offset;
591   }
592 
593   /**
594    * @return a byte stream reading the data + checksum of this block
595    */
596   public DataInputStream getByteStream() {
597     return new DataInputStream(new ByteArrayInputStream(buf.array(),
598         buf.arrayOffset() + headerSize(), buf.limit() - headerSize()));
599   }
600 
601   @Override
602   public long heapSize() {
603     long size = ClassSize.align(
604         ClassSize.OBJECT +
605         // Block type, byte buffer and meta references
606         3 * ClassSize.REFERENCE +
607         // On-disk size, uncompressed size, and next block's on-disk size
608         // bytePerChecksum and onDiskDataSize
609         4 * Bytes.SIZEOF_INT +
610         // This and previous block offset
611         2 * Bytes.SIZEOF_LONG +
612         // Heap size of the meta object. meta will be always not null.
613         fileContext.heapSize()
614     );
615 
616     if (buf != null) {
617       // Deep overhead of the byte buffer. Needs to be aligned separately.
618       size += ClassSize.align(buf.capacity() + BYTE_BUFFER_HEAP_SIZE);
619     }
620 
621     return ClassSize.align(size);
622   }
623 
624   /**
625    * Read from an input stream. Analogous to
626    * {@link IOUtils#readFully(InputStream, byte[], int, int)}, but specifies a
627    * number of "extra" bytes that would be desirable but not absolutely
628    * necessary to read.
629    *
630    * @param in the input stream to read from
631    * @param buf the buffer to read into
632    * @param bufOffset the destination offset in the buffer
633    * @param necessaryLen the number of bytes that are absolutely necessary to
634    *          read
635    * @param extraLen the number of extra bytes that would be nice to read
636    * @return true if succeeded reading the extra bytes
637    * @throws IOException if failed to read the necessary bytes
638    */
639   public static boolean readWithExtra(InputStream in, byte buf[],
640       int bufOffset, int necessaryLen, int extraLen) throws IOException {
641     int bytesRemaining = necessaryLen + extraLen;
642     while (bytesRemaining > 0) {
643       int ret = in.read(buf, bufOffset, bytesRemaining);
644       if (ret == -1 && bytesRemaining <= extraLen) {
645         // We could not read the "extra data", but that is OK.
646         break;
647       }
648 
649       if (ret < 0) {
650         throw new IOException("Premature EOF from inputStream (read "
651             + "returned " + ret + ", was trying to read " + necessaryLen
652             + " necessary bytes and " + extraLen + " extra bytes, "
653             + "successfully read "
654             + (necessaryLen + extraLen - bytesRemaining));
655       }
656       bufOffset += ret;
657       bytesRemaining -= ret;
658     }
659     return bytesRemaining <= 0;
660   }
661 
662   /**
663    * @return the on-disk size of the next block (including the header size)
664    *         that was read by peeking into the next block's header
665    */
666   public int getNextBlockOnDiskSizeWithHeader() {
667     return nextBlockOnDiskSizeWithHeader;
668   }
669 
670   /**
671    * Unified version 2 {@link HFile} block writer. The intended usage pattern
672    * is as follows:
673    * <ol>
674    * <li>Construct an {@link HFileBlock.Writer}, providing a compression algorithm.
675    * <li>Call {@link Writer#startWriting} and get a data stream to write to.
676    * <li>Write your data into the stream.
677    * <li>Call {@link Writer#writeHeaderAndData(FSDataOutputStream)} as many times as you need to.
678    * store the serialized block into an external stream.
679    * <li>Repeat to write more blocks.
680    * </ol>
681    * <p>
682    */
683   public static class Writer {
684 
685     private enum State {
686       INIT,
687       WRITING,
688       BLOCK_READY
689     };
690 
691     /** Writer state. Used to ensure the correct usage protocol. */
692     private State state = State.INIT;
693 
694     /** Data block encoder used for data blocks */
695     private final HFileDataBlockEncoder dataBlockEncoder;
696 
697     private HFileBlockEncodingContext dataBlockEncodingCtx;
698 
699     /** block encoding context for non-data blocks */
700     private HFileBlockDefaultEncodingContext defaultBlockEncodingCtx;
701 
702     /**
703      * The stream we use to accumulate data in uncompressed format for each
704      * block. We reset this stream at the end of each block and reuse it. The
705      * header is written as the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes into this
706      * stream.
707      */
708     private ByteArrayOutputStream baosInMemory;
709 
710     /**
711      * Current block type. Set in {@link #startWriting(BlockType)}. Could be
712      * changed in {@link #finishBlock()} from {@link BlockType#DATA}
713      * to {@link BlockType#ENCODED_DATA}.
714      */
715     private BlockType blockType;
716 
717     /**
718      * A stream that we write uncompressed bytes to, which compresses them and
719      * writes them to {@link #baosInMemory}.
720      */
721     private DataOutputStream userDataStream;
722 
723     /**
724      * Bytes to be written to the file system, including the header. Compressed
725      * if compression is turned on. It also includes the checksum data that
726      * immediately follows the block data. (header + data + checksums)
727      */
728     private byte[] onDiskBytesWithHeader;
729 
730     /**
731      * The size of the checksum data on disk. It is used only if data is
732      * not compressed. If data is compressed, then the checksums are already
733      * part of onDiskBytesWithHeader. If data is uncompressed, then this
734      * variable stores the checksum data for this block.
735      */
736     private byte[] onDiskChecksum;
737 
738     /**
739      * Valid in the READY state. Contains the header and the uncompressed (but
740      * potentially encoded, if this is a data block) bytes, so the length is
741      * {@link #uncompressedSizeWithoutHeader} + {@link org.apache.hadoop.hbase.HConstants#HFILEBLOCK_HEADER_SIZE}.
742      * Does not store checksums.
743      */
744     private byte[] uncompressedBytesWithHeader;
745 
746     /**
747      * Current block's start offset in the {@link HFile}. Set in
748      * {@link #writeHeaderAndData(FSDataOutputStream)}.
749      */
750     private long startOffset;
751 
752     /**
753      * Offset of previous block by block type. Updated when the next block is
754      * started.
755      */
756     private long[] prevOffsetByType;
757 
758     /** The offset of the previous block of the same type */
759     private long prevOffset;
760     /** Meta data that holds information about the hfileblock**/
761     private HFileContext fileContext;
762 
763     /**
764      * @param dataBlockEncoder data block encoding algorithm to use
765      */
766     public Writer(HFileDataBlockEncoder dataBlockEncoder, HFileContext fileContext) {
767       this.dataBlockEncoder = dataBlockEncoder != null
768           ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
769       defaultBlockEncodingCtx = new HFileBlockDefaultEncodingContext(null,
770           HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
771       dataBlockEncodingCtx = this.dataBlockEncoder
772           .newDataBlockEncodingContext(HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
773 
774       if (fileContext.getBytesPerChecksum() < HConstants.HFILEBLOCK_HEADER_SIZE) {
775         throw new RuntimeException("Unsupported value of bytesPerChecksum. " +
776             " Minimum is " + HConstants.HFILEBLOCK_HEADER_SIZE + " but the configured value is " +
777             fileContext.getBytesPerChecksum());
778       }
779 
780       baosInMemory = new ByteArrayOutputStream();
781       
782       prevOffsetByType = new long[BlockType.values().length];
783       for (int i = 0; i < prevOffsetByType.length; ++i)
784         prevOffsetByType[i] = -1;
785 
786       this.fileContext = fileContext;
787     }
788 
789     /**
790      * Starts writing into the block. The previous block's data is discarded.
791      *
792      * @return the stream the user can write their data into
793      * @throws IOException
794      */
795     public DataOutputStream startWriting(BlockType newBlockType)
796         throws IOException {
797       if (state == State.BLOCK_READY && startOffset != -1) {
798         // We had a previous block that was written to a stream at a specific
799         // offset. Save that offset as the last offset of a block of that type.
800         prevOffsetByType[blockType.getId()] = startOffset;
801       }
802 
803       startOffset = -1;
804       blockType = newBlockType;
805 
806       baosInMemory.reset();
807       baosInMemory.write(HConstants.HFILEBLOCK_DUMMY_HEADER);
808 
809       state = State.WRITING;
810 
811       // We will compress it later in finishBlock()
812       userDataStream = new DataOutputStream(baosInMemory);
813       return userDataStream;
814     }
815 
816     /**
817      * Returns the stream for the user to write to. The block writer takes care
818      * of handling compression and buffering for caching on write. Can only be
819      * called in the "writing" state.
820      *
821      * @return the data output stream for the user to write to
822      */
823     DataOutputStream getUserDataStream() {
824       expectState(State.WRITING);
825       return userDataStream;
826     }
827 
828     /**
829      * Transitions the block writer from the "writing" state to the "block
830      * ready" state.  Does nothing if a block is already finished.
831      */
832     private void ensureBlockReady() throws IOException {
833       Preconditions.checkState(state != State.INIT,
834           "Unexpected state: " + state);
835 
836       if (state == State.BLOCK_READY)
837         return;
838 
839       // This will set state to BLOCK_READY.
840       finishBlock();
841     }
842 
843     /**
844      * An internal method that flushes the compressing stream (if using
845      * compression), serializes the header, and takes care of the separate
846      * uncompressed stream for caching on write, if applicable. Sets block
847      * write state to "block ready".
848      */
849     private void finishBlock() throws IOException {
850       userDataStream.flush();
851       // This does an array copy, so it is safe to cache this byte array.
852       uncompressedBytesWithHeader = baosInMemory.toByteArray();
853       prevOffset = prevOffsetByType[blockType.getId()];
854 
855       // We need to set state before we can package the block up for
856       // cache-on-write. In a way, the block is ready, but not yet encoded or
857       // compressed.
858       state = State.BLOCK_READY;
859       if (blockType == BlockType.DATA) {
860         encodeDataBlockForDisk();
861       } else {
862         defaultBlockEncodingCtx.compressAfterEncodingWithBlockType(
863             uncompressedBytesWithHeader, blockType);
864         onDiskBytesWithHeader =
865           defaultBlockEncodingCtx.getOnDiskBytesWithHeader();
866       }
867 
868       int numBytes = (int) ChecksumUtil.numBytes(
869           onDiskBytesWithHeader.length,
870           fileContext.getBytesPerChecksum());
871 
872       // put the header for on disk bytes
873       putHeader(onDiskBytesWithHeader, 0,
874           onDiskBytesWithHeader.length + numBytes,
875           uncompressedBytesWithHeader.length, onDiskBytesWithHeader.length);
876       // set the header for the uncompressed bytes (for cache-on-write)
877       putHeader(uncompressedBytesWithHeader, 0,
878           onDiskBytesWithHeader.length + numBytes,
879           uncompressedBytesWithHeader.length, onDiskBytesWithHeader.length);
880 
881       onDiskChecksum = new byte[numBytes];
882       ChecksumUtil.generateChecksums(
883           onDiskBytesWithHeader, 0, onDiskBytesWithHeader.length,
884           onDiskChecksum, 0, fileContext.getChecksumType(), fileContext.getBytesPerChecksum());
885     }
886 
887     /**
888      * Encodes this block if it is a data block and encoding is turned on in
889      * {@link #dataBlockEncoder}.
890      */
891     private void encodeDataBlockForDisk() throws IOException {
892       // do data block encoding, if data block encoder is set
893       ByteBuffer rawKeyValues =
894           ByteBuffer.wrap(uncompressedBytesWithHeader, HConstants.HFILEBLOCK_HEADER_SIZE,
895               uncompressedBytesWithHeader.length - HConstants.HFILEBLOCK_HEADER_SIZE).slice();
896 
897       // do the encoding
898       dataBlockEncoder.beforeWriteToDisk(rawKeyValues, dataBlockEncodingCtx, blockType);
899 
900       uncompressedBytesWithHeader =
901           dataBlockEncodingCtx.getUncompressedBytesWithHeader();
902       onDiskBytesWithHeader =
903           dataBlockEncodingCtx.getOnDiskBytesWithHeader();
904       blockType = dataBlockEncodingCtx.getBlockType();
905     }
906 
907     /**
908      * Put the header into the given byte array at the given offset.
909      * @param onDiskSize size of the block on disk header + data + checksum
910      * @param uncompressedSize size of the block after decompression (but
911      *          before optional data block decoding) including header
912      * @param onDiskDataSize size of the block on disk with header
913      *        and data but not including the checksums
914      */
915     private void putHeader(byte[] dest, int offset, int onDiskSize,
916         int uncompressedSize, int onDiskDataSize) {
917       offset = blockType.put(dest, offset);
918       offset = Bytes.putInt(dest, offset, onDiskSize - HConstants.HFILEBLOCK_HEADER_SIZE);
919       offset = Bytes.putInt(dest, offset, uncompressedSize - HConstants.HFILEBLOCK_HEADER_SIZE);
920       offset = Bytes.putLong(dest, offset, prevOffset);
921       offset = Bytes.putByte(dest, offset, fileContext.getChecksumType().getCode());
922       offset = Bytes.putInt(dest, offset, fileContext.getBytesPerChecksum());
923       Bytes.putInt(dest, offset, onDiskDataSize);
924     }
925 
926     /**
927      * Similar to {@link #writeHeaderAndData(FSDataOutputStream)}, but records
928      * the offset of this block so that it can be referenced in the next block
929      * of the same type.
930      *
931      * @param out
932      * @throws IOException
933      */
934     public void writeHeaderAndData(FSDataOutputStream out) throws IOException {
935       long offset = out.getPos();
936       if (startOffset != -1 && offset != startOffset) {
937         throw new IOException("A " + blockType + " block written to a "
938             + "stream twice, first at offset " + startOffset + ", then at "
939             + offset);
940       }
941       startOffset = offset;
942 
943       finishBlockAndWriteHeaderAndData((DataOutputStream) out);
944     }
945 
946     /**
947      * Writes the header and the compressed data of this block (or uncompressed
948      * data when not using compression) into the given stream. Can be called in
949      * the "writing" state or in the "block ready" state. If called in the
950      * "writing" state, transitions the writer to the "block ready" state.
951      *
952      * @param out the output stream to write the
953      * @throws IOException
954      */
955     private void finishBlockAndWriteHeaderAndData(DataOutputStream out)
956       throws IOException {
957       ensureBlockReady();
958       out.write(onDiskBytesWithHeader);
959       out.write(onDiskChecksum);
960     }
961 
962     /**
963      * Returns the header or the compressed data (or uncompressed data when not
964      * using compression) as a byte array. Can be called in the "writing" state
965      * or in the "block ready" state. If called in the "writing" state,
966      * transitions the writer to the "block ready" state. This returns
967      * the header + data + checksums stored on disk.
968      *
969      * @return header and data as they would be stored on disk in a byte array
970      * @throws IOException
971      */
972     byte[] getHeaderAndDataForTest() throws IOException {
973       ensureBlockReady();
974       // This is not very optimal, because we are doing an extra copy.
975       // But this method is used only by unit tests.
976       byte[] output =
977           new byte[onDiskBytesWithHeader.length
978               + onDiskChecksum.length];
979       System.arraycopy(onDiskBytesWithHeader, 0, output, 0,
980           onDiskBytesWithHeader.length);
981       System.arraycopy(onDiskChecksum, 0, output,
982           onDiskBytesWithHeader.length, onDiskChecksum.length);
983       return output;
984     }
985 
986     /**
987      * Releases resources used by this writer.
988      */
989     public void release() {
990       if (dataBlockEncodingCtx != null) {
991         dataBlockEncodingCtx.close();
992         dataBlockEncodingCtx = null;
993       }
994       if (defaultBlockEncodingCtx != null) {
995         defaultBlockEncodingCtx.close();
996         defaultBlockEncodingCtx = null;
997       }
998     }
999 
1000     /**
1001      * Returns the on-disk size of the data portion of the block. This is the
1002      * compressed size if compression is enabled. Can only be called in the
1003      * "block ready" state. Header is not compressed, and its size is not
1004      * included in the return value.
1005      *
1006      * @return the on-disk size of the block, not including the header.
1007      */
1008     int getOnDiskSizeWithoutHeader() {
1009       expectState(State.BLOCK_READY);
1010       return onDiskBytesWithHeader.length + onDiskChecksum.length - HConstants.HFILEBLOCK_HEADER_SIZE;
1011     }
1012 
1013     /**
1014      * Returns the on-disk size of the block. Can only be called in the
1015      * "block ready" state.
1016      *
1017      * @return the on-disk size of the block ready to be written, including the
1018      *         header size, the data and the checksum data.
1019      */
1020     int getOnDiskSizeWithHeader() {
1021       expectState(State.BLOCK_READY);
1022       return onDiskBytesWithHeader.length + onDiskChecksum.length;
1023     }
1024 
1025     /**
1026      * The uncompressed size of the block data. Does not include header size.
1027      */
1028     int getUncompressedSizeWithoutHeader() {
1029       expectState(State.BLOCK_READY);
1030       return uncompressedBytesWithHeader.length - HConstants.HFILEBLOCK_HEADER_SIZE;
1031     }
1032 
1033     /**
1034      * The uncompressed size of the block data, including header size.
1035      */
1036     int getUncompressedSizeWithHeader() {
1037       expectState(State.BLOCK_READY);
1038       return uncompressedBytesWithHeader.length;
1039     }
1040 
1041     /** @return true if a block is being written  */
1042     public boolean isWriting() {
1043       return state == State.WRITING;
1044     }
1045 
1046     /**
1047      * Returns the number of bytes written into the current block so far, or
1048      * zero if not writing the block at the moment. Note that this will return
1049      * zero in the "block ready" state as well.
1050      *
1051      * @return the number of bytes written
1052      */
1053     public int blockSizeWritten() {
1054       if (state != State.WRITING)
1055         return 0;
1056       return userDataStream.size();
1057     }
1058 
1059     /**
1060      * Returns the header followed by the uncompressed data, even if using
1061      * compression. This is needed for storing uncompressed blocks in the block
1062      * cache. Can be called in the "writing" state or the "block ready" state.
1063      * Returns only the header and data, does not include checksum data.
1064      *
1065      * @return uncompressed block bytes for caching on write
1066      */
1067     ByteBuffer getUncompressedBufferWithHeader() {
1068       expectState(State.BLOCK_READY);
1069       return ByteBuffer.wrap(uncompressedBytesWithHeader);
1070     }
1071 
1072     /**
1073      * Returns the header followed by the on-disk (compressed/encoded/encrypted) data. This is
1074      * needed for storing packed blocks in the block cache. Expects calling semantics identical to
1075      * {@link #getUncompressedBufferWithHeader()}. Returns only the header and data,
1076      * Does not include checksum data.
1077      *
1078      * @return packed block bytes for caching on write
1079      */
1080     ByteBuffer getOnDiskBufferWithHeader() {
1081       expectState(State.BLOCK_READY);
1082       return ByteBuffer.wrap(onDiskBytesWithHeader);
1083     }
1084 
1085     private void expectState(State expectedState) {
1086       if (state != expectedState) {
1087         throw new IllegalStateException("Expected state: " + expectedState +
1088             ", actual state: " + state);
1089       }
1090     }
1091 
1092     /**
1093      * Takes the given {@link BlockWritable} instance, creates a new block of
1094      * its appropriate type, writes the writable into this block, and flushes
1095      * the block into the output stream. The writer is instructed not to buffer
1096      * uncompressed bytes for cache-on-write.
1097      *
1098      * @param bw the block-writable object to write as a block
1099      * @param out the file system output stream
1100      * @throws IOException
1101      */
1102     public void writeBlock(BlockWritable bw, FSDataOutputStream out)
1103         throws IOException {
1104       bw.writeToBlock(startWriting(bw.getBlockType()));
1105       writeHeaderAndData(out);
1106     }
1107 
1108     /**
1109      * Creates a new HFileBlock. Checksums have already been validated, so
1110      * the byte buffer passed into the constructor of this newly created
1111      * block does not have checksum data even though the header minor 
1112      * version is MINOR_VERSION_WITH_CHECKSUM. This is indicated by setting a
1113      * 0 value in bytesPerChecksum.
1114      */
1115     public HFileBlock getBlockForCaching(CacheConfig cacheConf) {
1116       HFileContext newContext = new HFileContextBuilder()
1117                                 .withBlockSize(fileContext.getBlocksize())
1118                                 .withBytesPerCheckSum(0)
1119                                 .withChecksumType(ChecksumType.NULL) // no checksums in cached data
1120                                 .withCompression(fileContext.getCompression())
1121                                 .withDataBlockEncoding(fileContext.getDataBlockEncoding())
1122                                 .withHBaseCheckSum(fileContext.isUseHBaseChecksum())
1123                                 .withCompressTags(fileContext.isCompressTags())
1124                                 .withIncludesMvcc(fileContext.isIncludesMvcc())
1125                                 .withIncludesTags(fileContext.isIncludesTags())
1126                                 .build();
1127       return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(),
1128           getUncompressedSizeWithoutHeader(), prevOffset,
1129           cacheConf.shouldCacheCompressed(blockType.getCategory()) ?
1130             getOnDiskBufferWithHeader() :
1131             getUncompressedBufferWithHeader(),
1132           FILL_HEADER, startOffset,
1133           onDiskBytesWithHeader.length + onDiskChecksum.length, newContext);
1134     }
1135   }
1136 
1137   /** Something that can be written into a block. */
1138   public interface BlockWritable {
1139 
1140     /** The type of block this data should use. */
1141     BlockType getBlockType();
1142 
1143     /**
1144      * Writes the block to the provided stream. Must not write any magic
1145      * records.
1146      *
1147      * @param out a stream to write uncompressed data into
1148      */
1149     void writeToBlock(DataOutput out) throws IOException;
1150   }
1151 
1152   // Block readers and writers
1153 
1154   /** An interface allowing to iterate {@link HFileBlock}s. */
1155   public interface BlockIterator {
1156 
1157     /**
1158      * Get the next block, or null if there are no more blocks to iterate.
1159      */
1160     HFileBlock nextBlock() throws IOException;
1161 
1162     /**
1163      * Similar to {@link #nextBlock()} but checks block type, throws an
1164      * exception if incorrect, and returns the HFile block
1165      */
1166     HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException;
1167   }
1168 
1169   /** A full-fledged reader with iteration ability. */
1170   public interface FSReader {
1171 
1172     /**
1173      * Reads the block at the given offset in the file with the given on-disk
1174      * size and uncompressed size.
1175      *
1176      * @param offset
1177      * @param onDiskSize the on-disk size of the entire block, including all
1178      *          applicable headers, or -1 if unknown
1179      * @param uncompressedSize the uncompressed size of the compressed part of
1180      *          the block, or -1 if unknown
1181      * @return the newly read block
1182      */
1183     HFileBlock readBlockData(long offset, long onDiskSize,
1184         int uncompressedSize, boolean pread) throws IOException;
1185 
1186     /**
1187      * Creates a block iterator over the given portion of the {@link HFile}.
1188      * The iterator returns blocks starting with offset such that offset <=
1189      * startOffset < endOffset. Returned blocks are always unpacked.
1190      *
1191      * @param startOffset the offset of the block to start iteration with
1192      * @param endOffset the offset to end iteration at (exclusive)
1193      * @return an iterator of blocks between the two given offsets
1194      */
1195     BlockIterator blockRange(long startOffset, long endOffset);
1196 
1197     /** Closes the backing streams */
1198     void closeStreams() throws IOException;
1199 
1200     /** Get a decoder for {@link BlockType#ENCODED_DATA} blocks from this file. */
1201     HFileBlockDecodingContext getBlockDecodingContext();
1202 
1203     /** Get the default decoder for blocks from this file. */
1204     HFileBlockDecodingContext getDefaultBlockDecodingContext();
1205   }
1206 
1207   /**
1208    * A common implementation of some methods of {@link FSReader} and some
1209    * tools for implementing HFile format version-specific block readers.
1210    */
1211   private abstract static class AbstractFSReader implements FSReader {
1212     /** Compression algorithm used by the {@link HFile} */
1213 
1214     /** The size of the file we are reading from, or -1 if unknown. */
1215     protected long fileSize;
1216 
1217     /** The size of the header */
1218     protected final int hdrSize;
1219 
1220     /** The filesystem used to access data */
1221     protected HFileSystem hfs;
1222 
1223     /** The path (if any) where this data is coming from */
1224     protected Path path;
1225 
1226     private final Lock streamLock = new ReentrantLock();
1227 
1228     /** The default buffer size for our buffered streams */
1229     public static final int DEFAULT_BUFFER_SIZE = 1 << 20;
1230 
1231     protected HFileContext fileContext;
1232 
1233     public AbstractFSReader(long fileSize, HFileSystem hfs, Path path, HFileContext fileContext)
1234         throws IOException {
1235       this.fileSize = fileSize;
1236       this.hfs = hfs;
1237       this.path = path;
1238       this.fileContext = fileContext;
1239       this.hdrSize = headerSize(fileContext.isUseHBaseChecksum());
1240     }
1241 
1242     @Override
1243     public BlockIterator blockRange(final long startOffset,
1244         final long endOffset) {
1245       final FSReader owner = this; // handle for inner class
1246       return new BlockIterator() {
1247         private long offset = startOffset;
1248 
1249         @Override
1250         public HFileBlock nextBlock() throws IOException {
1251           if (offset >= endOffset)
1252             return null;
1253           HFileBlock b = readBlockData(offset, -1, -1, false);
1254           offset += b.getOnDiskSizeWithHeader();
1255           return b.unpack(fileContext, owner);
1256         }
1257 
1258         @Override
1259         public HFileBlock nextBlockWithBlockType(BlockType blockType)
1260             throws IOException {
1261           HFileBlock blk = nextBlock();
1262           if (blk.getBlockType() != blockType) {
1263             throw new IOException("Expected block of type " + blockType
1264                 + " but found " + blk.getBlockType());
1265           }
1266           return blk;
1267         }
1268       };
1269     }
1270 
1271     /**
1272      * Does a positional read or a seek and read into the given buffer. Returns
1273      * the on-disk size of the next block, or -1 if it could not be determined.
1274      *
1275      * @param dest destination buffer
1276      * @param destOffset offset in the destination buffer
1277      * @param size size of the block to be read
1278      * @param peekIntoNextBlock whether to read the next block's on-disk size
1279      * @param fileOffset position in the stream to read at
1280      * @param pread whether we should do a positional read
1281      * @param istream The input source of data
1282      * @return the on-disk size of the next block with header size included, or
1283      *         -1 if it could not be determined
1284      * @throws IOException
1285      */
1286     protected int readAtOffset(FSDataInputStream istream,
1287         byte[] dest, int destOffset, int size,
1288         boolean peekIntoNextBlock, long fileOffset, boolean pread)
1289         throws IOException {
1290       if (peekIntoNextBlock &&
1291           destOffset + size + hdrSize > dest.length) {
1292         // We are asked to read the next block's header as well, but there is
1293         // not enough room in the array.
1294         throw new IOException("Attempted to read " + size + " bytes and " +
1295             hdrSize + " bytes of next header into a " + dest.length +
1296             "-byte array at offset " + destOffset);
1297       }
1298 
1299       if (!pread && streamLock.tryLock()) {
1300         // Seek + read. Better for scanning.
1301         try {
1302           istream.seek(fileOffset);
1303 
1304           long realOffset = istream.getPos();
1305           if (realOffset != fileOffset) {
1306             throw new IOException("Tried to seek to " + fileOffset + " to "
1307                 + "read " + size + " bytes, but pos=" + realOffset
1308                 + " after seek");
1309           }
1310 
1311           if (!peekIntoNextBlock) {
1312             IOUtils.readFully(istream, dest, destOffset, size);
1313             return -1;
1314           }
1315 
1316           // Try to read the next block header.
1317           if (!readWithExtra(istream, dest, destOffset, size, hdrSize))
1318             return -1;
1319         } finally {
1320           streamLock.unlock();
1321         }
1322       } else {
1323         // Positional read. Better for random reads; or when the streamLock is already locked.
1324         int extraSize = peekIntoNextBlock ? hdrSize : 0;
1325         int ret = istream.read(fileOffset, dest, destOffset, size + extraSize);
1326         if (ret < size) {
1327           throw new IOException("Positional read of " + size + " bytes " +
1328               "failed at offset " + fileOffset + " (returned " + ret + ")");
1329         }
1330 
1331         if (ret == size || ret < size + extraSize) {
1332           // Could not read the next block's header, or did not try.
1333           return -1;
1334         }
1335       }
1336 
1337       assert peekIntoNextBlock;
1338       return Bytes.toInt(dest, destOffset + size + BlockType.MAGIC_LENGTH) + hdrSize;
1339     }
1340 
1341   }
1342 
1343   /**
1344    * We always prefetch the header of the next block, so that we know its
1345    * on-disk size in advance and can read it in one operation.
1346    */
1347   private static class PrefetchedHeader {
1348     long offset = -1;
1349     byte[] header = new byte[HConstants.HFILEBLOCK_HEADER_SIZE];
1350     ByteBuffer buf = ByteBuffer.wrap(header, 0, HConstants.HFILEBLOCK_HEADER_SIZE);
1351   }
1352 
1353   /** Reads version 2 blocks from the filesystem. */
1354   static class FSReaderV2 extends AbstractFSReader {
1355     /** The file system stream of the underlying {@link HFile} that 
1356      * does or doesn't do checksum validations in the filesystem */
1357     protected FSDataInputStreamWrapper streamWrapper;
1358 
1359     private HFileBlockDecodingContext encodedBlockDecodingCtx;
1360 
1361     /** Default context used when BlockType != {@link BlockType#ENCODED_DATA}. */
1362     private final HFileBlockDefaultDecodingContext defaultDecodingCtx;
1363 
1364     private ThreadLocal<PrefetchedHeader> prefetchedHeaderForThread =
1365         new ThreadLocal<PrefetchedHeader>() {
1366           @Override
1367           public PrefetchedHeader initialValue() {
1368             return new PrefetchedHeader();
1369           }
1370         };
1371 
1372     public FSReaderV2(FSDataInputStreamWrapper stream, long fileSize, HFileSystem hfs, Path path,
1373         HFileContext fileContext) throws IOException {
1374       super(fileSize, hfs, path, fileContext);
1375       this.streamWrapper = stream;
1376       // Older versions of HBase didn't support checksum.
1377       this.streamWrapper.prepareForBlockReader(!fileContext.isUseHBaseChecksum());
1378       defaultDecodingCtx = new HFileBlockDefaultDecodingContext(fileContext);
1379       encodedBlockDecodingCtx = defaultDecodingCtx;
1380     }
1381 
1382     /**
1383      * A constructor that reads files with the latest minor version.
1384      * This is used by unit tests only.
1385      */
1386     FSReaderV2(FSDataInputStream istream, long fileSize, HFileContext fileContext) throws IOException {
1387       this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext);
1388     }
1389 
1390     /**
1391      * Reads a version 2 block. Tries to do as little memory allocation as
1392      * possible, using the provided on-disk size.
1393      *
1394      * @param offset the offset in the stream to read at
1395      * @param onDiskSizeWithHeaderL the on-disk size of the block, including
1396      *          the header, or -1 if unknown
1397      * @param uncompressedSize the uncompressed size of the the block. Always
1398      *          expected to be -1. This parameter is only used in version 1.
1399      * @param pread whether to use a positional read
1400      */
1401     @Override
1402     public HFileBlock readBlockData(long offset, long onDiskSizeWithHeaderL,
1403         int uncompressedSize, boolean pread) throws IOException {
1404 
1405       // get a copy of the current state of whether to validate
1406       // hbase checksums or not for this read call. This is not 
1407       // thread-safe but the one constaint is that if we decide 
1408       // to skip hbase checksum verification then we are 
1409       // guaranteed to use hdfs checksum verification.
1410       boolean doVerificationThruHBaseChecksum = streamWrapper.shouldUseHBaseChecksum();
1411       FSDataInputStream is = streamWrapper.getStream(doVerificationThruHBaseChecksum);
1412 
1413       HFileBlock blk = readBlockDataInternal(is, offset, 
1414                          onDiskSizeWithHeaderL, 
1415                          uncompressedSize, pread,
1416                          doVerificationThruHBaseChecksum);
1417       if (blk == null) {
1418         HFile.LOG.warn("HBase checksum verification failed for file " +
1419                        path + " at offset " +
1420                        offset + " filesize " + fileSize +
1421                        ". Retrying read with HDFS checksums turned on...");
1422 
1423         if (!doVerificationThruHBaseChecksum) {
1424           String msg = "HBase checksum verification failed for file " +
1425                        path + " at offset " +
1426                        offset + " filesize " + fileSize + 
1427                        " but this cannot happen because doVerify is " +
1428                        doVerificationThruHBaseChecksum;
1429           HFile.LOG.warn(msg);
1430           throw new IOException(msg); // cannot happen case here
1431         }
1432         HFile.checksumFailures.incrementAndGet(); // update metrics
1433 
1434         // If we have a checksum failure, we fall back into a mode where
1435         // the next few reads use HDFS level checksums. We aim to make the
1436         // next CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD reads avoid
1437         // hbase checksum verification, but since this value is set without
1438         // holding any locks, it can so happen that we might actually do
1439         // a few more than precisely this number.
1440         is = this.streamWrapper.fallbackToFsChecksum(CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD);
1441         doVerificationThruHBaseChecksum = false;
1442         blk = readBlockDataInternal(is, offset, onDiskSizeWithHeaderL,
1443                                     uncompressedSize, pread,
1444                                     doVerificationThruHBaseChecksum);
1445         if (blk != null) {
1446           HFile.LOG.warn("HDFS checksum verification suceeded for file " +
1447                          path + " at offset " +
1448                          offset + " filesize " + fileSize);
1449         }
1450       } 
1451       if (blk == null && !doVerificationThruHBaseChecksum) {
1452         String msg = "readBlockData failed, possibly due to " +
1453                      "checksum verification failed for file " + path +
1454                      " at offset " + offset + " filesize " + fileSize;
1455         HFile.LOG.warn(msg);
1456         throw new IOException(msg);
1457       }
1458 
1459       // If there is a checksum mismatch earlier, then retry with 
1460       // HBase checksums switched off and use HDFS checksum verification.
1461       // This triggers HDFS to detect and fix corrupt replicas. The
1462       // next checksumOffCount read requests will use HDFS checksums.
1463       // The decrementing of this.checksumOffCount is not thread-safe,
1464       // but it is harmless because eventually checksumOffCount will be
1465       // a negative number.
1466       streamWrapper.checksumOk();
1467       return blk;
1468     }
1469 
1470     /**
1471      * Reads a version 2 block. 
1472      *
1473      * @param offset the offset in the stream to read at
1474      * @param onDiskSizeWithHeaderL the on-disk size of the block, including
1475      *          the header, or -1 if unknown
1476      * @param uncompressedSize the uncompressed size of the the block. Always
1477      *          expected to be -1. This parameter is only used in version 1.
1478      * @param pread whether to use a positional read
1479      * @param verifyChecksum Whether to use HBase checksums. 
1480      *        If HBase checksum is switched off, then use HDFS checksum.
1481      * @return the HFileBlock or null if there is a HBase checksum mismatch
1482      */
1483     private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, 
1484         long onDiskSizeWithHeaderL, int uncompressedSize, boolean pread,
1485         boolean verifyChecksum) throws IOException {
1486       if (offset < 0) {
1487         throw new IOException("Invalid offset=" + offset + " trying to read "
1488             + "block (onDiskSize=" + onDiskSizeWithHeaderL
1489             + ", uncompressedSize=" + uncompressedSize + ")");
1490       }
1491       if (uncompressedSize != -1) {
1492         throw new IOException("Version 2 block reader API does not need " +
1493             "the uncompressed size parameter");
1494       }
1495 
1496       if ((onDiskSizeWithHeaderL < hdrSize && onDiskSizeWithHeaderL != -1)
1497           || onDiskSizeWithHeaderL >= Integer.MAX_VALUE) {
1498         throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeaderL
1499             + ": expected to be at least " + hdrSize
1500             + " and at most " + Integer.MAX_VALUE + ", or -1 (offset="
1501             + offset + ", uncompressedSize=" + uncompressedSize + ")");
1502       }
1503 
1504       int onDiskSizeWithHeader = (int) onDiskSizeWithHeaderL;
1505       // See if we can avoid reading the header. This is desirable, because
1506       // we will not incur a backward seek operation if we have already
1507       // read this block's header as part of the previous read's look-ahead.
1508       // And we also want to skip reading the header again if it has already
1509       // been read.
1510       PrefetchedHeader prefetchedHeader = prefetchedHeaderForThread.get();
1511       ByteBuffer headerBuf = prefetchedHeader.offset == offset ?
1512           prefetchedHeader.buf : null;
1513 
1514       int nextBlockOnDiskSize = 0;
1515       // Allocate enough space to fit the next block's header too.
1516       byte[] onDiskBlock = null;
1517 
1518       HFileBlock b = null;
1519       if (onDiskSizeWithHeader > 0) {
1520         // We know the total on-disk size. Read the entire block into memory,
1521         // then parse the header. This code path is used when
1522         // doing a random read operation relying on the block index, as well as
1523         // when the client knows the on-disk size from peeking into the next
1524         // block's header (e.g. this block's header) when reading the previous
1525         // block. This is the faster and more preferable case.
1526 
1527         // Size that we have to skip in case we have already read the header.
1528         int preReadHeaderSize = headerBuf == null ? 0 : hdrSize;
1529         onDiskBlock = new byte[onDiskSizeWithHeader + hdrSize]; // room for this block plus the
1530                                                                 // next block's header
1531         nextBlockOnDiskSize = readAtOffset(is, onDiskBlock,
1532             preReadHeaderSize, onDiskSizeWithHeader - preReadHeaderSize,
1533             true, offset + preReadHeaderSize, pread);
1534         if (headerBuf != null) {
1535           // the header has been read when reading the previous block, copy
1536           // to this block's header
1537           System.arraycopy(headerBuf.array(),
1538               headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
1539         } else {
1540           headerBuf = ByteBuffer.wrap(onDiskBlock, 0, hdrSize);
1541         }
1542         // We know the total on-disk size but not the uncompressed size. Read
1543         // the entire block into memory, then parse the header. Here we have
1544         // already read the block's header
1545         try {
1546           b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum());
1547         } catch (IOException ex) {
1548           // Seen in load testing. Provide comprehensive debug info.
1549           throw new IOException("Failed to read compressed block at "
1550               + offset
1551               + ", onDiskSizeWithoutHeader="
1552               + onDiskSizeWithHeader
1553               + ", preReadHeaderSize="
1554               + hdrSize
1555               + ", header.length="
1556               + prefetchedHeader.header.length
1557               + ", header bytes: "
1558               + Bytes.toStringBinary(prefetchedHeader.header, 0,
1559                   hdrSize), ex);
1560         }
1561         // if the caller specifies a onDiskSizeWithHeader, validate it.
1562         int onDiskSizeWithoutHeader = onDiskSizeWithHeader - hdrSize;
1563         assert onDiskSizeWithoutHeader >= 0;
1564         b.validateOnDiskSizeWithoutHeader(onDiskSizeWithoutHeader);
1565       } else {
1566         // Check headerBuf to see if we have read this block's header as part of
1567         // reading the previous block. This is an optimization of peeking into
1568         // the next block's header (e.g.this block's header) when reading the
1569         // previous block. This is the faster and more preferable case. If the
1570         // header is already there, don't read the header again.
1571 
1572         // Unfortunately, we still have to do a separate read operation to
1573         // read the header.
1574         if (headerBuf == null) {
1575           // From the header, determine the on-disk size of the given hfile
1576           // block, and read the remaining data, thereby incurring two read
1577           // operations. This might happen when we are doing the first read
1578           // in a series of reads or a random read, and we don't have access
1579           // to the block index. This is costly and should happen very rarely.
1580           headerBuf = ByteBuffer.allocate(hdrSize);
1581           readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(),
1582               hdrSize, false, offset, pread);
1583         }
1584         b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum());
1585         onDiskBlock = new byte[b.getOnDiskSizeWithHeader() + hdrSize];
1586         System.arraycopy(headerBuf.array(), headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
1587         nextBlockOnDiskSize =
1588           readAtOffset(is, onDiskBlock, hdrSize, b.getOnDiskSizeWithHeader()
1589               - hdrSize, true, offset + hdrSize, pread);
1590         onDiskSizeWithHeader = b.onDiskSizeWithoutHeader + hdrSize;
1591       }
1592 
1593       if (!fileContext.isCompressedOrEncrypted()) {
1594         b.assumeUncompressed();
1595       }
1596 
1597       if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) {
1598         return null;             // checksum mismatch
1599       }
1600 
1601       // The onDiskBlock will become the headerAndDataBuffer for this block.
1602       // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
1603       // contains the header of next block, so no need to set next
1604       // block's header in it.
1605       b = new HFileBlock(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader),
1606         this.fileContext.isUseHBaseChecksum());
1607 
1608       b.nextBlockOnDiskSizeWithHeader = nextBlockOnDiskSize;
1609 
1610       // Set prefetched header
1611       if (b.hasNextBlockHeader()) {
1612         prefetchedHeader.offset = offset + b.getOnDiskSizeWithHeader();
1613         System.arraycopy(onDiskBlock, onDiskSizeWithHeader,
1614             prefetchedHeader.header, 0, hdrSize);
1615       }
1616 
1617       b.offset = offset;
1618       b.fileContext.setIncludesTags(this.fileContext.isIncludesTags());
1619       b.fileContext.setIncludesMvcc(this.fileContext.isIncludesMvcc());
1620       return b;
1621     }
1622 
1623     void setIncludesMemstoreTS(boolean includesMemstoreTS) {
1624       this.fileContext.setIncludesMvcc(includesMemstoreTS);
1625     }
1626 
1627     void setDataBlockEncoder(HFileDataBlockEncoder encoder) {
1628       encodedBlockDecodingCtx = encoder.newDataBlockDecodingContext(this.fileContext);
1629     }
1630 
1631     @Override
1632     public HFileBlockDecodingContext getBlockDecodingContext() {
1633       return this.encodedBlockDecodingCtx;
1634     }
1635 
1636     @Override
1637     public HFileBlockDecodingContext getDefaultBlockDecodingContext() {
1638       return this.defaultDecodingCtx;
1639     }
1640 
1641     /**
1642      * Generates the checksum for the header as well as the data and
1643      * then validates that it matches the value stored in the header.
1644      * If there is a checksum mismatch, then return false. Otherwise
1645      * return true.
1646      */
1647     protected boolean validateBlockChecksum(HFileBlock block,  byte[] data, int hdrSize)
1648         throws IOException {
1649       return ChecksumUtil.validateBlockChecksum(path, block, data, hdrSize);
1650     }
1651 
1652     @Override
1653     public void closeStreams() throws IOException {
1654       streamWrapper.close();
1655     }
1656 
1657     @Override
1658     public String toString() {
1659       return "FSReaderV2 [ hfs=" + hfs + " path=" + path + " fileContext=" + fileContext + " ]";
1660     }
1661   }
1662 
1663   @Override
1664   public int getSerializedLength() {
1665     if (buf != null) {
1666       // include extra bytes for the next header when it's available.
1667       int extraSpace = hasNextBlockHeader() ? headerSize() : 0;
1668       return this.buf.limit() + extraSpace + HFileBlock.EXTRA_SERIALIZATION_SPACE;
1669     }
1670     return 0;
1671   }
1672 
1673   @Override
1674   public void serialize(ByteBuffer destination) {
1675     // assumes HeapByteBuffer
1676     destination.put(this.buf.array(), this.buf.arrayOffset(),
1677       getSerializedLength() - EXTRA_SERIALIZATION_SPACE);
1678     serializeExtraInfo(destination);
1679   }
1680 
1681   public void serializeExtraInfo(ByteBuffer destination) {
1682     destination.put(this.fileContext.isUseHBaseChecksum() ? (byte) 1 : (byte) 0);
1683     destination.putLong(this.offset);
1684     destination.putInt(this.nextBlockOnDiskSizeWithHeader);
1685     destination.rewind();
1686   }
1687 
1688   @Override
1689   public CacheableDeserializer<Cacheable> getDeserializer() {
1690     return HFileBlock.blockDeserializer;
1691   }
1692 
1693   @Override
1694   public boolean equals(Object comparison) {
1695     if (this == comparison) {
1696       return true;
1697     }
1698     if (comparison == null) {
1699       return false;
1700     }
1701     if (comparison.getClass() != this.getClass()) {
1702       return false;
1703     }
1704 
1705     HFileBlock castedComparison = (HFileBlock) comparison;
1706 
1707     if (castedComparison.blockType != this.blockType) {
1708       return false;
1709     }
1710     if (castedComparison.nextBlockOnDiskSizeWithHeader != this.nextBlockOnDiskSizeWithHeader) {
1711       return false;
1712     }
1713     if (castedComparison.offset != this.offset) {
1714       return false;
1715     }
1716     if (castedComparison.onDiskSizeWithoutHeader != this.onDiskSizeWithoutHeader) {
1717       return false;
1718     }
1719     if (castedComparison.prevBlockOffset != this.prevBlockOffset) {
1720       return false;
1721     }
1722     if (castedComparison.uncompressedSizeWithoutHeader != this.uncompressedSizeWithoutHeader) {
1723       return false;
1724     }
1725     if (Bytes.compareTo(this.buf.array(), this.buf.arrayOffset(), this.buf.limit(),
1726       castedComparison.buf.array(), castedComparison.buf.arrayOffset(),
1727       castedComparison.buf.limit()) != 0) {
1728       return false;
1729     }
1730     return true;
1731   }
1732 
1733   public DataBlockEncoding getDataBlockEncoding() {
1734     if (blockType == BlockType.ENCODED_DATA) {
1735       return DataBlockEncoding.getEncodingById(getDataBlockEncodingId());
1736     }
1737     return DataBlockEncoding.NONE;
1738   }
1739 
1740   byte getChecksumType() {
1741     return this.fileContext.getChecksumType().getCode();
1742   }
1743 
1744   int getBytesPerChecksum() {
1745     return this.fileContext.getBytesPerChecksum();
1746   }
1747 
1748   /** @return the size of data on disk + header. Excludes checksum. */
1749   int getOnDiskDataSizeWithHeader() {
1750     return this.onDiskDataSizeWithHeader;
1751   }
1752 
1753   /** 
1754    * Calcuate the number of bytes required to store all the checksums
1755    * for this block. Each checksum value is a 4 byte integer.
1756    */
1757   int totalChecksumBytes() {
1758     // If the hfile block has minorVersion 0, then there are no checksum
1759     // data to validate. Similarly, a zero value in this.bytesPerChecksum
1760     // indicates that cached blocks do not have checksum data because
1761     // checksums were already validated when the block was read from disk.
1762     if (!fileContext.isUseHBaseChecksum() || this.fileContext.getBytesPerChecksum() == 0) {
1763       return 0;
1764     }
1765     return (int)ChecksumUtil.numBytes(onDiskDataSizeWithHeader, this.fileContext.getBytesPerChecksum());
1766   }
1767 
1768   /**
1769    * Returns the size of this block header.
1770    */
1771   public int headerSize() {
1772     return headerSize(this.fileContext.isUseHBaseChecksum());
1773   }
1774 
1775   /**
1776    * Maps a minor version to the size of the header.
1777    */
1778   public static int headerSize(boolean usesHBaseChecksum) {
1779     if (usesHBaseChecksum) {
1780       return HConstants.HFILEBLOCK_HEADER_SIZE;
1781     }
1782     return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
1783   }
1784 
1785   /**
1786    * Return the appropriate DUMMY_HEADER for the minor version
1787    */
1788   public byte[] getDummyHeaderForVersion() {
1789     return getDummyHeaderForVersion(this.fileContext.isUseHBaseChecksum());
1790   }
1791 
1792   /**
1793    * Return the appropriate DUMMY_HEADER for the minor version
1794    */
1795   static private byte[] getDummyHeaderForVersion(boolean usesHBaseChecksum) {
1796     if (usesHBaseChecksum) {
1797       return HConstants.HFILEBLOCK_DUMMY_HEADER;
1798     }
1799     return DUMMY_HEADER_NO_CHECKSUM;
1800   }
1801 
1802   /**
1803    * @return the HFileContext used to create this HFileBlock. Not necessary the
1804    * fileContext for the file from which this block's data was originally read.
1805    */
1806   public HFileContext getHFileContext() {
1807     return this.fileContext;
1808   }
1809 
1810   /**
1811    * Convert the contents of the block header into a human readable string.
1812    * This is mostly helpful for debugging. This assumes that the block
1813    * has minor version > 0.
1814    */
1815   static String toStringHeader(ByteBuffer buf) throws IOException {
1816     int offset = buf.arrayOffset();
1817     byte[] b = buf.array();
1818     long magic = Bytes.toLong(b, offset);
1819     BlockType bt = BlockType.read(buf);
1820     offset += Bytes.SIZEOF_LONG;
1821     int compressedBlockSizeNoHeader = Bytes.toInt(b, offset);
1822     offset += Bytes.SIZEOF_INT;
1823     int uncompressedBlockSizeNoHeader = Bytes.toInt(b, offset);
1824     offset += Bytes.SIZEOF_INT;
1825     long prevBlockOffset = Bytes.toLong(b, offset); 
1826     offset += Bytes.SIZEOF_LONG;
1827     byte cksumtype = b[offset];
1828     offset += Bytes.SIZEOF_BYTE;
1829     long bytesPerChecksum = Bytes.toInt(b, offset); 
1830     offset += Bytes.SIZEOF_INT;
1831     long onDiskDataSizeWithHeader = Bytes.toInt(b, offset); 
1832     offset += Bytes.SIZEOF_INT;
1833     return " Header dump: magic: " + magic +
1834                    " blockType " + bt +
1835                    " compressedBlockSizeNoHeader " + 
1836                    compressedBlockSizeNoHeader +
1837                    " uncompressedBlockSizeNoHeader " + 
1838                    uncompressedBlockSizeNoHeader +
1839                    " prevBlockOffset " + prevBlockOffset +
1840                    " checksumType " + ChecksumType.codeToType(cksumtype) +
1841                    " bytesPerChecksum " + bytesPerChecksum +
1842                    " onDiskDataSizeWithHeader " + onDiskDataSizeWithHeader;
1843   }
1844 }