View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.NavigableSet;
24  
25  import org.apache.hadoop.hbase.KeyValue.Type;
26  import org.apache.hadoop.hbase.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.Cell;
28  import org.apache.hadoop.hbase.CellUtil;
29  import org.apache.hadoop.hbase.HConstants;
30  import org.apache.hadoop.hbase.KeepDeletedCells;
31  import org.apache.hadoop.hbase.KeyValue;
32  import org.apache.hadoop.hbase.client.Scan;
33  import org.apache.hadoop.hbase.filter.Filter;
34  import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
35  import org.apache.hadoop.hbase.io.TimeRange;
36  import org.apache.hadoop.hbase.regionserver.DeleteTracker.DeleteResult;
37  import org.apache.hadoop.hbase.util.Bytes;
38  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
39  
40  import com.google.common.base.Preconditions;
41  
42  /**
43   * A query matcher that is specifically designed for the scan case.
44   */
45  @InterfaceAudience.Private
46  public class ScanQueryMatcher {
47    // Optimization so we can skip lots of compares when we decide to skip
48    // to the next row.
49    private boolean stickyNextRow;
50    private final byte[] stopRow;
51  
52    private final TimeRange tr;
53  
54    private final Filter filter;
55  
56    /** Keeps track of deletes */
57    private final DeleteTracker deletes;
58  
59    /*
60     * The following three booleans define how we deal with deletes.
61     * There are three different aspects:
62     * 1. Whether to keep delete markers. This is used in compactions.
63     *    Minor compactions always keep delete markers.
64     * 2. Whether to keep deleted rows. This is also used in compactions,
65     *    if the store is set to keep deleted rows. This implies keeping
66     *    the delete markers as well.
67     *    In this case deleted rows are subject to the normal max version
68     *    and TTL/min version rules just like "normal" rows.
69     * 3. Whether a scan can do time travel queries even before deleted
70     *    marker to reach deleted rows.
71     */
72    /** whether to retain delete markers */
73    private boolean retainDeletesInOutput;
74  
75    /** whether to return deleted rows */
76    private final KeepDeletedCells keepDeletedCells;
77    /** whether time range queries can see rows "behind" a delete */
78    private final boolean seePastDeleteMarkers;
79  
80  
81    /** Keeps track of columns and versions */
82    private final ColumnTracker columns;
83  
84    /** Key to seek to in memstore and StoreFiles */
85    private final KeyValue startKey;
86  
87    /** Row comparator for the region this query is for */
88    private final KeyValue.KVComparator rowComparator;
89  
90    /* row is not private for tests */
91    /** Row the query is on */
92    byte [] row;
93    int rowOffset;
94    short rowLength;
95    
96    /**
97     * Oldest put in any of the involved store files
98     * Used to decide whether it is ok to delete
99     * family delete marker of this store keeps
100    * deleted KVs.
101    */
102   private final long earliestPutTs;
103   private final long ttl;
104 
105   /** The oldest timestamp we are interested in, based on TTL */
106   private final long oldestUnexpiredTS;
107   private final long now;
108 
109   /** readPoint over which the KVs are unconditionally included */
110   protected long maxReadPointToTrackVersions;
111 
112   private byte[] dropDeletesFromRow = null, dropDeletesToRow = null;
113 
114   /**
115    * This variable shows whether there is an null column in the query. There
116    * always exists a null column in the wildcard column query.
117    * There maybe exists a null column in the explicit column query based on the
118    * first column.
119    * */
120   private boolean hasNullColumn = true;
121   
122   private RegionCoprocessorHost regionCoprocessorHost= null;
123 
124   // By default, when hbase.hstore.time.to.purge.deletes is 0ms, a delete
125   // marker is always removed during a major compaction. If set to non-zero
126   // value then major compaction will try to keep a delete marker around for
127   // the given number of milliseconds. We want to keep the delete markers
128   // around a bit longer because old puts might appear out-of-order. For
129   // example, during log replication between two clusters.
130   //
131   // If the delete marker has lived longer than its column-family's TTL then
132   // the delete marker will be removed even if time.to.purge.deletes has not
133   // passed. This is because all the Puts that this delete marker can influence
134   // would have also expired. (Removing of delete markers on col family TTL will
135   // not happen if min-versions is set to non-zero)
136   //
137   // But, if time.to.purge.deletes has not expired then a delete
138   // marker will not be removed just because there are no Puts that it is
139   // currently influencing. This is because Puts, that this delete can
140   // influence.  may appear out of order.
141   private final long timeToPurgeDeletes;
142   
143   private final boolean isUserScan;
144 
145   private final boolean isReversed;
146 
147   /**
148    * Construct a QueryMatcher for a scan
149    * @param scan
150    * @param scanInfo The store's immutable scan info
151    * @param columns
152    * @param scanType Type of the scan
153    * @param earliestPutTs Earliest put seen in any of the store files.
154    * @param oldestUnexpiredTS the oldest timestamp we are interested in,
155    *  based on TTL
156    * @param regionCoprocessorHost 
157    * @throws IOException 
158    */
159   public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> columns,
160       ScanType scanType, long readPointToUse, long earliestPutTs, long oldestUnexpiredTS,
161       long now, RegionCoprocessorHost regionCoprocessorHost) throws IOException {
162     this.tr = scan.getTimeRange();
163     this.rowComparator = scanInfo.getComparator();
164     this.regionCoprocessorHost = regionCoprocessorHost;
165     this.deletes =  instantiateDeleteTracker();
166     this.stopRow = scan.getStopRow();
167     this.startKey = KeyValue.createFirstDeleteFamilyOnRow(scan.getStartRow(),
168         scanInfo.getFamily());
169     this.filter = scan.getFilter();
170     this.earliestPutTs = earliestPutTs;
171     this.oldestUnexpiredTS = oldestUnexpiredTS;
172     this.now = now;
173 
174     this.maxReadPointToTrackVersions = readPointToUse;
175     this.timeToPurgeDeletes = scanInfo.getTimeToPurgeDeletes();
176     this.ttl = oldestUnexpiredTS;
177 
178     /* how to deal with deletes */
179     this.isUserScan = scanType == ScanType.USER_SCAN;
180     // keep deleted cells: if compaction or raw scan
181     this.keepDeletedCells = scan.isRaw() ? KeepDeletedCells.TRUE :
182       isUserScan ? KeepDeletedCells.FALSE : scanInfo.getKeepDeletedCells();
183     // retain deletes: if minor compaction or raw scanisDone
184     this.retainDeletesInOutput = scanType == ScanType.COMPACT_RETAIN_DELETES || scan.isRaw();
185     // seePastDeleteMarker: user initiated scans
186     this.seePastDeleteMarkers =
187         scanInfo.getKeepDeletedCells() != KeepDeletedCells.FALSE && isUserScan;
188 
189     int maxVersions =
190         scan.isRaw() ? scan.getMaxVersions() : Math.min(scan.getMaxVersions(),
191           scanInfo.getMaxVersions());
192 
193     // Single branch to deal with two types of reads (columns vs all in family)
194     if (columns == null || columns.size() == 0) {
195       // there is always a null column in the wildcard column query.
196       hasNullColumn = true;
197 
198       // use a specialized scan for wildcard column tracker.
199       this.columns = new ScanWildcardColumnTracker(
200           scanInfo.getMinVersions(), maxVersions, oldestUnexpiredTS);
201     } else {
202       // whether there is null column in the explicit column query
203       hasNullColumn = (columns.first().length == 0);
204 
205       // We can share the ExplicitColumnTracker, diff is we reset
206       // between rows, not between storefiles.
207       this.columns = new ExplicitColumnTracker(columns, scanInfo.getMinVersions(), maxVersions,
208           oldestUnexpiredTS);
209     }
210     this.isReversed = scan.isReversed();
211   }
212 
213   private DeleteTracker instantiateDeleteTracker() throws IOException {
214     DeleteTracker tracker = new ScanDeleteTracker();
215     if (regionCoprocessorHost != null) {
216       tracker = regionCoprocessorHost.postInstantiateDeleteTracker(tracker);
217     }
218     return tracker;
219   }
220 
221   /**
222    * Construct a QueryMatcher for a scan that drop deletes from a limited range of rows.
223    * @param scan
224    * @param scanInfo The store's immutable scan info
225    * @param columns
226    * @param earliestPutTs Earliest put seen in any of the store files.
227    * @param oldestUnexpiredTS the oldest timestamp we are interested in, based on TTL
228    * @param now the current server time
229    * @param dropDeletesFromRow The inclusive left bound of the range; can be EMPTY_START_ROW.
230    * @param dropDeletesToRow The exclusive right bound of the range; can be EMPTY_END_ROW.
231    * @param regionCoprocessorHost 
232    * @throws IOException 
233    */
234   public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> columns,
235       long readPointToUse, long earliestPutTs, long oldestUnexpiredTS, long now, byte[] dropDeletesFromRow,
236       byte[] dropDeletesToRow, RegionCoprocessorHost regionCoprocessorHost) throws IOException {
237     this(scan, scanInfo, columns, ScanType.COMPACT_RETAIN_DELETES, readPointToUse, earliestPutTs,
238         oldestUnexpiredTS, now, regionCoprocessorHost);
239     Preconditions.checkArgument((dropDeletesFromRow != null) && (dropDeletesToRow != null));
240     this.dropDeletesFromRow = dropDeletesFromRow;
241     this.dropDeletesToRow = dropDeletesToRow;
242   }
243 
244   /*
245    * Constructor for tests
246    */
247   ScanQueryMatcher(Scan scan, ScanInfo scanInfo,
248       NavigableSet<byte[]> columns, long oldestUnexpiredTS, long now) throws IOException {
249     this(scan, scanInfo, columns, ScanType.USER_SCAN,
250           Long.MAX_VALUE, /* max Readpoint to track versions */
251         HConstants.LATEST_TIMESTAMP, oldestUnexpiredTS, now, null);
252   }
253 
254   /**
255    *
256    * @return  whether there is an null column in the query
257    */
258   public boolean hasNullColumnInQuery() {
259     return hasNullColumn;
260   }
261 
262   /**
263    * Determines if the caller should do one of several things:
264    * - seek/skip to the next row (MatchCode.SEEK_NEXT_ROW)
265    * - seek/skip to the next column (MatchCode.SEEK_NEXT_COL)
266    * - include the current KeyValue (MatchCode.INCLUDE)
267    * - ignore the current KeyValue (MatchCode.SKIP)
268    * - got to the next row (MatchCode.DONE)
269    *
270    * @param kv KeyValue to check
271    * @return The match code instance.
272    * @throws IOException in case there is an internal consistency problem
273    *      caused by a data corruption.
274    */
275   public MatchCode match(KeyValue kv) throws IOException {
276     if (filter != null && filter.filterAllRemaining()) {
277       return MatchCode.DONE_SCAN;
278     }
279 
280     byte [] bytes = kv.getBuffer();
281     int offset = kv.getOffset();
282 
283     int keyLength = Bytes.toInt(bytes, offset, Bytes.SIZEOF_INT);
284     offset += KeyValue.ROW_OFFSET;
285 
286     int initialOffset = offset;
287 
288     short rowLength = Bytes.toShort(bytes, offset, Bytes.SIZEOF_SHORT);
289     offset += Bytes.SIZEOF_SHORT;
290 
291     int ret = this.rowComparator.compareRows(row, this.rowOffset, this.rowLength,
292         bytes, offset, rowLength);
293     if (!this.isReversed) {
294       if (ret <= -1) {
295         return MatchCode.DONE;
296       } else if (ret >= 1) {
297         // could optimize this, if necessary?
298         // Could also be called SEEK_TO_CURRENT_ROW, but this
299         // should be rare/never happens.
300         return MatchCode.SEEK_NEXT_ROW;
301       }
302     } else {
303       if (ret <= -1) {
304         return MatchCode.SEEK_NEXT_ROW;
305       } else if (ret >= 1) {
306         return MatchCode.DONE;
307       }
308     }
309 
310     // optimize case.
311     if (this.stickyNextRow)
312         return MatchCode.SEEK_NEXT_ROW;
313 
314     if (this.columns.done()) {
315       stickyNextRow = true;
316       return MatchCode.SEEK_NEXT_ROW;
317     }
318 
319     //Passing rowLength
320     offset += rowLength;
321 
322     //Skipping family
323     byte familyLength = bytes [offset];
324     offset += familyLength + 1;
325 
326     int qualLength = keyLength -
327       (offset - initialOffset) - KeyValue.TIMESTAMP_TYPE_SIZE;
328 
329     long timestamp = Bytes.toLong(bytes, initialOffset + keyLength - KeyValue.TIMESTAMP_TYPE_SIZE);
330     // check for early out based on timestamp alone
331     if (columns.isDone(timestamp)) {
332       return columns.getNextRowOrNextColumn(kv.getQualifierArray(), kv.getQualifierOffset(),
333         kv.getQualifierLength());
334     }
335     // check if the cell is expired by cell TTL
336     if (HStore.isCellTTLExpired(kv, this.oldestUnexpiredTS, this.now)) {
337       return MatchCode.SKIP;
338     }    
339 
340     /*
341      * The delete logic is pretty complicated now.
342      * This is corroborated by the following:
343      * 1. The store might be instructed to keep deleted rows around.
344      * 2. A scan can optionally see past a delete marker now.
345      * 3. If deleted rows are kept, we have to find out when we can
346      *    remove the delete markers.
347      * 4. Family delete markers are always first (regardless of their TS)
348      * 5. Delete markers should not be counted as version
349      * 6. Delete markers affect puts of the *same* TS
350      * 7. Delete marker need to be version counted together with puts
351      *    they affect
352      */
353     byte type = bytes[initialOffset + keyLength - 1];
354     if (kv.isDelete()) {
355       if (keepDeletedCells == KeepDeletedCells.FALSE
356           || (keepDeletedCells == KeepDeletedCells.TTL && timestamp < ttl)) {
357         // first ignore delete markers if the scanner can do so, and the
358         // range does not include the marker
359         //
360         // during flushes and compactions also ignore delete markers newer
361         // than the readpoint of any open scanner, this prevents deleted
362         // rows that could still be seen by a scanner from being collected
363         boolean includeDeleteMarker = seePastDeleteMarkers ?
364             tr.withinTimeRange(timestamp) :
365             tr.withinOrAfterTimeRange(timestamp);
366         if (includeDeleteMarker
367             && kv.getMvccVersion() <= maxReadPointToTrackVersions) {
368           this.deletes.add(kv);
369         }
370         // Can't early out now, because DelFam come before any other keys
371       }
372      
373       if ((!isUserScan)
374           && timeToPurgeDeletes > 0
375           && (EnvironmentEdgeManager.currentTimeMillis() - timestamp) <= timeToPurgeDeletes) {
376         return MatchCode.INCLUDE;
377       } else if (retainDeletesInOutput || kv.getMvccVersion() > maxReadPointToTrackVersions) {
378         // always include or it is not time yet to check whether it is OK
379         // to purge deltes or not
380         if (!isUserScan) {
381           // if this is not a user scan (compaction), we can filter this deletemarker right here
382           // otherwise (i.e. a "raw" scan) we fall through to normal version and timerange checking
383           return MatchCode.INCLUDE;
384         }
385       } else if (keepDeletedCells == KeepDeletedCells.TRUE
386           || (keepDeletedCells == KeepDeletedCells.TTL && timestamp >= ttl)) {
387         if (timestamp < earliestPutTs) {
388           // keeping delete rows, but there are no puts older than
389           // this delete in the store files.
390           return columns.getNextRowOrNextColumn(bytes, offset, qualLength);
391         }
392         // else: fall through and do version counting on the
393         // delete markers
394       } else {
395         return MatchCode.SKIP;
396       }
397       // note the following next else if...
398       // delete marker are not subject to other delete markers
399     } else if (!this.deletes.isEmpty()) {
400       DeleteResult deleteResult = deletes.isDeleted(kv);
401       switch (deleteResult) {
402         case FAMILY_DELETED:
403         case COLUMN_DELETED:
404           return columns.getNextRowOrNextColumn(bytes, offset, qualLength);
405         case VERSION_DELETED:
406         case FAMILY_VERSION_DELETED:
407           return MatchCode.SKIP;
408         case NOT_DELETED:
409           break;
410         default:
411           throw new RuntimeException("UNEXPECTED");
412         }
413     }
414 
415     int timestampComparison = tr.compare(timestamp);
416     if (timestampComparison >= 1) {
417       return MatchCode.SKIP;
418     } else if (timestampComparison <= -1) {
419       return columns.getNextRowOrNextColumn(bytes, offset, qualLength);
420     }
421 
422     // STEP 1: Check if the column is part of the requested columns
423     MatchCode colChecker = columns.checkColumn(bytes, offset, qualLength, type);
424     if (colChecker == MatchCode.INCLUDE) {
425       ReturnCode filterResponse = ReturnCode.SKIP;
426       // STEP 2: Yes, the column is part of the requested columns. Check if filter is present
427       if (filter != null) {
428         // STEP 3: Filter the key value and return if it filters out
429         filterResponse = filter.filterKeyValue(kv);
430         switch (filterResponse) {
431         case SKIP:
432           return MatchCode.SKIP;
433         case NEXT_COL:
434           return columns.getNextRowOrNextColumn(bytes, offset, qualLength);
435         case NEXT_ROW:
436           stickyNextRow = true;
437           return MatchCode.SEEK_NEXT_ROW;
438         case SEEK_NEXT_USING_HINT:
439           return MatchCode.SEEK_NEXT_USING_HINT;
440         default:
441           //It means it is either include or include and seek next
442           break;
443         }
444       }
445       /*
446        * STEP 4: Reaching this step means the column is part of the requested columns and either
447        * the filter is null or the filter has returned INCLUDE or INCLUDE_AND_NEXT_COL response.
448        * Now check the number of versions needed. This method call returns SKIP, INCLUDE,
449        * INCLUDE_AND_SEEK_NEXT_ROW, INCLUDE_AND_SEEK_NEXT_COL.
450        *
451        * FilterResponse            ColumnChecker               Desired behavior
452        * INCLUDE                   SKIP                        row has already been included, SKIP.
453        * INCLUDE                   INCLUDE                     INCLUDE
454        * INCLUDE                   INCLUDE_AND_SEEK_NEXT_COL   INCLUDE_AND_SEEK_NEXT_COL
455        * INCLUDE                   INCLUDE_AND_SEEK_NEXT_ROW   INCLUDE_AND_SEEK_NEXT_ROW
456        * INCLUDE_AND_SEEK_NEXT_COL SKIP                        row has already been included, SKIP.
457        * INCLUDE_AND_SEEK_NEXT_COL INCLUDE                     INCLUDE_AND_SEEK_NEXT_COL
458        * INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_COL   INCLUDE_AND_SEEK_NEXT_COL
459        * INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW   INCLUDE_AND_SEEK_NEXT_ROW
460        *
461        * In all the above scenarios, we return the column checker return value except for
462        * FilterResponse (INCLUDE_AND_SEEK_NEXT_COL) and ColumnChecker(INCLUDE)
463        */
464       colChecker =
465           columns.checkVersions(bytes, offset, qualLength, timestamp, type,
466             kv.getMvccVersion() > maxReadPointToTrackVersions);
467       //Optimize with stickyNextRow
468       stickyNextRow = colChecker == MatchCode.INCLUDE_AND_SEEK_NEXT_ROW ? true : stickyNextRow;
469       return (filterResponse == ReturnCode.INCLUDE_AND_NEXT_COL &&
470           colChecker == MatchCode.INCLUDE) ? MatchCode.INCLUDE_AND_SEEK_NEXT_COL
471           : colChecker;
472     }
473     stickyNextRow = (colChecker == MatchCode.SEEK_NEXT_ROW) ? true
474         : stickyNextRow;
475     return colChecker;
476   }
477 
478   /** Handle partial-drop-deletes. As we match keys in order, when we have a range from which
479    * we can drop deletes, we can set retainDeletesInOutput to false for the duration of this
480    * range only, and maintain consistency. */
481   private void checkPartialDropDeleteRange(byte [] row, int offset, short length) {
482     // If partial-drop-deletes are used, initially, dropDeletesFromRow and dropDeletesToRow
483     // are both set, and the matcher is set to retain deletes. We assume ordered keys. When
484     // dropDeletesFromRow is leq current kv, we start dropping deletes and reset
485     // dropDeletesFromRow; thus the 2nd "if" starts to apply.
486     if ((dropDeletesFromRow != null)
487         && ((dropDeletesFromRow == HConstants.EMPTY_START_ROW)
488           || (Bytes.compareTo(row, offset, length,
489               dropDeletesFromRow, 0, dropDeletesFromRow.length) >= 0))) {
490       retainDeletesInOutput = false;
491       dropDeletesFromRow = null;
492     }
493     // If dropDeletesFromRow is null and dropDeletesToRow is set, we are inside the partial-
494     // drop-deletes range. When dropDeletesToRow is leq current kv, we stop dropping deletes,
495     // and reset dropDeletesToRow so that we don't do any more compares.
496     if ((dropDeletesFromRow == null)
497         && (dropDeletesToRow != null) && (dropDeletesToRow != HConstants.EMPTY_END_ROW)
498         && (Bytes.compareTo(row, offset, length,
499             dropDeletesToRow, 0, dropDeletesToRow.length) >= 0)) {
500       retainDeletesInOutput = true;
501       dropDeletesToRow = null;
502     }
503   }
504 
505   public boolean moreRowsMayExistAfter(KeyValue kv) {
506     if (this.isReversed) {
507       if (rowComparator.compareRows(kv.getRowArray(), kv.getRowOffset(),
508           kv.getRowLength(), stopRow, 0, stopRow.length) <= 0) {
509         return false;
510       } else {
511         return true;
512       }
513     }
514     if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
515         rowComparator.compareRows(kv.getRowArray(),kv.getRowOffset(),
516             kv.getRowLength(), stopRow, 0, stopRow.length) >= 0) {
517       // KV >= STOPROW
518       // then NO there is nothing left.
519       return false;
520     } else {
521       return true;
522     }
523   }
524 
525   /**
526    * Set current row
527    * @param row
528    */
529   public void setRow(byte [] row, int offset, short length) {
530     checkPartialDropDeleteRange(row, offset, length);
531     this.row = row;
532     this.rowOffset = offset;
533     this.rowLength = length;
534     reset();
535   }
536 
537   public void reset() {
538     this.deletes.reset();
539     this.columns.reset();
540 
541     stickyNextRow = false;
542   }
543 
544   /**
545    *
546    * @return the start key
547    */
548   public KeyValue getStartKey() {
549     return this.startKey;
550   }
551 
552   /**
553    *
554    * @return the Filter
555    */
556   Filter getFilter() {
557     return this.filter;
558   }
559 
560   public Cell getNextKeyHint(Cell kv) throws IOException {
561     if (filter == null) {
562       return null;
563     } else {
564       return filter.getNextCellHint(kv);
565     }
566   }
567 
568   public KeyValue getKeyForNextColumn(KeyValue kv) {
569     ColumnCount nextColumn = columns.getColumnHint();
570     if (nextColumn == null) {
571       return KeyValue.createLastOnRow(
572           kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
573           kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
574           kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength());
575     } else {
576       return KeyValue.createFirstOnRow(
577           kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
578           kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
579           nextColumn.getBuffer(), nextColumn.getOffset(), nextColumn.getLength());
580     }
581   }
582 
583   public KeyValue getKeyForNextRow(KeyValue kv) {
584     return KeyValue.createLastOnRow(
585         kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
586         null, 0, 0,
587         null, 0, 0);
588   }
589 
590   /**
591    * @param nextIndexed the key of the next entry in the block index (if any)
592    * @param kv The Cell we're using to calculate the seek key
593    * @return result of the compare between the indexed key and the key portion of the passed cell
594    */
595   public int compareKeyForNextRow(byte[] nextIndexed, Cell kv) {
596     return rowComparator.compareKey(nextIndexed, 0, nextIndexed.length,
597       kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
598       null, 0, 0, null, 0, 0,
599       HConstants.OLDEST_TIMESTAMP, Type.Minimum.getCode());
600   }
601 
602   /**
603    * @param nextIndexed the key of the next entry in the block index (if any)
604    * @param kv The Cell we're using to calculate the seek key
605    * @return result of the compare between the indexed key and the key portion of the passed cell
606    */
607   public int compareKeyForNextColumn(byte[] nextIndexed, Cell kv) {
608     ColumnCount nextColumn = columns.getColumnHint();
609     if (nextColumn == null) {
610       return rowComparator.compareKey(nextIndexed, 0, nextIndexed.length,
611         kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
612         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
613         kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength(),
614         HConstants.OLDEST_TIMESTAMP, Type.Minimum.getCode());
615     } else {
616       return rowComparator.compareKey(nextIndexed, 0, nextIndexed.length,
617         kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
618         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
619         nextColumn.getBuffer(), nextColumn.getOffset(), nextColumn.getLength(),
620         HConstants.LATEST_TIMESTAMP, Type.Maximum.getCode());
621     }
622   }
623 
624   //Used only for testing purposes
625   static MatchCode checkColumn(ColumnTracker columnTracker, byte[] bytes, int offset,
626       int length, long ttl, byte type, boolean ignoreCount) throws IOException {
627     MatchCode matchCode = columnTracker.checkColumn(bytes, offset, length, type);
628     if (matchCode == MatchCode.INCLUDE) {
629       return columnTracker.checkVersions(bytes, offset, length, ttl, type, ignoreCount);
630     }
631     return matchCode;
632   }
633 
634   /**
635    * {@link #match} return codes.  These instruct the scanner moving through
636    * memstores and StoreFiles what to do with the current KeyValue.
637    * <p>
638    * Additionally, this contains "early-out" language to tell the scanner to
639    * move on to the next File (memstore or Storefile), or to return immediately.
640    */
641   public static enum MatchCode {
642     /**
643      * Include KeyValue in the returned result
644      */
645     INCLUDE,
646 
647     /**
648      * Do not include KeyValue in the returned result
649      */
650     SKIP,
651 
652     /**
653      * Do not include, jump to next StoreFile or memstore (in time order)
654      */
655     NEXT,
656 
657     /**
658      * Do not include, return current result
659      */
660     DONE,
661 
662     /**
663      * These codes are used by the ScanQueryMatcher
664      */
665 
666     /**
667      * Done with the row, seek there.
668      */
669     SEEK_NEXT_ROW,
670     /**
671      * Done with column, seek to next.
672      */
673     SEEK_NEXT_COL,
674 
675     /**
676      * Done with scan, thanks to the row filter.
677      */
678     DONE_SCAN,
679 
680     /*
681      * Seek to next key which is given as hint.
682      */
683     SEEK_NEXT_USING_HINT,
684 
685     /**
686      * Include KeyValue and done with column, seek to next.
687      */
688     INCLUDE_AND_SEEK_NEXT_COL,
689 
690     /**
691      * Include KeyValue and done with row, seek to next.
692      */
693     INCLUDE_AND_SEEK_NEXT_ROW,
694   }
695 }