1 /* 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 package org.apache.hadoop.hbase.client; 21 22 import java.io.IOException; 23 import java.util.ArrayList; 24 import java.util.HashMap; 25 import java.util.List; 26 import java.util.Map; 27 import java.util.NavigableSet; 28 import java.util.TreeMap; 29 import java.util.TreeSet; 30 31 import org.apache.commons.logging.Log; 32 import org.apache.commons.logging.LogFactory; 33 import org.apache.hadoop.hbase.classification.InterfaceAudience; 34 import org.apache.hadoop.hbase.classification.InterfaceStability; 35 import org.apache.hadoop.hbase.HConstants; 36 import org.apache.hadoop.hbase.filter.Filter; 37 import org.apache.hadoop.hbase.filter.IncompatibleFilterException; 38 import org.apache.hadoop.hbase.io.TimeRange; 39 import org.apache.hadoop.hbase.util.Bytes; 40 41 /** 42 * Used to perform Scan operations. 43 * <p> 44 * All operations are identical to {@link Get} with the exception of 45 * instantiation. Rather than specifying a single row, an optional startRow 46 * and stopRow may be defined. If rows are not specified, the Scanner will 47 * iterate over all rows. 48 * <p> 49 * To scan everything for each row, instantiate a Scan object. 50 * <p> 51 * To modify scanner caching for just this scan, use {@link #setCaching(int) setCaching}. 52 * If caching is NOT set, we will use the caching value of the hosting {@link HTable}. See 53 * {@link HTable#setScannerCaching(int)}. In addition to row caching, it is possible to specify a 54 * maximum result size, using {@link #setMaxResultSize(long)}. When both are used, 55 * single server requests are limited by either number of rows or maximum result size, whichever 56 * limit comes first. 57 * <p> 58 * To further define the scope of what to get when scanning, perform additional 59 * methods as outlined below. 60 * <p> 61 * To get all columns from specific families, execute {@link #addFamily(byte[]) addFamily} 62 * for each family to retrieve. 63 * <p> 64 * To get specific columns, execute {@link #addColumn(byte[], byte[]) addColumn} 65 * for each column to retrieve. 66 * <p> 67 * To only retrieve columns within a specific range of version timestamps, 68 * execute {@link #setTimeRange(long, long) setTimeRange}. 69 * <p> 70 * To only retrieve columns with a specific timestamp, execute 71 * {@link #setTimeStamp(long) setTimestamp}. 72 * <p> 73 * To limit the number of versions of each column to be returned, execute 74 * {@link #setMaxVersions(int) setMaxVersions}. 75 * <p> 76 * To limit the maximum number of values returned for each call to next(), 77 * execute {@link #setBatch(int) setBatch}. 78 * <p> 79 * To add a filter, execute {@link #setFilter(org.apache.hadoop.hbase.filter.Filter) setFilter}. 80 * <p> 81 * Expert: To explicitly disable server-side block caching for this scan, 82 * execute {@link #setCacheBlocks(boolean)}. 83 */ 84 @InterfaceAudience.Public 85 @InterfaceStability.Stable 86 public class Scan extends Query { 87 private static final Log LOG = LogFactory.getLog(Scan.class); 88 89 private static final String RAW_ATTR = "_raw_"; 90 91 /** 92 * EXPERT ONLY. 93 * An integer (not long) indicating to the scanner logic how many times we attempt to retrieve the 94 * next KV before we schedule a reseek. 95 * The right value depends on the size of the average KV. A reseek is more efficient when 96 * it can skip 5-10 KVs or 512B-1KB, or when the next KV is likely found in another HFile block. 97 * Setting this only has any effect when columns were added with 98 * {@link #addColumn(byte[], byte[])} 99 * <pre>{@code 100 * Scan s = new Scan(...); 101 * s.addColumn(...); 102 * s.setAttribute(Scan.HINT_LOOKAHEAD, Bytes.toBytes(2)); 103 * }</pre> 104 * Default is 0 (always reseek). 105 * @deprecated without replacement 106 * This is now a no-op, SEEKs and SKIPs are optimizated automatically. 107 */ 108 @Deprecated 109 public static final String HINT_LOOKAHEAD = "_look_ahead_"; 110 111 private byte [] startRow = HConstants.EMPTY_START_ROW; 112 private byte [] stopRow = HConstants.EMPTY_END_ROW; 113 private int maxVersions = 1; 114 private int batch = -1; 115 116 private int storeLimit = -1; 117 private int storeOffset = 0; 118 private boolean getScan; 119 120 // If application wants to collect scan metrics, it needs to 121 // call scan.setAttribute(SCAN_ATTRIBUTES_ENABLE, Bytes.toBytes(Boolean.TRUE)) 122 static public final String SCAN_ATTRIBUTES_METRICS_ENABLE = "scan.attributes.metrics.enable"; 123 static public final String SCAN_ATTRIBUTES_METRICS_DATA = "scan.attributes.metrics.data"; 124 125 // If an application wants to use multiple scans over different tables each scan must 126 // define this attribute with the appropriate table name by calling 127 // scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName)) 128 static public final String SCAN_ATTRIBUTES_TABLE_NAME = "scan.attributes.table.name"; 129 130 /* 131 * -1 means no caching 132 */ 133 private int caching = -1; 134 private long maxResultSize = -1; 135 private boolean cacheBlocks = true; 136 private boolean reversed = false; 137 private TimeRange tr = new TimeRange(); 138 private Map<byte [], NavigableSet<byte []>> familyMap = 139 new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR); 140 private Boolean loadColumnFamiliesOnDemand = null; 141 142 /** 143 * Set it true for small scan to get better performance 144 * 145 * Small scan should use pread and big scan can use seek + read 146 * 147 * seek + read is fast but can cause two problem (1) resource contention (2) 148 * cause too much network io 149 * 150 * [89-fb] Using pread for non-compaction read request 151 * https://issues.apache.org/jira/browse/HBASE-7266 152 * 153 * On the other hand, if setting it true, we would do 154 * openScanner,next,closeScanner in one RPC call. It means the better 155 * performance for small scan. [HBASE-9488]. 156 * 157 * Generally, if the scan range is within one data block(64KB), it could be 158 * considered as a small scan. 159 */ 160 private boolean small = false; 161 162 /** 163 * Create a Scan operation across all rows. 164 */ 165 public Scan() {} 166 167 public Scan(byte [] startRow, Filter filter) { 168 this(startRow); 169 this.filter = filter; 170 } 171 172 /** 173 * Create a Scan operation starting at the specified row. 174 * <p> 175 * If the specified row does not exist, the Scanner will start from the 176 * next closest row after the specified row. 177 * @param startRow row to start scanner at or after 178 */ 179 public Scan(byte [] startRow) { 180 this.startRow = startRow; 181 } 182 183 /** 184 * Create a Scan operation for the range of rows specified. 185 * @param startRow row to start scanner at or after (inclusive) 186 * @param stopRow row to stop scanner before (exclusive) 187 */ 188 public Scan(byte [] startRow, byte [] stopRow) { 189 this.startRow = startRow; 190 this.stopRow = stopRow; 191 //if the startRow and stopRow both are empty, it is not a Get 192 this.getScan = isStartRowAndEqualsStopRow(); 193 } 194 195 /** 196 * Creates a new instance of this class while copying all values. 197 * 198 * @param scan The scan instance to copy from. 199 * @throws IOException When copying the values fails. 200 */ 201 public Scan(Scan scan) throws IOException { 202 startRow = scan.getStartRow(); 203 stopRow = scan.getStopRow(); 204 maxVersions = scan.getMaxVersions(); 205 batch = scan.getBatch(); 206 storeLimit = scan.getMaxResultsPerColumnFamily(); 207 storeOffset = scan.getRowOffsetPerColumnFamily(); 208 caching = scan.getCaching(); 209 maxResultSize = scan.getMaxResultSize(); 210 cacheBlocks = scan.getCacheBlocks(); 211 getScan = scan.isGetScan(); 212 filter = scan.getFilter(); // clone? 213 loadColumnFamiliesOnDemand = scan.getLoadColumnFamiliesOnDemandValue(); 214 TimeRange ctr = scan.getTimeRange(); 215 tr = new TimeRange(ctr.getMin(), ctr.getMax()); 216 reversed = scan.isReversed(); 217 small = scan.isSmall(); 218 Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap(); 219 for (Map.Entry<byte[],NavigableSet<byte[]>> entry : fams.entrySet()) { 220 byte [] fam = entry.getKey(); 221 NavigableSet<byte[]> cols = entry.getValue(); 222 if (cols != null && cols.size() > 0) { 223 for (byte[] col : cols) { 224 addColumn(fam, col); 225 } 226 } else { 227 addFamily(fam); 228 } 229 } 230 for (Map.Entry<String, byte[]> attr : scan.getAttributesMap().entrySet()) { 231 setAttribute(attr.getKey(), attr.getValue()); 232 } 233 } 234 235 /** 236 * Builds a scan object with the same specs as get. 237 * @param get get to model scan after 238 */ 239 public Scan(Get get) { 240 this.startRow = get.getRow(); 241 this.stopRow = get.getRow(); 242 this.filter = get.getFilter(); 243 this.cacheBlocks = get.getCacheBlocks(); 244 this.maxVersions = get.getMaxVersions(); 245 this.storeLimit = get.getMaxResultsPerColumnFamily(); 246 this.storeOffset = get.getRowOffsetPerColumnFamily(); 247 this.tr = get.getTimeRange(); 248 this.familyMap = get.getFamilyMap(); 249 this.getScan = true; 250 for (Map.Entry<String, byte[]> attr : get.getAttributesMap().entrySet()) { 251 setAttribute(attr.getKey(), attr.getValue()); 252 } 253 } 254 255 public boolean isGetScan() { 256 return this.getScan || isStartRowAndEqualsStopRow(); 257 } 258 259 private boolean isStartRowAndEqualsStopRow() { 260 return this.startRow != null && this.startRow.length > 0 && 261 Bytes.equals(this.startRow, this.stopRow); 262 } 263 /** 264 * Get all columns from the specified family. 265 * <p> 266 * Overrides previous calls to addColumn for this family. 267 * @param family family name 268 * @return this 269 */ 270 public Scan addFamily(byte [] family) { 271 familyMap.remove(family); 272 familyMap.put(family, null); 273 return this; 274 } 275 276 /** 277 * Get the column from the specified family with the specified qualifier. 278 * <p> 279 * Overrides previous calls to addFamily for this family. 280 * @param family family name 281 * @param qualifier column qualifier 282 * @return this 283 */ 284 public Scan addColumn(byte [] family, byte [] qualifier) { 285 NavigableSet<byte []> set = familyMap.get(family); 286 if(set == null) { 287 set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR); 288 } 289 if (qualifier == null) { 290 qualifier = HConstants.EMPTY_BYTE_ARRAY; 291 } 292 set.add(qualifier); 293 familyMap.put(family, set); 294 return this; 295 } 296 297 /** 298 * Get versions of columns only within the specified timestamp range, 299 * [minStamp, maxStamp). Note, default maximum versions to return is 1. If 300 * your time range spans more than one version and you want all versions 301 * returned, up the number of versions beyond the defaut. 302 * @param minStamp minimum timestamp value, inclusive 303 * @param maxStamp maximum timestamp value, exclusive 304 * @throws IOException if invalid time range 305 * @see #setMaxVersions() 306 * @see #setMaxVersions(int) 307 * @return this 308 */ 309 public Scan setTimeRange(long minStamp, long maxStamp) 310 throws IOException { 311 tr = new TimeRange(minStamp, maxStamp); 312 return this; 313 } 314 315 /** 316 * Get versions of columns with the specified timestamp. Note, default maximum 317 * versions to return is 1. If your time range spans more than one version 318 * and you want all versions returned, up the number of versions beyond the 319 * defaut. 320 * @param timestamp version timestamp 321 * @see #setMaxVersions() 322 * @see #setMaxVersions(int) 323 * @return this 324 */ 325 public Scan setTimeStamp(long timestamp) 326 throws IOException { 327 try { 328 tr = new TimeRange(timestamp, timestamp+1); 329 } catch(IOException e) { 330 // This should never happen, unless integer overflow or something extremely wrong... 331 LOG.error("TimeRange failed, likely caused by integer overflow. ", e); 332 throw e; 333 } 334 return this; 335 } 336 337 /** 338 * Set the start row of the scan. 339 * @param startRow row to start scan on (inclusive) 340 * Note: In order to make startRow exclusive add a trailing 0 byte 341 * @return this 342 */ 343 public Scan setStartRow(byte [] startRow) { 344 this.startRow = startRow; 345 return this; 346 } 347 348 /** 349 * Set the stop row. 350 * @param stopRow row to end at (exclusive) 351 * Note: In order to make stopRow inclusive add a trailing 0 byte 352 * @return this 353 */ 354 public Scan setStopRow(byte [] stopRow) { 355 this.stopRow = stopRow; 356 return this; 357 } 358 359 /** 360 * Get all available versions. 361 * @return this 362 */ 363 public Scan setMaxVersions() { 364 this.maxVersions = Integer.MAX_VALUE; 365 return this; 366 } 367 368 /** 369 * Get up to the specified number of versions of each column. 370 * @param maxVersions maximum versions for each column 371 * @return this 372 */ 373 public Scan setMaxVersions(int maxVersions) { 374 this.maxVersions = maxVersions; 375 return this; 376 } 377 378 /** 379 * Set the maximum number of values to return for each call to next() 380 * @param batch the maximum number of values 381 */ 382 public void setBatch(int batch) { 383 if (this.hasFilter() && this.filter.hasFilterRow()) { 384 throw new IncompatibleFilterException( 385 "Cannot set batch on a scan using a filter" + 386 " that returns true for filter.hasFilterRow"); 387 } 388 this.batch = batch; 389 } 390 391 /** 392 * Set the maximum number of values to return per row per Column Family 393 * @param limit the maximum number of values returned / row / CF 394 */ 395 public void setMaxResultsPerColumnFamily(int limit) { 396 this.storeLimit = limit; 397 } 398 399 /** 400 * Set offset for the row per Column Family. 401 * @param offset is the number of kvs that will be skipped. 402 */ 403 public void setRowOffsetPerColumnFamily(int offset) { 404 this.storeOffset = offset; 405 } 406 407 /** 408 * Set the number of rows for caching that will be passed to scanners. 409 * If not set, the default setting from {@link HTable#getScannerCaching()} will apply. 410 * Higher caching values will enable faster scanners but will use more memory. 411 * @param caching the number of rows for caching 412 */ 413 public void setCaching(int caching) { 414 this.caching = caching; 415 } 416 417 /** 418 * @return the maximum result size in bytes. See {@link #setMaxResultSize(long)} 419 */ 420 public long getMaxResultSize() { 421 return maxResultSize; 422 } 423 424 /** 425 * Set the maximum result size. The default is -1; this means that no specific 426 * maximum result size will be set for this scan, and the global configured 427 * value will be used instead. (Defaults to unlimited). 428 * 429 * @param maxResultSize The maximum result size in bytes. 430 */ 431 public void setMaxResultSize(long maxResultSize) { 432 this.maxResultSize = maxResultSize; 433 } 434 435 @Override 436 public Scan setFilter(Filter filter) { 437 super.setFilter(filter); 438 return this; 439 } 440 441 /** 442 * Setting the familyMap 443 * @param familyMap map of family to qualifier 444 * @return this 445 */ 446 public Scan setFamilyMap(Map<byte [], NavigableSet<byte []>> familyMap) { 447 this.familyMap = familyMap; 448 return this; 449 } 450 451 /** 452 * Getting the familyMap 453 * @return familyMap 454 */ 455 public Map<byte [], NavigableSet<byte []>> getFamilyMap() { 456 return this.familyMap; 457 } 458 459 /** 460 * @return the number of families in familyMap 461 */ 462 public int numFamilies() { 463 if(hasFamilies()) { 464 return this.familyMap.size(); 465 } 466 return 0; 467 } 468 469 /** 470 * @return true if familyMap is non empty, false otherwise 471 */ 472 public boolean hasFamilies() { 473 return !this.familyMap.isEmpty(); 474 } 475 476 /** 477 * @return the keys of the familyMap 478 */ 479 public byte[][] getFamilies() { 480 if(hasFamilies()) { 481 return this.familyMap.keySet().toArray(new byte[0][0]); 482 } 483 return null; 484 } 485 486 /** 487 * @return the startrow 488 */ 489 public byte [] getStartRow() { 490 return this.startRow; 491 } 492 493 /** 494 * @return the stoprow 495 */ 496 public byte [] getStopRow() { 497 return this.stopRow; 498 } 499 500 /** 501 * @return the max number of versions to fetch 502 */ 503 public int getMaxVersions() { 504 return this.maxVersions; 505 } 506 507 /** 508 * @return maximum number of values to return for a single call to next() 509 */ 510 public int getBatch() { 511 return this.batch; 512 } 513 514 /** 515 * @return maximum number of values to return per row per CF 516 */ 517 public int getMaxResultsPerColumnFamily() { 518 return this.storeLimit; 519 } 520 521 /** 522 * Method for retrieving the scan's offset per row per column 523 * family (#kvs to be skipped) 524 * @return row offset 525 */ 526 public int getRowOffsetPerColumnFamily() { 527 return this.storeOffset; 528 } 529 530 /** 531 * @return caching the number of rows fetched when calling next on a scanner 532 */ 533 public int getCaching() { 534 return this.caching; 535 } 536 537 /** 538 * @return TimeRange 539 */ 540 public TimeRange getTimeRange() { 541 return this.tr; 542 } 543 544 /** 545 * @return RowFilter 546 */ 547 public Filter getFilter() { 548 return filter; 549 } 550 551 /** 552 * @return true is a filter has been specified, false if not 553 */ 554 public boolean hasFilter() { 555 return filter != null; 556 } 557 558 /** 559 * Set whether blocks should be cached for this Scan. 560 * <p> 561 * This is true by default. When true, default settings of the table and 562 * family are used (this will never override caching blocks if the block 563 * cache is disabled for that family or entirely). 564 * 565 * @param cacheBlocks if false, default settings are overridden and blocks 566 * will not be cached 567 */ 568 public void setCacheBlocks(boolean cacheBlocks) { 569 this.cacheBlocks = cacheBlocks; 570 } 571 572 /** 573 * Get whether blocks should be cached for this Scan. 574 * @return true if default caching should be used, false if blocks should not 575 * be cached 576 */ 577 public boolean getCacheBlocks() { 578 return cacheBlocks; 579 } 580 581 /** 582 * Set whether this scan is a reversed one 583 * <p> 584 * This is false by default which means forward(normal) scan. 585 * 586 * @param reversed if true, scan will be backward order 587 * @return this 588 */ 589 public Scan setReversed(boolean reversed) { 590 this.reversed = reversed; 591 return this; 592 } 593 594 /** 595 * Get whether this scan is a reversed one. 596 * @return true if backward scan, false if forward(default) scan 597 */ 598 public boolean isReversed() { 599 return reversed; 600 } 601 602 /** 603 * Set the value indicating whether loading CFs on demand should be allowed (cluster 604 * default is false). On-demand CF loading doesn't load column families until necessary, e.g. 605 * if you filter on one column, the other column family data will be loaded only for the rows 606 * that are included in result, not all rows like in normal case. 607 * With column-specific filters, like SingleColumnValueFilter w/filterIfMissing == true, 608 * this can deliver huge perf gains when there's a cf with lots of data; however, it can 609 * also lead to some inconsistent results, as follows: 610 * - if someone does a concurrent update to both column families in question you may get a row 611 * that never existed, e.g. for { rowKey = 5, { cat_videos => 1 }, { video => "my cat" } } 612 * someone puts rowKey 5 with { cat_videos => 0 }, { video => "my dog" }, concurrent scan 613 * filtering on "cat_videos == 1" can get { rowKey = 5, { cat_videos => 1 }, 614 * { video => "my dog" } }. 615 * - if there's a concurrent split and you have more than 2 column families, some rows may be 616 * missing some column families. 617 */ 618 public void setLoadColumnFamiliesOnDemand(boolean value) { 619 this.loadColumnFamiliesOnDemand = value; 620 } 621 622 /** 623 * Get the raw loadColumnFamiliesOnDemand setting; if it's not set, can be null. 624 */ 625 public Boolean getLoadColumnFamiliesOnDemandValue() { 626 return this.loadColumnFamiliesOnDemand; 627 } 628 629 /** 630 * Get the logical value indicating whether on-demand CF loading should be allowed. 631 */ 632 public boolean doLoadColumnFamiliesOnDemand() { 633 return (this.loadColumnFamiliesOnDemand != null) 634 && this.loadColumnFamiliesOnDemand.booleanValue(); 635 } 636 637 /** 638 * Compile the table and column family (i.e. schema) information 639 * into a String. Useful for parsing and aggregation by debugging, 640 * logging, and administration tools. 641 * @return Map 642 */ 643 @Override 644 public Map<String, Object> getFingerprint() { 645 Map<String, Object> map = new HashMap<String, Object>(); 646 List<String> families = new ArrayList<String>(); 647 if(this.familyMap.size() == 0) { 648 map.put("families", "ALL"); 649 return map; 650 } else { 651 map.put("families", families); 652 } 653 for (Map.Entry<byte [], NavigableSet<byte[]>> entry : 654 this.familyMap.entrySet()) { 655 families.add(Bytes.toStringBinary(entry.getKey())); 656 } 657 return map; 658 } 659 660 /** 661 * Compile the details beyond the scope of getFingerprint (row, columns, 662 * timestamps, etc.) into a Map along with the fingerprinted information. 663 * Useful for debugging, logging, and administration tools. 664 * @param maxCols a limit on the number of columns output prior to truncation 665 * @return Map 666 */ 667 @Override 668 public Map<String, Object> toMap(int maxCols) { 669 // start with the fingerpring map and build on top of it 670 Map<String, Object> map = getFingerprint(); 671 // map from families to column list replaces fingerprint's list of families 672 Map<String, List<String>> familyColumns = 673 new HashMap<String, List<String>>(); 674 map.put("families", familyColumns); 675 // add scalar information first 676 map.put("startRow", Bytes.toStringBinary(this.startRow)); 677 map.put("stopRow", Bytes.toStringBinary(this.stopRow)); 678 map.put("maxVersions", this.maxVersions); 679 map.put("batch", this.batch); 680 map.put("caching", this.caching); 681 map.put("maxResultSize", this.maxResultSize); 682 map.put("cacheBlocks", this.cacheBlocks); 683 map.put("loadColumnFamiliesOnDemand", this.loadColumnFamiliesOnDemand); 684 List<Long> timeRange = new ArrayList<Long>(); 685 timeRange.add(this.tr.getMin()); 686 timeRange.add(this.tr.getMax()); 687 map.put("timeRange", timeRange); 688 int colCount = 0; 689 // iterate through affected families and list out up to maxCols columns 690 for (Map.Entry<byte [], NavigableSet<byte[]>> entry : 691 this.familyMap.entrySet()) { 692 List<String> columns = new ArrayList<String>(); 693 familyColumns.put(Bytes.toStringBinary(entry.getKey()), columns); 694 if(entry.getValue() == null) { 695 colCount++; 696 --maxCols; 697 columns.add("ALL"); 698 } else { 699 colCount += entry.getValue().size(); 700 if (maxCols <= 0) { 701 continue; 702 } 703 for (byte [] column : entry.getValue()) { 704 if (--maxCols <= 0) { 705 continue; 706 } 707 columns.add(Bytes.toStringBinary(column)); 708 } 709 } 710 } 711 map.put("totalColumns", colCount); 712 if (this.filter != null) { 713 map.put("filter", this.filter.toString()); 714 } 715 // add the id if set 716 if (getId() != null) { 717 map.put("id", getId()); 718 } 719 return map; 720 } 721 722 /** 723 * Enable/disable "raw" mode for this scan. 724 * If "raw" is enabled the scan will return all 725 * delete marker and deleted rows that have not 726 * been collected, yet. 727 * This is mostly useful for Scan on column families 728 * that have KEEP_DELETED_ROWS enabled. 729 * It is an error to specify any column when "raw" is set. 730 * @param raw True/False to enable/disable "raw" mode. 731 */ 732 public void setRaw(boolean raw) { 733 setAttribute(RAW_ATTR, Bytes.toBytes(raw)); 734 } 735 736 /** 737 * @return True if this Scan is in "raw" mode. 738 */ 739 public boolean isRaw() { 740 byte[] attr = getAttribute(RAW_ATTR); 741 return attr == null ? false : Bytes.toBoolean(attr); 742 } 743 744 /** 745 * Set whether this scan is a small scan 746 * <p> 747 * Small scan should use pread and big scan can use seek + read 748 * 749 * seek + read is fast but can cause two problem (1) resource contention (2) 750 * cause too much network io 751 * 752 * [89-fb] Using pread for non-compaction read request 753 * https://issues.apache.org/jira/browse/HBASE-7266 754 * 755 * On the other hand, if setting it true, we would do 756 * openScanner,next,closeScanner in one RPC call. It means the better 757 * performance for small scan. [HBASE-9488]. 758 * 759 * Generally, if the scan range is within one data block(64KB), it could be 760 * considered as a small scan. 761 * 762 * @param small 763 */ 764 public void setSmall(boolean small) { 765 this.small = small; 766 } 767 768 /** 769 * Get whether this scan is a small scan 770 * @return true if small scan 771 */ 772 public boolean isSmall() { 773 return small; 774 } 775 }