1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hbase.regionserver; 19 20 import java.io.IOException; 21 import java.util.Collection; 22 import java.util.List; 23 import java.util.NavigableSet; 24 25 import org.apache.hadoop.fs.FileSystem; 26 import org.apache.hadoop.fs.Path; 27 import org.apache.hadoop.hbase.Cell; 28 import org.apache.hadoop.hbase.HColumnDescriptor; 29 import org.apache.hadoop.hbase.HRegionInfo; 30 import org.apache.hadoop.hbase.KeyValue; 31 import org.apache.hadoop.hbase.TableName; 32 import org.apache.hadoop.hbase.classification.InterfaceAudience; 33 import org.apache.hadoop.hbase.classification.InterfaceStability; 34 import org.apache.hadoop.hbase.client.Scan; 35 import org.apache.hadoop.hbase.io.HeapSize; 36 import org.apache.hadoop.hbase.io.compress.Compression; 37 import org.apache.hadoop.hbase.io.hfile.CacheConfig; 38 import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; 39 import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor; 40 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; 41 import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress; 42 import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest; 43 import org.apache.hadoop.hbase.regionserver.compactions.CompactionThroughputController; 44 45 /** 46 * Interface for objects that hold a column family in a Region. Its a memstore and a set of zero or 47 * more StoreFiles, which stretch backwards over time. 48 */ 49 @InterfaceAudience.Private 50 @InterfaceStability.Evolving 51 public interface Store extends HeapSize, StoreConfigInformation { 52 53 /* The default priority for user-specified compaction requests. 54 * The user gets top priority unless we have blocking compactions. (Pri <= 0) 55 */ int PRIORITY_USER = 1; 56 int NO_PRIORITY = Integer.MIN_VALUE; 57 58 // General Accessors 59 KeyValue.KVComparator getComparator(); 60 61 Collection<StoreFile> getStorefiles(); 62 63 /** 64 * Close all the readers We don't need to worry about subsequent requests because the HRegion 65 * holds a write lock that will prevent any more reads or writes. 66 * @return the {@link StoreFile StoreFiles} that were previously being used. 67 * @throws IOException on failure 68 */ 69 Collection<StoreFile> close() throws IOException; 70 71 /** 72 * Return a scanner for both the memstore and the HStore files. Assumes we are not in a 73 * compaction. 74 * @param scan Scan to apply when scanning the stores 75 * @param targetCols columns to scan 76 * @return a scanner over the current key values 77 * @throws IOException on failure 78 */ 79 KeyValueScanner getScanner(Scan scan, final NavigableSet<byte[]> targetCols, long readPt) 80 throws IOException; 81 82 /** 83 * Get all scanners with no filtering based on TTL (that happens further down 84 * the line). 85 * @param cacheBlocks 86 * @param isGet 87 * @param usePread 88 * @param isCompaction 89 * @param matcher 90 * @param startRow 91 * @param stopRow 92 * @param readPt 93 * @return all scanners for this store 94 */ 95 List<KeyValueScanner> getScanners( 96 boolean cacheBlocks, 97 boolean isGet, 98 boolean usePread, 99 boolean isCompaction, 100 ScanQueryMatcher matcher, 101 byte[] startRow, 102 byte[] stopRow, 103 long readPt 104 ) throws IOException; 105 106 ScanInfo getScanInfo(); 107 108 /** 109 * Adds or replaces the specified KeyValues. 110 * <p> 111 * For each KeyValue specified, if a cell with the same row, family, and qualifier exists in 112 * MemStore, it will be replaced. Otherwise, it will just be inserted to MemStore. 113 * <p> 114 * This operation is atomic on each KeyValue (row/family/qualifier) but not necessarily atomic 115 * across all of them. 116 * @param cells 117 * @param readpoint readpoint below which we can safely remove duplicate KVs 118 * @return memstore size delta 119 * @throws IOException 120 */ 121 long upsert(Iterable<Cell> cells, long readpoint) throws IOException; 122 123 /** 124 * Adds a value to the memstore 125 * @param kv 126 * @return memstore size delta 127 */ 128 long add(KeyValue kv); 129 130 /** 131 * When was the last edit done in the memstore 132 */ 133 long timeOfOldestEdit(); 134 135 /** 136 * Removes a kv from the memstore. The KeyValue is removed only if its key & memstoreTS match the 137 * key & memstoreTS value of the kv parameter. 138 * @param kv 139 */ 140 void rollback(final KeyValue kv); 141 142 /** 143 * Find the key that matches <i>row</i> exactly, or the one that immediately precedes it. WARNING: 144 * Only use this method on a table where writes occur with strictly increasing timestamps. This 145 * method assumes this pattern of writes in order to make it reasonably performant. Also our 146 * search is dependent on the axiom that deletes are for cells that are in the container that 147 * follows whether a memstore snapshot or a storefile, not for the current container: i.e. we'll 148 * see deletes before we come across cells we are to delete. Presumption is that the 149 * memstore#kvset is processed before memstore#snapshot and so on. 150 * @param row The row key of the targeted row. 151 * @return Found keyvalue or null if none found. 152 * @throws IOException 153 */ 154 KeyValue getRowKeyAtOrBefore(final byte[] row) throws IOException; 155 156 FileSystem getFileSystem(); 157 158 /* 159 * @param maxKeyCount 160 * @param compression Compression algorithm to use 161 * @param isCompaction whether we are creating a new file in a compaction 162 * @param includeMVCCReadpoint whether we should out the MVCC readpoint 163 * @return Writer for a new StoreFile in the tmp dir. 164 */ 165 StoreFile.Writer createWriterInTmp( 166 long maxKeyCount, 167 Compression.Algorithm compression, 168 boolean isCompaction, 169 boolean includeMVCCReadpoint, 170 boolean includesTags 171 ) throws IOException; 172 173 // Compaction oriented methods 174 175 boolean throttleCompaction(long compactionSize); 176 177 /** 178 * getter for CompactionProgress object 179 * @return CompactionProgress object; can be null 180 */ 181 CompactionProgress getCompactionProgress(); 182 183 CompactionContext requestCompaction() throws IOException; 184 185 CompactionContext requestCompaction(int priority, CompactionRequest baseRequest) 186 throws IOException; 187 188 void cancelRequestedCompaction(CompactionContext compaction); 189 190 List<StoreFile> compact(CompactionContext compaction, 191 CompactionThroughputController throughputController) throws IOException; 192 193 /** 194 * @return true if we should run a major compaction. 195 */ 196 boolean isMajorCompaction() throws IOException; 197 198 void triggerMajorCompaction(); 199 200 /** 201 * See if there's too much store files in this store 202 * @return true if number of store files is greater than the number defined in minFilesToCompact 203 */ 204 boolean needsCompaction(); 205 206 int getCompactPriority(); 207 208 StoreFlushContext createFlushContext(long cacheFlushId); 209 210 /** 211 * Call to complete a compaction. Its for the case where we find in the WAL a compaction 212 * that was not finished. We could find one recovering a WAL after a regionserver crash. 213 * See HBASE-2331. 214 * @param compaction 215 */ 216 void completeCompactionMarker(CompactionDescriptor compaction) 217 throws IOException; 218 219 // Split oriented methods 220 221 boolean canSplit(); 222 223 /** 224 * Determines if Store should be split 225 * @return byte[] if store should be split, null otherwise. 226 */ 227 byte[] getSplitPoint(); 228 229 // Bulk Load methods 230 231 /** 232 * This throws a WrongRegionException if the HFile does not fit in this region, or an 233 * InvalidHFileException if the HFile is not valid. 234 */ 235 void assertBulkLoadHFileOk(Path srcPath) throws IOException; 236 237 /** 238 * This method should only be called from HRegion. It is assumed that the ranges of values in the 239 * HFile fit within the stores assigned region. (assertBulkLoadHFileOk checks this) 240 * 241 * @param srcPathStr 242 * @param sequenceId sequence Id associated with the HFile 243 */ 244 void bulkLoadHFile(String srcPathStr, long sequenceId) throws IOException; 245 246 // General accessors into the state of the store 247 // TODO abstract some of this out into a metrics class 248 249 /** 250 * @return <tt>true</tt> if the store has any underlying reference files to older HFiles 251 */ 252 boolean hasReferences(); 253 254 /** 255 * @return The size of this store's memstore, in bytes 256 */ 257 long getMemStoreSize(); 258 259 /** 260 * @return The amount of memory we could flush from this memstore; usually this is equal to 261 * {@link #getMemStoreSize()} unless we are carrying snapshots and then it will be the size of 262 * outstanding snapshots. 263 */ 264 long getFlushableSize(); 265 266 HColumnDescriptor getFamily(); 267 268 /** 269 * @return The maximum memstoreTS in all store files. 270 */ 271 long getMaxMemstoreTS(); 272 273 /** 274 * @return the data block encoder 275 */ 276 HFileDataBlockEncoder getDataBlockEncoder(); 277 278 /** @return aggregate size of all HStores used in the last compaction */ 279 long getLastCompactSize(); 280 281 /** @return aggregate size of HStore */ 282 long getSize(); 283 284 /** 285 * @return Count of store files 286 */ 287 int getStorefilesCount(); 288 289 /** 290 * @return The size of the store files, in bytes, uncompressed. 291 */ 292 long getStoreSizeUncompressed(); 293 294 /** 295 * @return The size of the store files, in bytes. 296 */ 297 long getStorefilesSize(); 298 299 /** 300 * @return The size of the store file indexes, in bytes. 301 */ 302 long getStorefilesIndexSize(); 303 304 /** 305 * Returns the total size of all index blocks in the data block indexes, including the root level, 306 * intermediate levels, and the leaf level for multi-level indexes, or just the root level for 307 * single-level indexes. 308 * @return the total size of block indexes in the store 309 */ 310 long getTotalStaticIndexSize(); 311 312 /** 313 * Returns the total byte size of all Bloom filter bit arrays. For compound Bloom filters even the 314 * Bloom blocks currently not loaded into the block cache are counted. 315 * @return the total size of all Bloom filters in the store 316 */ 317 long getTotalStaticBloomSize(); 318 319 // Test-helper methods 320 321 /** 322 * Used for tests. 323 * @return cache configuration for this Store. 324 */ 325 CacheConfig getCacheConfig(); 326 327 /** 328 * @return the parent region info hosting this store 329 */ 330 HRegionInfo getRegionInfo(); 331 332 RegionCoprocessorHost getCoprocessorHost(); 333 334 boolean areWritesEnabled(); 335 336 /** 337 * @return The smallest mvcc readPoint across all the scanners in this 338 * region. Writes older than this readPoint, are included in every 339 * read operation. 340 */ 341 long getSmallestReadPoint(); 342 343 String getColumnFamilyName(); 344 345 TableName getTableName(); 346 347 /** 348 * @return The number of cells flushed to disk 349 */ 350 long getFlushedCellsCount(); 351 352 /** 353 * @return The total size of data flushed to disk, in bytes 354 */ 355 long getFlushedCellsSize(); 356 357 /** 358 * @return The number of cells processed during minor compactions 359 */ 360 long getCompactedCellsCount(); 361 362 /** 363 * @return The total amount of data processed during minor compactions, in bytes 364 */ 365 long getCompactedCellsSize(); 366 367 /** 368 * @return The number of cells processed during major compactions 369 */ 370 long getMajorCompactedCellsCount(); 371 372 /** 373 * @return The total amount of data processed during major compactions, in bytes 374 */ 375 long getMajorCompactedCellsSize(); 376 377 /* 378 * @param o Observer who wants to know about changes in set of Readers 379 */ 380 void addChangedReaderObserver(ChangedReadersObserver o); 381 382 /* 383 * @param o Observer no longer interested in changes in set of Readers. 384 */ 385 void deleteChangedReaderObserver(ChangedReadersObserver o); 386 387 /** 388 * @return Whether this store has too many store files. 389 */ 390 boolean hasTooManyStoreFiles(); 391 392 /** 393 * This value can represent the degree of emergency of compaction for this store. It should be 394 * greater than or equal to 0.0, any value greater than 1.0 means we have too many store files. 395 * <ul> 396 * <li>if getStorefilesCount <= getMinFilesToCompact, return 0.0</li> 397 * <li>return (getStorefilesCount - getMinFilesToCompact) / (blockingFileCount - 398 * getMinFilesToCompact)</li> 399 * </ul> 400 * <p> 401 * And for striped stores, we should calculate this value by the files in each stripe separately 402 * and return the maximum value. 403 * <p> 404 * It is similar to {@link #getCompactPriority()} except that it is more suitable to use in a 405 * linear formula. 406 */ 407 double getCompactionPressure(); 408 }