View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.client;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Collections;
24  import java.util.List;
25  import java.util.UUID;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.hbase.classification.InterfaceStability;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.CellUtil;
35  import org.apache.hadoop.hbase.HRegionInfo;
36  import org.apache.hadoop.hbase.HTableDescriptor;
37  import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat;
38  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
39  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
40  import org.apache.hadoop.hbase.snapshot.ExportSnapshot;
41  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
42  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
43  import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
44  import org.apache.hadoop.hbase.util.FSUtils;
45  
46  /**
47   * A Scanner which performs a scan over snapshot files. Using this class requires copying the
48   * snapshot to a temporary empty directory, which will copy the snapshot reference files into that
49   * directory. Actual data files are not copied.
50   *
51   * <p>
52   * This also allows one to run the scan from an
53   * online or offline hbase cluster. The snapshot files can be exported by using the
54   * {@link ExportSnapshot} tool, to a pure-hdfs cluster, and this scanner can be used to
55   * run the scan directly over the snapshot files. The snapshot should not be deleted while there
56   * are open scanners reading from snapshot files.
57   *
58   * <p>
59   * An internal RegionScanner is used to execute the {@link Scan} obtained
60   * from the user for each region in the snapshot.
61   * <p>
62   * HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from
63   * snapshot files and data files. HBase also enforces security because all the requests are handled
64   * by the server layer, and the user cannot read from the data files directly. To read from snapshot
65   * files directly from the file system, the user who is running the MR job must have sufficient
66   * permissions to access snapshot and reference files. This means that to run mapreduce over
67   * snapshot files, the job has to be run as the HBase user or the user must have group or other
68   * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from
69   * snapshot/data files will completely circumvent the access control enforced by HBase.
70   * @see TableSnapshotInputFormat
71   */
72  @InterfaceAudience.Public
73  @InterfaceStability.Evolving
74  public class TableSnapshotScanner extends AbstractClientScanner {
75  
76    private static final Log LOG = LogFactory.getLog(TableSnapshotScanner.class);
77  
78    private Configuration conf;
79    private String snapshotName;
80    private FileSystem fs;
81    private Path rootDir;
82    private Path restoreDir;
83    private Scan scan;
84    private ArrayList<HRegionInfo> regions;
85    private HTableDescriptor htd;
86  
87    private ClientSideRegionScanner currentRegionScanner  = null;
88    private int currentRegion = -1;
89  
90    /**
91     * Creates a TableSnapshotScanner.
92     * @param conf the configuration
93     * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
94     * have write permissions to this directory, and this should not be a subdirectory of rootdir.
95     * The scanner deletes the contents of the directory once the scanner is closed.
96     * @param snapshotName the name of the snapshot to read from
97     * @param scan a Scan representing scan parameters
98     * @throws IOException in case of error
99     */
100   public TableSnapshotScanner(Configuration conf, Path restoreDir,
101       String snapshotName, Scan scan) throws IOException {
102     this(conf, FSUtils.getRootDir(conf), restoreDir, snapshotName, scan);
103   }
104 
105   /**
106    * Creates a TableSnapshotScanner.
107    * @param conf the configuration
108    * @param rootDir root directory for HBase.
109    * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
110    * have write permissions to this directory, and this should not be a subdirectory of rootdir.
111    * The scanner deletes the contents of the directory once the scanner is closed.
112    * @param snapshotName the name of the snapshot to read from
113    * @param scan a Scan representing scan parameters
114    * @throws IOException in case of error
115    */
116   public TableSnapshotScanner(Configuration conf, Path rootDir,
117       Path restoreDir, String snapshotName, Scan scan) throws IOException {
118     this.conf = conf;
119     this.snapshotName = snapshotName;
120     this.rootDir = rootDir;
121     // restoreDir will be deleted in close(), use a unique sub directory
122     this.restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
123     this.scan = scan;
124     this.fs = rootDir.getFileSystem(conf);
125     init();
126   }
127 
128   private void init() throws IOException {
129     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
130     SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
131     SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
132 
133     // load table descriptor
134     htd = manifest.getTableDescriptor();
135 
136     List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
137     if (regionManifests == null) {
138       throw new IllegalArgumentException("Snapshot seems empty");
139     }
140 
141     regions = new ArrayList<HRegionInfo>(regionManifests.size());
142     for (SnapshotRegionManifest regionManifest : regionManifests) {
143       // load region descriptor
144       HRegionInfo hri = HRegionInfo.convert(regionManifest.getRegionInfo());
145 
146       if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(),
147           hri.getStartKey(), hri.getEndKey())) {
148         regions.add(hri);
149       }
150     }
151 
152     // sort for regions according to startKey.
153     Collections.sort(regions);
154 
155     initScanMetrics(scan);
156 
157     RestoreSnapshotHelper.copySnapshotForScanner(conf, fs,
158       rootDir, restoreDir, snapshotName);
159   }
160 
161   @Override
162   public Result next() throws IOException {
163     Result result = null;
164     while (true) {
165       if (currentRegionScanner == null) {
166         currentRegion++;
167         if (currentRegion >= regions.size()) {
168           return null;
169         }
170 
171         HRegionInfo hri = regions.get(currentRegion);
172         currentRegionScanner = new ClientSideRegionScanner(conf, fs,
173           restoreDir, htd, hri, scan, scanMetrics);
174         if (this.scanMetrics != null) {
175           this.scanMetrics.countOfRegions.incrementAndGet();
176         }
177       }
178 
179       try {
180         result = currentRegionScanner.next();
181         if (result != null) {
182           return result;
183         }
184       } finally {
185         if (result == null) {
186           currentRegionScanner.close();
187           currentRegionScanner = null;
188         }        
189       }
190     }
191   }
192 
193   @Override
194   public void close() {
195     if (currentRegionScanner != null) {
196       currentRegionScanner.close();
197     }
198     try {
199       fs.delete(this.restoreDir, true);
200     } catch (IOException ex) {
201       LOG.warn("Could not delete restore directory for the snapshot:" + ex);
202     }
203   }
204 
205 }