1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.IOException;
22 import java.util.List;
23 import java.util.Map;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.hadoop.hbase.classification.InterfaceAudience;
28 import org.apache.hadoop.hbase.classification.InterfaceStability;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.hbase.KeyValue;
31 import org.apache.hadoop.hbase.client.HTable;
32 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
33 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
34 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
35 import org.apache.hadoop.hbase.regionserver.BloomType;
36 import org.apache.hadoop.mapreduce.Job;
37 import org.apache.hadoop.mapreduce.RecordWriter;
38 import org.apache.hadoop.mapreduce.TaskAttemptContext;
39 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
40
41 import com.google.common.annotations.VisibleForTesting;
42
43 /**
44 * Writes HFiles. Passed KeyValues must arrive in order.
45 * Writes current time as the sequence id for the file. Sets the major compacted
46 * attribute on created hfiles. Calling write(null,null) will forcibly roll
47 * all HFiles being written.
48 * <p>
49 * Using this class as part of a MapReduce job is best done
50 * using {@link #configureIncrementalLoad(Job, HTable)}.
51 * @see KeyValueSortReducer
52 * @deprecated use {@link HFileOutputFormat2} instead.
53 */
54 @Deprecated
55 @InterfaceAudience.Public
56 @InterfaceStability.Stable
57 public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
58 static Log LOG = LogFactory.getLog(HFileOutputFormat.class);
59
60 // This constant is public since the client can modify this when setting
61 // up their conf object and thus refer to this symbol.
62 // It is present for backwards compatibility reasons. Use it only to
63 // override the auto-detection of datablock encoding.
64 public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
65 HFileOutputFormat2.DATABLOCK_ENCODING_OVERRIDE_CONF_KEY;
66
67 public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(
68 final TaskAttemptContext context) throws IOException, InterruptedException {
69 return HFileOutputFormat2.createRecordWriter(context);
70 }
71
72 /**
73 * Configure a MapReduce Job to perform an incremental load into the given
74 * table. This
75 * <ul>
76 * <li>Inspects the table to configure a total order partitioner</li>
77 * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
78 * <li>Sets the number of reduce tasks to match the current number of regions</li>
79 * <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
80 * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
81 * PutSortReducer)</li>
82 * </ul>
83 * The user should be sure to set the map output value class to either KeyValue or Put before
84 * running this function.
85 */
86 public static void configureIncrementalLoad(Job job, HTable table)
87 throws IOException {
88 HFileOutputFormat2.configureIncrementalLoad(job, table, HFileOutputFormat.class);
89 }
90
91 /**
92 * Runs inside the task to deserialize column family to compression algorithm
93 * map from the configuration.
94 *
95 * @param conf to read the serialized values from
96 * @return a map from column family to the configured compression algorithm
97 */
98 @VisibleForTesting
99 static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
100 conf) {
101 return HFileOutputFormat2.createFamilyCompressionMap(conf);
102 }
103
104 /**
105 * Runs inside the task to deserialize column family to bloom filter type
106 * map from the configuration.
107 *
108 * @param conf to read the serialized values from
109 * @return a map from column family to the the configured bloom filter type
110 */
111 @VisibleForTesting
112 static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
113 return HFileOutputFormat2.createFamilyBloomTypeMap(conf);
114 }
115
116 /**
117 * Runs inside the task to deserialize column family to block size
118 * map from the configuration.
119 *
120 * @param conf to read the serialized values from
121 * @return a map from column family to the configured block size
122 */
123 @VisibleForTesting
124 static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
125 return HFileOutputFormat2.createFamilyBlockSizeMap(conf);
126 }
127
128 /**
129 * Runs inside the task to deserialize column family to data block encoding
130 * type map from the configuration.
131 *
132 * @param conf to read the serialized values from
133 * @return a map from column family to HFileDataBlockEncoder for the
134 * configured data block type for the family
135 */
136 @VisibleForTesting
137 static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
138 Configuration conf) {
139 return HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf);
140 }
141
142 /**
143 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
144 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
145 */
146 static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
147 throws IOException {
148 HFileOutputFormat2.configurePartitioner(job, splitPoints);
149 }
150
151 /**
152 * Serialize column family to compression algorithm map to configuration.
153 * Invoked while configuring the MR job for incremental load.
154 *
155 * @param table to read the properties from
156 * @param conf to persist serialized values into
157 * @throws IOException
158 * on failure to read column family descriptors
159 */
160 @edu.umd.cs.findbugs.annotations.SuppressWarnings(
161 value="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
162 @VisibleForTesting
163 static void configureCompression(HTable table, Configuration conf) throws IOException {
164 HFileOutputFormat2.configureCompression(table, conf);
165 }
166
167 /**
168 * Serialize column family to block size map to configuration.
169 * Invoked while configuring the MR job for incremental load.
170 *
171 * @param table to read the properties from
172 * @param conf to persist serialized values into
173 * @throws IOException
174 * on failure to read column family descriptors
175 */
176 @VisibleForTesting
177 static void configureBlockSize(HTable table, Configuration conf) throws IOException {
178 HFileOutputFormat2.configureBlockSize(table, conf);
179 }
180
181 /**
182 * Serialize column family to bloom type map to configuration.
183 * Invoked while configuring the MR job for incremental load.
184 *
185 * @param table to read the properties from
186 * @param conf to persist serialized values into
187 * @throws IOException
188 * on failure to read column family descriptors
189 */
190 @VisibleForTesting
191 static void configureBloomType(HTable table, Configuration conf) throws IOException {
192 HFileOutputFormat2.configureBloomType(table, conf);
193 }
194
195 /**
196 * Serialize column family to data block encoding map to configuration.
197 * Invoked while configuring the MR job for incremental load.
198 *
199 * @param table to read the properties from
200 * @param conf to persist serialized values into
201 * @throws IOException
202 * on failure to read column family descriptors
203 */
204 @VisibleForTesting
205 static void configureDataBlockEncoding(HTable table,
206 Configuration conf) throws IOException {
207 HFileOutputFormat2.configureDataBlockEncoding(table, conf);
208 }
209 }