View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.fail;
23  
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  import java.util.Collection;
28  import java.util.List;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.catalog.CatalogTracker;
33  import org.apache.hadoop.hbase.catalog.MetaReader;
34  import org.apache.hadoop.hbase.client.HBaseAdmin;
35  import org.apache.hadoop.hbase.client.HTable;
36  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
37  import org.apache.hadoop.hbase.regionserver.HRegionServer;
38  import org.apache.hadoop.hbase.testclassification.LargeTests;
39  import org.apache.hadoop.hbase.util.Bytes;
40  import org.apache.hadoop.hbase.util.JVMClusterUtil;
41  import org.junit.After;
42  import org.junit.Before;
43  import org.junit.Test;
44  import org.junit.experimental.categories.Category;
45  import org.junit.runner.RunWith;
46  import org.junit.runners.Parameterized;
47  import org.junit.runners.Parameterized.Parameters;
48  
49  /**
50   * Test whether region re-balancing works. (HBASE-71)
51   */
52  @Category(LargeTests.class)
53  @RunWith(value = Parameterized.class)
54  public class TestRegionRebalancing {
55  
56    @Parameters
57    public static Collection<Object[]> data() {
58      Object[][] balancers =
59          new String[][] { { "org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer" },
60              { "org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer" } };
61      return Arrays.asList(balancers);
62    }
63  
64    private static final byte[] FAMILY_NAME = Bytes.toBytes("col");
65    public static final Log LOG = LogFactory.getLog(TestRegionRebalancing.class);
66    private final HBaseTestingUtility UTIL = new HBaseTestingUtility();
67    private HTable table;
68    private HTableDescriptor desc;
69    private String balancerName;
70  
71    public TestRegionRebalancing(String balancerName) {
72      this.balancerName = balancerName;
73  
74    }
75  
76    @After
77    public void after() throws Exception {
78      UTIL.shutdownMiniCluster();
79    }
80  
81    @Before
82    public void before() throws Exception {
83      UTIL.getConfiguration().set("hbase.master.loadbalancer.class", this.balancerName);
84      UTIL.startMiniCluster(1);
85      this.desc = new HTableDescriptor(TableName.valueOf("test"));
86      this.desc.addFamily(new HColumnDescriptor(FAMILY_NAME));
87    }
88  
89    /**
90     * For HBASE-71. Try a few different configurations of starting and stopping
91     * region servers to see if the assignment or regions is pretty balanced.
92     * @throws IOException
93     * @throws InterruptedException
94     */
95    @Test (timeout=300000)
96    public void testRebalanceOnRegionServerNumberChange()
97    throws IOException, InterruptedException {
98      HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
99      admin.createTable(this.desc, Arrays.copyOfRange(HBaseTestingUtility.KEYS,
100         1, HBaseTestingUtility.KEYS.length));
101     this.table = new HTable(UTIL.getConfiguration(), this.desc.getTableName());
102     CatalogTracker ct = new CatalogTracker(UTIL.getConfiguration());
103     ct.start();
104     try {
105       MetaReader.fullScanMetaAndPrint(ct);
106     } finally {
107       ct.stop();
108     }
109     assertEquals("Test table should have right number of regions",
110       HBaseTestingUtility.KEYS.length,
111       this.table.getStartKeys().length);
112 
113     // verify that the region assignments are balanced to start out
114     assertRegionsAreBalanced();
115 
116     // add a region server - total of 2
117     LOG.info("Started second server=" +
118       UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
119     UTIL.getHBaseCluster().getMaster().balance();
120     assertRegionsAreBalanced();
121 
122     // add a region server - total of 3
123     LOG.info("Started third server=" +
124         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
125     UTIL.getHBaseCluster().getMaster().balance();
126     assertRegionsAreBalanced();
127 
128     // kill a region server - total of 2
129     LOG.info("Stopped third server=" + UTIL.getHBaseCluster().stopRegionServer(2, false));
130     UTIL.getHBaseCluster().waitOnRegionServer(2);
131     UTIL.getHBaseCluster().getMaster().balance();
132     assertRegionsAreBalanced();
133 
134     // start two more region servers - total of 4
135     LOG.info("Readding third server=" +
136         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
137     LOG.info("Added fourth server=" +
138         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
139     UTIL.getHBaseCluster().getMaster().balance();
140     assertRegionsAreBalanced();
141 
142     for (int i = 0; i < 6; i++){
143       LOG.info("Adding " + (i + 5) + "th region server");
144       UTIL.getHBaseCluster().startRegionServer();
145     }
146     UTIL.getHBaseCluster().getMaster().balance();
147     assertRegionsAreBalanced();
148     table.close();
149   }
150 
151   /** figure out how many regions are currently being served. */
152   private int getRegionCount() throws IOException {
153     int total = 0;
154     for (HRegionServer server : getOnlineRegionServers()) {
155       total += ProtobufUtil.getOnlineRegions(server).size();
156     }
157     return total;
158   }
159 
160   /**
161    * Determine if regions are balanced. Figure out the total, divide by the
162    * number of online servers, then test if each server is +/- 1 of average
163    * rounded up.
164    */
165   private void assertRegionsAreBalanced() throws IOException {
166     // TODO: Fix this test.  Old balancer used to run with 'slop'.  New
167     // balancer does not.
168     boolean success = false;
169     float slop = (float)UTIL.getConfiguration().getFloat("hbase.regions.slop", 0.1f);
170     if (slop <= 0) slop = 1;
171 
172     for (int i = 0; i < 5; i++) {
173       success = true;
174       // make sure all the regions are reassigned before we test balance
175       waitForAllRegionsAssigned();
176 
177       int regionCount = getRegionCount();
178       List<HRegionServer> servers = getOnlineRegionServers();
179       double avg = UTIL.getHBaseCluster().getMaster().getAverageLoad();
180       int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
181       int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1;
182       LOG.debug("There are " + servers.size() + " servers and " + regionCount
183         + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop
184         + ", up border: " + avgLoadPlusSlop + "; attempt: " + i);
185 
186       for (HRegionServer server : servers) {
187         int serverLoad = ProtobufUtil.getOnlineRegions(server).size();
188         LOG.debug(server.getServerName() + " Avg: " + avg + " actual: " + serverLoad);
189         if (!(avg > 2.0 && serverLoad <= avgLoadPlusSlop
190             && serverLoad >= avgLoadMinusSlop)) {
191           for (HRegionInfo hri : ProtobufUtil.getOnlineRegions(server)) {
192             if (hri.isMetaRegion()) serverLoad--;
193             // LOG.debug(hri.getRegionNameAsString());
194           }
195           if (!(serverLoad <= avgLoadPlusSlop && serverLoad >= avgLoadMinusSlop)) {
196             LOG.debug(server.getServerName() + " Isn't balanced!!! Avg: " + avg +
197                 " actual: " + serverLoad + " slop: " + slop);
198             success = false;            
199             break;
200           }
201         }
202       }
203 
204       if (!success) {
205         // one or more servers are not balanced. sleep a little to give it a
206         // chance to catch up. then, go back to the retry loop.
207         try {
208           Thread.sleep(10000);
209         } catch (InterruptedException e) {}
210 
211         UTIL.getHBaseCluster().getMaster().balance();
212         continue;
213       }
214 
215       // if we get here, all servers were balanced, so we should just return.
216       return;
217     }
218     // if we get here, we tried 5 times and never got to short circuit out of
219     // the retry loop, so this is a failure.
220     fail("After 5 attempts, region assignments were not balanced.");
221   }
222 
223   private List<HRegionServer> getOnlineRegionServers() {
224     List<HRegionServer> list = new ArrayList<HRegionServer>();
225     for (JVMClusterUtil.RegionServerThread rst :
226         UTIL.getHBaseCluster().getRegionServerThreads()) {
227       if (rst.getRegionServer().isOnline()) {
228         list.add(rst.getRegionServer());
229       }
230     }
231     return list;
232   }
233 
234   /**
235    * Wait until all the regions are assigned.
236    */
237   private void waitForAllRegionsAssigned() throws IOException {
238     int totalRegions = HBaseTestingUtility.KEYS.length+1;
239     while (getRegionCount() < totalRegions) {
240     // while (!cluster.getMaster().allRegionsAssigned()) {
241       LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are " + getRegionCount() + " right now.");
242       try {
243         Thread.sleep(200);
244       } catch (InterruptedException e) {}
245     }
246   }
247 
248 }
249