View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.balancer;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.List;
25  import java.util.Map;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.NamespaceDescriptor;
34  import org.apache.hadoop.hbase.ServerLoad;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.master.LoadBalancer;
37  import org.apache.hadoop.hbase.master.RackManager;
38  import org.apache.hadoop.hbase.master.RegionPlan;
39  import org.apache.hadoop.hbase.master.ServerManager;
40  import org.apache.hadoop.hbase.master.SnapshotOfRegionAssignmentFromMeta;
41  import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan.Position;
42  import org.apache.hadoop.hbase.util.Pair;
43  
44  /**
45   * An implementation of the {@link LoadBalancer} that assigns favored nodes for
46   * each region. There is a Primary RegionServer that hosts the region, and then
47   * there is Secondary and Tertiary RegionServers. Currently, the favored nodes
48   * information is used in creating HDFS files - the Primary RegionServer passes
49   * the primary, secondary, tertiary node addresses as hints to the DistributedFileSystem
50   * API for creating files on the filesystem. These nodes are treated as hints by
51   * the HDFS to place the blocks of the file. This alleviates the problem to do with
52   * reading from remote nodes (since we can make the Secondary RegionServer as the new
53   * Primary RegionServer) after a region is recovered. This should help provide consistent
54   * read latencies for the regions even when their primary region servers die.
55   *
56   */
57  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
58  public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
59    private static final Log LOG = LogFactory.getLog(FavoredNodeLoadBalancer.class);
60  
61    private FavoredNodesPlan globalFavoredNodesAssignmentPlan;
62    private RackManager rackManager;
63    Configuration conf;
64  
65    @Override
66    public void setConf(Configuration conf) {
67      globalFavoredNodesAssignmentPlan = new FavoredNodesPlan();
68      this.rackManager = new RackManager(conf);
69      this.conf = conf;
70    }
71  
72    @Override
73    public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState)  {
74      //TODO. Look at is whether Stochastic loadbalancer can be integrated with this
75      List<RegionPlan> plans = new ArrayList<RegionPlan>();
76      //perform a scan of the meta to get the latest updates (if any)
77      SnapshotOfRegionAssignmentFromMeta snaphotOfRegionAssignment =
78          new SnapshotOfRegionAssignmentFromMeta(super.services.getCatalogTracker());
79      try {
80        snaphotOfRegionAssignment.initialize();
81      } catch (IOException ie) {
82        LOG.warn("Not running balancer since exception was thrown " + ie);
83        return plans;
84      }
85      globalFavoredNodesAssignmentPlan = snaphotOfRegionAssignment.getExistingAssignmentPlan(); 
86      Map<ServerName, ServerName> serverNameToServerNameWithoutCode =
87          new HashMap<ServerName, ServerName>();
88      Map<ServerName, ServerName> serverNameWithoutCodeToServerName =
89          new HashMap<ServerName, ServerName>();
90      ServerManager serverMgr = super.services.getServerManager();
91      for (ServerName sn: serverMgr.getOnlineServersList()) {
92        ServerName s = ServerName.valueOf(sn.getHostname(), sn.getPort(), ServerName.NON_STARTCODE);
93        serverNameToServerNameWithoutCode.put(sn, s);
94        serverNameWithoutCodeToServerName.put(s, sn);
95      }
96      for (Map.Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
97        ServerName currentServer = entry.getKey();
98        //get a server without the startcode for the currentServer
99        ServerName currentServerWithoutStartCode = ServerName.valueOf(currentServer.getHostname(),
100           currentServer.getPort(), ServerName.NON_STARTCODE);
101       List<HRegionInfo> list = entry.getValue();
102       for (HRegionInfo region : list) {
103         if(region.getTable().getNamespaceAsString()
104             .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
105           continue;
106         }
107         List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region);
108         if (favoredNodes == null || favoredNodes.get(0).equals(currentServerWithoutStartCode)) {
109           continue; //either favorednodes does not exist or we are already on the primary node
110         }
111         ServerName destination = null;
112         //check whether the primary is available
113         destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(0));
114         if (destination == null) {
115           //check whether the region is on secondary/tertiary
116           if (currentServerWithoutStartCode.equals(favoredNodes.get(1)) ||
117               currentServerWithoutStartCode.equals(favoredNodes.get(2))) {
118             continue;
119           }
120           //the region is currently on none of the favored nodes
121           //get it on one of them if possible
122           ServerLoad l1 = super.services.getServerManager().getLoad(
123               serverNameWithoutCodeToServerName.get(favoredNodes.get(1)));
124           ServerLoad l2 = super.services.getServerManager().getLoad(
125               serverNameWithoutCodeToServerName.get(favoredNodes.get(2)));
126           if (l1 != null && l2 != null) {
127             if (l1.getLoad() > l2.getLoad()) {
128               destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2));
129             } else {
130               destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1));
131             }
132           } else if (l1 != null) {
133             destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1));
134           } else if (l2 != null) {
135             destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2));
136           }
137         }
138         
139         if (destination != null) {
140           RegionPlan plan = new RegionPlan(region, currentServer, destination);
141           plans.add(plan);
142         }
143       }
144     }
145     return plans;
146   }
147 
148   @Override
149   public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
150       List<ServerName> servers) {
151     Map<ServerName, List<HRegionInfo>> assignmentMap;
152     try {
153       FavoredNodeAssignmentHelper assignmentHelper =
154           new FavoredNodeAssignmentHelper(servers, rackManager);
155       assignmentHelper.initialize();
156       if (!assignmentHelper.canPlaceFavoredNodes()) {
157         return super.roundRobinAssignment(regions, servers);
158       }
159       // Segregate the regions into two types:
160       // 1. The regions that have favored node assignment, and where at least
161       //    one of the favored node is still alive. In this case, try to adhere
162       //    to the current favored nodes assignment as much as possible - i.e.,
163       //    if the current primary is gone, then make the secondary or tertiary
164       //    as the new host for the region (based on their current load). 
165       //    Note that we don't change the favored
166       //    node assignments here (even though one or more favored node is currently
167       //    down). It is up to the balanceCluster to do this hard work. The HDFS
168       //    can handle the fact that some nodes in the favored nodes hint is down
169       //    It'd allocate some other DNs. In combination with stale settings for HDFS,
170       //    we should be just fine.
171       // 2. The regions that currently don't have favored node assignment. We will
172       //    need to come up with favored nodes assignments for them. The corner case
173       //    in (1) above is that all the nodes are unavailable and in that case, we
174       //    will note that this region doesn't have favored nodes.
175       Pair<Map<ServerName,List<HRegionInfo>>, List<HRegionInfo>> segregatedRegions =
176           segregateRegionsAndAssignRegionsWithFavoredNodes(regions, servers);
177       Map<ServerName,List<HRegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst();
178       List<HRegionInfo> regionsWithNoFavoredNodes = segregatedRegions.getSecond();
179       assignmentMap = new HashMap<ServerName, List<HRegionInfo>>();
180       roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regionsWithNoFavoredNodes,
181           servers);
182       // merge the assignment maps
183       assignmentMap.putAll(regionsWithFavoredNodesMap);
184     } catch (Exception ex) {
185       LOG.warn("Encountered exception while doing favored-nodes assignment " + ex +
186           " Falling back to regular assignment");
187       assignmentMap = super.roundRobinAssignment(regions, servers);
188     }
189     return assignmentMap;
190   }
191 
192   @Override
193   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
194     try {
195       FavoredNodeAssignmentHelper assignmentHelper =
196           new FavoredNodeAssignmentHelper(servers, rackManager);
197       assignmentHelper.initialize();
198       ServerName primary = super.randomAssignment(regionInfo, servers);
199       if (!assignmentHelper.canPlaceFavoredNodes()) {
200         return primary;
201       }
202       List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo);
203       // check if we have a favored nodes mapping for this region and if so, return
204       // a server from the favored nodes list if the passed 'servers' contains this
205       // server as well (available servers, that is)
206       if (favoredNodes != null) {
207         for (ServerName s : favoredNodes) {
208           ServerName serverWithLegitStartCode = availableServersContains(servers, s);
209           if (serverWithLegitStartCode != null) {
210             return serverWithLegitStartCode;
211           }
212         }
213       }
214       List<HRegionInfo> regions = new ArrayList<HRegionInfo>(1);
215       regions.add(regionInfo);
216       Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>(1);
217       primaryRSMap.put(regionInfo, primary);
218       assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
219       return primary;
220     } catch (Exception ex) {
221       LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + ex +
222           " Falling back to regular assignment");
223       return super.randomAssignment(regionInfo, servers);
224     }
225   }
226 
227   private Pair<Map<ServerName, List<HRegionInfo>>, List<HRegionInfo>> 
228   segregateRegionsAndAssignRegionsWithFavoredNodes(List<HRegionInfo> regions,
229       List<ServerName> availableServers) {
230     Map<ServerName, List<HRegionInfo>> assignmentMapForFavoredNodes =
231         new HashMap<ServerName, List<HRegionInfo>>(regions.size() / 2);
232     List<HRegionInfo> regionsWithNoFavoredNodes = new ArrayList<HRegionInfo>(regions.size()/2);
233     for (HRegionInfo region : regions) {
234       List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region);
235       ServerName primaryHost = null;
236       ServerName secondaryHost = null;
237       ServerName tertiaryHost = null;
238       if (favoredNodes != null) {
239         for (ServerName s : favoredNodes) {
240           ServerName serverWithLegitStartCode = availableServersContains(availableServers, s);
241           if (serverWithLegitStartCode != null) {
242             FavoredNodesPlan.Position position =
243                 FavoredNodesPlan.getFavoredServerPosition(favoredNodes, s);
244             if (Position.PRIMARY.equals(position)) {
245               primaryHost = serverWithLegitStartCode;
246             } else if (Position.SECONDARY.equals(position)) {
247               secondaryHost = serverWithLegitStartCode;
248             } else if (Position.TERTIARY.equals(position)) {
249               tertiaryHost = serverWithLegitStartCode;
250             }
251           }
252         }
253         assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region,
254               primaryHost, secondaryHost, tertiaryHost);
255       }
256       if (primaryHost == null && secondaryHost == null && tertiaryHost == null) {
257         //all favored nodes unavailable
258         regionsWithNoFavoredNodes.add(region);
259       }
260     }
261     return new Pair<Map<ServerName, List<HRegionInfo>>, List<HRegionInfo>>(
262         assignmentMapForFavoredNodes, regionsWithNoFavoredNodes);
263   }
264 
265   // Do a check of the hostname and port and return the servername from the servers list
266   // that matched (the favoredNode will have a startcode of -1 but we want the real
267   // server with the legit startcode
268   private ServerName availableServersContains(List<ServerName> servers, ServerName favoredNode) {
269     for (ServerName server : servers) {
270       if (ServerName.isSameHostnameAndPort(favoredNode, server)) {
271         return server;
272       }
273     }
274     return null;
275   }
276 
277   private void assignRegionToAvailableFavoredNode(Map<ServerName,
278       List<HRegionInfo>> assignmentMapForFavoredNodes, HRegionInfo region, ServerName primaryHost,
279       ServerName secondaryHost, ServerName tertiaryHost) {
280     if (primaryHost != null) {
281       addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost);
282     } else if (secondaryHost != null && tertiaryHost != null) {
283       // assign the region to the one with a lower load
284       // (both have the desired hdfs blocks)
285       ServerName s;
286       ServerLoad tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost);
287       ServerLoad secondaryLoad = super.services.getServerManager().getLoad(secondaryHost);
288       if (secondaryLoad.getLoad() < tertiaryLoad.getLoad()) {
289         s = secondaryHost;
290       } else {
291         s = tertiaryHost;
292       }
293       addRegionToMap(assignmentMapForFavoredNodes, region, s);
294     } else if (secondaryHost != null) {
295       addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost);
296     } else if (tertiaryHost != null) {
297       addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost);
298     }
299   }
300 
301   private void addRegionToMap(Map<ServerName, List<HRegionInfo>> assignmentMapForFavoredNodes,
302       HRegionInfo region, ServerName host) {
303     List<HRegionInfo> regionsOnServer = null;
304     if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) {
305       regionsOnServer = new ArrayList<HRegionInfo>();
306       assignmentMapForFavoredNodes.put(host, regionsOnServer);
307     }
308     regionsOnServer.add(region);
309   }
310 
311   public List<ServerName> getFavoredNodes(HRegionInfo regionInfo) {
312     return this.globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo);
313   }
314 
315   private void roundRobinAssignmentImpl(FavoredNodeAssignmentHelper assignmentHelper,
316       Map<ServerName, List<HRegionInfo>> assignmentMap,
317       List<HRegionInfo> regions, List<ServerName> servers) {
318     Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>();
319     // figure the primary RSs
320     assignmentHelper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions);
321     assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
322   }
323 
324   private void assignSecondaryAndTertiaryNodesForRegion(
325       FavoredNodeAssignmentHelper assignmentHelper,
326       List<HRegionInfo> regions, Map<HRegionInfo, ServerName> primaryRSMap) {
327     // figure the secondary and tertiary RSs
328     Map<HRegionInfo, ServerName[]> secondaryAndTertiaryRSMap =
329         assignmentHelper.placeSecondaryAndTertiaryRS(primaryRSMap);
330     // now record all the assignments so that we can serve queries later
331     for (HRegionInfo region : regions) {
332       // Store the favored nodes without startCode for the ServerName objects
333       // We don't care about the startcode; but only the hostname really
334       List<ServerName> favoredNodesForRegion = new ArrayList<ServerName>(3);
335       ServerName sn = primaryRSMap.get(region);
336       favoredNodesForRegion.add(ServerName.valueOf(sn.getHostname(), sn.getPort(),
337           ServerName.NON_STARTCODE));
338       ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(region);
339       if (secondaryAndTertiaryNodes != null) {
340         favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[0].getHostname(),
341             secondaryAndTertiaryNodes[0].getPort(), ServerName.NON_STARTCODE));
342         favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(),
343             secondaryAndTertiaryNodes[1].getPort(), ServerName.NON_STARTCODE));
344       }
345       globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(region, favoredNodesForRegion);
346     }
347   }
348 }