View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Collections;
23  import java.util.HashMap;
24  import java.util.HashSet;
25  import java.util.Iterator;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.Set;
29  import java.util.TreeMap;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.RegionTransition;
38  import org.apache.hadoop.hbase.Server;
39  import org.apache.hadoop.hbase.ServerLoad;
40  import org.apache.hadoop.hbase.ServerName;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.catalog.MetaReader;
43  import org.apache.hadoop.hbase.master.RegionState.State;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.hbase.util.FSUtils;
46  import org.apache.hadoop.hbase.util.Pair;
47  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
48  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
49  import org.apache.zookeeper.KeeperException;
50  
51  import com.google.common.base.Preconditions;
52  
53  /**
54   * Region state accountant. It holds the states of all regions in the memory.
55   * In normal scenario, it should match the meta table and the true region states.
56   *
57   * This map is used by AssignmentManager to track region states.
58   */
59  @InterfaceAudience.Private
60  public class RegionStates {
61    private static final Log LOG = LogFactory.getLog(RegionStates.class);
62  
63    /**
64     * Regions currently in transition.
65     */
66    final HashMap<String, RegionState> regionsInTransition;
67  
68    /**
69     * Region encoded name to state map.
70     * All the regions should be in this map.
71     */
72    private final HashMap<String, RegionState> regionStates;
73  
74    /**
75     * Holds mapping of table -> region state
76     */
77    private final Map<TableName, Map<String, RegionState>> regionStatesTableIndex =
78        new HashMap<TableName, Map<String, RegionState>>();
79  
80    /**
81     * Server to regions assignment map.
82     * Contains the set of regions currently assigned to a given server.
83     */
84    private final Map<ServerName, Set<HRegionInfo>> serverHoldings;
85  
86    /**
87     * Region to server assignment map.
88     * Contains the server a given region is currently assigned to.
89     */
90    private final TreeMap<HRegionInfo, ServerName> regionAssignments;
91  
92    /**
93     * Encoded region name to server assignment map for re-assignment
94     * purpose. Contains the server a given region is last known assigned
95     * to, which has not completed log splitting, so not assignable.
96     * If a region is currently assigned, this server info in this
97     * map should be the same as that in regionAssignments.
98     * However the info in regionAssignments is cleared when the region
99     * is offline while the info in lastAssignments is cleared when
100    * the region is closed or the server is dead and processed.
101    */
102   private final HashMap<String, ServerName> lastAssignments;
103 
104   /**
105    * Map a host port pair string to the latest start code
106    * of a region server which is known to be dead. It is dead
107    * to us, but server manager may not know it yet.
108    */
109   private final HashMap<String, Long> deadServers;
110 
111   /**
112    * Map a dead servers to the time when log split is done.
113    * Since log splitting is not ordered, we have to remember
114    * all processed instances. The map is cleaned up based
115    * on a configured time. By default, we assume a dead
116    * server should be done with log splitting in two hours.
117    */
118   private final HashMap<ServerName, Long> processedServers;
119   private long lastProcessedServerCleanTime;
120 
121   private final RegionStateStore regionStateStore;
122   private final ServerManager serverManager;
123   private final Server server;
124 
125   // The maximum time to keep a log split info in region states map
126   static final String LOG_SPLIT_TIME = "hbase.master.maximum.logsplit.keeptime";
127   static final long DEFAULT_LOG_SPLIT_TIME = 7200000L; // 2 hours
128 
129   RegionStates(final Server master,
130       final ServerManager serverManager, final RegionStateStore regionStateStore) {
131     regionStates = new HashMap<String, RegionState>();
132     regionsInTransition = new HashMap<String, RegionState>();
133     serverHoldings = new HashMap<ServerName, Set<HRegionInfo>>();
134     regionAssignments = new TreeMap<HRegionInfo, ServerName>();
135     lastAssignments = new HashMap<String, ServerName>();
136     processedServers = new HashMap<ServerName, Long>();
137     deadServers = new HashMap<String, Long>();
138     this.regionStateStore = regionStateStore;
139     this.serverManager = serverManager;
140     this.server = master;
141   }
142 
143   /**
144    * @return an unmodifiable the region assignment map
145    */
146   public synchronized Map<HRegionInfo, ServerName> getRegionAssignments() {
147     return Collections.unmodifiableMap(regionAssignments);
148   }
149 
150   public synchronized ServerName getRegionServerOfRegion(HRegionInfo hri) {
151     return regionAssignments.get(hri);
152   }
153 
154   /**
155    * Get regions in transition and their states
156    */
157   @SuppressWarnings("unchecked")
158   public synchronized Map<String, RegionState> getRegionsInTransition() {
159     return (Map<String, RegionState>)regionsInTransition.clone();
160   }
161 
162   /**
163    * @return True if specified region in transition.
164    */
165   public synchronized boolean isRegionInTransition(final HRegionInfo hri) {
166     return regionsInTransition.containsKey(hri.getEncodedName());
167   }
168 
169   /**
170    * @return True if specified region in transition.
171    */
172   public synchronized boolean isRegionInTransition(final String encodedName) {
173     return regionsInTransition.containsKey(encodedName);
174   }
175 
176   /**
177    * @return True if any region in transition.
178    */
179   public synchronized boolean isRegionsInTransition() {
180     return !regionsInTransition.isEmpty();
181   }
182 
183   /**
184    * @return True if specified region assigned, and not in transition.
185    */
186   public synchronized boolean isRegionOnline(final HRegionInfo hri) {
187     return !isRegionInTransition(hri) && regionAssignments.containsKey(hri);
188   }
189 
190   /**
191    * @return True if specified region offline/closed, but not in transition.
192    * If the region is not in the map, it is offline to us too.
193    */
194   public synchronized boolean isRegionOffline(final HRegionInfo hri) {
195     return getRegionState(hri) == null || (!isRegionInTransition(hri)
196       && isRegionInState(hri, State.OFFLINE, State.CLOSED));
197   }
198 
199   /**
200    * @return True if specified region is in one of the specified states.
201    */
202   public boolean isRegionInState(
203       final HRegionInfo hri, final State... states) {
204     return isRegionInState(hri.getEncodedName(), states);
205   }
206 
207   /**
208    * @return True if specified region is in one of the specified states.
209    */
210   public boolean isRegionInState(
211       final String encodedName, final State... states) {
212     RegionState regionState = getRegionState(encodedName);
213     return isOneOfStates(regionState, states);
214   }
215 
216   /**
217    * Wait for the state map to be updated by assignment manager.
218    */
219   public synchronized void waitForUpdate(
220       final long timeout) throws InterruptedException {
221     this.wait(timeout);
222   }
223 
224   /**
225    * Get region transition state
226    */
227   public RegionState getRegionTransitionState(final HRegionInfo hri) {
228     return getRegionTransitionState(hri.getEncodedName());
229   }
230 
231   /**
232    * Get region transition state
233    */
234   public synchronized RegionState
235       getRegionTransitionState(final String encodedName) {
236     return regionsInTransition.get(encodedName);
237   }
238 
239   /**
240    * Add a list of regions to RegionStates. If a region is split
241    * and offline, its state will be SPLIT. Otherwise, its state will
242    * be OFFLINE. Region already in RegionStates will be skipped.
243    */
244   public void createRegionStates(
245       final List<HRegionInfo> hris) {
246     for (HRegionInfo hri: hris) {
247       createRegionState(hri);
248     }
249   }
250 
251   /**
252    * Add a region to RegionStates. If the region is split
253    * and offline, its state will be SPLIT. Otherwise, its state will
254    * be OFFLINE. If it is already in RegionStates, this call has
255    * no effect, and the original state is returned.
256    */
257   public RegionState createRegionState(final HRegionInfo hri) {
258     return createRegionState(hri, null, null);
259   }
260 
261   /**
262    * Add a region to RegionStates with the specified state.
263    * If the region is already in RegionStates, this call has
264    * no effect, and the original state is returned.
265    */
266   public synchronized RegionState createRegionState(
267       final HRegionInfo hri, State newState, ServerName serverName) {
268     if (newState == null || (newState == State.OPEN && serverName == null)) {
269       newState =  State.OFFLINE;
270     }
271     if (hri.isOffline() && hri.isSplit()) {
272       newState = State.SPLIT;
273       serverName = null;
274     }
275     String encodedName = hri.getEncodedName();
276     RegionState regionState = regionStates.get(encodedName);
277     if (regionState != null) {
278       LOG.warn("Tried to create a state for a region already in RegionStates, "
279         + "used existing: " + regionState + ", ignored new: " + newState);
280     } else {
281       regionState = new RegionState(hri, newState, serverName);
282       putRegionState(regionState);
283       if (newState == State.OPEN) {
284         regionAssignments.put(hri, serverName);
285         lastAssignments.put(encodedName, serverName);
286         Set<HRegionInfo> regions = serverHoldings.get(serverName);
287         if (regions == null) {
288           regions = new HashSet<HRegionInfo>();
289           serverHoldings.put(serverName, regions);
290         }
291         regions.add(hri);
292       } else if (!regionState.isUnassignable()) {
293         regionsInTransition.put(encodedName, regionState);
294       }
295     }
296     return regionState;
297   }
298 
299   private RegionState putRegionState(RegionState regionState) {
300     HRegionInfo hri = regionState.getRegion();
301     String encodedName = hri.getEncodedName();
302     TableName table = hri.getTable();
303     RegionState oldState = regionStates.put(encodedName, regionState);
304     Map<String, RegionState> map = regionStatesTableIndex.get(table);
305     if (map == null) {
306       map = new HashMap<String, RegionState>();
307       regionStatesTableIndex.put(table, map);
308     }
309     map.put(encodedName, regionState);
310     return oldState;
311   }
312 
313   /**
314    * Update a region state. It will be put in transition if not already there.
315    */
316   public RegionState updateRegionState(
317       final HRegionInfo hri, final State state) {
318     RegionState regionState = getRegionState(hri.getEncodedName());
319     return updateRegionState(hri, state,
320       regionState == null ? null : regionState.getServerName());
321   }
322 
323   /**
324    * Update a region state. It will be put in transition if not already there.
325    *
326    * If we can't find the region info based on the region name in
327    * the transition, log a warning and return null.
328    */
329   public RegionState updateRegionState(
330       final RegionTransition transition, final State state) {
331     byte [] regionName = transition.getRegionName();
332     HRegionInfo regionInfo = getRegionInfo(regionName);
333     if (regionInfo == null) {
334       String prettyRegionName = HRegionInfo.prettyPrint(
335         HRegionInfo.encodeRegionName(regionName));
336       LOG.warn("Failed to find region " + prettyRegionName
337         + " in updating its state to " + state
338         + " based on region transition " + transition);
339       return null;
340     }
341     return updateRegionState(regionInfo, state,
342       transition.getServerName());
343   }
344 
345   /**
346    * Transition a region state to OPEN from OPENING/PENDING_OPEN
347    */
348   public synchronized RegionState transitionOpenFromPendingOpenOrOpeningOnServer(
349       final RegionTransition transition, final RegionState fromState, final ServerName sn) {
350     if(fromState.isPendingOpenOrOpeningOnServer(sn)){
351       return updateRegionState(transition, State.OPEN);
352     }
353     return null;
354   }
355 
356   /**
357    * Update a region state. It will be put in transition if not already there.
358    */
359   public RegionState updateRegionState(
360       final HRegionInfo hri, final State state, final ServerName serverName) {
361     return updateRegionState(hri, state, serverName, HConstants.NO_SEQNUM);
362   }
363 
364   public void regionOnline(
365       final HRegionInfo hri, final ServerName serverName) {
366     regionOnline(hri, serverName, HConstants.NO_SEQNUM);
367   }
368 
369   /**
370    * A region is online, won't be in transition any more.
371    * We can't confirm it is really online on specified region server
372    * because it hasn't been put in region server's online region list yet.
373    */
374   public void regionOnline(final HRegionInfo hri,
375       final ServerName serverName, long openSeqNum) {
376     if (!serverManager.isServerOnline(serverName)) {
377       // This is possible if the region server dies before master gets a
378       // chance to handle ZK event in time. At this time, if the dead server
379       // is already processed by SSH, we should ignore this event.
380       // If not processed yet, ignore and let SSH deal with it.
381       LOG.warn("Ignored, " + hri.getEncodedName()
382         + " was opened on a dead server: " + serverName);
383       return;
384     }
385     updateRegionState(hri, State.OPEN, serverName, openSeqNum);
386 
387     synchronized (this) {
388       regionsInTransition.remove(hri.getEncodedName());
389       ServerName oldServerName = regionAssignments.put(hri, serverName);
390       if (!serverName.equals(oldServerName)) {
391         LOG.info("Onlined " + hri.getShortNameToLog() + " on " + serverName);
392         Set<HRegionInfo> regions = serverHoldings.get(serverName);
393         if (regions == null) {
394           regions = new HashSet<HRegionInfo>();
395           serverHoldings.put(serverName, regions);
396         }
397         regions.add(hri);
398         if (oldServerName != null) {
399           LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
400           Set<HRegionInfo> oldRegions = serverHoldings.get(oldServerName);
401           oldRegions.remove(hri);
402           if (oldRegions.isEmpty()) {
403             serverHoldings.remove(oldServerName);
404           }
405         }
406       }
407     }
408   }
409 
410   /**
411    * A dead server's hlogs have been split so that all the regions
412    * used to be open on it can be safely assigned now. Mark them assignable.
413    */
414   public synchronized void logSplit(final ServerName serverName) {
415     for (Iterator<Map.Entry<String, ServerName>> it
416         = lastAssignments.entrySet().iterator(); it.hasNext();) {
417       Map.Entry<String, ServerName> e = it.next();
418       if (e.getValue().equals(serverName)) {
419         it.remove();
420       }
421     }
422     long now = System.currentTimeMillis();
423     if (LOG.isDebugEnabled()) {
424       LOG.debug("Adding to processed servers " + serverName);
425     }
426     processedServers.put(serverName, Long.valueOf(now));
427     Configuration conf = server.getConfiguration();
428     long obsoleteTime = conf.getLong(LOG_SPLIT_TIME, DEFAULT_LOG_SPLIT_TIME);
429     // Doesn't have to be very accurate about the clean up time
430     if (now > lastProcessedServerCleanTime + obsoleteTime) {
431       lastProcessedServerCleanTime = now;
432       long cutoff = now - obsoleteTime;
433       for (Iterator<Map.Entry<ServerName, Long>> it
434           = processedServers.entrySet().iterator(); it.hasNext();) {
435         Map.Entry<ServerName, Long> e = it.next();
436         if (e.getValue().longValue() < cutoff) {
437           if (LOG.isDebugEnabled()) {
438             LOG.debug("Removed from processed servers " + e.getKey());
439           }
440           it.remove();
441         }
442       }
443     }
444   }
445 
446   /**
447    * Log split is done for a given region, so it is assignable now.
448    */
449   public void logSplit(final HRegionInfo region) {
450     clearLastAssignment(region);
451   }
452 
453   public synchronized void clearLastAssignment(final HRegionInfo region) {
454     lastAssignments.remove(region.getEncodedName());
455   }
456 
457   /**
458    * A region is offline, won't be in transition any more.
459    */
460   public void regionOffline(final HRegionInfo hri) {
461     regionOffline(hri, null);
462   }
463 
464   /**
465    * A region is offline, won't be in transition any more. Its state
466    * should be the specified expected state, which can only be
467    * Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew.
468    */
469   public void regionOffline(
470       final HRegionInfo hri, final State expectedState) {
471     Preconditions.checkArgument(expectedState == null
472       || RegionState.isUnassignable(expectedState),
473         "Offlined region should not be " + expectedState);
474     if (isRegionInState(hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
475       // Remove it from all region maps
476       deleteRegion(hri);
477       return;
478     }
479     State newState =
480       expectedState == null ? State.OFFLINE : expectedState;
481     updateRegionState(hri, newState);
482 
483     synchronized (this) {
484       regionsInTransition.remove(hri.getEncodedName());
485       ServerName oldServerName = regionAssignments.remove(hri);
486       if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
487         LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
488         Set<HRegionInfo> oldRegions = serverHoldings.get(oldServerName);
489         oldRegions.remove(hri);
490         if (oldRegions.isEmpty()) {
491           serverHoldings.remove(oldServerName);
492         }
493       }
494     }
495   }
496 
497   /**
498    * A server is offline, all regions on it are dead.
499    */
500   public List<HRegionInfo> serverOffline(final ZooKeeperWatcher watcher, final ServerName sn) {
501     // Offline all regions on this server not already in transition.
502     List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
503     Set<HRegionInfo> regionsToCleanIfNoMetaEntry = new HashSet<HRegionInfo>();
504     synchronized (this) {
505       Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
506       if (assignedRegions == null) {
507         assignedRegions = new HashSet<HRegionInfo>();
508       }
509 
510       // Offline regions outside the loop to avoid ConcurrentModificationException
511       Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
512       for (HRegionInfo region : assignedRegions) {
513         // Offline open regions, no need to offline if SPLIT/MERGED/OFFLINE
514         if (isRegionOnline(region)) {
515           regionsToOffline.add(region);
516         } else {
517           if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
518             LOG.debug("Offline splitting/merging region " + getRegionState(region));
519             try {
520               // Delete the ZNode if exists
521               ZKAssign.deleteNodeFailSilent(watcher, region);
522               regionsToOffline.add(region);
523             } catch (KeeperException ke) {
524               server.abort("Unexpected ZK exception deleting node " + region, ke);
525             }
526           }
527         }
528       }
529 
530       for (RegionState state : regionsInTransition.values()) {
531         HRegionInfo hri = state.getRegion();
532         if (assignedRegions.contains(hri)) {
533           // Region is open on this region server, but in transition.
534           // This region must be moving away from this server, or splitting/merging.
535           // SSH will handle it, either skip assigning, or re-assign.
536           LOG.info("Transitioning " + state + " will be handled by SSH for " + sn);
537         } else if (sn.equals(state.getServerName())) {
538           // Region is in transition on this region server, and this
539           // region is not open on this server. So the region must be
540           // moving to this server from another one (i.e. opening or
541           // pending open on this server, was open on another one.
542           // Offline state is also kind of pending open if the region is in
543           // transition. The region could be in failed_close state too if we have
544           // tried several times to open it while this region server is not reachable)
545           if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) {
546             LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn);
547             rits.add(hri);
548           } else if(state.isSplittingNew()) {
549             regionsToCleanIfNoMetaEntry.add(state.getRegion());
550           } else {
551             LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
552           }
553         }
554       }
555 
556       for (HRegionInfo hri : regionsToOffline) {
557         regionOffline(hri);
558       }
559 
560       this.notifyAll();
561     }
562     cleanIfNoMetaEntry(regionsToCleanIfNoMetaEntry);
563     return rits;
564   }
565 
566   /**
567    * This method does an RPC to hbase:meta. Do not call this method with a lock/synchronize held.
568    * @param hris The hris to check if empty in hbase:meta and if so, clean them up.
569    */
570   private void cleanIfNoMetaEntry(Set<HRegionInfo> hris) {
571     if (hris.isEmpty()) return;
572     for (HRegionInfo hri: hris) {
573       try {
574         // This is RPC to meta table. It is done while we have a synchronize on
575         // regionstates. No progress will be made if meta is not available at this time.
576         // This is a cleanup task. Not critical.
577         if (MetaReader.getRegion(server.getCatalogTracker(), hri.getEncodedNameAsBytes()) ==
578             null) {
579           regionOffline(hri);
580           FSUtils.deleteRegionDir(server.getConfiguration(), hri);
581         }
582       } catch (IOException e) {
583         LOG.warn("Got exception while deleting " + hri + " directories from file system.", e);
584       }
585     }
586   }
587 
588   /**
589    * Gets the online regions of the specified table.
590    * This method looks at the in-memory state.  It does not go to <code>hbase:meta</code>.
591    * Only returns <em>online</em> regions.  If a region on this table has been
592    * closed during a disable, etc., it will be included in the returned list.
593    * So, the returned list may not necessarily be ALL regions in this table, its
594    * all the ONLINE regions in the table.
595    * @param tableName
596    * @return Online regions from <code>tableName</code>
597    */
598   public synchronized List<HRegionInfo> getRegionsOfTable(TableName tableName) {
599     List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>();
600     // boundary needs to have table's name but regionID 0 so that it is sorted
601     // before all table's regions.
602     HRegionInfo boundary = new HRegionInfo(tableName, null, null, false, 0L);
603     for (HRegionInfo hri: regionAssignments.tailMap(boundary).keySet()) {
604       if(!hri.getTable().equals(tableName)) break;
605       tableRegions.add(hri);
606     }
607     return tableRegions;
608   }
609 
610   /**
611    * Gets current state of all regions of the table.
612    * This method looks at the in-memory state.  It does not go to <code>hbase:meta</code>.
613    * Method guaranteed to return keys for all states
614    * in {@link org.apache.hadoop.hbase.master.RegionState.State}
615    *
616    * @param tableName
617    * @return Online regions from <code>tableName</code>
618    */
619   public synchronized Map<RegionState.State, List<HRegionInfo>>
620   getRegionByStateOfTable(TableName tableName) {
621     Map<RegionState.State, List<HRegionInfo>> tableRegions =
622         new HashMap<State, List<HRegionInfo>>();
623     for (State state : State.values()) {
624       tableRegions.put(state, new ArrayList<HRegionInfo>());
625     }
626     Map<String, RegionState> indexMap = regionStatesTableIndex.get(tableName);
627     if (indexMap == null)
628       return tableRegions;
629     for (RegionState regionState : indexMap.values()) {
630       tableRegions.get(regionState.getState()).add(regionState.getRegion());
631     }
632     return tableRegions;
633   }
634 
635   /**
636    * Wait on region to clear regions-in-transition.
637    * <p>
638    * If the region isn't in transition, returns immediately.  Otherwise, method
639    * blocks until the region is out of transition.
640    */
641   public synchronized void waitOnRegionToClearRegionsInTransition(
642       final HRegionInfo hri) throws InterruptedException {
643     if (!isRegionInTransition(hri)) return;
644 
645     while(!server.isStopped() && isRegionInTransition(hri)) {
646       RegionState rs = getRegionState(hri);
647       LOG.info("Waiting on " + rs + " to clear regions-in-transition");
648       waitForUpdate(100);
649     }
650 
651     if (server.isStopped()) {
652       LOG.info("Giving up wait on region in " +
653         "transition because stoppable.isStopped is set");
654     }
655   }
656 
657   /**
658    * A table is deleted. Remove its regions from all internal maps.
659    * We loop through all regions assuming we don't delete tables too much.
660    */
661   public void tableDeleted(final TableName tableName) {
662     Set<HRegionInfo> regionsToDelete = new HashSet<HRegionInfo>();
663     synchronized (this) {
664       for (RegionState state: regionStates.values()) {
665         HRegionInfo region = state.getRegion();
666         if (region.getTable().equals(tableName)) {
667           regionsToDelete.add(region);
668         }
669       }
670     }
671     for (HRegionInfo region: regionsToDelete) {
672       deleteRegion(region);
673     }
674   }
675 
676   /**
677    * Checking if a region was assigned to a server which is not online now.
678    * If so, we should hold re-assign this region till SSH has split its hlogs.
679    * Once logs are split, the last assignment of this region will be reset,
680    * which means a null last assignment server is ok for re-assigning.
681    *
682    * A region server could be dead but we don't know it yet. We may
683    * think it's online falsely. Therefore if a server is online, we still
684    * need to confirm it reachable and having the expected start code.
685    */
686   synchronized boolean wasRegionOnDeadServer(final String encodedName) {
687     ServerName server = lastAssignments.get(encodedName);
688     return isServerDeadAndNotProcessed(server);
689   }
690 
691   synchronized boolean isServerDeadAndNotProcessed(ServerName server) {
692     if (server == null) return false;
693     if (serverManager.isServerOnline(server)) {
694       String hostAndPort = server.getHostAndPort();
695       long startCode = server.getStartcode();
696       Long deadCode = deadServers.get(hostAndPort);
697       if (deadCode == null || startCode > deadCode.longValue()) {
698         if (serverManager.isServerReachable(server)) {
699           return false;
700         }
701         // The size of deadServers won't grow unbounded.
702         deadServers.put(hostAndPort, Long.valueOf(startCode));
703       }
704       // Watch out! If the server is not dead, the region could
705       // remain unassigned. That's why ServerManager#isServerReachable
706       // should use some retry.
707       //
708       // We cache this info since it is very unlikely for that
709       // instance to come back up later on. We don't want to expire
710       // the server since we prefer to let it die naturally.
711       LOG.warn("Couldn't reach online server " + server);
712     }
713     // Now, we know it's dead. Check if it's processed
714     return !processedServers.containsKey(server);
715   }
716 
717  /**
718    * Get the last region server a region was on for purpose of re-assignment,
719    * i.e. should the re-assignment be held back till log split is done?
720    */
721   synchronized ServerName getLastRegionServerOfRegion(final String encodedName) {
722     return lastAssignments.get(encodedName);
723   }
724 
725   synchronized void setLastRegionServerOfRegions(
726       final ServerName serverName, final List<HRegionInfo> regionInfos) {
727     for (HRegionInfo hri: regionInfos) {
728       setLastRegionServerOfRegion(serverName, hri.getEncodedName());
729     }
730   }
731 
732   synchronized void setLastRegionServerOfRegion(
733       final ServerName serverName, final String encodedName) {
734     lastAssignments.put(encodedName, serverName);
735   }
736 
737   synchronized void closeAllUserRegions(Set<TableName> excludedTables) {
738     Set<HRegionInfo> toBeClosed = new HashSet<HRegionInfo>(regionStates.size());
739     for(RegionState state: regionStates.values()) {
740       HRegionInfo hri = state.getRegion();
741       TableName tableName = hri.getTable();
742       if (!hri.isSplit() && !hri.isMetaRegion()
743           && !excludedTables.contains(tableName)) {
744         toBeClosed.add(hri);
745       }
746     }
747     for (HRegionInfo hri: toBeClosed) {
748       updateRegionState(hri, State.CLOSED);
749     }
750   }
751 
752   /**
753    * Compute the average load across all region servers.
754    * Currently, this uses a very naive computation - just uses the number of
755    * regions being served, ignoring stats about number of requests.
756    * @return the average load
757    */
758   protected synchronized double getAverageLoad() {
759     int numServers = 0, totalLoad = 0;
760     for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
761       Set<HRegionInfo> regions = e.getValue();
762       ServerName serverName = e.getKey();
763       int regionCount = regions.size();
764       if (serverManager.isServerOnline(serverName)) {
765         totalLoad += regionCount;
766         numServers++;
767       }
768     }
769     return numServers == 0 ? 0.0 :
770       (double)totalLoad / (double)numServers;
771   }
772 
773   /**
774    * This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
775    * Can't let out original since it can change and at least the load balancer
776    * wants to iterate this exported list.  We need to synchronize on regions
777    * since all access to this.servers is under a lock on this.regions.
778    *
779    * @return A clone of current assignments by table.
780    */
781   protected Map<TableName, Map<ServerName, List<HRegionInfo>>>
782       getAssignmentsByTable() {
783     Map<TableName, Map<ServerName, List<HRegionInfo>>> result =
784       new HashMap<TableName, Map<ServerName,List<HRegionInfo>>>();
785     synchronized (this) {
786       if (!server.getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false)) {
787         Map<ServerName, List<HRegionInfo>> svrToRegions =
788           new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
789         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
790           svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
791         }
792         result.put(TableName.valueOf("ensemble"), svrToRegions);
793       } else {
794         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
795           for (HRegionInfo hri: e.getValue()) {
796             if (hri.isMetaRegion()) continue;
797             TableName tablename = hri.getTable();
798             Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename);
799             if (svrToRegions == null) {
800               svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
801               result.put(tablename, svrToRegions);
802             }
803             List<HRegionInfo> regions = svrToRegions.get(e.getKey());
804             if (regions == null) {
805               regions = new ArrayList<HRegionInfo>();
806               svrToRegions.put(e.getKey(), regions);
807             }
808             regions.add(hri);
809           }
810         }
811       }
812     }
813 
814     Map<ServerName, ServerLoad>
815       onlineSvrs = serverManager.getOnlineServers();
816     // Take care of servers w/o assignments, and remove servers in draining mode
817     List<ServerName> drainingServers = this.serverManager.getDrainingServersList();
818     for (Map<ServerName, List<HRegionInfo>> map: result.values()) {
819       for (ServerName svr: onlineSvrs.keySet()) {
820         if (!map.containsKey(svr)) {
821           map.put(svr, new ArrayList<HRegionInfo>());
822         }
823       }
824       map.keySet().removeAll(drainingServers);
825     }
826     return result;
827   }
828 
829   protected RegionState getRegionState(final HRegionInfo hri) {
830     return getRegionState(hri.getEncodedName());
831   }
832 
833   protected synchronized RegionState getRegionState(final String encodedName) {
834     return regionStates.get(encodedName);
835   }
836 
837   /**
838    * Get the HRegionInfo from cache, if not there, from the hbase:meta table.
839    * Be careful. Does RPC. Do not hold a lock or synchronize when you call this method.
840    * @param  regionName
841    * @return HRegionInfo for the region
842    */
843   protected HRegionInfo getRegionInfo(final byte [] regionName) {
844     String encodedName = HRegionInfo.encodeRegionName(regionName);
845     RegionState regionState = getRegionState(encodedName);
846     if (regionState != null) {
847       return regionState.getRegion();
848     }
849 
850     try {
851       Pair<HRegionInfo, ServerName> p =
852         MetaReader.getRegion(server.getCatalogTracker(), regionName);
853       HRegionInfo hri = p == null ? null : p.getFirst();
854       if (hri != null) {
855         createRegionState(hri);
856       }
857       return hri;
858     } catch (IOException e) {
859       server.abort("Aborting because error occoured while reading "
860         + Bytes.toStringBinary(regionName) + " from hbase:meta", e);
861       return null;
862     }
863   }
864 
865   static boolean isOneOfStates(RegionState regionState, State... states) {
866     State s = regionState != null ? regionState.getState() : null;
867     for (State state: states) {
868       if (s == state) return true;
869     }
870     return false;
871   }
872 
873   /**
874    * Update a region state. It will be put in transition if not already there.
875    */
876   private RegionState updateRegionState(final HRegionInfo hri,
877       final State state, final ServerName serverName, long openSeqNum) {
878     if (state == State.FAILED_CLOSE || state == State.FAILED_OPEN) {
879       LOG.warn("Failed to open/close " + hri.getShortNameToLog()
880         + " on " + serverName + ", set to " + state);
881     }
882 
883     String encodedName = hri.getEncodedName();
884     RegionState regionState = new RegionState(
885       hri, state, System.currentTimeMillis(), serverName);
886     RegionState oldState = getRegionState(encodedName);
887     if (!regionState.equals(oldState)) {
888       LOG.info("Transition " + oldState + " to " + regionState);
889       // Persist region state before updating in-memory info, if needed
890       regionStateStore.updateRegionState(openSeqNum, regionState, oldState);
891     }
892 
893     synchronized (this) {
894       regionsInTransition.put(encodedName, regionState);
895       putRegionState(regionState);
896 
897       // For these states, region should be properly closed.
898       // There should be no log splitting issue.
899       if ((state == State.CLOSED || state == State.MERGED
900           || state == State.SPLIT) && lastAssignments.containsKey(encodedName)) {
901         ServerName last = lastAssignments.get(encodedName);
902         if (last.equals(serverName)) {
903           lastAssignments.remove(encodedName);
904         } else {
905           LOG.warn(encodedName + " moved to " + state + " on "
906             + serverName + ", expected " + last);
907         }
908       }
909 
910       // Once a region is opened, record its last assignment right away.
911       if (serverName != null && state == State.OPEN) {
912         ServerName last = lastAssignments.get(encodedName);
913         if (!serverName.equals(last)) {
914           lastAssignments.put(encodedName, serverName);
915           if (last != null && isServerDeadAndNotProcessed(last)) {
916             LOG.warn(encodedName + " moved to " + serverName
917               + ", while it's previous host " + last
918               + " is dead but not processed yet");
919           }
920         }
921       }
922 
923       // notify the change
924       this.notifyAll();
925     }
926     return regionState;
927   }
928 
929   /**
930    * Remove a region from all state maps.
931    */
932   private synchronized void deleteRegion(final HRegionInfo hri) {
933     String encodedName = hri.getEncodedName();
934     regionsInTransition.remove(encodedName);
935     regionStates.remove(encodedName);
936     lastAssignments.remove(encodedName);
937     ServerName sn = regionAssignments.remove(hri);
938     if (sn != null) {
939       Set<HRegionInfo> regions = serverHoldings.get(sn);
940       regions.remove(hri);
941     }
942   }
943 }