View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.hbase.HBaseTestingUtility;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.HRegionInfo;
27  import org.apache.hadoop.hbase.HTableDescriptor;
28  import org.apache.hadoop.hbase.testclassification.MediumTests;
29  import org.apache.hadoop.hbase.NotServingRegionException;
30  import org.apache.hadoop.hbase.ServerName;
31  import org.apache.hadoop.hbase.client.HTable;
32  import org.apache.hadoop.hbase.client.Put;
33  import org.apache.hadoop.hbase.executor.EventType;
34  import org.apache.hadoop.hbase.protobuf.RequestConverter;
35  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
36  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CloseRegionRequest;
37  import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler;
38  import org.apache.hadoop.hbase.util.Bytes;
39  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
40  import org.junit.After;
41  import org.junit.AfterClass;
42  import org.junit.Assert;
43  import org.junit.BeforeClass;
44  import org.junit.Test;
45  import org.junit.experimental.categories.Category;
46  
47  import com.google.protobuf.ServiceException;
48  
49  
50  /**
51   * Tests on the region server, without the master.
52   */
53  @Category(MediumTests.class)
54  public class TestRegionServerNoMaster {
55  
56    private static final int NB_SERVERS = 1;
57    private static HTable table;
58    private static final byte[] row = "ee".getBytes();
59  
60    private static HRegionInfo hri;
61  
62    private static byte[] regionName;
63    private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
64  
65  
66    @BeforeClass
67    public static void before() throws Exception {
68      HTU.getConfiguration().setBoolean("hbase.assignment.usezk", true);
69      HTU.startMiniCluster(NB_SERVERS);
70      final byte[] tableName = Bytes.toBytes(TestRegionServerNoMaster.class.getSimpleName());
71  
72      // Create table then get the single region for our new table.
73      table = HTU.createTable(tableName, HConstants.CATALOG_FAMILY);
74      Put p = new Put(row);
75      p.add(HConstants.CATALOG_FAMILY, row, row);
76      table.put(p);
77  
78      hri = table.getRegionLocation(row, false).getRegionInfo();
79      regionName = hri.getRegionName();
80  
81      // No master
82      HTU.getHBaseCluster().getMaster().stopMaster();
83    }
84  
85    @AfterClass
86    public static void afterClass() throws Exception {
87      table.close();
88      HTU.shutdownMiniCluster();
89    }
90  
91    @After
92    public void after() throws Exception {
93      // Clean the state if the test failed before cleaning the znode
94      // It does not manage all bad failures, so if there are multiple failures, only
95      //  the first one should be looked at.
96      ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hri);
97    }
98  
99  
100   private static HRegionServer getRS() {
101     return HTU.getHBaseCluster().getLiveRegionServerThreads().get(0).getRegionServer();
102   }
103 
104 
105   /**
106    * Reopen the region. Reused in multiple tests as we always leave the region open after a test.
107    */
108   private void reopenRegion() throws Exception {
109     // We reopen. We need a ZK node here, as a open is always triggered by a master.
110     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
111     // first version is '0'
112     AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
113       getRS().getServerName(), hri, 0, null, null);
114     AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr);
115     Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
116     Assert.assertTrue(responseOpen.getOpeningState(0).
117         equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED));
118 
119 
120     checkRegionIsOpened();
121   }
122 
123   private void checkRegionIsOpened() throws Exception {
124 
125     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
126       Thread.sleep(1);
127     }
128 
129     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
130 
131     Assert.assertTrue(
132       ZKAssign.deleteOpenedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
133         getRS().getServerName()));
134   }
135 
136 
137   private void checkRegionIsClosed() throws Exception {
138 
139     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
140       Thread.sleep(1);
141     }
142 
143     try {
144       Assert.assertFalse(getRS().getRegion(regionName).isAvailable());
145     } catch (NotServingRegionException expected) {
146       // That's how it work: if the region is closed we have an exception.
147     }
148 
149     // We don't delete the znode here, because there is not always a znode.
150   }
151 
152 
153   /**
154    * Close the region without using ZK
155    */
156   private void closeNoZK() throws Exception {
157     // no transition in ZK
158     AdminProtos.CloseRegionRequest crr =
159         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
160     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
161     Assert.assertTrue(responseClose.getClosed());
162 
163     // now waiting & checking. After a while, the transition should be done and the region closed
164     checkRegionIsClosed();
165   }
166 
167 
168   @Test(timeout = 60000)
169   public void testCloseByRegionServer() throws Exception {
170     closeNoZK();
171     reopenRegion();
172   }
173 
174   @Test(timeout = 60000)
175   public void testCloseByMasterWithoutZNode() throws Exception {
176 
177     // Transition in ZK on. This should fail, as there is no znode
178     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
179       getRS().getServerName(), regionName, true);
180     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
181     Assert.assertTrue(responseClose.getClosed());
182 
183     // now waiting. After a while, the transition should be done
184     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
185       Thread.sleep(1);
186     }
187 
188     // the region is still available, the close got rejected at the end
189     Assert.assertTrue("The close should have failed", getRS().getRegion(regionName).isAvailable());
190   }
191 
192   @Test(timeout = 60000)
193   public void testOpenCloseByMasterWithZNode() throws Exception {
194 
195     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
196 
197     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
198       getRS().getServerName(), regionName, true);
199     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
200     Assert.assertTrue(responseClose.getClosed());
201 
202     checkRegionIsClosed();
203 
204     ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
205       getRS().getServerName());
206 
207     reopenRegion();
208   }
209 
210   /**
211    * Test that we can send multiple openRegion to the region server.
212    * This is used when:
213    * - there is a SocketTimeout: in this case, the master does not know if the region server
214    * received the request before the timeout.
215    * - We have a socket error during the operation: same stuff: we don't know
216    * - a master failover: if we find a znode in thz M_ZK_REGION_OFFLINE, we don't know if
217    * the region server has received the query or not. Only solution to be efficient: re-ask
218    * immediately.
219    */
220   @Test(timeout = 60000)
221   public void testMultipleOpen() throws Exception {
222 
223     // We close
224     closeNoZK();
225     checkRegionIsClosed();
226 
227     // We reopen. We need a ZK node here, as a open is always triggered by a master.
228     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
229 
230     // We're sending multiple requests in a row. The region server must handle this nicely.
231     for (int i = 0; i < 10; i++) {
232       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
233         getRS().getServerName(), hri, 0, null, null);
234       AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr);
235       Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
236 
237       AdminProtos.OpenRegionResponse.RegionOpeningState ors = responseOpen.getOpeningState(0);
238       Assert.assertTrue("request " + i + " failed",
239           ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED) ||
240               ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.ALREADY_OPENED)
241       );
242     }
243 
244     checkRegionIsOpened();
245   }
246 
247   @Test
248   public void testOpenClosingRegion() throws Exception {
249     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
250 
251     try {
252       // fake region to be closing now, need to clear state afterwards
253       getRS().regionsInTransitionInRS.put(hri.getEncodedNameAsBytes(), Boolean.FALSE);
254       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
255         getRS().getServerName(), hri, 0, null, null);
256       getRS().openRegion(null, orr);
257       Assert.fail("The closing region should not be opened");
258     } catch (ServiceException se) {
259       Assert.assertTrue("The region should be already in transition",
260         se.getCause() instanceof RegionAlreadyInTransitionException);
261     } finally {
262       getRS().regionsInTransitionInRS.remove(hri.getEncodedNameAsBytes());
263     }
264   }
265 
266   @Test(timeout = 60000)
267   public void testMultipleCloseFromMaster() throws Exception {
268 
269     // As opening, we must support multiple requests on the same region
270     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
271     for (int i = 0; i < 10; i++) {
272       AdminProtos.CloseRegionRequest crr =
273           RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, 0, null, true);
274       try {
275         AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
276         Assert.assertEquals("The first request should succeeds", 0, i);
277         Assert.assertTrue("request " + i + " failed",
278             responseClose.getClosed() || responseClose.hasClosed());
279       } catch (ServiceException se) {
280         Assert.assertTrue("The next queries should throw an exception.", i > 0);
281       }
282     }
283 
284     checkRegionIsClosed();
285 
286     Assert.assertTrue(
287       ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
288         getRS().getServerName())
289     );
290 
291     reopenRegion();
292   }
293 
294   /**
295    * Test that if we do a close while opening it stops the opening.
296    */
297   @Test(timeout = 60000)
298   public void testCancelOpeningWithoutZK() throws Exception {
299     // We close
300     closeNoZK();
301     checkRegionIsClosed();
302 
303     // Let do the initial steps, without having a handler
304     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
305     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
306 
307     // That's a close without ZK.
308     AdminProtos.CloseRegionRequest crr =
309         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
310     try {
311       getRS().closeRegion(null, crr);
312       Assert.assertTrue(false);
313     } catch (ServiceException expected) {
314     }
315 
316     // The state in RIT should have changed to close
317     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
318         hri.getEncodedNameAsBytes()));
319 
320     // Let's start the open handler
321     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
322     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd, 0));
323 
324     // The open handler should have removed the region from RIT but kept the region closed
325     checkRegionIsClosed();
326 
327     // The open handler should have updated the value in ZK.
328     Assert.assertTrue(ZKAssign.deleteNode(
329         getRS().getZooKeeperWatcher(), hri.getEncodedName(),
330         EventType.RS_ZK_REGION_FAILED_OPEN, 1)
331     );
332 
333     reopenRegion();
334   }
335 
336   /**
337    * Test an open then a close with ZK. This is going to mess-up the ZK states, so
338    * the opening will fail as well because it doesn't find what it expects in ZK.
339    */
340   @Test(timeout = 60000)
341   public void testCancelOpeningWithZK() throws Exception {
342     // We close
343     closeNoZK();
344     checkRegionIsClosed();
345 
346     // Let do the initial steps, without having a handler
347     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
348 
349     // That's a close without ZK.
350     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
351     AdminProtos.CloseRegionRequest crr =
352         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
353     try {
354       getRS().closeRegion(null, crr);
355       Assert.assertTrue(false);
356     } catch (ServiceException expected) {
357       Assert.assertTrue(expected.getCause() instanceof RegionAlreadyInTransitionException);
358     }
359 
360     // The close should have left the ZK state as it is: it's the job the AM to delete it
361     Assert.assertTrue(ZKAssign.deleteNode(
362         getRS().getZooKeeperWatcher(), hri.getEncodedName(),
363         EventType.M_ZK_REGION_CLOSING, 0)
364     );
365 
366     // The state in RIT should have changed to close
367     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
368         hri.getEncodedNameAsBytes()));
369 
370     // Let's start the open handler
371     // It should not succeed for two reasons:
372     //  1) There is no ZK node
373     //  2) The region in RIT was changed.
374     // The order is more or less implementation dependant.
375     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
376     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd, 0));
377 
378     // The open handler should have removed the region from RIT but kept the region closed
379     checkRegionIsClosed();
380 
381     // We should not find any znode here.
382     Assert.assertEquals(-1, ZKAssign.getVersion(HTU.getZooKeeperWatcher(), hri));
383 
384     reopenRegion();
385   }
386 
387   /**
388    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
389    * for openRegion. The region server should reject this RPC. (HBASE-9721)
390    */
391   @Test
392   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
393     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
394 
395     ServerName sn = getRS().getServerName();
396     ServerName earlierServerName = ServerName.valueOf(sn.getHostname(), sn.getPort(), 1);
397 
398     try {
399       CloseRegionRequest request = RequestConverter.buildCloseRegionRequest(earlierServerName, regionName, true);
400       getRS().closeRegion(null, request);
401       Assert.fail("The closeRegion should have been rejected");
402     } catch (ServiceException se) {
403       Assert.assertTrue(se.getCause() instanceof IOException);
404       Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server"));
405     }
406 
407     //actual close
408     closeNoZK();
409     try {
410       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
411         earlierServerName, hri, 0, null, null);
412       getRS().openRegion(null, orr);
413       Assert.fail("The openRegion should have been rejected");
414     } catch (ServiceException se) {
415       Assert.assertTrue(se.getCause() instanceof IOException);
416       Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server"));
417     } finally {
418       reopenRegion();
419     }
420   }
421 }