Search in sources :

Example 1 with ChannelInboundHandlerAdapter

use of org.apache.hbase.thirdparty.io.netty.channel.ChannelInboundHandlerAdapter in project hbase by apache.

the class TestFanOutOneBlockAsyncDFSOutputHang method testFlushHangWhenOneDataNodeFailedBeforeOtherDataNodeAck.

/**
 * <pre>
 * This test is for HBASE-26679. Consider there are two dataNodes: dn1 and dn2,dn2 is a slow DN.
 * The threads sequence before HBASE-26679 is:
 * 1.We write some data to {@link FanOutOneBlockAsyncDFSOutput} and then flush it, there are one
 *   {@link FanOutOneBlockAsyncDFSOutput.Callback} in
 *   {@link FanOutOneBlockAsyncDFSOutput#waitingAckQueue}.
 * 2.The ack from dn1 arrives firstly and triggers Netty to invoke
 *   {@link FanOutOneBlockAsyncDFSOutput#completed} with dn1's channel, then in
 *   {@link FanOutOneBlockAsyncDFSOutput#completed}, dn1's channel is removed from
 *   {@link FanOutOneBlockAsyncDFSOutput.Callback#unfinishedReplicas}.
 * 3.But dn2 responds slowly, before dn2 sending ack,dn1 is shut down or have a exception,
 *   so {@link FanOutOneBlockAsyncDFSOutput#failed} is triggered by Netty with dn1's channel,
 *   and because the {@link FanOutOneBlockAsyncDFSOutput.Callback#unfinishedReplicas} does not
 *   contain dn1's channel,the {@link FanOutOneBlockAsyncDFSOutput.Callback} is skipped in
 *   {@link FanOutOneBlockAsyncDFSOutput#failed} method,and
 *   {@link FanOutOneBlockAsyncDFSOutput#state} is set to
 *   {@link FanOutOneBlockAsyncDFSOutput.State#BROKEN},and dn1,dn2 are all closed at the end of
 *   {@link FanOutOneBlockAsyncDFSOutput#failed}.
 * 4.{@link FanOutOneBlockAsyncDFSOutput#failed} is triggered again by dn2 because it is closed,
 *   but because {@link FanOutOneBlockAsyncDFSOutput#state} is already
 *   {@link FanOutOneBlockAsyncDFSOutput.State#BROKEN},the whole
 *   {@link FanOutOneBlockAsyncDFSOutput#failed} is skipped. So wait on the future
 *   returned by {@link FanOutOneBlockAsyncDFSOutput#flush} would be stuck for ever.
 * After HBASE-26679, for above step 4,even if the {@link FanOutOneBlockAsyncDFSOutput#state}
 * is already {@link FanOutOneBlockAsyncDFSOutput.State#BROKEN}, we would still try to trigger
 * {@link FanOutOneBlockAsyncDFSOutput.Callback#future}.
 * </pre>
 */
@Test
public void testFlushHangWhenOneDataNodeFailedBeforeOtherDataNodeAck() throws Exception {
    DataNodeProperties firstDataNodeProperties = null;
    try {
        final CyclicBarrier dn1AckReceivedCyclicBarrier = new CyclicBarrier(2);
        Map<Channel, DatanodeInfo> datanodeInfoMap = OUT.getDatanodeInfoMap();
        Iterator<Map.Entry<Channel, DatanodeInfo>> iterator = datanodeInfoMap.entrySet().iterator();
        assertTrue(iterator.hasNext());
        Map.Entry<Channel, DatanodeInfo> dn1Entry = iterator.next();
        Channel dn1Channel = dn1Entry.getKey();
        DatanodeInfo dn1DatanodeInfo = dn1Entry.getValue();
        final List<String> protobufDecoderNames = new ArrayList<String>();
        dn1Channel.pipeline().forEach((entry) -> {
            if (ProtobufDecoder.class.isInstance(entry.getValue())) {
                protobufDecoderNames.add(entry.getKey());
            }
        });
        assertTrue(protobufDecoderNames.size() == 1);
        dn1Channel.pipeline().addAfter(protobufDecoderNames.get(0), "dn1AckReceivedHandler", new ChannelInboundHandlerAdapter() {

            @Override
            public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception {
                super.channelRead(ctx, msg);
                dn1AckReceivedCyclicBarrier.await();
            }
        });
        assertTrue(iterator.hasNext());
        Map.Entry<Channel, DatanodeInfo> dn2Entry = iterator.next();
        Channel dn2Channel = dn2Entry.getKey();
        /**
         * Here we add a {@link ChannelInboundHandlerAdapter} to eat all the responses to simulate a
         * slow dn2.
         */
        dn2Channel.pipeline().addFirst(new ChannelInboundHandlerAdapter() {

            @Override
            public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception {
                if (!(msg instanceof ByteBuf)) {
                    ctx.fireChannelRead(msg);
                }
            }
        });
        byte[] b = new byte[10];
        ThreadLocalRandom.current().nextBytes(b);
        OUT.write(b, 0, b.length);
        CompletableFuture<Long> future = OUT.flush(false);
        /**
         * Wait for ack from dn1.
         */
        dn1AckReceivedCyclicBarrier.await();
        /**
         * First ack is received from dn1,we could stop dn1 now.
         */
        firstDataNodeProperties = findAndKillFirstDataNode(dn1DatanodeInfo);
        assertTrue(firstDataNodeProperties != null);
        try {
            /**
             * Before HBASE-26679,here we should be stuck, after HBASE-26679,we would fail soon with
             * {@link ExecutionException}.
             */
            future.get();
            fail();
        } catch (ExecutionException e) {
            assertTrue(e != null);
            LOG.info("expected exception caught when get future", e);
        }
        /**
         * Make sure all the data node channel are closed.
         */
        datanodeInfoMap.keySet().forEach(ch -> {
            try {
                ch.closeFuture().get();
            } catch (InterruptedException | ExecutionException e) {
                throw new RuntimeException(e);
            }
        });
    } finally {
        if (firstDataNodeProperties != null) {
            CLUSTER.restartDataNode(firstDataNodeProperties);
        }
    }
}
Also used : DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) ArrayList(java.util.ArrayList) ChannelHandlerContext(org.apache.hbase.thirdparty.io.netty.channel.ChannelHandlerContext) ByteBuf(org.apache.hbase.thirdparty.io.netty.buffer.ByteBuf) ExecutionException(java.util.concurrent.ExecutionException) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) Channel(org.apache.hbase.thirdparty.io.netty.channel.Channel) NioSocketChannel(org.apache.hbase.thirdparty.io.netty.channel.socket.nio.NioSocketChannel) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CyclicBarrier(java.util.concurrent.CyclicBarrier) Map(java.util.Map) ChannelInboundHandlerAdapter(org.apache.hbase.thirdparty.io.netty.channel.ChannelInboundHandlerAdapter) Test(org.junit.Test)

Aggregations

IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 CyclicBarrier (java.util.concurrent.CyclicBarrier)1 ExecutionException (java.util.concurrent.ExecutionException)1 DataNodeProperties (org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties)1 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)1 ByteBuf (org.apache.hbase.thirdparty.io.netty.buffer.ByteBuf)1 Channel (org.apache.hbase.thirdparty.io.netty.channel.Channel)1 ChannelHandlerContext (org.apache.hbase.thirdparty.io.netty.channel.ChannelHandlerContext)1 ChannelInboundHandlerAdapter (org.apache.hbase.thirdparty.io.netty.channel.ChannelInboundHandlerAdapter)1 NioSocketChannel (org.apache.hbase.thirdparty.io.netty.channel.socket.nio.NioSocketChannel)1 Test (org.junit.Test)1