Search in sources :

Example 1 with XceiverServerRatis

use of org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis in project ozone by apache.

the class TestPipelineClose method testPipelineCloseWithLogFailure.

@Test
@Ignore("HDDS-5604")
public void testPipelineCloseWithLogFailure() throws IOException {
    EventQueue eventQ = (EventQueue) scm.getEventQueue();
    PipelineActionHandler pipelineActionTest = Mockito.mock(PipelineActionHandler.class);
    eventQ.addHandler(SCMEvents.PIPELINE_ACTIONS, pipelineActionTest);
    ArgumentCaptor<PipelineActionsFromDatanode> actionCaptor = ArgumentCaptor.forClass(PipelineActionsFromDatanode.class);
    ContainerInfo containerInfo = containerManager.allocateContainer(RatisReplicationConfig.getInstance(ReplicationFactor.THREE), "testOwner");
    ContainerWithPipeline containerWithPipeline = new ContainerWithPipeline(containerInfo, pipelineManager.getPipeline(containerInfo.getPipelineID()));
    Pipeline openPipeline = containerWithPipeline.getPipeline();
    RaftGroupId groupId = RaftGroupId.valueOf(openPipeline.getId().getId());
    try {
        pipelineManager.getPipeline(openPipeline.getId());
    } catch (PipelineNotFoundException e) {
        Assert.assertTrue("pipeline should exist", false);
    }
    DatanodeDetails datanodeDetails = openPipeline.getNodes().get(0);
    int index = cluster.getHddsDatanodeIndex(datanodeDetails);
    XceiverServerRatis xceiverRatis = (XceiverServerRatis) cluster.getHddsDatanodes().get(index).getDatanodeStateMachine().getContainer().getWriteChannel();
    /**
     * Notify Datanode Ratis Server endpoint of a Ratis log failure.
     * This is expected to trigger an immediate pipeline actions report to SCM
     */
    xceiverRatis.handleNodeLogFailure(groupId, null);
    // verify SCM receives a pipeline action report "immediately"
    Mockito.verify(pipelineActionTest, Mockito.timeout(100)).onMessage(actionCaptor.capture(), Mockito.any(EventPublisher.class));
    PipelineActionsFromDatanode actionsFromDatanode = actionCaptor.getValue();
    // match the pipeline id
    verifyCloseForPipeline(openPipeline, actionsFromDatanode);
}
Also used : XceiverServerRatis(org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) RaftGroupId(org.apache.ratis.protocol.RaftGroupId) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) PipelineActionsFromDatanode(org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.PipelineActionsFromDatanode) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 2 with XceiverServerRatis

use of org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis in project ozone by apache.

the class TestContainerStateMachineFailures method testContainerStateMachineCloseOnMissingPipeline.

@Test
public void testContainerStateMachineCloseOnMissingPipeline() throws Exception {
    // This integration test is a bit of a hack to see if the highly
    // improbable event where the Datanode does not have the pipeline
    // in its Ratis channel but still receives a close container command
    // for a container that is open or in closing state.
    // Bugs in code can lead to this sequence of events but for this test
    // to inject this state, it removes the pipeline by directly calling
    // the underlying method.
    OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("testQuasiClosed1", 1024, ReplicationType.RATIS, ReplicationFactor.THREE, new HashMap<>());
    key.write("ratis".getBytes(UTF_8));
    key.flush();
    KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
    List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
    Assert.assertEquals(1, locationInfoList.size());
    OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
    Set<HddsDatanodeService> datanodeSet = TestHelper.getDatanodeServices(cluster, omKeyLocationInfo.getPipeline());
    long containerID = omKeyLocationInfo.getContainerID();
    for (HddsDatanodeService dn : datanodeSet) {
        XceiverServerRatis wc = (XceiverServerRatis) dn.getDatanodeStateMachine().getContainer().getWriteChannel();
        if (wc == null) {
            // Test applicable only for RATIS based channel.
            return;
        }
        wc.notifyGroupRemove(RaftGroupId.valueOf(omKeyLocationInfo.getPipeline().getId().getId()));
        SCMCommand<?> command = new CloseContainerCommand(containerID, omKeyLocationInfo.getPipeline().getId());
        command.setTerm(cluster.getStorageContainerManager().getScmContext().getTermOfLeader());
        cluster.getStorageContainerManager().getScmNodeManager().addDatanodeCommand(dn.getDatanodeDetails().getUuid(), command);
    }
    for (HddsDatanodeService dn : datanodeSet) {
        LambdaTestUtils.await(20000, 1000, () -> (dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState().equals(QUASI_CLOSED)));
    }
    key.close();
}
Also used : XceiverServerRatis(org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis) CloseContainerCommand(org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) KeyOutputStream(org.apache.hadoop.ozone.client.io.KeyOutputStream) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) Test(org.junit.Test)

Example 3 with XceiverServerRatis

use of org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis in project ozone by apache.

the class TestHelper method waitForPipelineClose.

public static void waitForPipelineClose(List<Pipeline> pipelineList, MiniOzoneCluster cluster) throws TimeoutException, InterruptedException, IOException {
    for (Pipeline pipeline1 : pipelineList) {
        // issue pipeline destroy command
        cluster.getStorageContainerManager().getPipelineManager().closePipeline(pipeline1, false);
    }
    // wait for the pipeline to get destroyed in the datanodes
    for (Pipeline pipeline : pipelineList) {
        HddsProtos.PipelineID pipelineId = pipeline.getId().getProtobuf();
        for (DatanodeDetails dn : pipeline.getNodes()) {
            XceiverServerSpi server = cluster.getHddsDatanodes().get(cluster.getHddsDatanodeIndex(dn)).getDatanodeStateMachine().getContainer().getWriteChannel();
            Assert.assertTrue(server instanceof XceiverServerRatis);
            GenericTestUtils.waitFor(() -> !server.isExist(pipelineId), 100, 30_000);
        }
    }
}
Also used : XceiverServerRatis(org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) XceiverServerSpi(org.apache.hadoop.ozone.container.common.transport.server.XceiverServerSpi)

Example 4 with XceiverServerRatis

use of org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis in project ozone by apache.

the class TestHelper method createPipelineOnDatanode.

public static void createPipelineOnDatanode(Pipeline pipeline, MiniOzoneCluster cluster) throws IOException {
    // wait for the pipeline to get destroyed in the datanodes
    for (DatanodeDetails dn : pipeline.getNodes()) {
        XceiverServerSpi server = cluster.getHddsDatanodes().get(cluster.getHddsDatanodeIndex(dn)).getDatanodeStateMachine().getContainer().getWriteChannel();
        Assert.assertTrue(server instanceof XceiverServerRatis);
        try {
            server.addGroup(pipeline.getId().getProtobuf(), Collections.unmodifiableList(pipeline.getNodes()));
        } catch (Exception e) {
        // ignore exception
        }
    }
}
Also used : XceiverServerRatis(org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ContainerNotFoundException(org.apache.hadoop.hdds.scm.container.ContainerNotFoundException) TimeoutException(java.util.concurrent.TimeoutException) PipelineNotFoundException(org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException) IOException(java.io.IOException) XceiverServerSpi(org.apache.hadoop.ozone.container.common.transport.server.XceiverServerSpi)

Example 5 with XceiverServerRatis

use of org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis in project ozone by apache.

the class TestRatisPipelineLeader method verifyLeaderInfo.

private boolean verifyLeaderInfo(Pipeline ratisPipeline) throws Exception {
    Optional<HddsDatanodeService> hddsDatanodeService = cluster.getHddsDatanodes().stream().filter(s -> s.getDatanodeStateMachine().getDatanodeDetails().getUuid().equals(ratisPipeline.getLeaderId())).findFirst();
    Assert.assertTrue(hddsDatanodeService.isPresent());
    XceiverServerRatis serverRatis = (XceiverServerRatis) hddsDatanodeService.get().getDatanodeStateMachine().getContainer().getWriteChannel();
    GroupInfoRequest groupInfoRequest = new GroupInfoRequest(ClientId.randomId(), serverRatis.getServer().getId(), RaftGroupId.valueOf(ratisPipeline.getId().getId()), 100);
    GroupInfoReply reply = serverRatis.getServer().getGroupInfo(groupInfoRequest);
    return reply.getRoleInfoProto().hasLeaderInfo() && ratisPipeline.getLeaderId().toString().equals(reply.getRoleInfoProto().getSelf().getId().toStringUtf8());
}
Also used : OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) GroupInfoReply(org.apache.ratis.protocol.GroupInfoReply) BeforeClass(org.junit.BeforeClass) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) GroupInfoRequest(org.apache.ratis.protocol.GroupInfoRequest) LoggerFactory(org.slf4j.LoggerFactory) ContainerProtocolCalls(org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls) MiniOzoneCluster(org.apache.hadoop.ozone.MiniOzoneCluster) RaftGroupId(org.apache.ratis.protocol.RaftGroupId) Logger(org.apache.log4j.Logger) GrpcClientProtocolService(org.apache.ratis.grpc.client.GrpcClientProtocolService) Level(org.apache.log4j.Level) DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY(org.apache.hadoop.ozone.OzoneConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY) HddsConfigKeys(org.apache.hadoop.hdds.HddsConfigKeys) XceiverServerRatis(org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis) AfterClass(org.junit.AfterClass) RatisReplicationConfig(org.apache.hadoop.hdds.client.RatisReplicationConfig) ReplicationFactor(org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) Test(org.junit.Test) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Ignore(org.junit.Ignore) ClientId(org.apache.ratis.protocol.ClientId) Optional(java.util.Optional) Assert(org.junit.Assert) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) GroupInfoRequest(org.apache.ratis.protocol.GroupInfoRequest) XceiverServerRatis(org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) GroupInfoReply(org.apache.ratis.protocol.GroupInfoReply)

Aggregations

XceiverServerRatis (org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis)5 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)3 Test (org.junit.Test)3 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)2 HddsDatanodeService (org.apache.hadoop.ozone.HddsDatanodeService)2 XceiverServerSpi (org.apache.hadoop.ozone.container.common.transport.server.XceiverServerSpi)2 RaftGroupId (org.apache.ratis.protocol.RaftGroupId)2 Ignore (org.junit.Ignore)2 IOException (java.io.IOException)1 List (java.util.List)1 Optional (java.util.Optional)1 TimeUnit (java.util.concurrent.TimeUnit)1 TimeoutException (java.util.concurrent.TimeoutException)1 HddsConfigKeys (org.apache.hadoop.hdds.HddsConfigKeys)1 RatisReplicationConfig (org.apache.hadoop.hdds.client.RatisReplicationConfig)1 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)1 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)1 ReplicationFactor (org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor)1 ContainerInfo (org.apache.hadoop.hdds.scm.container.ContainerInfo)1 ContainerNotFoundException (org.apache.hadoop.hdds.scm.container.ContainerNotFoundException)1