Search in sources :

Example 11 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestContainerReplicationEndToEnd method testContainerReplication.

/**
 * The test simulates end to end container replication.
 */
@Test
public void testContainerReplication() throws Exception {
    String keyName = "testContainerReplication";
    OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey(keyName, 0, ReplicationType.RATIS, ReplicationFactor.THREE, new HashMap<>());
    byte[] testData = "ratis".getBytes(UTF_8);
    // First write and flush creates a container in the datanode
    key.write(testData);
    key.flush();
    KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
    List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
    Assert.assertEquals(1, locationInfoList.size());
    OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
    long containerID = omKeyLocationInfo.getContainerID();
    PipelineID pipelineID = cluster.getStorageContainerManager().getContainerManager().getContainer(ContainerID.valueOf(containerID)).getPipelineID();
    Pipeline pipeline = cluster.getStorageContainerManager().getPipelineManager().getPipeline(pipelineID);
    key.close();
    HddsProtos.LifeCycleState containerState = cluster.getStorageContainerManager().getContainerManager().getContainer(ContainerID.valueOf(containerID)).getState();
    LoggerFactory.getLogger(TestContainerReplicationEndToEnd.class).info("Current Container State is {}", containerState);
    if ((containerState != HddsProtos.LifeCycleState.CLOSING) && (containerState != HddsProtos.LifeCycleState.CLOSED)) {
        cluster.getStorageContainerManager().getContainerManager().updateContainerState(ContainerID.valueOf(containerID), HddsProtos.LifeCycleEvent.FINALIZE);
    }
    // wait for container to move to OPEN state in SCM
    Thread.sleep(2 * containerReportInterval);
    DatanodeDetails oldReplicaNode = pipeline.getFirstNode();
    // now move the container to the closed on the datanode.
    XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
    ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
    request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
    request.setCmdType(ContainerProtos.Type.CloseContainer);
    request.setContainerID(containerID);
    request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
    xceiverClient.sendCommand(request.build());
    // wait for container to move to closed state in SCM
    Thread.sleep(2 * containerReportInterval);
    Assert.assertTrue(cluster.getStorageContainerManager().getContainerInfo(containerID).getState() == HddsProtos.LifeCycleState.CLOSED);
    // shutdown the replica node
    cluster.shutdownHddsDatanode(oldReplicaNode);
    // now the container is under replicated and will be moved to a different dn
    HddsDatanodeService dnService = null;
    for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
        Predicate<DatanodeDetails> p = i -> i.getUuid().equals(dn.getDatanodeDetails().getUuid());
        if (!pipeline.getNodes().stream().anyMatch(p)) {
            dnService = dn;
        }
    }
    Assert.assertNotNull(dnService);
    final HddsDatanodeService newReplicaNode = dnService;
    // wait for the container to get replicated
    GenericTestUtils.waitFor(() -> {
        return newReplicaNode.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID) != null;
    }, 500, 100000);
    Assert.assertTrue(newReplicaNode.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerData().getBlockCommitSequenceId() > 0);
    // wait for SCM to update the replica Map
    Thread.sleep(5 * containerReportInterval);
    // the key again
    for (DatanodeDetails dn : pipeline.getNodes()) {
        cluster.shutdownHddsDatanode(dn);
    }
    // This will try to read the data from the dn to which the container got
    // replicated after the container got closed.
    TestHelper.validateData(keyName, testData, objectStore, volumeName, bucketName);
}
Also used : ScmConfigKeys(org.apache.hadoop.hdds.scm.ScmConfigKeys) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) DatanodeRatisServerConfig(org.apache.hadoop.hdds.conf.DatanodeRatisServerConfig) BeforeClass(org.junit.BeforeClass) OZONE_SCM_STALENODE_INTERVAL(org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) LoggerFactory(org.slf4j.LoggerFactory) MiniOzoneCluster(org.apache.hadoop.ozone.MiniOzoneCluster) HashMap(java.util.HashMap) ContainerProtos(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos) ReplicationManagerConfiguration(org.apache.hadoop.hdds.scm.container.replication.ReplicationManager.ReplicationManagerConfiguration) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) Duration(java.time.Duration) OZONE_SCM_PIPELINE_DESTROY_TIMEOUT(org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT) AfterClass(org.junit.AfterClass) XceiverClientManager(org.apache.hadoop.hdds.scm.XceiverClientManager) HDDS_CONTAINER_REPORT_INTERVAL(org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL) Predicate(java.util.function.Predicate) UTF_8(java.nio.charset.StandardCharsets.UTF_8) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ContainerID(org.apache.hadoop.hdds.scm.container.ContainerID) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) Test(org.junit.Test) IOException(java.io.IOException) ObjectStore(org.apache.hadoop.ozone.client.ObjectStore) OzoneClientFactory(org.apache.hadoop.ozone.client.OzoneClientFactory) OZONE_DATANODE_PIPELINE_LIMIT(org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT) ReplicationFactor(org.apache.hadoop.hdds.client.ReplicationFactor) File(java.io.File) ReplicationType(org.apache.hadoop.hdds.client.ReplicationType) KeyOutputStream(org.apache.hadoop.ozone.client.io.KeyOutputStream) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) TestHelper(org.apache.hadoop.ozone.container.TestHelper) PipelineID(org.apache.hadoop.hdds.scm.pipeline.PipelineID) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) Assert(org.junit.Assert) OzoneClient(org.apache.hadoop.ozone.client.OzoneClient) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) PipelineID(org.apache.hadoop.hdds.scm.pipeline.PipelineID) KeyOutputStream(org.apache.hadoop.ozone.client.io.KeyOutputStream) Test(org.junit.Test)

Example 12 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestContainerStateMachineFailures method testApplyTransactionFailure.

@Test
public void testApplyTransactionFailure() throws Exception {
    OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
    // First write and flush creates a container in the datanode
    key.write("ratis".getBytes(UTF_8));
    key.flush();
    key.write("ratis".getBytes(UTF_8));
    KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
    List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
    Assert.assertEquals(1, locationInfoList.size());
    OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
    HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
    int index = cluster.getHddsDatanodeIndex(dn.getDatanodeDetails());
    ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
    Assert.assertTrue(containerData instanceof KeyValueContainerData);
    KeyValueContainerData keyValueContainerData = (KeyValueContainerData) containerData;
    key.close();
    ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(cluster.getHddsDatanodes().get(index), omKeyLocationInfo.getPipeline());
    SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
    stateMachine.takeSnapshot();
    Path parentPath = storage.findLatestSnapshot().getFile().getPath();
    // Since the snapshot threshold is set to 1, since there are
    // applyTransactions, we should see snapshots
    Assert.assertTrue(parentPath.getParent().toFile().listFiles().length > 0);
    FileInfo snapshot = storage.findLatestSnapshot().getFile();
    Assert.assertNotNull(snapshot);
    long containerID = omKeyLocationInfo.getContainerID();
    // delete the container db file
    FileUtil.fullyDelete(new File(keyValueContainerData.getContainerPath()));
    Pipeline pipeline = cluster.getStorageContainerLocationClient().getContainerWithPipeline(containerID).getPipeline();
    XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
    ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
    request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
    request.setCmdType(ContainerProtos.Type.CloseContainer);
    request.setContainerID(containerID);
    request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
    try {
        xceiverClient.sendCommand(request.build());
        Assert.fail("Expected exception not thrown");
    } catch (IOException e) {
    // Exception should be thrown
    }
    // Make sure the container is marked unhealthy
    Assert.assertTrue(dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.UNHEALTHY);
    try {
        // try to take a new snapshot, ideally it should just fail
        stateMachine.takeSnapshot();
    } catch (IOException ioe) {
        Assert.assertTrue(ioe instanceof StateMachineException);
    }
    if (snapshot.getPath().toFile().exists()) {
        // Make sure the latest snapshot is same as the previous one
        try {
            FileInfo latestSnapshot = storage.findLatestSnapshot().getFile();
            Assert.assertTrue(snapshot.getPath().equals(latestSnapshot.getPath()));
        } catch (Throwable e) {
            Assert.assertFalse(snapshot.getPath().toFile().exists());
        }
    }
    // when remove pipeline, group dir including snapshot will be deleted
    LambdaTestUtils.await(5000, 500, () -> (!snapshot.getPath().toFile().exists()));
}
Also used : Path(java.nio.file.Path) StateMachineException(org.apache.ratis.protocol.exceptions.StateMachineException) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) ContainerStateMachine(org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine) FileInfo(org.apache.ratis.server.storage.FileInfo) SimpleStateMachineStorage(org.apache.ratis.statemachine.impl.SimpleStateMachineStorage) KeyOutputStream(org.apache.hadoop.ozone.client.io.KeyOutputStream) File(java.io.File) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) ContainerData(org.apache.hadoop.ozone.container.common.impl.ContainerData) Test(org.junit.jupiter.api.Test)

Example 13 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestContainerStateMachineFailures method testApplyTransactionIdempotencyWithClosedContainer.

@Test
@Flaky("HDDS-6115")
public void testApplyTransactionIdempotencyWithClosedContainer() throws Exception {
    OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
    // First write and flush creates a container in the datanode
    key.write("ratis".getBytes(UTF_8));
    key.flush();
    key.write("ratis".getBytes(UTF_8));
    KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
    List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
    Assert.assertEquals(1, locationInfoList.size());
    OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
    HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
    ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
    Assert.assertTrue(containerData instanceof KeyValueContainerData);
    key.close();
    ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(dn, omKeyLocationInfo.getPipeline());
    SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
    Path parentPath = storage.findLatestSnapshot().getFile().getPath();
    stateMachine.takeSnapshot();
    Assert.assertTrue(parentPath.getParent().toFile().listFiles().length > 0);
    FileInfo snapshot = storage.findLatestSnapshot().getFile();
    Assert.assertNotNull(snapshot);
    long containerID = omKeyLocationInfo.getContainerID();
    Pipeline pipeline = cluster.getStorageContainerLocationClient().getContainerWithPipeline(containerID).getPipeline();
    XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
    ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
    request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
    request.setCmdType(ContainerProtos.Type.CloseContainer);
    request.setContainerID(containerID);
    request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
    try {
        xceiverClient.sendCommand(request.build());
    } catch (IOException e) {
        Assert.fail("Exception should not be thrown");
    }
    Assert.assertTrue(TestHelper.getDatanodeService(omKeyLocationInfo, cluster).getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.CLOSED);
    Assert.assertTrue(stateMachine.isStateMachineHealthy());
    try {
        stateMachine.takeSnapshot();
    } catch (IOException ioe) {
        Assert.fail("Exception should not be thrown");
    }
    FileInfo latestSnapshot = storage.findLatestSnapshot().getFile();
    Assert.assertFalse(snapshot.getPath().equals(latestSnapshot.getPath()));
}
Also used : Path(java.nio.file.Path) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) ContainerStateMachine(org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine) FileInfo(org.apache.ratis.server.storage.FileInfo) SimpleStateMachineStorage(org.apache.ratis.statemachine.impl.SimpleStateMachineStorage) KeyOutputStream(org.apache.hadoop.ozone.client.io.KeyOutputStream) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) ContainerData(org.apache.hadoop.ozone.container.common.impl.ContainerData) Test(org.junit.jupiter.api.Test) Flaky(org.apache.ozone.test.tag.Flaky)

Example 14 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestContainerStateMachineFailures method testContainerStateMachineCloseOnMissingPipeline.

@Test
public void testContainerStateMachineCloseOnMissingPipeline() throws Exception {
    // This integration test is a bit of a hack to see if the highly
    // improbable event where the Datanode does not have the pipeline
    // in its Ratis channel but still receives a close container command
    // for a container that is open or in closing state.
    // Bugs in code can lead to this sequence of events but for this test
    // to inject this state, it removes the pipeline by directly calling
    // the underlying method.
    OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("testQuasiClosed1", 1024, ReplicationType.RATIS, ReplicationFactor.THREE, new HashMap<>());
    key.write("ratis".getBytes(UTF_8));
    key.flush();
    KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
    List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
    Assert.assertEquals(1, locationInfoList.size());
    OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
    Set<HddsDatanodeService> datanodeSet = TestHelper.getDatanodeServices(cluster, omKeyLocationInfo.getPipeline());
    long containerID = omKeyLocationInfo.getContainerID();
    for (HddsDatanodeService dn : datanodeSet) {
        XceiverServerRatis wc = (XceiverServerRatis) dn.getDatanodeStateMachine().getContainer().getWriteChannel();
        if (wc == null) {
            // Test applicable only for RATIS based channel.
            return;
        }
        wc.notifyGroupRemove(RaftGroupId.valueOf(omKeyLocationInfo.getPipeline().getId().getId()));
        SCMCommand<?> command = new CloseContainerCommand(containerID, omKeyLocationInfo.getPipeline().getId());
        command.setTerm(cluster.getStorageContainerManager().getScmContext().getTermOfLeader());
        cluster.getStorageContainerManager().getScmNodeManager().addDatanodeCommand(dn.getDatanodeDetails().getUuid(), command);
    }
    for (HddsDatanodeService dn : datanodeSet) {
        LambdaTestUtils.await(20000, 1000, () -> (dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState().equals(QUASI_CLOSED)));
    }
    key.close();
}
Also used : XceiverServerRatis(org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis) CloseContainerCommand(org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) KeyOutputStream(org.apache.hadoop.ozone.client.io.KeyOutputStream) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) Test(org.junit.jupiter.api.Test)

Example 15 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestHelper method getDatanodeServices.

public static Set<HddsDatanodeService> getDatanodeServices(MiniOzoneCluster cluster, Pipeline pipeline) {
    Set<HddsDatanodeService> services = new HashSet<>();
    Set<DatanodeDetails> pipelineNodes = pipeline.getNodeSet();
    for (HddsDatanodeService service : cluster.getHddsDatanodes()) {
        if (pipelineNodes.contains(service.getDatanodeDetails())) {
            services.add(service);
        }
    }
    Assert.assertEquals(pipelineNodes.size(), services.size());
    return services;
}
Also used : DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) HashSet(java.util.HashSet)

Aggregations

HddsDatanodeService (org.apache.hadoop.ozone.HddsDatanodeService)41 OzoneOutputStream (org.apache.hadoop.ozone.client.io.OzoneOutputStream)20 Test (org.junit.jupiter.api.Test)20 IOException (java.io.IOException)17 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)14 File (java.io.File)12 KeyValueContainerData (org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData)12 OmKeyLocationInfo (org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo)12 Test (org.junit.Test)12 Container (org.apache.hadoop.ozone.container.common.interfaces.Container)11 KeyOutputStream (org.apache.hadoop.ozone.client.io.KeyOutputStream)10 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)9 OzoneContainer (org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer)9 GenericTestUtils (org.apache.ozone.test.GenericTestUtils)9 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)8 XceiverClientSpi (org.apache.hadoop.hdds.scm.XceiverClientSpi)8 ArrayList (java.util.ArrayList)7 OzoneKey (org.apache.hadoop.ozone.client.OzoneKey)7 OzoneInputStream (org.apache.hadoop.ozone.client.io.OzoneInputStream)7 ContainerData (org.apache.hadoop.ozone.container.common.impl.ContainerData)7