Search in sources :

Example 1 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestScmSafeMode method testSCMSafeMode.

@Test(timeout = 300_000)
public void testSCMSafeMode() throws Exception {
    // Test1: Test safe mode  when there are no containers in system.
    cluster.stop();
    try {
        cluster = builder.build();
    } catch (IOException e) {
        Assert.fail("Cluster startup failed.");
    }
    assertTrue(cluster.getStorageContainerManager().isInSafeMode());
    cluster.startHddsDatanodes();
    cluster.waitForClusterToBeReady();
    cluster.waitTobeOutOfSafeMode();
    assertFalse(cluster.getStorageContainerManager().isInSafeMode());
    // Test2: Test safe mode  when containers are there in system.
    // Create {numKeys} random names keys.
    TestStorageContainerManagerHelper helper = new TestStorageContainerManagerHelper(cluster, conf);
    Map<String, OmKeyInfo> keyLocations = helper.createKeys(100 * 2, 4096);
    final List<ContainerInfo> containers = cluster.getStorageContainerManager().getContainerManager().getContainers();
    GenericTestUtils.waitFor(() -> containers.size() >= 3, 100, 1000 * 30);
    // Removing some container to keep them open.
    containers.remove(0);
    containers.remove(0);
    // Close remaining containers
    ContainerManager mapping = cluster.getStorageContainerManager().getContainerManager();
    containers.forEach(c -> {
        try {
            mapping.updateContainerState(c.containerID(), HddsProtos.LifeCycleEvent.FINALIZE);
            mapping.updateContainerState(c.containerID(), LifeCycleEvent.CLOSE);
        } catch (IOException | InvalidStateTransitionException e) {
            LOG.info("Failed to change state of open containers.", e);
        }
    });
    cluster.stop();
    GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.captureLogs(SCMSafeModeManager.getLogger());
    logCapturer.clearOutput();
    try {
        cluster = builder.build();
    } catch (IOException ex) {
        fail("failed");
    }
    StorageContainerManager scm;
    scm = cluster.getStorageContainerManager();
    assertTrue(scm.isInSafeMode());
    assertFalse(logCapturer.getOutput().contains("SCM exiting safe mode."));
    assertTrue(scm.getCurrentContainerThreshold() == 0);
    for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
        dn.start();
    }
    GenericTestUtils.waitFor(() -> scm.getCurrentContainerThreshold() == 1.0, 100, 20000);
    EventQueue eventQueue = (EventQueue) cluster.getStorageContainerManager().getEventQueue();
    eventQueue.processAll(5000L);
    double safeModeCutoff = conf.getDouble(HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT, HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT_DEFAULT);
    assertTrue(scm.getCurrentContainerThreshold() >= safeModeCutoff);
    assertTrue(logCapturer.getOutput().contains("SCM exiting safe mode."));
    assertFalse(scm.isInSafeMode());
}
Also used : ContainerManager(org.apache.hadoop.hdds.scm.container.ContainerManager) StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) InvalidStateTransitionException(org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) TestStorageContainerManagerHelper(org.apache.hadoop.ozone.TestStorageContainerManagerHelper) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) OmKeyInfo(org.apache.hadoop.ozone.om.helpers.OmKeyInfo) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) Test(org.junit.Test)

Example 2 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestRatisPipelineCreateAndDestroy method testPipelineCreationOnNodeRestart.

@Test(timeout = 180000)
public void testPipelineCreationOnNodeRestart() throws Exception {
    conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 5, TimeUnit.SECONDS);
    init(3);
    // make sure a pipelines is created
    waitForPipelines(1);
    List<HddsDatanodeService> dns = new ArrayList<>(cluster.getHddsDatanodes());
    List<Pipeline> pipelines = pipelineManager.getPipelines(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
    for (HddsDatanodeService dn : dns) {
        cluster.shutdownHddsDatanode(dn.getDatanodeDetails());
    }
    // try creating another pipeline now
    try {
        pipelineManager.createPipeline(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
        Assert.fail("pipeline creation should fail after shutting down pipeline");
    } catch (IOException ioe) {
        // As now all datanodes are shutdown, they move to stale state, there
        // will be no sufficient datanodes to create the pipeline.
        Assert.assertTrue(ioe instanceof SCMException);
        Assert.assertEquals(SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE, ((SCMException) ioe).getResult());
    }
    // make sure pipelines is destroyed
    waitForPipelines(0);
    for (HddsDatanodeService dn : dns) {
        cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
    }
    // destroy the existing pipelines
    for (Pipeline pipeline : pipelines) {
        pipelineManager.closePipeline(pipeline, false);
    }
    if (cluster.getStorageContainerManager().getScmNodeManager().getNodeCount(NodeStatus.inServiceHealthy()) >= HddsProtos.ReplicationFactor.THREE.getNumber()) {
        // make sure pipelines is created after node start
        cluster.getStorageContainerManager().getSCMServiceManager().notifyEventTriggered(Event.PRE_CHECK_COMPLETED);
        waitForPipelines(1);
    }
}
Also used : ArrayList(java.util.ArrayList) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) SCMException(org.apache.hadoop.hdds.scm.exceptions.SCMException) Test(org.junit.Test)

Example 3 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class Test2WayCommitInRatis method test2WayCommitForRetryfailure.

@Test
public void test2WayCommitForRetryfailure() throws Exception {
    OzoneConfiguration conf = new OzoneConfiguration();
    startCluster(conf);
    GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.captureLogs(XceiverClientRatis.LOG);
    XceiverClientManager clientManager = new XceiverClientManager(conf);
    ContainerWithPipeline container1 = storageContainerLocationClient.allocateContainer(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE, OzoneConsts.OZONE);
    XceiverClientSpi xceiverClient = clientManager.acquireClient(container1.getPipeline());
    Assert.assertEquals(1, xceiverClient.getRefcount());
    Assert.assertEquals(container1.getPipeline(), xceiverClient.getPipeline());
    Pipeline pipeline = xceiverClient.getPipeline();
    XceiverClientRatis ratisClient = (XceiverClientRatis) xceiverClient;
    XceiverClientReply reply = xceiverClient.sendCommandAsync(ContainerTestHelper.getCreateContainerRequest(container1.getContainerInfo().getContainerID(), xceiverClient.getPipeline()));
    reply.getResponse().get();
    Assert.assertEquals(3, ratisClient.getCommitInfoMap().size());
    // wait for the container to be created on all the nodes
    xceiverClient.watchForCommit(reply.getLogIndex());
    for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
        // shutdown the ratis follower
        if (RatisTestHelper.isRatisFollower(dn, pipeline)) {
            cluster.shutdownHddsDatanode(dn.getDatanodeDetails());
            break;
        }
    }
    reply = xceiverClient.sendCommandAsync(ContainerTestHelper.getCloseContainer(pipeline, container1.getContainerInfo().getContainerID()));
    reply.getResponse().get();
    xceiverClient.watchForCommit(reply.getLogIndex());
    // commitInfo Map will be reduced to 2 here
    Assert.assertEquals(2, ratisClient.getCommitInfoMap().size());
    clientManager.releaseClient(xceiverClient, false);
    Assert.assertTrue(logCapturer.getOutput().contains("3 way commit failed"));
    Assert.assertTrue(logCapturer.getOutput().contains("Committed by majority"));
    logCapturer.stopCapturing();
    shutdown();
}
Also used : XceiverClientReply(org.apache.hadoop.hdds.scm.XceiverClientReply) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) XceiverClientRatis(org.apache.hadoop.hdds.scm.XceiverClientRatis) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) XceiverClientManager(org.apache.hadoop.hdds.scm.XceiverClientManager) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) Test(org.junit.Test)

Example 4 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestContainerReplicationEndToEnd method testContainerReplication.

/**
 * The test simulates end to end container replication.
 */
@Test
public void testContainerReplication() throws Exception {
    String keyName = "testContainerReplication";
    OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey(keyName, 0, ReplicationType.RATIS, ReplicationFactor.THREE, new HashMap<>());
    byte[] testData = "ratis".getBytes(UTF_8);
    // First write and flush creates a container in the datanode
    key.write(testData);
    key.flush();
    KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
    List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
    Assert.assertEquals(1, locationInfoList.size());
    OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
    long containerID = omKeyLocationInfo.getContainerID();
    PipelineID pipelineID = cluster.getStorageContainerManager().getContainerManager().getContainer(ContainerID.valueOf(containerID)).getPipelineID();
    Pipeline pipeline = cluster.getStorageContainerManager().getPipelineManager().getPipeline(pipelineID);
    key.close();
    HddsProtos.LifeCycleState containerState = cluster.getStorageContainerManager().getContainerManager().getContainer(ContainerID.valueOf(containerID)).getState();
    LoggerFactory.getLogger(TestContainerReplicationEndToEnd.class).info("Current Container State is {}", containerState);
    if ((containerState != HddsProtos.LifeCycleState.CLOSING) && (containerState != HddsProtos.LifeCycleState.CLOSED)) {
        cluster.getStorageContainerManager().getContainerManager().updateContainerState(ContainerID.valueOf(containerID), HddsProtos.LifeCycleEvent.FINALIZE);
    }
    // wait for container to move to OPEN state in SCM
    Thread.sleep(2 * containerReportInterval);
    DatanodeDetails oldReplicaNode = pipeline.getFirstNode();
    // now move the container to the closed on the datanode.
    XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
    ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
    request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
    request.setCmdType(ContainerProtos.Type.CloseContainer);
    request.setContainerID(containerID);
    request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
    xceiverClient.sendCommand(request.build());
    // wait for container to move to closed state in SCM
    Thread.sleep(2 * containerReportInterval);
    Assert.assertTrue(cluster.getStorageContainerManager().getContainerInfo(containerID).getState() == HddsProtos.LifeCycleState.CLOSED);
    // shutdown the replica node
    cluster.shutdownHddsDatanode(oldReplicaNode);
    // now the container is under replicated and will be moved to a different dn
    HddsDatanodeService dnService = null;
    for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
        Predicate<DatanodeDetails> p = i -> i.getUuid().equals(dn.getDatanodeDetails().getUuid());
        if (!pipeline.getNodes().stream().anyMatch(p)) {
            dnService = dn;
        }
    }
    Assert.assertNotNull(dnService);
    final HddsDatanodeService newReplicaNode = dnService;
    // wait for the container to get replicated
    GenericTestUtils.waitFor(() -> {
        return newReplicaNode.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID) != null;
    }, 500, 100000);
    Assert.assertTrue(newReplicaNode.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerData().getBlockCommitSequenceId() > 0);
    // wait for SCM to update the replica Map
    Thread.sleep(5 * containerReportInterval);
    // the key again
    for (DatanodeDetails dn : pipeline.getNodes()) {
        cluster.shutdownHddsDatanode(dn);
    }
    // This will try to read the data from the dn to which the container got
    // replicated after the container got closed.
    TestHelper.validateData(keyName, testData, objectStore, volumeName, bucketName);
}
Also used : ScmConfigKeys(org.apache.hadoop.hdds.scm.ScmConfigKeys) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) DatanodeRatisServerConfig(org.apache.hadoop.hdds.conf.DatanodeRatisServerConfig) BeforeClass(org.junit.BeforeClass) OZONE_SCM_STALENODE_INTERVAL(org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) LoggerFactory(org.slf4j.LoggerFactory) MiniOzoneCluster(org.apache.hadoop.ozone.MiniOzoneCluster) HashMap(java.util.HashMap) ContainerProtos(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) Duration(java.time.Duration) OZONE_SCM_PIPELINE_DESTROY_TIMEOUT(org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT) AfterClass(org.junit.AfterClass) XceiverClientManager(org.apache.hadoop.hdds.scm.XceiverClientManager) HDDS_CONTAINER_REPORT_INTERVAL(org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL) Predicate(java.util.function.Predicate) UTF_8(java.nio.charset.StandardCharsets.UTF_8) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ContainerID(org.apache.hadoop.hdds.scm.container.ContainerID) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) Test(org.junit.Test) IOException(java.io.IOException) ObjectStore(org.apache.hadoop.ozone.client.ObjectStore) OzoneClientFactory(org.apache.hadoop.ozone.client.OzoneClientFactory) OZONE_DATANODE_PIPELINE_LIMIT(org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT) ReplicationFactor(org.apache.hadoop.hdds.client.ReplicationFactor) File(java.io.File) ReplicationType(org.apache.hadoop.hdds.client.ReplicationType) KeyOutputStream(org.apache.hadoop.ozone.client.io.KeyOutputStream) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) TestHelper(org.apache.hadoop.ozone.container.TestHelper) ReplicationManagerConfiguration(org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration) PipelineID(org.apache.hadoop.hdds.scm.pipeline.PipelineID) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) Assert(org.junit.Assert) OzoneClient(org.apache.hadoop.ozone.client.OzoneClient) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) PipelineID(org.apache.hadoop.hdds.scm.pipeline.PipelineID) KeyOutputStream(org.apache.hadoop.ozone.client.io.KeyOutputStream) Test(org.junit.Test)

Example 5 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestContainerStateMachineFailures method testApplyTransactionIdempotencyWithClosedContainer.

@Test
public void testApplyTransactionIdempotencyWithClosedContainer() throws Exception {
    OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
    // First write and flush creates a container in the datanode
    key.write("ratis".getBytes(UTF_8));
    key.flush();
    key.write("ratis".getBytes(UTF_8));
    KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
    List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
    Assert.assertEquals(1, locationInfoList.size());
    OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
    HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
    ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
    Assert.assertTrue(containerData instanceof KeyValueContainerData);
    key.close();
    ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(dn, omKeyLocationInfo.getPipeline());
    SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
    Path parentPath = storage.findLatestSnapshot().getFile().getPath();
    stateMachine.takeSnapshot();
    Assert.assertTrue(parentPath.getParent().toFile().listFiles().length > 0);
    FileInfo snapshot = storage.findLatestSnapshot().getFile();
    Assert.assertNotNull(snapshot);
    long containerID = omKeyLocationInfo.getContainerID();
    Pipeline pipeline = cluster.getStorageContainerLocationClient().getContainerWithPipeline(containerID).getPipeline();
    XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
    ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
    request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
    request.setCmdType(ContainerProtos.Type.CloseContainer);
    request.setContainerID(containerID);
    request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
    try {
        xceiverClient.sendCommand(request.build());
    } catch (IOException e) {
        Assert.fail("Exception should not be thrown");
    }
    Assert.assertTrue(TestHelper.getDatanodeService(omKeyLocationInfo, cluster).getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.CLOSED);
    Assert.assertTrue(stateMachine.isStateMachineHealthy());
    try {
        stateMachine.takeSnapshot();
    } catch (IOException ioe) {
        Assert.fail("Exception should not be thrown");
    }
    FileInfo latestSnapshot = storage.findLatestSnapshot().getFile();
    Assert.assertFalse(snapshot.getPath().equals(latestSnapshot.getPath()));
}
Also used : Path(java.nio.file.Path) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) ContainerStateMachine(org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine) FileInfo(org.apache.ratis.server.storage.FileInfo) SimpleStateMachineStorage(org.apache.ratis.statemachine.impl.SimpleStateMachineStorage) KeyOutputStream(org.apache.hadoop.ozone.client.io.KeyOutputStream) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) ContainerData(org.apache.hadoop.ozone.container.common.impl.ContainerData) Test(org.junit.Test)

Aggregations

HddsDatanodeService (org.apache.hadoop.ozone.HddsDatanodeService)41 Test (org.junit.Test)32 OzoneOutputStream (org.apache.hadoop.ozone.client.io.OzoneOutputStream)20 IOException (java.io.IOException)17 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)14 File (java.io.File)12 OmKeyLocationInfo (org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo)12 Container (org.apache.hadoop.ozone.container.common.interfaces.Container)11 KeyOutputStream (org.apache.hadoop.ozone.client.io.KeyOutputStream)10 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)9 KeyValueContainerData (org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData)9 OzoneContainer (org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer)9 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)8 XceiverClientSpi (org.apache.hadoop.hdds.scm.XceiverClientSpi)8 GenericTestUtils (org.apache.ozone.test.GenericTestUtils)8 ArrayList (java.util.ArrayList)7 OzoneKey (org.apache.hadoop.ozone.client.OzoneKey)7 OzoneInputStream (org.apache.hadoop.ozone.client.io.OzoneInputStream)7 ContainerData (org.apache.hadoop.ozone.container.common.impl.ContainerData)7 Ignore (org.junit.Ignore)7