Search in sources :

Example 26 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestWatchForCommit method test2WayCommitForTimeoutException.

@Test
public void test2WayCommitForTimeoutException() throws Exception {
    GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.captureLogs(XceiverClientRatis.LOG);
    XceiverClientManager clientManager = new XceiverClientManager(conf);
    ContainerWithPipeline container1 = storageContainerLocationClient.allocateContainer(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE, OzoneConsts.OZONE);
    XceiverClientSpi xceiverClient = clientManager.acquireClient(container1.getPipeline());
    Assert.assertEquals(1, xceiverClient.getRefcount());
    Assert.assertEquals(container1.getPipeline(), xceiverClient.getPipeline());
    Pipeline pipeline = xceiverClient.getPipeline();
    TestHelper.createPipelineOnDatanode(pipeline, cluster);
    XceiverClientRatis ratisClient = (XceiverClientRatis) xceiverClient;
    XceiverClientReply reply = xceiverClient.sendCommandAsync(ContainerTestHelper.getCreateContainerRequest(container1.getContainerInfo().getContainerID(), xceiverClient.getPipeline()));
    reply.getResponse().get();
    Assert.assertEquals(3, ratisClient.getCommitInfoMap().size());
    List<DatanodeDetails> nodesInPipeline = pipeline.getNodes();
    for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
        // shutdown the ratis follower
        if (nodesInPipeline.contains(dn.getDatanodeDetails()) && RatisTestHelper.isRatisFollower(dn, pipeline)) {
            cluster.shutdownHddsDatanode(dn.getDatanodeDetails());
            break;
        }
    }
    reply = xceiverClient.sendCommandAsync(ContainerTestHelper.getCloseContainer(pipeline, container1.getContainerInfo().getContainerID()));
    reply.getResponse().get();
    xceiverClient.watchForCommit(reply.getLogIndex());
    // commitInfo Map will be reduced to 2 here
    Assert.assertEquals(2, ratisClient.getCommitInfoMap().size());
    clientManager.releaseClient(xceiverClient, false);
    Assert.assertTrue(logCapturer.getOutput().contains("3 way commit failed"));
    Assert.assertTrue(logCapturer.getOutput().contains("TimeoutException"));
    Assert.assertTrue(logCapturer.getOutput().contains("Committed by majority"));
    logCapturer.stopCapturing();
}
Also used : XceiverClientReply(org.apache.hadoop.hdds.scm.XceiverClientReply) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) XceiverClientRatis(org.apache.hadoop.hdds.scm.XceiverClientRatis) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) XceiverClientManager(org.apache.hadoop.hdds.scm.XceiverClientManager) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) Test(org.junit.Test)

Example 27 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestContainerStateMachineFailures method induceFollowerFailure.

private void induceFollowerFailure(OmKeyLocationInfo omKeyLocationInfo, int failureCount) {
    UUID leader = omKeyLocationInfo.getPipeline().getLeaderId();
    Set<HddsDatanodeService> datanodeSet = TestHelper.getDatanodeServices(cluster, omKeyLocationInfo.getPipeline());
    int count = 0;
    for (HddsDatanodeService dn : datanodeSet) {
        UUID dnUuid = dn.getDatanodeDetails().getUuid();
        if (!dnUuid.equals(leader)) {
            count++;
            long containerID = omKeyLocationInfo.getContainerID();
            Container container = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID);
            if (container != null) {
                ContainerData containerData = container.getContainerData();
                Assert.assertTrue(containerData instanceof KeyValueContainerData);
                KeyValueContainerData keyValueContainerData = (KeyValueContainerData) containerData;
                FileUtil.fullyDelete(new File(keyValueContainerData.getChunksPath()));
            }
            if (count == failureCount) {
                break;
            }
        }
    }
}
Also used : OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) Container(org.apache.hadoop.ozone.container.common.interfaces.Container) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) UUID(java.util.UUID) File(java.io.File) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) ContainerData(org.apache.hadoop.ozone.container.common.impl.ContainerData)

Example 28 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestContainerStateMachineFailures method testUnhealthyContainer.

@Test
public void testUnhealthyContainer() throws Exception {
    OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
    // First write and flush creates a container in the datanode
    key.write("ratis".getBytes(UTF_8));
    key.flush();
    key.write("ratis".getBytes(UTF_8));
    KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
    List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
    Assert.assertEquals(1, locationInfoList.size());
    OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
    HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
    ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
    Assert.assertTrue(containerData instanceof KeyValueContainerData);
    KeyValueContainerData keyValueContainerData = (KeyValueContainerData) containerData;
    // delete the container db file
    FileUtil.fullyDelete(new File(keyValueContainerData.getChunksPath()));
    try {
        // there is only 1 datanode in the pipeline, the pipeline will be closed
        // and allocation to new pipeline will fail as there is no other dn in
        // the cluster
        key.close();
    } catch (IOException ioe) {
    }
    long containerID = omKeyLocationInfo.getContainerID();
    // Make sure the container is marked unhealthy
    Assert.assertTrue(dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.UNHEALTHY);
    // Check metadata in the .container file
    File containerFile = new File(keyValueContainerData.getMetadataPath(), containerID + OzoneConsts.CONTAINER_EXTENSION);
    keyValueContainerData = (KeyValueContainerData) ContainerDataYaml.readContainerFile(containerFile);
    assertThat(keyValueContainerData.getState(), is(UNHEALTHY));
    OzoneConfiguration config = dn.getConf();
    final String dir = config.get(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR) + UUID.randomUUID();
    config.set(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR, dir);
    int index = cluster.getHddsDatanodeIndex(dn.getDatanodeDetails());
    // restart the hdds datanode and see if the container is listed in the
    // in the missing container set and not in the regular set
    cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
    // make sure the container state is still marked unhealthy after restart
    keyValueContainerData = (KeyValueContainerData) ContainerDataYaml.readContainerFile(containerFile);
    assertThat(keyValueContainerData.getState(), is(UNHEALTHY));
    OzoneContainer ozoneContainer;
    HddsDatanodeService dnService = cluster.getHddsDatanodes().get(index);
    ozoneContainer = dnService.getDatanodeStateMachine().getContainer();
    HddsDispatcher dispatcher = (HddsDispatcher) ozoneContainer.getDispatcher();
    ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
    request.setCmdType(ContainerProtos.Type.CloseContainer);
    request.setContainerID(containerID);
    request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
    request.setDatanodeUuid(dnService.getDatanodeDetails().getUuidString());
    Assert.assertEquals(ContainerProtos.Result.CONTAINER_UNHEALTHY, dispatcher.dispatch(request.build(), null).getResult());
}
Also used : OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) ByteString(org.apache.ratis.thirdparty.com.google.protobuf.ByteString) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) HddsDispatcher(org.apache.hadoop.ozone.container.common.impl.HddsDispatcher) KeyOutputStream(org.apache.hadoop.ozone.client.io.KeyOutputStream) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) File(java.io.File) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) ContainerData(org.apache.hadoop.ozone.container.common.impl.ContainerData) Test(org.junit.Test)

Example 29 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestContainerStateMachineFailures method testContainerStateMachineFailures.

@Test
public void testContainerStateMachineFailures() throws Exception {
    OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
    byte[] testData = "ratis".getBytes(UTF_8);
    // First write and flush creates a container in the datanode
    key.write(testData);
    key.flush();
    key.write(testData);
    KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
    List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
    Assert.assertEquals(1, locationInfoList.size());
    OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
    HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
    // delete the container dir
    FileUtil.fullyDelete(new File(dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData().getContainerPath()));
    try {
        // there is only 1 datanode in the pipeline, the pipeline will be closed
        // and allocation to new pipeline will fail as there is no other dn in
        // the cluster
        key.close();
    } catch (IOException ioe) {
    }
    long containerID = omKeyLocationInfo.getContainerID();
    // Make sure the container is marked unhealthy
    Assert.assertTrue(dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.UNHEALTHY);
    OzoneContainer ozoneContainer;
    // restart the hdds datanode, container should not in the regular set
    OzoneConfiguration config = dn.getConf();
    final String dir = config.get(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR) + UUID.randomUUID();
    config.set(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR, dir);
    int index = cluster.getHddsDatanodeIndex(dn.getDatanodeDetails());
    cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
    ozoneContainer = cluster.getHddsDatanodes().get(index).getDatanodeStateMachine().getContainer();
    Assert.assertNull(ozoneContainer.getContainerSet().getContainer(containerID));
}
Also used : OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) ByteString(org.apache.ratis.thirdparty.com.google.protobuf.ByteString) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) KeyOutputStream(org.apache.hadoop.ozone.client.io.KeyOutputStream) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) File(java.io.File) Test(org.junit.Test)

Example 30 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestContainerStateMachineFailures method testWriteStateMachineDataIdempotencyWithClosedContainer.

// The test injects multiple write chunk requests along with closed container
// request thereby inducing a situation where a writeStateMachine call
// gets executed when the closed container apply completes thereby
// failing writeStateMachine call. In any case, our stateMachine should
// not be marked unhealthy and pipeline should not fail if container gets
// closed here.
@Test
public void testWriteStateMachineDataIdempotencyWithClosedContainer() throws Exception {
    OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis-1", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
    // First write and flush creates a container in the datanode
    key.write("ratis".getBytes(UTF_8));
    key.flush();
    key.write("ratis".getBytes(UTF_8));
    KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
    List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
    Assert.assertEquals(1, locationInfoList.size());
    OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
    HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
    ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
    Assert.assertTrue(containerData instanceof KeyValueContainerData);
    key.close();
    ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(dn, omKeyLocationInfo.getPipeline());
    SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
    Path parentPath = storage.findLatestSnapshot().getFile().getPath();
    stateMachine.takeSnapshot();
    // Since the snapshot threshold is set to 1, since there are
    // applyTransactions, we should see snapshots
    Assert.assertTrue(parentPath.getParent().toFile().listFiles().length > 0);
    FileInfo snapshot = storage.findLatestSnapshot().getFile();
    Assert.assertNotNull(snapshot);
    long containerID = omKeyLocationInfo.getContainerID();
    Pipeline pipeline = cluster.getStorageContainerLocationClient().getContainerWithPipeline(containerID).getPipeline();
    XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
    CountDownLatch latch = new CountDownLatch(100);
    int count = 0;
    AtomicInteger failCount = new AtomicInteger(0);
    Runnable r1 = () -> {
        try {
            ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
            request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
            request.setCmdType(ContainerProtos.Type.CloseContainer);
            request.setContainerID(containerID);
            request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
            xceiverClient.sendCommand(request.build());
        } catch (IOException e) {
            failCount.incrementAndGet();
        }
    };
    Runnable r2 = () -> {
        try {
            ByteString data = ByteString.copyFromUtf8("hello");
            ContainerProtos.ContainerCommandRequestProto.Builder writeChunkRequest = ContainerTestHelper.newWriteChunkRequestBuilder(pipeline, omKeyLocationInfo.getBlockID(), data.size(), random.nextInt());
            writeChunkRequest.setWriteChunk(writeChunkRequest.getWriteChunkBuilder().setData(data));
            xceiverClient.sendCommand(writeChunkRequest.build());
            latch.countDown();
        } catch (IOException e) {
            latch.countDown();
            if (!(HddsClientUtils.checkForException(e) instanceof ContainerNotOpenException)) {
                failCount.incrementAndGet();
            }
            String message = e.getMessage();
            Assert.assertFalse(message, message.contains("hello"));
            Assert.assertTrue(message, message.contains(HddsUtils.REDACTED.toStringUtf8()));
        }
    };
    List<Thread> threadList = new ArrayList<>();
    for (int i = 0; i < 100; i++) {
        count++;
        Thread r = new Thread(r2);
        r.start();
        threadList.add(r);
    }
    Thread closeContainerThread = new Thread(r1);
    closeContainerThread.start();
    threadList.add(closeContainerThread);
    latch.await(600, TimeUnit.SECONDS);
    for (int i = 0; i < 101; i++) {
        threadList.get(i).join();
    }
    if (failCount.get() > 0) {
        fail("testWriteStateMachineDataIdempotencyWithClosedContainer failed");
    }
    Assert.assertTrue(TestHelper.getDatanodeService(omKeyLocationInfo, cluster).getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.CLOSED);
    Assert.assertTrue(stateMachine.isStateMachineHealthy());
    try {
        stateMachine.takeSnapshot();
    } catch (IOException ioe) {
        Assert.fail("Exception should not be thrown");
    }
    FileInfo latestSnapshot = storage.findLatestSnapshot().getFile();
    Assert.assertFalse(snapshot.getPath().equals(latestSnapshot.getPath()));
    r2.run();
}
Also used : ByteString(org.apache.ratis.thirdparty.com.google.protobuf.ByteString) ArrayList(java.util.ArrayList) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) ByteString(org.apache.ratis.thirdparty.com.google.protobuf.ByteString) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) ContainerStateMachine(org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine) FileInfo(org.apache.ratis.server.storage.FileInfo) SimpleStateMachineStorage(org.apache.ratis.statemachine.impl.SimpleStateMachineStorage) Path(java.nio.file.Path) ContainerProtos(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) IOException(java.io.IOException) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) CountDownLatch(java.util.concurrent.CountDownLatch) ContainerNotOpenException(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) KeyOutputStream(org.apache.hadoop.ozone.client.io.KeyOutputStream) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) ContainerData(org.apache.hadoop.ozone.container.common.impl.ContainerData) Test(org.junit.Test)

Aggregations

HddsDatanodeService (org.apache.hadoop.ozone.HddsDatanodeService)41 Test (org.junit.Test)32 OzoneOutputStream (org.apache.hadoop.ozone.client.io.OzoneOutputStream)20 IOException (java.io.IOException)17 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)14 File (java.io.File)12 OmKeyLocationInfo (org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo)12 Container (org.apache.hadoop.ozone.container.common.interfaces.Container)11 KeyOutputStream (org.apache.hadoop.ozone.client.io.KeyOutputStream)10 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)9 KeyValueContainerData (org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData)9 OzoneContainer (org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer)9 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)8 XceiverClientSpi (org.apache.hadoop.hdds.scm.XceiverClientSpi)8 GenericTestUtils (org.apache.ozone.test.GenericTestUtils)8 ArrayList (java.util.ArrayList)7 OzoneKey (org.apache.hadoop.ozone.client.OzoneKey)7 OzoneInputStream (org.apache.hadoop.ozone.client.io.OzoneInputStream)7 ContainerData (org.apache.hadoop.ozone.container.common.impl.ContainerData)7 Ignore (org.junit.Ignore)7