use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestWatchForCommit method test2WayCommitForTimeoutException.
@Test
public void test2WayCommitForTimeoutException() throws Exception {
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.captureLogs(XceiverClientRatis.LOG);
XceiverClientManager clientManager = new XceiverClientManager(conf);
ContainerWithPipeline container1 = storageContainerLocationClient.allocateContainer(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE, OzoneConsts.OZONE);
XceiverClientSpi xceiverClient = clientManager.acquireClient(container1.getPipeline());
Assert.assertEquals(1, xceiverClient.getRefcount());
Assert.assertEquals(container1.getPipeline(), xceiverClient.getPipeline());
Pipeline pipeline = xceiverClient.getPipeline();
TestHelper.createPipelineOnDatanode(pipeline, cluster);
XceiverClientRatis ratisClient = (XceiverClientRatis) xceiverClient;
XceiverClientReply reply = xceiverClient.sendCommandAsync(ContainerTestHelper.getCreateContainerRequest(container1.getContainerInfo().getContainerID(), xceiverClient.getPipeline()));
reply.getResponse().get();
Assert.assertEquals(3, ratisClient.getCommitInfoMap().size());
List<DatanodeDetails> nodesInPipeline = pipeline.getNodes();
for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
// shutdown the ratis follower
if (nodesInPipeline.contains(dn.getDatanodeDetails()) && RatisTestHelper.isRatisFollower(dn, pipeline)) {
cluster.shutdownHddsDatanode(dn.getDatanodeDetails());
break;
}
}
reply = xceiverClient.sendCommandAsync(ContainerTestHelper.getCloseContainer(pipeline, container1.getContainerInfo().getContainerID()));
reply.getResponse().get();
xceiverClient.watchForCommit(reply.getLogIndex());
// commitInfo Map will be reduced to 2 here
Assert.assertEquals(2, ratisClient.getCommitInfoMap().size());
clientManager.releaseClient(xceiverClient, false);
Assert.assertTrue(logCapturer.getOutput().contains("3 way commit failed"));
Assert.assertTrue(logCapturer.getOutput().contains("TimeoutException"));
Assert.assertTrue(logCapturer.getOutput().contains("Committed by majority"));
logCapturer.stopCapturing();
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestContainerStateMachineFailures method induceFollowerFailure.
private void induceFollowerFailure(OmKeyLocationInfo omKeyLocationInfo, int failureCount) {
UUID leader = omKeyLocationInfo.getPipeline().getLeaderId();
Set<HddsDatanodeService> datanodeSet = TestHelper.getDatanodeServices(cluster, omKeyLocationInfo.getPipeline());
int count = 0;
for (HddsDatanodeService dn : datanodeSet) {
UUID dnUuid = dn.getDatanodeDetails().getUuid();
if (!dnUuid.equals(leader)) {
count++;
long containerID = omKeyLocationInfo.getContainerID();
Container container = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID);
if (container != null) {
ContainerData containerData = container.getContainerData();
Assert.assertTrue(containerData instanceof KeyValueContainerData);
KeyValueContainerData keyValueContainerData = (KeyValueContainerData) containerData;
FileUtil.fullyDelete(new File(keyValueContainerData.getChunksPath()));
}
if (count == failureCount) {
break;
}
}
}
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestContainerStateMachineFailures method testUnhealthyContainer.
@Test
public void testUnhealthyContainer() throws Exception {
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
// First write and flush creates a container in the datanode
key.write("ratis".getBytes(UTF_8));
key.flush();
key.write("ratis".getBytes(UTF_8));
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
Assert.assertTrue(containerData instanceof KeyValueContainerData);
KeyValueContainerData keyValueContainerData = (KeyValueContainerData) containerData;
// delete the container db file
FileUtil.fullyDelete(new File(keyValueContainerData.getChunksPath()));
try {
// there is only 1 datanode in the pipeline, the pipeline will be closed
// and allocation to new pipeline will fail as there is no other dn in
// the cluster
key.close();
} catch (IOException ioe) {
}
long containerID = omKeyLocationInfo.getContainerID();
// Make sure the container is marked unhealthy
Assert.assertTrue(dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.UNHEALTHY);
// Check metadata in the .container file
File containerFile = new File(keyValueContainerData.getMetadataPath(), containerID + OzoneConsts.CONTAINER_EXTENSION);
keyValueContainerData = (KeyValueContainerData) ContainerDataYaml.readContainerFile(containerFile);
assertThat(keyValueContainerData.getState(), is(UNHEALTHY));
OzoneConfiguration config = dn.getConf();
final String dir = config.get(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR) + UUID.randomUUID();
config.set(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR, dir);
int index = cluster.getHddsDatanodeIndex(dn.getDatanodeDetails());
// restart the hdds datanode and see if the container is listed in the
// in the missing container set and not in the regular set
cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
// make sure the container state is still marked unhealthy after restart
keyValueContainerData = (KeyValueContainerData) ContainerDataYaml.readContainerFile(containerFile);
assertThat(keyValueContainerData.getState(), is(UNHEALTHY));
OzoneContainer ozoneContainer;
HddsDatanodeService dnService = cluster.getHddsDatanodes().get(index);
ozoneContainer = dnService.getDatanodeStateMachine().getContainer();
HddsDispatcher dispatcher = (HddsDispatcher) ozoneContainer.getDispatcher();
ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
request.setCmdType(ContainerProtos.Type.CloseContainer);
request.setContainerID(containerID);
request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
request.setDatanodeUuid(dnService.getDatanodeDetails().getUuidString());
Assert.assertEquals(ContainerProtos.Result.CONTAINER_UNHEALTHY, dispatcher.dispatch(request.build(), null).getResult());
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestContainerStateMachineFailures method testContainerStateMachineFailures.
@Test
public void testContainerStateMachineFailures() throws Exception {
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
byte[] testData = "ratis".getBytes(UTF_8);
// First write and flush creates a container in the datanode
key.write(testData);
key.flush();
key.write(testData);
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
// delete the container dir
FileUtil.fullyDelete(new File(dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData().getContainerPath()));
try {
// there is only 1 datanode in the pipeline, the pipeline will be closed
// and allocation to new pipeline will fail as there is no other dn in
// the cluster
key.close();
} catch (IOException ioe) {
}
long containerID = omKeyLocationInfo.getContainerID();
// Make sure the container is marked unhealthy
Assert.assertTrue(dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.UNHEALTHY);
OzoneContainer ozoneContainer;
// restart the hdds datanode, container should not in the regular set
OzoneConfiguration config = dn.getConf();
final String dir = config.get(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR) + UUID.randomUUID();
config.set(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR, dir);
int index = cluster.getHddsDatanodeIndex(dn.getDatanodeDetails());
cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
ozoneContainer = cluster.getHddsDatanodes().get(index).getDatanodeStateMachine().getContainer();
Assert.assertNull(ozoneContainer.getContainerSet().getContainer(containerID));
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestContainerStateMachineFailures method testWriteStateMachineDataIdempotencyWithClosedContainer.
// The test injects multiple write chunk requests along with closed container
// request thereby inducing a situation where a writeStateMachine call
// gets executed when the closed container apply completes thereby
// failing writeStateMachine call. In any case, our stateMachine should
// not be marked unhealthy and pipeline should not fail if container gets
// closed here.
@Test
public void testWriteStateMachineDataIdempotencyWithClosedContainer() throws Exception {
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis-1", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
// First write and flush creates a container in the datanode
key.write("ratis".getBytes(UTF_8));
key.flush();
key.write("ratis".getBytes(UTF_8));
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
Assert.assertTrue(containerData instanceof KeyValueContainerData);
key.close();
ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(dn, omKeyLocationInfo.getPipeline());
SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
Path parentPath = storage.findLatestSnapshot().getFile().getPath();
stateMachine.takeSnapshot();
// Since the snapshot threshold is set to 1, since there are
// applyTransactions, we should see snapshots
Assert.assertTrue(parentPath.getParent().toFile().listFiles().length > 0);
FileInfo snapshot = storage.findLatestSnapshot().getFile();
Assert.assertNotNull(snapshot);
long containerID = omKeyLocationInfo.getContainerID();
Pipeline pipeline = cluster.getStorageContainerLocationClient().getContainerWithPipeline(containerID).getPipeline();
XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
CountDownLatch latch = new CountDownLatch(100);
int count = 0;
AtomicInteger failCount = new AtomicInteger(0);
Runnable r1 = () -> {
try {
ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
request.setCmdType(ContainerProtos.Type.CloseContainer);
request.setContainerID(containerID);
request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
xceiverClient.sendCommand(request.build());
} catch (IOException e) {
failCount.incrementAndGet();
}
};
Runnable r2 = () -> {
try {
ByteString data = ByteString.copyFromUtf8("hello");
ContainerProtos.ContainerCommandRequestProto.Builder writeChunkRequest = ContainerTestHelper.newWriteChunkRequestBuilder(pipeline, omKeyLocationInfo.getBlockID(), data.size(), random.nextInt());
writeChunkRequest.setWriteChunk(writeChunkRequest.getWriteChunkBuilder().setData(data));
xceiverClient.sendCommand(writeChunkRequest.build());
latch.countDown();
} catch (IOException e) {
latch.countDown();
if (!(HddsClientUtils.checkForException(e) instanceof ContainerNotOpenException)) {
failCount.incrementAndGet();
}
String message = e.getMessage();
Assert.assertFalse(message, message.contains("hello"));
Assert.assertTrue(message, message.contains(HddsUtils.REDACTED.toStringUtf8()));
}
};
List<Thread> threadList = new ArrayList<>();
for (int i = 0; i < 100; i++) {
count++;
Thread r = new Thread(r2);
r.start();
threadList.add(r);
}
Thread closeContainerThread = new Thread(r1);
closeContainerThread.start();
threadList.add(closeContainerThread);
latch.await(600, TimeUnit.SECONDS);
for (int i = 0; i < 101; i++) {
threadList.get(i).join();
}
if (failCount.get() > 0) {
fail("testWriteStateMachineDataIdempotencyWithClosedContainer failed");
}
Assert.assertTrue(TestHelper.getDatanodeService(omKeyLocationInfo, cluster).getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.CLOSED);
Assert.assertTrue(stateMachine.isStateMachineHealthy());
try {
stateMachine.takeSnapshot();
} catch (IOException ioe) {
Assert.fail("Exception should not be thrown");
}
FileInfo latestSnapshot = storage.findLatestSnapshot().getFile();
Assert.assertFalse(snapshot.getPath().equals(latestSnapshot.getPath()));
r2.run();
}
Aggregations