use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestContainerReplicationEndToEnd method testContainerReplication.
/**
* The test simulates end to end container replication.
*/
@Test
public void testContainerReplication() throws Exception {
String keyName = "testContainerReplication";
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey(keyName, 0, ReplicationType.RATIS, ReplicationFactor.THREE, new HashMap<>());
byte[] testData = "ratis".getBytes(UTF_8);
// First write and flush creates a container in the datanode
key.write(testData);
key.flush();
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
long containerID = omKeyLocationInfo.getContainerID();
PipelineID pipelineID = cluster.getStorageContainerManager().getContainerManager().getContainer(ContainerID.valueOf(containerID)).getPipelineID();
Pipeline pipeline = cluster.getStorageContainerManager().getPipelineManager().getPipeline(pipelineID);
key.close();
HddsProtos.LifeCycleState containerState = cluster.getStorageContainerManager().getContainerManager().getContainer(ContainerID.valueOf(containerID)).getState();
LoggerFactory.getLogger(TestContainerReplicationEndToEnd.class).info("Current Container State is {}", containerState);
if ((containerState != HddsProtos.LifeCycleState.CLOSING) && (containerState != HddsProtos.LifeCycleState.CLOSED)) {
cluster.getStorageContainerManager().getContainerManager().updateContainerState(ContainerID.valueOf(containerID), HddsProtos.LifeCycleEvent.FINALIZE);
}
// wait for container to move to OPEN state in SCM
Thread.sleep(2 * containerReportInterval);
DatanodeDetails oldReplicaNode = pipeline.getFirstNode();
// now move the container to the closed on the datanode.
XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
request.setCmdType(ContainerProtos.Type.CloseContainer);
request.setContainerID(containerID);
request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
xceiverClient.sendCommand(request.build());
// wait for container to move to closed state in SCM
Thread.sleep(2 * containerReportInterval);
Assert.assertTrue(cluster.getStorageContainerManager().getContainerInfo(containerID).getState() == HddsProtos.LifeCycleState.CLOSED);
// shutdown the replica node
cluster.shutdownHddsDatanode(oldReplicaNode);
// now the container is under replicated and will be moved to a different dn
HddsDatanodeService dnService = null;
for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
Predicate<DatanodeDetails> p = i -> i.getUuid().equals(dn.getDatanodeDetails().getUuid());
if (!pipeline.getNodes().stream().anyMatch(p)) {
dnService = dn;
}
}
Assert.assertNotNull(dnService);
final HddsDatanodeService newReplicaNode = dnService;
// wait for the container to get replicated
GenericTestUtils.waitFor(() -> {
return newReplicaNode.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID) != null;
}, 500, 100000);
Assert.assertTrue(newReplicaNode.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerData().getBlockCommitSequenceId() > 0);
// wait for SCM to update the replica Map
Thread.sleep(5 * containerReportInterval);
// the key again
for (DatanodeDetails dn : pipeline.getNodes()) {
cluster.shutdownHddsDatanode(dn);
}
// This will try to read the data from the dn to which the container got
// replicated after the container got closed.
TestHelper.validateData(keyName, testData, objectStore, volumeName, bucketName);
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestContainerStateMachineFailures method testApplyTransactionFailure.
@Test
public void testApplyTransactionFailure() throws Exception {
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
// First write and flush creates a container in the datanode
key.write("ratis".getBytes(UTF_8));
key.flush();
key.write("ratis".getBytes(UTF_8));
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
int index = cluster.getHddsDatanodeIndex(dn.getDatanodeDetails());
ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
Assert.assertTrue(containerData instanceof KeyValueContainerData);
KeyValueContainerData keyValueContainerData = (KeyValueContainerData) containerData;
key.close();
ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(cluster.getHddsDatanodes().get(index), omKeyLocationInfo.getPipeline());
SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
stateMachine.takeSnapshot();
Path parentPath = storage.findLatestSnapshot().getFile().getPath();
// Since the snapshot threshold is set to 1, since there are
// applyTransactions, we should see snapshots
Assert.assertTrue(parentPath.getParent().toFile().listFiles().length > 0);
FileInfo snapshot = storage.findLatestSnapshot().getFile();
Assert.assertNotNull(snapshot);
long containerID = omKeyLocationInfo.getContainerID();
// delete the container db file
FileUtil.fullyDelete(new File(keyValueContainerData.getContainerPath()));
Pipeline pipeline = cluster.getStorageContainerLocationClient().getContainerWithPipeline(containerID).getPipeline();
XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
request.setCmdType(ContainerProtos.Type.CloseContainer);
request.setContainerID(containerID);
request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
try {
xceiverClient.sendCommand(request.build());
Assert.fail("Expected exception not thrown");
} catch (IOException e) {
// Exception should be thrown
}
// Make sure the container is marked unhealthy
Assert.assertTrue(dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.UNHEALTHY);
try {
// try to take a new snapshot, ideally it should just fail
stateMachine.takeSnapshot();
} catch (IOException ioe) {
Assert.assertTrue(ioe instanceof StateMachineException);
}
if (snapshot.getPath().toFile().exists()) {
// Make sure the latest snapshot is same as the previous one
try {
FileInfo latestSnapshot = storage.findLatestSnapshot().getFile();
Assert.assertTrue(snapshot.getPath().equals(latestSnapshot.getPath()));
} catch (Throwable e) {
Assert.assertFalse(snapshot.getPath().toFile().exists());
}
}
// when remove pipeline, group dir including snapshot will be deleted
LambdaTestUtils.await(5000, 500, () -> (!snapshot.getPath().toFile().exists()));
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestContainerStateMachineFailures method testApplyTransactionIdempotencyWithClosedContainer.
@Test
@Flaky("HDDS-6115")
public void testApplyTransactionIdempotencyWithClosedContainer() throws Exception {
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
// First write and flush creates a container in the datanode
key.write("ratis".getBytes(UTF_8));
key.flush();
key.write("ratis".getBytes(UTF_8));
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
Assert.assertTrue(containerData instanceof KeyValueContainerData);
key.close();
ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(dn, omKeyLocationInfo.getPipeline());
SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
Path parentPath = storage.findLatestSnapshot().getFile().getPath();
stateMachine.takeSnapshot();
Assert.assertTrue(parentPath.getParent().toFile().listFiles().length > 0);
FileInfo snapshot = storage.findLatestSnapshot().getFile();
Assert.assertNotNull(snapshot);
long containerID = omKeyLocationInfo.getContainerID();
Pipeline pipeline = cluster.getStorageContainerLocationClient().getContainerWithPipeline(containerID).getPipeline();
XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
request.setCmdType(ContainerProtos.Type.CloseContainer);
request.setContainerID(containerID);
request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
try {
xceiverClient.sendCommand(request.build());
} catch (IOException e) {
Assert.fail("Exception should not be thrown");
}
Assert.assertTrue(TestHelper.getDatanodeService(omKeyLocationInfo, cluster).getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.CLOSED);
Assert.assertTrue(stateMachine.isStateMachineHealthy());
try {
stateMachine.takeSnapshot();
} catch (IOException ioe) {
Assert.fail("Exception should not be thrown");
}
FileInfo latestSnapshot = storage.findLatestSnapshot().getFile();
Assert.assertFalse(snapshot.getPath().equals(latestSnapshot.getPath()));
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestContainerStateMachineFailures method testContainerStateMachineCloseOnMissingPipeline.
@Test
public void testContainerStateMachineCloseOnMissingPipeline() throws Exception {
// This integration test is a bit of a hack to see if the highly
// improbable event where the Datanode does not have the pipeline
// in its Ratis channel but still receives a close container command
// for a container that is open or in closing state.
// Bugs in code can lead to this sequence of events but for this test
// to inject this state, it removes the pipeline by directly calling
// the underlying method.
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("testQuasiClosed1", 1024, ReplicationType.RATIS, ReplicationFactor.THREE, new HashMap<>());
key.write("ratis".getBytes(UTF_8));
key.flush();
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
Set<HddsDatanodeService> datanodeSet = TestHelper.getDatanodeServices(cluster, omKeyLocationInfo.getPipeline());
long containerID = omKeyLocationInfo.getContainerID();
for (HddsDatanodeService dn : datanodeSet) {
XceiverServerRatis wc = (XceiverServerRatis) dn.getDatanodeStateMachine().getContainer().getWriteChannel();
if (wc == null) {
// Test applicable only for RATIS based channel.
return;
}
wc.notifyGroupRemove(RaftGroupId.valueOf(omKeyLocationInfo.getPipeline().getId().getId()));
SCMCommand<?> command = new CloseContainerCommand(containerID, omKeyLocationInfo.getPipeline().getId());
command.setTerm(cluster.getStorageContainerManager().getScmContext().getTermOfLeader());
cluster.getStorageContainerManager().getScmNodeManager().addDatanodeCommand(dn.getDatanodeDetails().getUuid(), command);
}
for (HddsDatanodeService dn : datanodeSet) {
LambdaTestUtils.await(20000, 1000, () -> (dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState().equals(QUASI_CLOSED)));
}
key.close();
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestHelper method getDatanodeServices.
public static Set<HddsDatanodeService> getDatanodeServices(MiniOzoneCluster cluster, Pipeline pipeline) {
Set<HddsDatanodeService> services = new HashSet<>();
Set<DatanodeDetails> pipelineNodes = pipeline.getNodeSet();
for (HddsDatanodeService service : cluster.getHddsDatanodes()) {
if (pipelineNodes.contains(service.getDatanodeDetails())) {
services.add(service);
}
}
Assert.assertEquals(pipelineNodes.size(), services.size());
return services;
}
Aggregations