use of org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine in project ozone by apache.
the class TestContainerStateMachine method testRatisSnapshotRetention.
@Test
public void testRatisSnapshotRetention() throws Exception {
ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(cluster);
SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
Assert.assertNull(storage.findLatestSnapshot());
// Write 10 keys. Num snapshots should be equal to config value.
for (int i = 1; i <= 10; i++) {
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey(("ratis" + i), 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
// First write and flush creates a container in the datanode
key.write(("ratis" + i).getBytes(UTF_8));
key.flush();
key.write(("ratis" + i).getBytes(UTF_8));
key.close();
}
RatisServerConfiguration ratisServerConfiguration = conf.getObject(RatisServerConfiguration.class);
stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(cluster);
storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
Path parentPath = storage.findLatestSnapshot().getFile().getPath();
int numSnapshots = parentPath.getParent().toFile().listFiles().length;
Assert.assertTrue(Math.abs(ratisServerConfiguration.getNumSnapshotsRetained() - numSnapshots) <= 1);
// Write 10 more keys. Num Snapshots should remain the same.
for (int i = 11; i <= 20; i++) {
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey(("ratis" + i), 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
// First write and flush creates a container in the datanode
key.write(("ratis" + i).getBytes(UTF_8));
key.flush();
key.write(("ratis" + i).getBytes(UTF_8));
key.close();
}
stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(cluster);
storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
parentPath = storage.findLatestSnapshot().getFile().getPath();
numSnapshots = parentPath.getParent().toFile().listFiles().length;
Assert.assertTrue(Math.abs(ratisServerConfiguration.getNumSnapshotsRetained() - numSnapshots) <= 1);
}
use of org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine in project ozone by apache.
the class TestContainerStateMachineFailures method testApplyTransactionIdempotencyWithClosedContainer.
@Test
public void testApplyTransactionIdempotencyWithClosedContainer() throws Exception {
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
// First write and flush creates a container in the datanode
key.write("ratis".getBytes(UTF_8));
key.flush();
key.write("ratis".getBytes(UTF_8));
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
Assert.assertTrue(containerData instanceof KeyValueContainerData);
key.close();
ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(dn, omKeyLocationInfo.getPipeline());
SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
Path parentPath = storage.findLatestSnapshot().getFile().getPath();
stateMachine.takeSnapshot();
Assert.assertTrue(parentPath.getParent().toFile().listFiles().length > 0);
FileInfo snapshot = storage.findLatestSnapshot().getFile();
Assert.assertNotNull(snapshot);
long containerID = omKeyLocationInfo.getContainerID();
Pipeline pipeline = cluster.getStorageContainerLocationClient().getContainerWithPipeline(containerID).getPipeline();
XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
request.setCmdType(ContainerProtos.Type.CloseContainer);
request.setContainerID(containerID);
request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
try {
xceiverClient.sendCommand(request.build());
} catch (IOException e) {
Assert.fail("Exception should not be thrown");
}
Assert.assertTrue(TestHelper.getDatanodeService(omKeyLocationInfo, cluster).getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.CLOSED);
Assert.assertTrue(stateMachine.isStateMachineHealthy());
try {
stateMachine.takeSnapshot();
} catch (IOException ioe) {
Assert.fail("Exception should not be thrown");
}
FileInfo latestSnapshot = storage.findLatestSnapshot().getFile();
Assert.assertFalse(snapshot.getPath().equals(latestSnapshot.getPath()));
}
use of org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine in project ozone by apache.
the class TestContainerStateMachineFailures method testApplyTransactionFailure.
@Test
public void testApplyTransactionFailure() throws Exception {
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
// First write and flush creates a container in the datanode
key.write("ratis".getBytes(UTF_8));
key.flush();
key.write("ratis".getBytes(UTF_8));
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
int index = cluster.getHddsDatanodeIndex(dn.getDatanodeDetails());
ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
Assert.assertTrue(containerData instanceof KeyValueContainerData);
KeyValueContainerData keyValueContainerData = (KeyValueContainerData) containerData;
key.close();
ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(cluster.getHddsDatanodes().get(index), omKeyLocationInfo.getPipeline());
SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
stateMachine.takeSnapshot();
Path parentPath = storage.findLatestSnapshot().getFile().getPath();
// Since the snapshot threshold is set to 1, since there are
// applyTransactions, we should see snapshots
Assert.assertTrue(parentPath.getParent().toFile().listFiles().length > 0);
FileInfo snapshot = storage.findLatestSnapshot().getFile();
Assert.assertNotNull(snapshot);
long containerID = omKeyLocationInfo.getContainerID();
// delete the container db file
FileUtil.fullyDelete(new File(keyValueContainerData.getContainerPath()));
Pipeline pipeline = cluster.getStorageContainerLocationClient().getContainerWithPipeline(containerID).getPipeline();
XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
request.setCmdType(ContainerProtos.Type.CloseContainer);
request.setContainerID(containerID);
request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
try {
xceiverClient.sendCommand(request.build());
Assert.fail("Expected exception not thrown");
} catch (IOException e) {
// Exception should be thrown
}
// Make sure the container is marked unhealthy
Assert.assertTrue(dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.UNHEALTHY);
try {
// try to take a new snapshot, ideally it should just fail
stateMachine.takeSnapshot();
} catch (IOException ioe) {
Assert.assertTrue(ioe instanceof StateMachineException);
}
if (snapshot.getPath().toFile().exists()) {
// Make sure the latest snapshot is same as the previous one
try {
FileInfo latestSnapshot = storage.findLatestSnapshot().getFile();
Assert.assertTrue(snapshot.getPath().equals(latestSnapshot.getPath()));
} catch (Throwable e) {
Assert.assertFalse(snapshot.getPath().toFile().exists());
}
}
// when remove pipeline, group dir including snapshot will be deleted
LambdaTestUtils.await(5000, 500, () -> (!snapshot.getPath().toFile().exists()));
}
use of org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine in project ozone by apache.
the class TestValidateBCSIDOnRestart method testValidateBCSIDOnDnRestart.
@Test
public void testValidateBCSIDOnDnRestart() throws Exception {
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
// First write and flush creates a container in the datanode
key.write("ratis".getBytes(UTF_8));
key.flush();
key.write("ratis".getBytes(UTF_8));
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
ContainerData containerData = TestHelper.getDatanodeService(omKeyLocationInfo, cluster).getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
Assert.assertTrue(containerData instanceof KeyValueContainerData);
KeyValueContainerData keyValueContainerData = (KeyValueContainerData) containerData;
key.close();
long containerID = omKeyLocationInfo.getContainerID();
int index = cluster.getHddsDatanodeIndex(dn.getDatanodeDetails());
// delete the container db file
FileUtil.fullyDelete(new File(keyValueContainerData.getContainerPath()));
HddsDatanodeService dnService = cluster.getHddsDatanodes().get(index);
OzoneContainer ozoneContainer = dnService.getDatanodeStateMachine().getContainer();
ozoneContainer.getContainerSet().removeContainer(containerID);
ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(cluster.getHddsDatanodes().get(index), omKeyLocationInfo.getPipeline());
SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
stateMachine.takeSnapshot();
Path parentPath = storage.findLatestSnapshot().getFile().getPath();
stateMachine.buildMissingContainerSet(parentPath.toFile());
// Since the snapshot threshold is set to 1, since there are
// applyTransactions, we should see snapshots
Assert.assertTrue(parentPath.getParent().toFile().listFiles().length > 0);
// make sure the missing containerSet is not empty
HddsDispatcher dispatcher = (HddsDispatcher) ozoneContainer.getDispatcher();
Assert.assertTrue(!dispatcher.getMissingContainerSet().isEmpty());
Assert.assertTrue(dispatcher.getMissingContainerSet().contains(containerID));
// write a new key
key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
// First write and flush creates a container in the datanode
key.write("ratis1".getBytes(UTF_8));
key.flush();
groupOutputStream = (KeyOutputStream) key.getOutputStream();
locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
omKeyLocationInfo = locationInfoList.get(0);
key.close();
containerID = omKeyLocationInfo.getContainerID();
dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
Assert.assertTrue(containerData instanceof KeyValueContainerData);
keyValueContainerData = (KeyValueContainerData) containerData;
ReferenceCountedDB db = BlockUtils.getDB(keyValueContainerData, conf);
// modify the bcsid for the container in the ROCKS DB thereby inducing
// corruption
db.getStore().getMetadataTable().put(OzoneConsts.BLOCK_COMMIT_SEQUENCE_ID, 0L);
db.decrementReference();
// after the restart, there will be a mismatch in BCSID of what is recorded
// in the and what is there in RockSDB and hence the container would be
// marked unhealthy
index = cluster.getHddsDatanodeIndex(dn.getDatanodeDetails());
cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
// Make sure the container is marked unhealthy
Assert.assertTrue(cluster.getHddsDatanodes().get(index).getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.UNHEALTHY);
}
use of org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine in project ozone by apache.
the class TestContainerStateMachineFailures method testWriteStateMachineDataIdempotencyWithClosedContainer.
// The test injects multiple write chunk requests along with closed container
// request thereby inducing a situation where a writeStateMachine call
// gets executed when the closed container apply completes thereby
// failing writeStateMachine call. In any case, our stateMachine should
// not be marked unhealthy and pipeline should not fail if container gets
// closed here.
@Test
public void testWriteStateMachineDataIdempotencyWithClosedContainer() throws Exception {
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis-1", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
// First write and flush creates a container in the datanode
key.write("ratis".getBytes(UTF_8));
key.flush();
key.write("ratis".getBytes(UTF_8));
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
Assert.assertTrue(containerData instanceof KeyValueContainerData);
key.close();
ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(dn, omKeyLocationInfo.getPipeline());
SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
Path parentPath = storage.findLatestSnapshot().getFile().getPath();
stateMachine.takeSnapshot();
// Since the snapshot threshold is set to 1, since there are
// applyTransactions, we should see snapshots
Assert.assertTrue(parentPath.getParent().toFile().listFiles().length > 0);
FileInfo snapshot = storage.findLatestSnapshot().getFile();
Assert.assertNotNull(snapshot);
long containerID = omKeyLocationInfo.getContainerID();
Pipeline pipeline = cluster.getStorageContainerLocationClient().getContainerWithPipeline(containerID).getPipeline();
XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
CountDownLatch latch = new CountDownLatch(100);
int count = 0;
AtomicInteger failCount = new AtomicInteger(0);
Runnable r1 = () -> {
try {
ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
request.setCmdType(ContainerProtos.Type.CloseContainer);
request.setContainerID(containerID);
request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
xceiverClient.sendCommand(request.build());
} catch (IOException e) {
failCount.incrementAndGet();
}
};
Runnable r2 = () -> {
try {
ByteString data = ByteString.copyFromUtf8("hello");
ContainerProtos.ContainerCommandRequestProto.Builder writeChunkRequest = ContainerTestHelper.newWriteChunkRequestBuilder(pipeline, omKeyLocationInfo.getBlockID(), data.size(), random.nextInt());
writeChunkRequest.setWriteChunk(writeChunkRequest.getWriteChunkBuilder().setData(data));
xceiverClient.sendCommand(writeChunkRequest.build());
latch.countDown();
} catch (IOException e) {
latch.countDown();
if (!(HddsClientUtils.checkForException(e) instanceof ContainerNotOpenException)) {
failCount.incrementAndGet();
}
String message = e.getMessage();
Assert.assertFalse(message, message.contains("hello"));
Assert.assertTrue(message, message.contains(HddsUtils.REDACTED.toStringUtf8()));
}
};
List<Thread> threadList = new ArrayList<>();
for (int i = 0; i < 100; i++) {
count++;
Thread r = new Thread(r2);
r.start();
threadList.add(r);
}
Thread closeContainerThread = new Thread(r1);
closeContainerThread.start();
threadList.add(closeContainerThread);
latch.await(600, TimeUnit.SECONDS);
for (int i = 0; i < 101; i++) {
threadList.get(i).join();
}
if (failCount.get() > 0) {
fail("testWriteStateMachineDataIdempotencyWithClosedContainer failed");
}
Assert.assertTrue(TestHelper.getDatanodeService(omKeyLocationInfo, cluster).getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.CLOSED);
Assert.assertTrue(stateMachine.isStateMachineHealthy());
try {
stateMachine.takeSnapshot();
} catch (IOException ioe) {
Assert.fail("Exception should not be thrown");
}
FileInfo latestSnapshot = storage.findLatestSnapshot().getFile();
Assert.assertFalse(snapshot.getPath().equals(latestSnapshot.getPath()));
r2.run();
}
Aggregations