use of org.apache.hadoop.ozone.protocol.commands.CommandForDatanode in project ozone by apache.
the class PipelineReportHandler method processPipelineReport.
protected void processPipelineReport(PipelineReport report, DatanodeDetails dn, EventPublisher publisher) throws IOException {
PipelineID pipelineID = PipelineID.getFromProtobuf(report.getPipelineID());
Pipeline pipeline;
try {
pipeline = pipelineManager.getPipeline(pipelineID);
} catch (PipelineNotFoundException e) {
if (scmContext.isLeader()) {
LOGGER.info("Reported pipeline {} is not found", pipelineID);
SCMCommand<?> command = new ClosePipelineCommand(pipelineID);
command.setTerm(scmContext.getTermOfLeader());
publisher.fireEvent(SCMEvents.DATANODE_COMMAND, new CommandForDatanode<>(dn.getUuid(), command));
}
return;
}
setReportedDatanode(pipeline, dn);
setPipelineLeaderId(report, pipeline, dn);
if (pipeline.getPipelineState() == Pipeline.PipelineState.ALLOCATED) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Pipeline {} {} reported by {}", pipeline.getReplicationConfig(), pipeline.getId(), dn);
}
if (pipeline.isHealthy()) {
pipelineManager.openPipeline(pipelineID);
}
}
if (pipeline.isHealthy()) {
if (pipelineAvailabilityCheck && scmSafeModeManager.getInSafeMode()) {
publisher.fireEvent(SCMEvents.OPEN_PIPELINE, pipeline);
}
}
}
use of org.apache.hadoop.ozone.protocol.commands.CommandForDatanode in project ozone by apache.
the class PipelineActionHandler method processPipelineAction.
/**
* Process the given PipelineAction.
*
* @param datanode the datanode which has sent the PipelineAction
* @param pipelineAction the PipelineAction
* @param publisher EventPublisher to fire new events if required
*/
private void processPipelineAction(final DatanodeDetails datanode, final PipelineAction pipelineAction, final EventPublisher publisher) {
final ClosePipelineInfo info = pipelineAction.getClosePipeline();
final PipelineAction.Action action = pipelineAction.getAction();
final PipelineID pid = PipelineID.getFromProtobuf(info.getPipelineID());
try {
LOG.info("Received pipeline action {} for {} from datanode {}. " + "Reason : {}", action, pid, datanode.getUuidString(), info.getDetailedReason());
if (action == PipelineAction.Action.CLOSE) {
pipelineManager.closePipeline(pipelineManager.getPipeline(pid), false);
} else {
LOG.error("unknown pipeline action:{}", action);
}
} catch (PipelineNotFoundException e) {
LOG.warn("Pipeline action {} received for unknown pipeline {}, " + "firing close pipeline event.", action, pid);
SCMCommand<?> command = new ClosePipelineCommand(pid);
try {
command.setTerm(scmContext.getTermOfLeader());
} catch (NotLeaderException nle) {
LOG.warn("Skip sending ClosePipelineCommand for pipeline {}," + " since not leader SCM.", pid);
return;
}
publisher.fireEvent(SCMEvents.DATANODE_COMMAND, new CommandForDatanode<>(datanode.getUuid(), command));
} catch (IOException ioe) {
LOG.error("Could not execute pipeline action={} pipeline={}", action, pid, ioe);
}
}
use of org.apache.hadoop.ozone.protocol.commands.CommandForDatanode in project ozone by apache.
the class TestSCMNodeManager method testProcessLayoutVersionLowerMlv.
@Test
public void testProcessLayoutVersionLowerMlv() throws IOException {
OzoneConfiguration conf = new OzoneConfiguration();
SCMStorageConfig scmStorageConfig = mock(SCMStorageConfig.class);
when(scmStorageConfig.getClusterID()).thenReturn("xyz111");
EventPublisher eventPublisher = mock(EventPublisher.class);
HDDSLayoutVersionManager lvm = new HDDSLayoutVersionManager(scmStorageConfig.getLayoutVersion());
SCMNodeManager nodeManager = new SCMNodeManager(conf, scmStorageConfig, eventPublisher, new NetworkTopologyImpl(conf), SCMContext.emptyContext(), lvm);
DatanodeDetails node1 = HddsTestUtils.createRandomDatanodeAndRegister(nodeManager);
verify(eventPublisher, times(1)).fireEvent(NEW_NODE, node1);
int scmMlv = nodeManager.getLayoutVersionManager().getMetadataLayoutVersion();
nodeManager.processLayoutVersionReport(node1, LayoutVersionProto.newBuilder().setMetadataLayoutVersion(scmMlv - 1).setSoftwareLayoutVersion(scmMlv).build());
ArgumentCaptor<CommandForDatanode> captor = ArgumentCaptor.forClass(CommandForDatanode.class);
verify(eventPublisher, times(1)).fireEvent(Mockito.eq(DATANODE_COMMAND), captor.capture());
assertTrue(captor.getValue().getDatanodeId().equals(node1.getUuid()));
assertTrue(captor.getValue().getCommand().getType().equals(finalizeNewLayoutVersionCommand));
}
use of org.apache.hadoop.ozone.protocol.commands.CommandForDatanode in project ozone by apache.
the class TestReplicationManager method testUnderReplicatedQuasiClosedContainerWithUnhealthyReplica.
/**
* When a QUASI_CLOSED container is under replicated, ReplicationManager
* should re-replicate it. If there are any unhealthy replica, it has to
* be deleted.
*
* In this test case, the container is QUASI_CLOSED and is under replicated
* and also has an unhealthy replica.
*
* In the first iteration of ReplicationManager, it should re-replicate
* the container so that it has enough replicas.
*
* In the second iteration, ReplicationManager should delete the unhealthy
* replica.
*
* In the third iteration, ReplicationManager will re-replicate as the
* container has again become under replicated after the unhealthy
* replica has been deleted.
*/
@Test
public void testUnderReplicatedQuasiClosedContainerWithUnhealthyReplica() throws IOException, InterruptedException, TimeoutException {
final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED);
container.setUsedBytes(99);
final ContainerID id = container.containerID();
final UUID originNodeId = UUID.randomUUID();
final ContainerReplica replicaOne = getReplicas(id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails());
final ContainerReplica replicaTwo = getReplicas(id, State.UNHEALTHY, 1000L, originNodeId, randomDatanodeDetails());
containerStateManager.addContainer(container.getProtobuf());
containerStateManager.updateContainerReplica(id, replicaOne);
containerStateManager.updateContainerReplica(id, replicaTwo);
final int currentReplicateCommandCount = datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand);
final int currentDeleteCommandCount = datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.deleteContainerCommand);
final long currentBytesToDelete = replicationManager.getMetrics().getNumDeletionBytesTotal();
replicationManager.processAll();
GenericTestUtils.waitFor(() -> (currentReplicateCommandCount + 1) == datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand), 50, 5000);
Optional<CommandForDatanode> replicateCommand = datanodeCommandHandler.getReceivedCommands().stream().filter(c -> c.getCommand().getType().equals(SCMCommandProto.Type.replicateContainerCommand)).findFirst();
Assert.assertTrue(replicateCommand.isPresent());
DatanodeDetails newNode = createDatanodeDetails(replicateCommand.get().getDatanodeId());
ContainerReplica newReplica = getReplicas(id, State.QUASI_CLOSED, 1000L, originNodeId, newNode);
containerStateManager.updateContainerReplica(id, newReplica);
ReplicationManagerReport report = replicationManager.getContainerReport();
Assert.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED));
Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK));
Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED));
Assert.assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNHEALTHY));
/*
* We have report the replica to SCM, in the next ReplicationManager
* iteration it should delete the unhealthy replica.
*/
replicationManager.processAll();
eventQueue.processAll(1000);
Assert.assertEquals(currentDeleteCommandCount + 1, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.deleteContainerCommand));
// ReplicaTwo should be deleted, that is the unhealthy one
Assert.assertTrue(datanodeCommandHandler.received(SCMCommandProto.Type.deleteContainerCommand, replicaTwo.getDatanodeDetails()));
Assert.assertEquals(currentDeleteCommandCount + 1, replicationManager.getMetrics().getNumDeletionCmdsSent());
Assert.assertEquals(currentBytesToDelete + 99, replicationManager.getMetrics().getNumDeletionBytesTotal());
Assert.assertEquals(1, replicationManager.getInflightDeletion().size());
Assert.assertEquals(1, replicationManager.getMetrics().getInflightDeletion());
containerStateManager.removeContainerReplica(id, replicaTwo);
final long currentDeleteCommandCompleted = replicationManager.getMetrics().getNumDeletionCmdsCompleted();
report = replicationManager.getContainerReport();
Assert.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED));
Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK));
Assert.assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED));
Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.UNHEALTHY));
/*
* We have now removed unhealthy replica, next iteration of
* ReplicationManager should re-replicate the container as it
* is under replicated now
*/
replicationManager.processAll();
eventQueue.processAll(1000);
Assert.assertEquals(0, replicationManager.getInflightDeletion().size());
Assert.assertEquals(0, replicationManager.getMetrics().getInflightDeletion());
Assert.assertEquals(currentDeleteCommandCompleted + 1, replicationManager.getMetrics().getNumDeletionCmdsCompleted());
Assert.assertEquals(currentReplicateCommandCount + 2, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand));
Assert.assertEquals(currentReplicateCommandCount + 2, replicationManager.getMetrics().getNumReplicationCmdsSent());
Assert.assertEquals(1, replicationManager.getInflightReplication().size());
Assert.assertEquals(1, replicationManager.getMetrics().getInflightReplication());
report = replicationManager.getContainerReport();
Assert.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED));
Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK));
Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED));
Assert.assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNHEALTHY));
}
use of org.apache.hadoop.ozone.protocol.commands.CommandForDatanode in project ozone by apache.
the class RatisPipelineProvider method close.
/**
* Removes pipeline from SCM. Sends command to destroy pipeline on all
* the datanodes.
*
* @param pipeline - Pipeline to be destroyed
* @throws NotLeaderException - Send datanode command while not leader
*/
@Override
public void close(Pipeline pipeline) throws NotLeaderException {
final ClosePipelineCommand closeCommand = new ClosePipelineCommand(pipeline.getId());
closeCommand.setTerm(scmContext.getTermOfLeader());
pipeline.getNodes().forEach(node -> {
final CommandForDatanode<?> datanodeCommand = new CommandForDatanode<>(node.getUuid(), closeCommand);
LOG.info("Send pipeline:{} close command to datanode {}", pipeline.getId(), datanodeCommand.getDatanodeId());
eventPublisher.fireEvent(SCMEvents.DATANODE_COMMAND, datanodeCommand);
});
}
Aggregations