use of org.apache.hadoop.hdds.scm.container.ReplicationManager in project ozone by apache.
the class TestReplicationManagerMetrics method setup.
@Before
public void setup() {
ReplicationManagerReport report = new ReplicationManagerReport();
// to the value by incrementing the counter that number of times.
for (HddsProtos.LifeCycleState s : HddsProtos.LifeCycleState.values()) {
for (int i = 0; i < s.getNumber(); i++) {
report.increment(s);
}
}
// The ordinal starts from 0, so each state will have a value of its ordinal
for (ReplicationManagerReport.HealthState s : ReplicationManagerReport.HealthState.values()) {
for (int i = 0; i < s.ordinal(); i++) {
report.increment(s);
}
}
replicationManager = Mockito.mock(ReplicationManager.class);
Mockito.when(replicationManager.getContainerReport()).thenReturn(report);
metrics = ReplicationManagerMetrics.create(replicationManager);
}
use of org.apache.hadoop.hdds.scm.container.ReplicationManager in project ozone by apache.
the class StorageContainerManager method initializeSystemManagers.
/**
* This function initializes the following managers. If the configurator
* specifies a value, we will use it, else we will use the default value.
*
* Node Manager
* Pipeline Manager
* Container Manager
* Block Manager
* Replication Manager
* Safe Mode Manager
*
* @param conf - Ozone Configuration.
* @param configurator - A customizer which allows different managers to be
* used if needed.
* @throws IOException - on Failure.
*/
private void initializeSystemManagers(OzoneConfiguration conf, SCMConfigurator configurator) throws IOException {
if (configurator.getNetworkTopology() != null) {
clusterMap = configurator.getNetworkTopology();
} else {
clusterMap = new NetworkTopologyImpl(conf);
}
// This needs to be done before initializing Ratis.
RatisDropwizardExports.registerRatisMetricReporters(ratisMetricsMap);
if (configurator.getSCMHAManager() != null) {
scmHAManager = configurator.getSCMHAManager();
} else {
scmHAManager = new SCMHAManagerImpl(conf, this);
}
// inline upgrade for SequenceIdGenerator
SequenceIdGenerator.upgradeToSequenceId(scmMetadataStore);
// Distributed sequence id generator
sequenceIdGen = new SequenceIdGenerator(conf, scmHAManager, scmMetadataStore.getSequenceIdTable());
if (configurator.getScmContext() != null) {
scmContext = configurator.getScmContext();
} else {
// When term equals SCMContext.INVALID_TERM, the isLeader() check
// and getTermOfLeader() will always pass.
long term = SCMHAUtils.isSCMHAEnabled(conf) ? 0 : SCMContext.INVALID_TERM;
// non-leader of term 0, in safe mode, preCheck not completed.
scmContext = new SCMContext.Builder().setLeader(false).setTerm(term).setIsInSafeMode(true).setIsPreCheckComplete(false).setSCM(this).build();
}
if (configurator.getScmNodeManager() != null) {
scmNodeManager = configurator.getScmNodeManager();
} else {
scmNodeManager = new SCMNodeManager(conf, scmStorageConfig, eventQueue, clusterMap, scmContext, scmLayoutVersionManager);
}
placementMetrics = SCMContainerPlacementMetrics.create();
containerPlacementPolicy = ContainerPlacementPolicyFactory.getPolicy(conf, scmNodeManager, clusterMap, true, placementMetrics);
if (configurator.getPipelineManager() != null) {
pipelineManager = configurator.getPipelineManager();
} else {
pipelineManager = PipelineManagerImpl.newPipelineManager(conf, scmHAManager, scmNodeManager, scmMetadataStore.getPipelineTable(), eventQueue, scmContext, serviceManager);
}
if (configurator.getContainerManager() != null) {
containerManager = configurator.getContainerManager();
} else {
containerManager = new ContainerManagerImpl(conf, scmHAManager, sequenceIdGen, pipelineManager, scmMetadataStore.getContainerTable());
}
pipelineChoosePolicy = PipelineChoosePolicyFactory.getPolicy(conf);
if (configurator.getWritableContainerFactory() != null) {
writableContainerFactory = configurator.getWritableContainerFactory();
} else {
writableContainerFactory = new WritableContainerFactory(this);
}
if (configurator.getScmBlockManager() != null) {
scmBlockManager = configurator.getScmBlockManager();
} else {
scmBlockManager = new BlockManagerImpl(conf, this);
}
if (configurator.getReplicationManager() != null) {
replicationManager = configurator.getReplicationManager();
} else {
replicationManager = new ReplicationManager(conf, containerManager, containerPlacementPolicy, eventQueue, scmContext, serviceManager, scmNodeManager, new MonotonicClock(ZoneOffset.UTC), scmHAManager, getScmMetadataStore().getMoveTable());
}
if (configurator.getScmSafeModeManager() != null) {
scmSafeModeManager = configurator.getScmSafeModeManager();
} else {
scmSafeModeManager = new SCMSafeModeManager(conf, containerManager.getContainers(), containerManager, pipelineManager, eventQueue, serviceManager, scmContext);
}
scmDecommissionManager = new NodeDecommissionManager(conf, scmNodeManager, containerManager, scmContext, eventQueue, replicationManager);
}
use of org.apache.hadoop.hdds.scm.container.ReplicationManager in project ozone by apache.
the class TestContainerBalancer method setup.
/**
* Sets up configuration values and creates a mock cluster.
*/
@Before
public void setup() throws SCMException, NodeNotFoundException {
conf = new OzoneConfiguration();
scm = Mockito.mock(StorageContainerManager.class);
containerManager = Mockito.mock(ContainerManager.class);
replicationManager = Mockito.mock(ReplicationManager.class);
balancerConfiguration = conf.getObject(ContainerBalancerConfiguration.class);
balancerConfiguration.setThreshold(10);
balancerConfiguration.setIterations(1);
balancerConfiguration.setMaxDatanodesPercentageToInvolvePerIteration(100);
balancerConfiguration.setMaxSizeToMovePerIteration(50 * OzoneConsts.GB);
balancerConfiguration.setMaxSizeEnteringTarget(50 * OzoneConsts.GB);
conf.setFromObject(balancerConfiguration);
GenericTestUtils.setLogLevel(ContainerBalancer.LOG, Level.DEBUG);
averageUtilization = createCluster();
mockNodeManager = new MockNodeManager(datanodeToContainersMap);
placementPolicy = ContainerPlacementPolicyFactory.getPolicy(conf, mockNodeManager, mockNodeManager.getClusterNetworkTopologyMap(), true, SCMContainerPlacementMetrics.create());
Mockito.when(replicationManager.isContainerReplicatingOrDeleting(Mockito.any(ContainerID.class))).thenReturn(false);
Mockito.when(replicationManager.move(Mockito.any(ContainerID.class), Mockito.any(DatanodeDetails.class), Mockito.any(DatanodeDetails.class))).thenReturn(CompletableFuture.completedFuture(ReplicationManager.MoveResult.COMPLETED));
when(containerManager.getContainerReplicas(Mockito.any(ContainerID.class))).thenAnswer(invocationOnMock -> {
ContainerID cid = (ContainerID) invocationOnMock.getArguments()[0];
return cidToReplicasMap.get(cid);
});
when(containerManager.getContainer(Mockito.any(ContainerID.class))).thenAnswer(invocationOnMock -> {
ContainerID cid = (ContainerID) invocationOnMock.getArguments()[0];
return cidToInfoMap.get(cid);
});
when(containerManager.getContainers()).thenReturn(new ArrayList<>(cidToInfoMap.values()));
when(scm.getScmNodeManager()).thenReturn(mockNodeManager);
when(scm.getContainerPlacementPolicy()).thenReturn(placementPolicy);
when(scm.getContainerManager()).thenReturn(containerManager);
when(scm.getReplicationManager()).thenReturn(replicationManager);
when(scm.getScmContext()).thenReturn(SCMContext.emptyContext());
when(scm.getClusterMap()).thenReturn(null);
when(scm.getEventQueue()).thenReturn(mock(EventPublisher.class));
when(scm.getConfiguration()).thenReturn(conf);
containerBalancer = new ContainerBalancer(scm);
}
use of org.apache.hadoop.hdds.scm.container.ReplicationManager in project ozone by apache.
the class TestStorageContainerManager method testCloseContainerCommandOnRestart.
@Test
@SuppressWarnings("unchecked")
public void testCloseContainerCommandOnRestart() throws Exception {
int numKeys = 15;
OzoneConfiguration conf = new OzoneConfiguration();
conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS);
conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5);
conf.setTimeDuration(OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS);
conf.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, numKeys);
conf.setBoolean(HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false);
MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(conf).setHbInterval(1000).setHbProcessorInterval(3000).setTrace(false).setNumDatanodes(1).build();
cluster.waitForClusterToBeReady();
cluster.waitForPipelineTobeReady(HddsProtos.ReplicationFactor.ONE, 30000);
try {
TestStorageContainerManagerHelper helper = new TestStorageContainerManagerHelper(cluster, conf);
helper.createKeys(10, 4096);
GenericTestUtils.waitFor(() -> cluster.getStorageContainerManager().getContainerManager().getContainers() != null, 1000, 10000);
StorageContainerManager scm = cluster.getStorageContainerManager();
List<ContainerInfo> containers = cluster.getStorageContainerManager().getContainerManager().getContainers();
Assert.assertNotNull(containers);
ContainerInfo selectedContainer = containers.iterator().next();
// Stop processing HB
scm.getDatanodeProtocolServer().stop();
LOG.info("Current Container State is {}", selectedContainer.getState());
try {
scm.getContainerManager().updateContainerState(selectedContainer.containerID(), HddsProtos.LifeCycleEvent.FINALIZE);
} catch (SCMException ex) {
if (selectedContainer.getState() != HddsProtos.LifeCycleState.CLOSING) {
ex.printStackTrace();
throw (ex);
}
}
cluster.restartStorageContainerManager(false);
scm = cluster.getStorageContainerManager();
EventPublisher publisher = mock(EventPublisher.class);
ReplicationManager replicationManager = scm.getReplicationManager();
Field f = ReplicationManager.class.getDeclaredField("eventPublisher");
f.setAccessible(true);
Field modifiersField = Field.class.getDeclaredField("modifiers");
modifiersField.setAccessible(true);
modifiersField.setInt(f, f.getModifiers() & ~Modifier.FINAL);
f.set(replicationManager, publisher);
UUID dnUuid = cluster.getHddsDatanodes().iterator().next().getDatanodeDetails().getUuid();
CloseContainerCommand closeContainerCommand = new CloseContainerCommand(selectedContainer.getContainerID(), selectedContainer.getPipelineID(), false);
CommandForDatanode commandForDatanode = new CommandForDatanode(dnUuid, closeContainerCommand);
GenericTestUtils.waitFor(() -> {
SCMContext scmContext = cluster.getStorageContainerManager().getScmContext();
return !scmContext.isInSafeMode() && scmContext.isLeader();
}, 1000, 25000);
// After safe mode is off, ReplicationManager starts to run with a delay.
Thread.sleep(5000);
// Give ReplicationManager some time to process the containers.
cluster.getStorageContainerManager().getReplicationManager().processAll();
Thread.sleep(5000);
verify(publisher).fireEvent(eq(SCMEvents.DATANODE_COMMAND), argThat(new CloseContainerCommandMatcher(dnUuid, commandForDatanode)));
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdds.scm.container.ReplicationManager in project ozone by apache.
the class TestSCMSafeModeWithPipelineRules method testScmSafeMode.
@Test
public void testScmSafeMode() throws Exception {
int datanodeCount = 6;
setup(datanodeCount);
waitForRatis3NodePipelines(datanodeCount / 3);
waitForRatis1NodePipelines(datanodeCount);
int totalPipelineCount = datanodeCount + (datanodeCount / 3);
// Cluster is started successfully
cluster.stop();
cluster.restartOzoneManager();
cluster.restartStorageContainerManager(false);
pipelineManager = cluster.getStorageContainerManager().getPipelineManager();
List<Pipeline> pipelineList = pipelineManager.getPipelines(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
pipelineList.get(0).getNodes().forEach(datanodeDetails -> {
try {
cluster.restartHddsDatanode(datanodeDetails, false);
} catch (Exception ex) {
fail("Datanode restart failed");
}
});
SCMSafeModeManager scmSafeModeManager = cluster.getStorageContainerManager().getScmSafeModeManager();
// Ceil(0.1 * 2) is 1, as one pipeline is healthy healthy pipeline rule is
// satisfied
GenericTestUtils.waitFor(() -> scmSafeModeManager.getHealthyPipelineSafeModeRule().validate(), 1000, 60000);
// As Ceil(0.9 * 2) is 2, and from second pipeline no datanodes's are
// reported this rule is not met yet.
GenericTestUtils.waitFor(() -> !scmSafeModeManager.getOneReplicaPipelineSafeModeRule().validate(), 1000, 60000);
Assert.assertTrue(cluster.getStorageContainerManager().isInSafeMode());
DatanodeDetails restartedDatanode = pipelineList.get(1).getFirstNode();
// Now restart one datanode from the 2nd pipeline
try {
cluster.restartHddsDatanode(restartedDatanode, false);
} catch (Exception ex) {
fail("Datanode restart failed");
}
GenericTestUtils.waitFor(() -> scmSafeModeManager.getOneReplicaPipelineSafeModeRule().validate(), 1000, 60000);
GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 1000, 60000);
// As after safemode wait time is not completed, we should have total
// pipeline's as original count 6(1 node pipelines) + 2 (3 node pipeline)
Assert.assertEquals(totalPipelineCount, pipelineManager.getPipelines().size());
ReplicationManager replicationManager = cluster.getStorageContainerManager().getReplicationManager();
GenericTestUtils.waitFor(() -> replicationManager.isRunning(), 1000, 60000);
// As 4 datanodes are reported, 4 single node pipeline and 1 3 node
// pipeline.
waitForRatis1NodePipelines(4);
waitForRatis3NodePipelines(1);
// Restart other datanodes in the pipeline, and after some time we should
// have same count as original.
pipelineList.get(1).getNodes().forEach(datanodeDetails -> {
try {
if (!restartedDatanode.equals(datanodeDetails)) {
cluster.restartHddsDatanode(datanodeDetails, false);
}
} catch (Exception ex) {
fail("Datanode restart failed");
}
});
waitForRatis1NodePipelines(datanodeCount);
waitForRatis3NodePipelines(datanodeCount / 3);
}
Aggregations