use of org.apache.hadoop.hdds.scm.ha.SCMContext in project ozone by apache.
the class TestHealthyPipelineSafeModeRule method testHealthyPipelineSafeModeRuleWithMixedPipelines.
@Test
public void testHealthyPipelineSafeModeRuleWithMixedPipelines() throws Exception {
String storageDir = GenericTestUtils.getTempPath(TestHealthyPipelineSafeModeRule.class.getName() + UUID.randomUUID());
EventQueue eventQueue = new EventQueue();
SCMServiceManager serviceManager = new SCMServiceManager();
SCMContext scmContext = SCMContext.emptyContext();
List<ContainerInfo> containers = new ArrayList<>(HddsTestUtils.getContainerInfo(1));
OzoneConfiguration config = new OzoneConfiguration();
// In Mock Node Manager, first 8 nodes are healthy, next 2 nodes are
// stale and last one is dead, and this repeats. So for a 12 node, 9
// healthy, 2 stale and one dead.
MockNodeManager nodeManager = new MockNodeManager(true, 12);
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
// enable pipeline check
config.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK, true);
config.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false);
SCMMetadataStore scmMetadataStore = new SCMMetadataStoreImpl(config);
try {
PipelineManagerImpl pipelineManager = PipelineManagerImpl.newPipelineManager(config, MockSCMHAManager.getInstance(true), nodeManager, scmMetadataStore.getPipelineTable(), eventQueue, scmContext, serviceManager);
PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(nodeManager, pipelineManager.getStateManager(), config);
pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS, mockRatisProvider);
// Create 3 pipelines
Pipeline pipeline1 = pipelineManager.createPipeline(RatisReplicationConfig.getInstance(ReplicationFactor.ONE));
pipelineManager.openPipeline(pipeline1.getId());
Pipeline pipeline2 = pipelineManager.createPipeline(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
pipelineManager.openPipeline(pipeline2.getId());
Pipeline pipeline3 = pipelineManager.createPipeline(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
pipelineManager.openPipeline(pipeline3.getId());
// Mark pipeline healthy
pipeline1 = pipelineManager.getPipeline(pipeline1.getId());
MockRatisPipelineProvider.markPipelineHealthy(pipeline1);
pipeline2 = pipelineManager.getPipeline(pipeline2.getId());
MockRatisPipelineProvider.markPipelineHealthy(pipeline2);
pipeline3 = pipelineManager.getPipeline(pipeline3.getId());
MockRatisPipelineProvider.markPipelineHealthy(pipeline3);
SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(config, containers, null, pipelineManager, eventQueue, serviceManager, scmContext);
HealthyPipelineSafeModeRule healthyPipelineSafeModeRule = scmSafeModeManager.getHealthyPipelineSafeModeRule();
// No pipeline event have sent to SCMSafemodeManager
Assert.assertFalse(healthyPipelineSafeModeRule.validate());
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.captureLogs(LoggerFactory.getLogger(SCMSafeModeManager.class));
// fire event with pipeline create status with ratis type and factor 1
// pipeline, validate() should return false
firePipelineEvent(pipeline1, eventQueue);
GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains("reported count is 1"), 1000, 5000);
Assert.assertFalse(healthyPipelineSafeModeRule.validate());
firePipelineEvent(pipeline2, eventQueue);
firePipelineEvent(pipeline3, eventQueue);
GenericTestUtils.waitFor(() -> healthyPipelineSafeModeRule.validate(), 1000, 5000);
} finally {
scmMetadataStore.getStore().close();
FileUtil.fullyDelete(new File(storageDir));
}
}
use of org.apache.hadoop.hdds.scm.ha.SCMContext in project ozone by apache.
the class StorageContainerManager method initializeSystemManagers.
/**
* This function initializes the following managers. If the configurator
* specifies a value, we will use it, else we will use the default value.
*
* Node Manager
* Pipeline Manager
* Container Manager
* Block Manager
* Replication Manager
* Safe Mode Manager
*
* @param conf - Ozone Configuration.
* @param configurator - A customizer which allows different managers to be
* used if needed.
* @throws IOException - on Failure.
*/
private void initializeSystemManagers(OzoneConfiguration conf, SCMConfigurator configurator) throws IOException {
if (configurator.getNetworkTopology() != null) {
clusterMap = configurator.getNetworkTopology();
} else {
clusterMap = new NetworkTopologyImpl(conf);
}
// This needs to be done before initializing Ratis.
RatisDropwizardExports.registerRatisMetricReporters(ratisMetricsMap);
if (configurator.getSCMHAManager() != null) {
scmHAManager = configurator.getSCMHAManager();
} else {
scmHAManager = new SCMHAManagerImpl(conf, this);
}
// inline upgrade for SequenceIdGenerator
SequenceIdGenerator.upgradeToSequenceId(scmMetadataStore);
// Distributed sequence id generator
sequenceIdGen = new SequenceIdGenerator(conf, scmHAManager, scmMetadataStore.getSequenceIdTable());
if (configurator.getScmContext() != null) {
scmContext = configurator.getScmContext();
} else {
// When term equals SCMContext.INVALID_TERM, the isLeader() check
// and getTermOfLeader() will always pass.
long term = SCMHAUtils.isSCMHAEnabled(conf) ? 0 : SCMContext.INVALID_TERM;
// non-leader of term 0, in safe mode, preCheck not completed.
scmContext = new SCMContext.Builder().setLeader(false).setTerm(term).setIsInSafeMode(true).setIsPreCheckComplete(false).setSCM(this).build();
}
if (configurator.getScmNodeManager() != null) {
scmNodeManager = configurator.getScmNodeManager();
} else {
scmNodeManager = new SCMNodeManager(conf, scmStorageConfig, eventQueue, clusterMap, scmContext, scmLayoutVersionManager);
}
placementMetrics = SCMContainerPlacementMetrics.create();
containerPlacementPolicy = ContainerPlacementPolicyFactory.getPolicy(conf, scmNodeManager, clusterMap, true, placementMetrics);
if (configurator.getPipelineManager() != null) {
pipelineManager = configurator.getPipelineManager();
} else {
pipelineManager = PipelineManagerImpl.newPipelineManager(conf, scmHAManager, scmNodeManager, scmMetadataStore.getPipelineTable(), eventQueue, scmContext, serviceManager);
}
if (configurator.getContainerManager() != null) {
containerManager = configurator.getContainerManager();
} else {
containerManager = new ContainerManagerImpl(conf, scmHAManager, sequenceIdGen, pipelineManager, scmMetadataStore.getContainerTable());
}
pipelineChoosePolicy = PipelineChoosePolicyFactory.getPolicy(conf);
if (configurator.getWritableContainerFactory() != null) {
writableContainerFactory = configurator.getWritableContainerFactory();
} else {
writableContainerFactory = new WritableContainerFactory(this);
}
if (configurator.getScmBlockManager() != null) {
scmBlockManager = configurator.getScmBlockManager();
} else {
scmBlockManager = new BlockManagerImpl(conf, this);
}
if (configurator.getReplicationManager() != null) {
replicationManager = configurator.getReplicationManager();
} else {
replicationManager = new ReplicationManager(conf, containerManager, containerPlacementPolicy, eventQueue, scmContext, serviceManager, scmNodeManager, new MonotonicClock(ZoneOffset.UTC), scmHAManager, getScmMetadataStore().getMoveTable());
}
if (configurator.getScmSafeModeManager() != null) {
scmSafeModeManager = configurator.getScmSafeModeManager();
} else {
scmSafeModeManager = new SCMSafeModeManager(conf, containerManager.getContainers(), containerManager, pipelineManager, eventQueue, serviceManager, scmContext);
}
scmDecommissionManager = new NodeDecommissionManager(conf, scmNodeManager, containerManager, scmContext, eventQueue, replicationManager);
}
use of org.apache.hadoop.hdds.scm.ha.SCMContext in project ozone by apache.
the class TestHealthyPipelineSafeModeRule method testHealthyPipelineSafeModeRuleWithNoPipelines.
@Test
public void testHealthyPipelineSafeModeRuleWithNoPipelines() throws Exception {
EventQueue eventQueue = new EventQueue();
SCMServiceManager serviceManager = new SCMServiceManager();
SCMContext scmContext = SCMContext.emptyContext();
List<ContainerInfo> containers = new ArrayList<>(HddsTestUtils.getContainerInfo(1));
String storageDir = GenericTestUtils.getTempPath(TestHealthyPipelineSafeModeRule.class.getName() + UUID.randomUUID());
OzoneConfiguration config = new OzoneConfiguration();
MockNodeManager nodeManager = new MockNodeManager(true, 0);
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
// enable pipeline check
config.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK, true);
config.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false);
SCMMetadataStore scmMetadataStore = new SCMMetadataStoreImpl(config);
try {
PipelineManagerImpl pipelineManager = PipelineManagerImpl.newPipelineManager(config, MockSCMHAManager.getInstance(true), nodeManager, scmMetadataStore.getPipelineTable(), eventQueue, scmContext, serviceManager);
PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(nodeManager, pipelineManager.getStateManager(), config);
pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS, mockRatisProvider);
SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(config, containers, null, pipelineManager, eventQueue, serviceManager, scmContext);
HealthyPipelineSafeModeRule healthyPipelineSafeModeRule = scmSafeModeManager.getHealthyPipelineSafeModeRule();
// This should be immediately satisfied, as no pipelines are there yet.
Assert.assertTrue(healthyPipelineSafeModeRule.validate());
} finally {
scmMetadataStore.getStore().close();
FileUtil.fullyDelete(new File(storageDir));
}
}
use of org.apache.hadoop.hdds.scm.ha.SCMContext in project ozone by apache.
the class TestHealthyPipelineSafeModeRule method testHealthyPipelineSafeModeRuleWithPipelines.
@Test
public void testHealthyPipelineSafeModeRuleWithPipelines() throws Exception {
String storageDir = GenericTestUtils.getTempPath(TestHealthyPipelineSafeModeRule.class.getName() + UUID.randomUUID());
EventQueue eventQueue = new EventQueue();
SCMServiceManager serviceManager = new SCMServiceManager();
SCMContext scmContext = SCMContext.emptyContext();
List<ContainerInfo> containers = new ArrayList<>(HddsTestUtils.getContainerInfo(1));
OzoneConfiguration config = new OzoneConfiguration();
// In Mock Node Manager, first 8 nodes are healthy, next 2 nodes are
// stale and last one is dead, and this repeats. So for a 12 node, 9
// healthy, 2 stale and one dead.
MockNodeManager nodeManager = new MockNodeManager(true, 12);
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
// enable pipeline check
config.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK, true);
config.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false);
SCMMetadataStore scmMetadataStore = new SCMMetadataStoreImpl(config);
try {
PipelineManagerImpl pipelineManager = PipelineManagerImpl.newPipelineManager(config, MockSCMHAManager.getInstance(true), nodeManager, scmMetadataStore.getPipelineTable(), eventQueue, scmContext, serviceManager);
PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(nodeManager, pipelineManager.getStateManager(), config);
pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS, mockRatisProvider);
// Create 3 pipelines
Pipeline pipeline1 = pipelineManager.createPipeline(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
pipelineManager.openPipeline(pipeline1.getId());
Pipeline pipeline2 = pipelineManager.createPipeline(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
pipelineManager.openPipeline(pipeline2.getId());
Pipeline pipeline3 = pipelineManager.createPipeline(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
pipelineManager.openPipeline(pipeline3.getId());
// Mark pipeline healthy
pipeline1 = pipelineManager.getPipeline(pipeline1.getId());
MockRatisPipelineProvider.markPipelineHealthy(pipeline1);
pipeline2 = pipelineManager.getPipeline(pipeline2.getId());
MockRatisPipelineProvider.markPipelineHealthy(pipeline2);
pipeline3 = pipelineManager.getPipeline(pipeline3.getId());
MockRatisPipelineProvider.markPipelineHealthy(pipeline3);
SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(config, containers, null, pipelineManager, eventQueue, serviceManager, scmContext);
HealthyPipelineSafeModeRule healthyPipelineSafeModeRule = scmSafeModeManager.getHealthyPipelineSafeModeRule();
// No datanodes have sent pipelinereport from datanode
Assert.assertFalse(healthyPipelineSafeModeRule.validate());
// Fire pipeline report from all datanodes in first pipeline, as here we
// have 3 pipelines, 10% is 0.3, when doing ceil it is 1. So, we should
// validate should return true after fire pipeline event
// Here testing with out pipelinereport handler, so not moving created
// pipelines to allocated state, as pipelines changing to healthy is
// handled by pipeline report handler. So, leaving pipeline's in pipeline
// manager in open state for test case simplicity.
firePipelineEvent(pipeline1, eventQueue);
GenericTestUtils.waitFor(() -> healthyPipelineSafeModeRule.validate(), 1000, 5000);
} finally {
scmMetadataStore.getStore().close();
FileUtil.fullyDelete(new File(storageDir));
}
}
use of org.apache.hadoop.hdds.scm.ha.SCMContext in project ozone by apache.
the class TestStorageContainerManager method testCloseContainerCommandOnRestart.
@Test
@SuppressWarnings("unchecked")
public void testCloseContainerCommandOnRestart() throws Exception {
int numKeys = 15;
OzoneConfiguration conf = new OzoneConfiguration();
conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS);
conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5);
conf.setTimeDuration(OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS);
conf.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, numKeys);
conf.setBoolean(HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false);
MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(conf).setHbInterval(1000).setHbProcessorInterval(3000).setTrace(false).setNumDatanodes(1).build();
cluster.waitForClusterToBeReady();
cluster.waitForPipelineTobeReady(HddsProtos.ReplicationFactor.ONE, 30000);
try {
TestStorageContainerManagerHelper helper = new TestStorageContainerManagerHelper(cluster, conf);
helper.createKeys(10, 4096);
GenericTestUtils.waitFor(() -> cluster.getStorageContainerManager().getContainerManager().getContainers() != null, 1000, 10000);
StorageContainerManager scm = cluster.getStorageContainerManager();
List<ContainerInfo> containers = cluster.getStorageContainerManager().getContainerManager().getContainers();
Assert.assertNotNull(containers);
ContainerInfo selectedContainer = containers.iterator().next();
// Stop processing HB
scm.getDatanodeProtocolServer().stop();
LOG.info("Current Container State is {}", selectedContainer.getState());
try {
scm.getContainerManager().updateContainerState(selectedContainer.containerID(), HddsProtos.LifeCycleEvent.FINALIZE);
} catch (SCMException ex) {
if (selectedContainer.getState() != HddsProtos.LifeCycleState.CLOSING) {
ex.printStackTrace();
throw (ex);
}
}
cluster.restartStorageContainerManager(false);
scm = cluster.getStorageContainerManager();
EventPublisher publisher = mock(EventPublisher.class);
ReplicationManager replicationManager = scm.getReplicationManager();
Field f = ReplicationManager.class.getDeclaredField("eventPublisher");
f.setAccessible(true);
Field modifiersField = Field.class.getDeclaredField("modifiers");
modifiersField.setAccessible(true);
modifiersField.setInt(f, f.getModifiers() & ~Modifier.FINAL);
f.set(replicationManager, publisher);
UUID dnUuid = cluster.getHddsDatanodes().iterator().next().getDatanodeDetails().getUuid();
CloseContainerCommand closeContainerCommand = new CloseContainerCommand(selectedContainer.getContainerID(), selectedContainer.getPipelineID(), false);
CommandForDatanode commandForDatanode = new CommandForDatanode(dnUuid, closeContainerCommand);
GenericTestUtils.waitFor(() -> {
SCMContext scmContext = cluster.getStorageContainerManager().getScmContext();
return !scmContext.isInSafeMode() && scmContext.isLeader();
}, 1000, 25000);
// After safe mode is off, ReplicationManager starts to run with a delay.
Thread.sleep(5000);
// Give ReplicationManager some time to process the containers.
cluster.getStorageContainerManager().getReplicationManager().processAll();
Thread.sleep(5000);
verify(publisher).fireEvent(eq(SCMEvents.DATANODE_COMMAND), argThat(new CloseContainerCommandMatcher(dnUuid, commandForDatanode)));
} finally {
cluster.shutdown();
}
}
Aggregations