Search in sources :

Example 1 with ReplicationManager

use of org.apache.hadoop.hdds.scm.container.ReplicationManager in project ozone by apache.

the class TestReplicationManagerMetrics method setup.

@Before
public void setup() {
    ReplicationManagerReport report = new ReplicationManagerReport();
    // to the value by incrementing the counter that number of times.
    for (HddsProtos.LifeCycleState s : HddsProtos.LifeCycleState.values()) {
        for (int i = 0; i < s.getNumber(); i++) {
            report.increment(s);
        }
    }
    // The ordinal starts from 0, so each state will have a value of its ordinal
    for (ReplicationManagerReport.HealthState s : ReplicationManagerReport.HealthState.values()) {
        for (int i = 0; i < s.ordinal(); i++) {
            report.increment(s);
        }
    }
    replicationManager = Mockito.mock(ReplicationManager.class);
    Mockito.when(replicationManager.getContainerReport()).thenReturn(report);
    metrics = ReplicationManagerMetrics.create(replicationManager);
}
Also used : ReplicationManager(org.apache.hadoop.hdds.scm.container.ReplicationManager) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) ReplicationManagerReport(org.apache.hadoop.hdds.scm.container.ReplicationManagerReport) Before(org.junit.Before)

Example 2 with ReplicationManager

use of org.apache.hadoop.hdds.scm.container.ReplicationManager in project ozone by apache.

the class StorageContainerManager method initializeSystemManagers.

/**
 * This function initializes the following managers. If the configurator
 * specifies a value, we will use it, else we will use the default value.
 *
 *  Node Manager
 *  Pipeline Manager
 *  Container Manager
 *  Block Manager
 *  Replication Manager
 *  Safe Mode Manager
 *
 * @param conf - Ozone Configuration.
 * @param configurator - A customizer which allows different managers to be
 *                    used if needed.
 * @throws IOException - on Failure.
 */
private void initializeSystemManagers(OzoneConfiguration conf, SCMConfigurator configurator) throws IOException {
    if (configurator.getNetworkTopology() != null) {
        clusterMap = configurator.getNetworkTopology();
    } else {
        clusterMap = new NetworkTopologyImpl(conf);
    }
    // This needs to be done before initializing Ratis.
    RatisDropwizardExports.registerRatisMetricReporters(ratisMetricsMap);
    if (configurator.getSCMHAManager() != null) {
        scmHAManager = configurator.getSCMHAManager();
    } else {
        scmHAManager = new SCMHAManagerImpl(conf, this);
    }
    // inline upgrade for SequenceIdGenerator
    SequenceIdGenerator.upgradeToSequenceId(scmMetadataStore);
    // Distributed sequence id generator
    sequenceIdGen = new SequenceIdGenerator(conf, scmHAManager, scmMetadataStore.getSequenceIdTable());
    if (configurator.getScmContext() != null) {
        scmContext = configurator.getScmContext();
    } else {
        // When term equals SCMContext.INVALID_TERM, the isLeader() check
        // and getTermOfLeader() will always pass.
        long term = SCMHAUtils.isSCMHAEnabled(conf) ? 0 : SCMContext.INVALID_TERM;
        // non-leader of term 0, in safe mode, preCheck not completed.
        scmContext = new SCMContext.Builder().setLeader(false).setTerm(term).setIsInSafeMode(true).setIsPreCheckComplete(false).setSCM(this).build();
    }
    if (configurator.getScmNodeManager() != null) {
        scmNodeManager = configurator.getScmNodeManager();
    } else {
        scmNodeManager = new SCMNodeManager(conf, scmStorageConfig, eventQueue, clusterMap, scmContext, scmLayoutVersionManager);
    }
    placementMetrics = SCMContainerPlacementMetrics.create();
    containerPlacementPolicy = ContainerPlacementPolicyFactory.getPolicy(conf, scmNodeManager, clusterMap, true, placementMetrics);
    if (configurator.getPipelineManager() != null) {
        pipelineManager = configurator.getPipelineManager();
    } else {
        pipelineManager = PipelineManagerImpl.newPipelineManager(conf, scmHAManager, scmNodeManager, scmMetadataStore.getPipelineTable(), eventQueue, scmContext, serviceManager);
    }
    if (configurator.getContainerManager() != null) {
        containerManager = configurator.getContainerManager();
    } else {
        containerManager = new ContainerManagerImpl(conf, scmHAManager, sequenceIdGen, pipelineManager, scmMetadataStore.getContainerTable());
    }
    pipelineChoosePolicy = PipelineChoosePolicyFactory.getPolicy(conf);
    if (configurator.getWritableContainerFactory() != null) {
        writableContainerFactory = configurator.getWritableContainerFactory();
    } else {
        writableContainerFactory = new WritableContainerFactory(this);
    }
    if (configurator.getScmBlockManager() != null) {
        scmBlockManager = configurator.getScmBlockManager();
    } else {
        scmBlockManager = new BlockManagerImpl(conf, this);
    }
    if (configurator.getReplicationManager() != null) {
        replicationManager = configurator.getReplicationManager();
    } else {
        replicationManager = new ReplicationManager(conf, containerManager, containerPlacementPolicy, eventQueue, scmContext, serviceManager, scmNodeManager, new MonotonicClock(ZoneOffset.UTC), scmHAManager, getScmMetadataStore().getMoveTable());
    }
    if (configurator.getScmSafeModeManager() != null) {
        scmSafeModeManager = configurator.getScmSafeModeManager();
    } else {
        scmSafeModeManager = new SCMSafeModeManager(conf, containerManager.getContainers(), containerManager, pipelineManager, eventQueue, serviceManager, scmContext);
    }
    scmDecommissionManager = new NodeDecommissionManager(conf, scmNodeManager, containerManager, scmContext, eventQueue, replicationManager);
}
Also used : ReplicationManager(org.apache.hadoop.hdds.scm.container.ReplicationManager) SCMHAManagerImpl(org.apache.hadoop.hdds.scm.ha.SCMHAManagerImpl) SCMSafeModeManager(org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager) ContainerManagerImpl(org.apache.hadoop.hdds.scm.container.ContainerManagerImpl) NetworkTopologyImpl(org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl) SCMContext(org.apache.hadoop.hdds.scm.ha.SCMContext) SCMNodeManager(org.apache.hadoop.hdds.scm.node.SCMNodeManager) SequenceIdGenerator(org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator) WritableContainerFactory(org.apache.hadoop.hdds.scm.pipeline.WritableContainerFactory) NodeDecommissionManager(org.apache.hadoop.hdds.scm.node.NodeDecommissionManager) MonotonicClock(org.apache.hadoop.ozone.common.MonotonicClock) BlockManagerImpl(org.apache.hadoop.hdds.scm.block.BlockManagerImpl)

Example 3 with ReplicationManager

use of org.apache.hadoop.hdds.scm.container.ReplicationManager in project ozone by apache.

the class TestContainerBalancer method setup.

/**
 * Sets up configuration values and creates a mock cluster.
 */
@Before
public void setup() throws SCMException, NodeNotFoundException {
    conf = new OzoneConfiguration();
    scm = Mockito.mock(StorageContainerManager.class);
    containerManager = Mockito.mock(ContainerManager.class);
    replicationManager = Mockito.mock(ReplicationManager.class);
    balancerConfiguration = conf.getObject(ContainerBalancerConfiguration.class);
    balancerConfiguration.setThreshold(10);
    balancerConfiguration.setIterations(1);
    balancerConfiguration.setMaxDatanodesPercentageToInvolvePerIteration(100);
    balancerConfiguration.setMaxSizeToMovePerIteration(50 * OzoneConsts.GB);
    balancerConfiguration.setMaxSizeEnteringTarget(50 * OzoneConsts.GB);
    conf.setFromObject(balancerConfiguration);
    GenericTestUtils.setLogLevel(ContainerBalancer.LOG, Level.DEBUG);
    averageUtilization = createCluster();
    mockNodeManager = new MockNodeManager(datanodeToContainersMap);
    placementPolicy = ContainerPlacementPolicyFactory.getPolicy(conf, mockNodeManager, mockNodeManager.getClusterNetworkTopologyMap(), true, SCMContainerPlacementMetrics.create());
    Mockito.when(replicationManager.isContainerReplicatingOrDeleting(Mockito.any(ContainerID.class))).thenReturn(false);
    Mockito.when(replicationManager.move(Mockito.any(ContainerID.class), Mockito.any(DatanodeDetails.class), Mockito.any(DatanodeDetails.class))).thenReturn(CompletableFuture.completedFuture(ReplicationManager.MoveResult.COMPLETED));
    when(containerManager.getContainerReplicas(Mockito.any(ContainerID.class))).thenAnswer(invocationOnMock -> {
        ContainerID cid = (ContainerID) invocationOnMock.getArguments()[0];
        return cidToReplicasMap.get(cid);
    });
    when(containerManager.getContainer(Mockito.any(ContainerID.class))).thenAnswer(invocationOnMock -> {
        ContainerID cid = (ContainerID) invocationOnMock.getArguments()[0];
        return cidToInfoMap.get(cid);
    });
    when(containerManager.getContainers()).thenReturn(new ArrayList<>(cidToInfoMap.values()));
    when(scm.getScmNodeManager()).thenReturn(mockNodeManager);
    when(scm.getContainerPlacementPolicy()).thenReturn(placementPolicy);
    when(scm.getContainerManager()).thenReturn(containerManager);
    when(scm.getReplicationManager()).thenReturn(replicationManager);
    when(scm.getScmContext()).thenReturn(SCMContext.emptyContext());
    when(scm.getClusterMap()).thenReturn(null);
    when(scm.getEventQueue()).thenReturn(mock(EventPublisher.class));
    when(scm.getConfiguration()).thenReturn(conf);
    containerBalancer = new ContainerBalancer(scm);
}
Also used : StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) ContainerManager(org.apache.hadoop.hdds.scm.container.ContainerManager) StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) ReplicationManager(org.apache.hadoop.hdds.scm.container.ReplicationManager) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) ContainerID(org.apache.hadoop.hdds.scm.container.ContainerID) MockDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) MockNodeManager(org.apache.hadoop.hdds.scm.container.MockNodeManager) Before(org.junit.Before)

Example 4 with ReplicationManager

use of org.apache.hadoop.hdds.scm.container.ReplicationManager in project ozone by apache.

the class TestStorageContainerManager method testCloseContainerCommandOnRestart.

@Test
@SuppressWarnings("unchecked")
public void testCloseContainerCommandOnRestart() throws Exception {
    int numKeys = 15;
    OzoneConfiguration conf = new OzoneConfiguration();
    conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS);
    conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5);
    conf.setTimeDuration(OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS);
    conf.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, numKeys);
    conf.setBoolean(HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false);
    MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(conf).setHbInterval(1000).setHbProcessorInterval(3000).setTrace(false).setNumDatanodes(1).build();
    cluster.waitForClusterToBeReady();
    cluster.waitForPipelineTobeReady(HddsProtos.ReplicationFactor.ONE, 30000);
    try {
        TestStorageContainerManagerHelper helper = new TestStorageContainerManagerHelper(cluster, conf);
        helper.createKeys(10, 4096);
        GenericTestUtils.waitFor(() -> cluster.getStorageContainerManager().getContainerManager().getContainers() != null, 1000, 10000);
        StorageContainerManager scm = cluster.getStorageContainerManager();
        List<ContainerInfo> containers = cluster.getStorageContainerManager().getContainerManager().getContainers();
        Assert.assertNotNull(containers);
        ContainerInfo selectedContainer = containers.iterator().next();
        // Stop processing HB
        scm.getDatanodeProtocolServer().stop();
        LOG.info("Current Container State is {}", selectedContainer.getState());
        try {
            scm.getContainerManager().updateContainerState(selectedContainer.containerID(), HddsProtos.LifeCycleEvent.FINALIZE);
        } catch (SCMException ex) {
            if (selectedContainer.getState() != HddsProtos.LifeCycleState.CLOSING) {
                ex.printStackTrace();
                throw (ex);
            }
        }
        cluster.restartStorageContainerManager(false);
        scm = cluster.getStorageContainerManager();
        EventPublisher publisher = mock(EventPublisher.class);
        ReplicationManager replicationManager = scm.getReplicationManager();
        Field f = ReplicationManager.class.getDeclaredField("eventPublisher");
        f.setAccessible(true);
        Field modifiersField = Field.class.getDeclaredField("modifiers");
        modifiersField.setAccessible(true);
        modifiersField.setInt(f, f.getModifiers() & ~Modifier.FINAL);
        f.set(replicationManager, publisher);
        UUID dnUuid = cluster.getHddsDatanodes().iterator().next().getDatanodeDetails().getUuid();
        CloseContainerCommand closeContainerCommand = new CloseContainerCommand(selectedContainer.getContainerID(), selectedContainer.getPipelineID(), false);
        CommandForDatanode commandForDatanode = new CommandForDatanode(dnUuid, closeContainerCommand);
        GenericTestUtils.waitFor(() -> {
            SCMContext scmContext = cluster.getStorageContainerManager().getScmContext();
            return !scmContext.isInSafeMode() && scmContext.isLeader();
        }, 1000, 25000);
        // After safe mode is off, ReplicationManager starts to run with a delay.
        Thread.sleep(5000);
        // Give ReplicationManager some time to process the containers.
        cluster.getStorageContainerManager().getReplicationManager().processAll();
        Thread.sleep(5000);
        verify(publisher).fireEvent(eq(SCMEvents.DATANODE_COMMAND), argThat(new CloseContainerCommandMatcher(dnUuid, commandForDatanode)));
    } finally {
        cluster.shutdown();
    }
}
Also used : StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) ReplicationManager(org.apache.hadoop.hdds.scm.container.ReplicationManager) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) CloseContainerCommand(org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) Field(java.lang.reflect.Field) CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) SCMContext(org.apache.hadoop.hdds.scm.ha.SCMContext) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) UUID(java.util.UUID) SCMException(org.apache.hadoop.hdds.scm.exceptions.SCMException) Test(org.junit.Test)

Example 5 with ReplicationManager

use of org.apache.hadoop.hdds.scm.container.ReplicationManager in project ozone by apache.

the class TestSCMSafeModeWithPipelineRules method testScmSafeMode.

@Test
public void testScmSafeMode() throws Exception {
    int datanodeCount = 6;
    setup(datanodeCount);
    waitForRatis3NodePipelines(datanodeCount / 3);
    waitForRatis1NodePipelines(datanodeCount);
    int totalPipelineCount = datanodeCount + (datanodeCount / 3);
    // Cluster is started successfully
    cluster.stop();
    cluster.restartOzoneManager();
    cluster.restartStorageContainerManager(false);
    pipelineManager = cluster.getStorageContainerManager().getPipelineManager();
    List<Pipeline> pipelineList = pipelineManager.getPipelines(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
    pipelineList.get(0).getNodes().forEach(datanodeDetails -> {
        try {
            cluster.restartHddsDatanode(datanodeDetails, false);
        } catch (Exception ex) {
            fail("Datanode restart failed");
        }
    });
    SCMSafeModeManager scmSafeModeManager = cluster.getStorageContainerManager().getScmSafeModeManager();
    // Ceil(0.1 * 2) is 1, as one pipeline is healthy healthy pipeline rule is
    // satisfied
    GenericTestUtils.waitFor(() -> scmSafeModeManager.getHealthyPipelineSafeModeRule().validate(), 1000, 60000);
    // As Ceil(0.9 * 2) is 2, and from second pipeline no datanodes's are
    // reported this rule is not met yet.
    GenericTestUtils.waitFor(() -> !scmSafeModeManager.getOneReplicaPipelineSafeModeRule().validate(), 1000, 60000);
    Assert.assertTrue(cluster.getStorageContainerManager().isInSafeMode());
    DatanodeDetails restartedDatanode = pipelineList.get(1).getFirstNode();
    // Now restart one datanode from the 2nd pipeline
    try {
        cluster.restartHddsDatanode(restartedDatanode, false);
    } catch (Exception ex) {
        fail("Datanode restart failed");
    }
    GenericTestUtils.waitFor(() -> scmSafeModeManager.getOneReplicaPipelineSafeModeRule().validate(), 1000, 60000);
    GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 1000, 60000);
    // As after safemode wait time is not completed, we should have total
    // pipeline's as original count 6(1 node pipelines) + 2 (3 node pipeline)
    Assert.assertEquals(totalPipelineCount, pipelineManager.getPipelines().size());
    ReplicationManager replicationManager = cluster.getStorageContainerManager().getReplicationManager();
    GenericTestUtils.waitFor(() -> replicationManager.isRunning(), 1000, 60000);
    // As 4 datanodes are reported, 4 single node pipeline and 1 3 node
    // pipeline.
    waitForRatis1NodePipelines(4);
    waitForRatis3NodePipelines(1);
    // Restart other datanodes in the pipeline, and after some time we should
    // have same count as original.
    pipelineList.get(1).getNodes().forEach(datanodeDetails -> {
        try {
            if (!restartedDatanode.equals(datanodeDetails)) {
                cluster.restartHddsDatanode(datanodeDetails, false);
            }
        } catch (Exception ex) {
            fail("Datanode restart failed");
        }
    });
    waitForRatis1NodePipelines(datanodeCount);
    waitForRatis3NodePipelines(datanodeCount / 3);
}
Also used : ReplicationManager(org.apache.hadoop.hdds.scm.container.ReplicationManager) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) TimeoutException(java.util.concurrent.TimeoutException) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) Test(org.junit.Test)

Aggregations

ReplicationManager (org.apache.hadoop.hdds.scm.container.ReplicationManager)6 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)2 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)2 ContainerID (org.apache.hadoop.hdds.scm.container.ContainerID)2 SCMContext (org.apache.hadoop.hdds.scm.ha.SCMContext)2 StorageContainerManager (org.apache.hadoop.hdds.scm.server.StorageContainerManager)2 EventPublisher (org.apache.hadoop.hdds.server.events.EventPublisher)2 Before (org.junit.Before)2 Test (org.junit.Test)2 Field (java.lang.reflect.Field)1 UUID (java.util.UUID)1 TimeoutException (java.util.concurrent.TimeoutException)1 MockDatanodeDetails (org.apache.hadoop.hdds.protocol.MockDatanodeDetails)1 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)1 BlockManagerImpl (org.apache.hadoop.hdds.scm.block.BlockManagerImpl)1 ContainerInfo (org.apache.hadoop.hdds.scm.container.ContainerInfo)1 ContainerManager (org.apache.hadoop.hdds.scm.container.ContainerManager)1 ContainerManagerImpl (org.apache.hadoop.hdds.scm.container.ContainerManagerImpl)1 ContainerNotFoundException (org.apache.hadoop.hdds.scm.container.ContainerNotFoundException)1 MockNodeManager (org.apache.hadoop.hdds.scm.container.MockNodeManager)1