Search in sources :

Example 1 with EventQueue

use of org.apache.hadoop.hdds.server.events.EventQueue in project ozone by apache.

the class TestScmSafeMode method testSCMSafeMode.

@Test(timeout = 300_000)
public void testSCMSafeMode() throws Exception {
    // Test1: Test safe mode  when there are no containers in system.
    cluster.stop();
    try {
        cluster = builder.build();
    } catch (IOException e) {
        Assert.fail("Cluster startup failed.");
    }
    assertTrue(cluster.getStorageContainerManager().isInSafeMode());
    cluster.startHddsDatanodes();
    cluster.waitForClusterToBeReady();
    cluster.waitTobeOutOfSafeMode();
    assertFalse(cluster.getStorageContainerManager().isInSafeMode());
    // Test2: Test safe mode  when containers are there in system.
    // Create {numKeys} random names keys.
    TestStorageContainerManagerHelper helper = new TestStorageContainerManagerHelper(cluster, conf);
    Map<String, OmKeyInfo> keyLocations = helper.createKeys(100 * 2, 4096);
    final List<ContainerInfo> containers = cluster.getStorageContainerManager().getContainerManager().getContainers();
    GenericTestUtils.waitFor(() -> containers.size() >= 3, 100, 1000 * 30);
    // Removing some container to keep them open.
    containers.remove(0);
    containers.remove(0);
    // Close remaining containers
    ContainerManager mapping = cluster.getStorageContainerManager().getContainerManager();
    containers.forEach(c -> {
        try {
            mapping.updateContainerState(c.containerID(), HddsProtos.LifeCycleEvent.FINALIZE);
            mapping.updateContainerState(c.containerID(), LifeCycleEvent.CLOSE);
        } catch (IOException | InvalidStateTransitionException e) {
            LOG.info("Failed to change state of open containers.", e);
        }
    });
    cluster.stop();
    GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.captureLogs(SCMSafeModeManager.getLogger());
    logCapturer.clearOutput();
    try {
        cluster = builder.build();
    } catch (IOException ex) {
        fail("failed");
    }
    StorageContainerManager scm;
    scm = cluster.getStorageContainerManager();
    assertTrue(scm.isInSafeMode());
    assertFalse(logCapturer.getOutput().contains("SCM exiting safe mode."));
    assertTrue(scm.getCurrentContainerThreshold() == 0);
    for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
        dn.start();
    }
    GenericTestUtils.waitFor(() -> scm.getCurrentContainerThreshold() == 1.0, 100, 20000);
    EventQueue eventQueue = (EventQueue) cluster.getStorageContainerManager().getEventQueue();
    eventQueue.processAll(5000L);
    double safeModeCutoff = conf.getDouble(HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT, HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT_DEFAULT);
    assertTrue(scm.getCurrentContainerThreshold() >= safeModeCutoff);
    assertTrue(logCapturer.getOutput().contains("SCM exiting safe mode."));
    assertFalse(scm.isInSafeMode());
}
Also used : ContainerManager(org.apache.hadoop.hdds.scm.container.ContainerManager) StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) InvalidStateTransitionException(org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) TestStorageContainerManagerHelper(org.apache.hadoop.ozone.TestStorageContainerManagerHelper) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) OmKeyInfo(org.apache.hadoop.ozone.om.helpers.OmKeyInfo) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) Test(org.junit.Test)

Example 2 with EventQueue

use of org.apache.hadoop.hdds.server.events.EventQueue in project ozone by apache.

the class TestPipelineClose method testPipelineCloseWithLogFailure.

@Test
@Ignore("HDDS-5604")
public void testPipelineCloseWithLogFailure() throws IOException {
    EventQueue eventQ = (EventQueue) scm.getEventQueue();
    PipelineActionHandler pipelineActionTest = Mockito.mock(PipelineActionHandler.class);
    eventQ.addHandler(SCMEvents.PIPELINE_ACTIONS, pipelineActionTest);
    ArgumentCaptor<PipelineActionsFromDatanode> actionCaptor = ArgumentCaptor.forClass(PipelineActionsFromDatanode.class);
    ContainerInfo containerInfo = containerManager.allocateContainer(RatisReplicationConfig.getInstance(ReplicationFactor.THREE), "testOwner");
    ContainerWithPipeline containerWithPipeline = new ContainerWithPipeline(containerInfo, pipelineManager.getPipeline(containerInfo.getPipelineID()));
    Pipeline openPipeline = containerWithPipeline.getPipeline();
    RaftGroupId groupId = RaftGroupId.valueOf(openPipeline.getId().getId());
    try {
        pipelineManager.getPipeline(openPipeline.getId());
    } catch (PipelineNotFoundException e) {
        Assert.assertTrue("pipeline should exist", false);
    }
    DatanodeDetails datanodeDetails = openPipeline.getNodes().get(0);
    int index = cluster.getHddsDatanodeIndex(datanodeDetails);
    XceiverServerRatis xceiverRatis = (XceiverServerRatis) cluster.getHddsDatanodes().get(index).getDatanodeStateMachine().getContainer().getWriteChannel();
    /**
     * Notify Datanode Ratis Server endpoint of a Ratis log failure.
     * This is expected to trigger an immediate pipeline actions report to SCM
     */
    xceiverRatis.handleNodeLogFailure(groupId, null);
    // verify SCM receives a pipeline action report "immediately"
    Mockito.verify(pipelineActionTest, Mockito.timeout(100)).onMessage(actionCaptor.capture(), Mockito.any(EventPublisher.class));
    PipelineActionsFromDatanode actionsFromDatanode = actionCaptor.getValue();
    // match the pipeline id
    verifyCloseForPipeline(openPipeline, actionsFromDatanode);
}
Also used : XceiverServerRatis(org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) RaftGroupId(org.apache.ratis.protocol.RaftGroupId) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) PipelineActionsFromDatanode(org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.PipelineActionsFromDatanode) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 3 with EventQueue

use of org.apache.hadoop.hdds.server.events.EventQueue in project ozone by apache.

the class TestPipelineClose method testPipelineCloseWithPipelineAction.

@Test
public void testPipelineCloseWithPipelineAction() throws Exception {
    List<DatanodeDetails> dns = ratisContainer.getPipeline().getNodes();
    PipelineActionsFromDatanode pipelineActionsFromDatanode = HddsTestUtils.getPipelineActionFromDatanode(dns.get(0), ratisContainer.getPipeline().getId());
    // send closing action for pipeline
    PipelineActionHandler pipelineActionHandler = new PipelineActionHandler(pipelineManager, SCMContext.emptyContext(), conf);
    pipelineActionHandler.onMessage(pipelineActionsFromDatanode, new EventQueue());
    Thread.sleep(5000);
    OzoneContainer ozoneContainer = cluster.getHddsDatanodes().get(0).getDatanodeStateMachine().getContainer();
    List<PipelineReport> pipelineReports = ozoneContainer.getPipelineReport().getPipelineReportList();
    for (PipelineReport pipelineReport : pipelineReports) {
        // ensure the pipeline is not reported by any dn
        Assert.assertNotEquals(PipelineID.getFromProtobuf(pipelineReport.getPipelineID()), ratisContainer.getPipeline().getId());
    }
    try {
        pipelineManager.getPipeline(ratisContainer.getPipeline().getId());
        Assert.fail("Pipeline should not exist in SCM");
    } catch (PipelineNotFoundException e) {
    }
}
Also used : PipelineReport(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport) PipelineActionsFromDatanode(org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.PipelineActionsFromDatanode) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) Test(org.junit.Test)

Example 4 with EventQueue

use of org.apache.hadoop.hdds.server.events.EventQueue in project ozone by apache.

the class TestReconNodeManager method testUpdateNodeOperationalStateFromScm.

@Test
public void testUpdateNodeOperationalStateFromScm() throws Exception {
    ReconStorageConfig scmStorageConfig = new ReconStorageConfig(conf, new ReconUtils());
    EventQueue eventQueue = new EventQueue();
    NetworkTopology clusterMap = new NetworkTopologyImpl(conf);
    Table<UUID, DatanodeDetails> nodeTable = ReconSCMDBDefinition.NODES.getTable(store);
    ReconNodeManager reconNodeManager = new ReconNodeManager(conf, scmStorageConfig, eventQueue, clusterMap, nodeTable, versionManager);
    DatanodeDetails datanodeDetails = randomDatanodeDetails();
    HddsProtos.Node node = mock(HddsProtos.Node.class);
    LambdaTestUtils.intercept(NodeNotFoundException.class, () -> {
        reconNodeManager.updateNodeOperationalStateFromScm(node, datanodeDetails);
    });
    reconNodeManager.register(datanodeDetails, null, null);
    assertEquals(IN_SERVICE, reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString()).getPersistedOpState());
    when(node.getNodeOperationalStates(eq(0))).thenReturn(DECOMMISSIONING);
    reconNodeManager.updateNodeOperationalStateFromScm(node, datanodeDetails);
    assertEquals(DECOMMISSIONING, reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString()).getPersistedOpState());
    List<DatanodeDetails> nodes = reconNodeManager.getNodes(DECOMMISSIONING, null);
    assertEquals(1, nodes.size());
    assertEquals(datanodeDetails.getUuid(), nodes.get(0).getUuid());
}
Also used : NetworkTopologyImpl(org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl) ReconUtils(org.apache.hadoop.ozone.recon.ReconUtils) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) NetworkTopology(org.apache.hadoop.hdds.scm.net.NetworkTopology) UUID(java.util.UUID) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) Test(org.junit.Test)

Example 5 with EventQueue

use of org.apache.hadoop.hdds.server.events.EventQueue in project ozone by apache.

the class TestBlockDeletion method testContainerStatisticsAfterDelete.

@Test
public void testContainerStatisticsAfterDelete() throws Exception {
    String volumeName = UUID.randomUUID().toString();
    String bucketName = UUID.randomUUID().toString();
    String value = RandomStringUtils.random(1024 * 1024);
    store.createVolume(volumeName);
    OzoneVolume volume = store.getVolume(volumeName);
    volume.createBucket(bucketName);
    OzoneBucket bucket = volume.getBucket(bucketName);
    String keyName = UUID.randomUUID().toString();
    OzoneOutputStream out = bucket.createKey(keyName, value.getBytes(UTF_8).length, ReplicationType.RATIS, ReplicationFactor.THREE, new HashMap<>());
    out.write(value.getBytes(UTF_8));
    out.close();
    OmKeyArgs keyArgs = new OmKeyArgs.Builder().setVolumeName(volumeName).setBucketName(bucketName).setKeyName(keyName).setDataSize(0).setReplicationConfig(RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.THREE)).setRefreshPipeline(true).build();
    List<OmKeyLocationInfoGroup> omKeyLocationInfoGroupList = om.lookupKey(keyArgs).getKeyLocationVersions();
    Thread.sleep(5000);
    List<ContainerInfo> containerInfos = scm.getContainerManager().getContainers();
    final int valueSize = value.getBytes(UTF_8).length;
    final int keyCount = 1;
    containerInfos.stream().forEach(container -> {
        Assert.assertEquals(valueSize, container.getUsedBytes());
        Assert.assertEquals(keyCount, container.getNumberOfKeys());
    });
    OzoneTestUtils.closeAllContainers(scm.getEventQueue(), scm);
    // Wait for container to close
    Thread.sleep(2000);
    // make sure the containers are closed on the dn
    omKeyLocationInfoGroupList.forEach((group) -> {
        List<OmKeyLocationInfo> locationInfo = group.getLocationList();
        locationInfo.forEach((info) -> cluster.getHddsDatanodes().get(0).getDatanodeStateMachine().getContainer().getContainerSet().getContainer(info.getContainerID()).getContainerData().setState(ContainerProtos.ContainerDataProto.State.CLOSED));
    });
    writeClient.deleteKey(keyArgs);
    // Wait for blocks to be deleted and container reports to be processed
    Thread.sleep(5000);
    containerInfos = scm.getContainerManager().getContainers();
    containerInfos.stream().forEach(container -> {
        Assert.assertEquals(0, container.getUsedBytes());
        Assert.assertEquals(0, container.getNumberOfKeys());
    });
    // Verify that pending block delete num are as expected with resent cmds
    cluster.getHddsDatanodes().forEach(dn -> {
        Map<Long, Container<?>> containerMap = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainerMap();
        containerMap.values().forEach(container -> {
            KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData();
            Assert.assertEquals(0, containerData.getNumPendingDeletionBlocks());
        });
    });
    cluster.shutdownHddsDatanode(0);
    scm.getReplicationManager().processAll();
    ((EventQueue) scm.getEventQueue()).processAll(1000);
    containerInfos = scm.getContainerManager().getContainers();
    containerInfos.stream().forEach(container -> Assert.assertEquals(HddsProtos.LifeCycleState.DELETING, container.getState()));
    LogCapturer logCapturer = LogCapturer.captureLogs(ReplicationManager.LOG);
    logCapturer.clearOutput();
    scm.getReplicationManager().processAll();
    ((EventQueue) scm.getEventQueue()).processAll(1000);
    GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains("Resend delete Container"), 500, 3000);
    cluster.restartHddsDatanode(0, true);
    Thread.sleep(1000);
    scm.getReplicationManager().processAll();
    ((EventQueue) scm.getEventQueue()).processAll(1000);
    GenericTestUtils.waitFor(() -> {
        List<ContainerInfo> infos = scm.getContainerManager().getContainers();
        try {
            infos.stream().forEach(container -> {
                Assert.assertEquals(HddsProtos.LifeCycleState.DELETED, container.getState());
                try {
                    Assert.assertTrue(scm.getScmMetadataStore().getContainerTable().get(container.containerID()).getState() == HddsProtos.LifeCycleState.DELETED);
                } catch (IOException e) {
                    Assert.fail("Container from SCM DB should be marked as DELETED");
                }
            });
        } catch (Throwable e) {
            return false;
        }
        return true;
    }, 500, 5000);
    LOG.info(metrics.toString());
}
Also used : OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) IOException(java.io.IOException) OmKeyArgs(org.apache.hadoop.ozone.om.helpers.OmKeyArgs) OmKeyLocationInfo(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo) KeyValueContainerData(org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) OzoneVolume(org.apache.hadoop.ozone.client.OzoneVolume) OzoneBucket(org.apache.hadoop.ozone.client.OzoneBucket) Container(org.apache.hadoop.ozone.container.common.interfaces.Container) OmKeyLocationInfoGroup(org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) LogCapturer(org.apache.ozone.test.GenericTestUtils.LogCapturer) Test(org.junit.Test)

Aggregations

EventQueue (org.apache.hadoop.hdds.server.events.EventQueue)36 Test (org.junit.Test)22 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)19 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)12 HDDSLayoutVersionManager (org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager)11 NetworkTopologyImpl (org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl)10 Before (org.junit.Before)10 NetworkTopology (org.apache.hadoop.hdds.scm.net.NetworkTopology)9 MockDatanodeDetails.randomDatanodeDetails (org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails)8 SCMServiceManager (org.apache.hadoop.hdds.scm.ha.SCMServiceManager)8 PipelineProvider (org.apache.hadoop.hdds.scm.pipeline.PipelineProvider)8 ArrayList (java.util.ArrayList)7 UUID (java.util.UUID)7 ContainerInfo (org.apache.hadoop.hdds.scm.container.ContainerInfo)7 MockRatisPipelineProvider (org.apache.hadoop.hdds.scm.pipeline.MockRatisPipelineProvider)7 SCMMetadataStoreImpl (org.apache.hadoop.hdds.scm.metadata.SCMMetadataStoreImpl)6 SCMNodeManager (org.apache.hadoop.hdds.scm.node.SCMNodeManager)6 File (java.io.File)5 MockNodeManager (org.apache.hadoop.hdds.scm.container.MockNodeManager)5 NodeManager (org.apache.hadoop.hdds.scm.node.NodeManager)5