Search in sources :

Example 6 with ReconStorageContainerManagerFacade

use of org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade in project ozone by apache.

the class TestContainerHealthTask method testRun.

@SuppressWarnings("checkstyle:methodlength")
@Test
public void testRun() throws Exception {
    UnhealthyContainersDao unHealthyContainersTableHandle = getDao(UnhealthyContainersDao.class);
    ContainerHealthSchemaManager containerHealthSchemaManager = new ContainerHealthSchemaManager(getSchemaDefinition(ContainerSchemaDefinition.class), unHealthyContainersTableHandle);
    ReconStorageContainerManagerFacade scmMock = mock(ReconStorageContainerManagerFacade.class);
    MockPlacementPolicy placementMock = new MockPlacementPolicy();
    ContainerManager containerManagerMock = mock(ContainerManager.class);
    StorageContainerServiceProvider scmClientMock = mock(StorageContainerServiceProvider.class);
    ContainerReplica unhealthyReplicaMock = mock(ContainerReplica.class);
    when(unhealthyReplicaMock.getState()).thenReturn(State.UNHEALTHY);
    ContainerReplica healthyReplicaMock = mock(ContainerReplica.class);
    when(healthyReplicaMock.getState()).thenReturn(State.CLOSED);
    // Create 6 containers. The first 5 will have various unhealthy states
    // defined below. The container with ID=6 will be healthy.
    List<ContainerInfo> mockContainers = getMockContainers(6);
    when(scmMock.getScmServiceProvider()).thenReturn(scmClientMock);
    when(scmMock.getContainerManager()).thenReturn(containerManagerMock);
    when(containerManagerMock.getContainers()).thenReturn(mockContainers);
    for (ContainerInfo c : mockContainers) {
        when(containerManagerMock.getContainer(c.containerID())).thenReturn(c);
        when(scmClientMock.getContainerWithPipeline(c.getContainerID())).thenReturn(new ContainerWithPipeline(c, null));
    }
    // Under replicated
    when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(1L))).thenReturn(getMockReplicas(1L, State.CLOSED, State.UNHEALTHY));
    // return all UNHEALTHY replicas for container ID 2 -> UNDER_REPLICATED
    when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(2L))).thenReturn(getMockReplicas(2L, State.UNHEALTHY));
    // return 0 replicas for container ID 3 -> Missing
    when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(3L))).thenReturn(Collections.emptySet());
    // Return 5 Healthy -> Over replicated
    when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(4L))).thenReturn(getMockReplicas(4L, State.CLOSED, State.CLOSED, State.CLOSED, State.CLOSED, State.CLOSED));
    // Mis-replicated
    Set<ContainerReplica> misReplicas = getMockReplicas(5L, State.CLOSED, State.CLOSED, State.CLOSED);
    placementMock.setMisRepWhenDnPresent(misReplicas.iterator().next().getDatanodeDetails().getUuid());
    when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(5L))).thenReturn(misReplicas);
    // Return 3 Healthy -> Healthy container
    when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(6L))).thenReturn(getMockReplicas(6L, State.CLOSED, State.CLOSED, State.CLOSED));
    List<UnhealthyContainers> all = unHealthyContainersTableHandle.findAll();
    Assert.assertTrue(all.isEmpty());
    long currentTime = System.currentTimeMillis();
    ReconTaskStatusDao reconTaskStatusDao = getDao(ReconTaskStatusDao.class);
    ReconTaskConfig reconTaskConfig = new ReconTaskConfig();
    reconTaskConfig.setMissingContainerTaskInterval(Duration.ofSeconds(2));
    ContainerHealthTask containerHealthTask = new ContainerHealthTask(scmMock.getContainerManager(), scmMock.getScmServiceProvider(), reconTaskStatusDao, containerHealthSchemaManager, placementMock, reconTaskConfig);
    containerHealthTask.start();
    LambdaTestUtils.await(6000, 1000, () -> (unHealthyContainersTableHandle.count() == 5));
    UnhealthyContainers rec = unHealthyContainersTableHandle.fetchByContainerId(1L).get(0);
    assertEquals("UNDER_REPLICATED", rec.getContainerState());
    assertEquals(2, rec.getReplicaDelta().intValue());
    rec = unHealthyContainersTableHandle.fetchByContainerId(2L).get(0);
    assertEquals("UNDER_REPLICATED", rec.getContainerState());
    assertEquals(3, rec.getReplicaDelta().intValue());
    List<UnhealthyContainers> unhealthyContainers = containerHealthSchemaManager.getUnhealthyContainers(ALL_REPLICAS_UNHEALTHY, 0, Integer.MAX_VALUE);
    assertEquals(1, unhealthyContainers.size());
    assertEquals(2L, unhealthyContainers.get(0).getContainerId().longValue());
    assertEquals(0, unhealthyContainers.get(0).getActualReplicaCount().intValue());
    rec = unHealthyContainersTableHandle.fetchByContainerId(3L).get(0);
    assertEquals("MISSING", rec.getContainerState());
    assertEquals(3, rec.getReplicaDelta().intValue());
    rec = unHealthyContainersTableHandle.fetchByContainerId(4L).get(0);
    assertEquals("OVER_REPLICATED", rec.getContainerState());
    assertEquals(-2, rec.getReplicaDelta().intValue());
    rec = unHealthyContainersTableHandle.fetchByContainerId(5L).get(0);
    assertEquals("MIS_REPLICATED", rec.getContainerState());
    assertEquals(1, rec.getReplicaDelta().intValue());
    assertEquals(2, rec.getExpectedReplicaCount().intValue());
    assertEquals(1, rec.getActualReplicaCount().intValue());
    assertNotNull(rec.getReason());
    ReconTaskStatus taskStatus = reconTaskStatusDao.findById(containerHealthTask.getTaskName());
    Assert.assertTrue(taskStatus.getLastUpdatedTimestamp() > currentTime);
    // Now run the job again, to check that relevant records are updated or
    // removed as appropriate. Need to adjust the return value for all the mocks
    // Under replicated -> Delta goes from 2 to 1
    when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(1L))).thenReturn(getMockReplicas(1L, State.CLOSED, State.CLOSED));
    // ID 2 was missing - make it healthy now
    when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(2L))).thenReturn(getMockReplicas(2L, State.CLOSED, State.CLOSED, State.CLOSED));
    // return 0 replicas for container ID 3 -> Still Missing
    when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(3L))).thenReturn(Collections.emptySet());
    // Return 4 Healthy -> Delta changes from -2 to -1
    when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(4L))).thenReturn(getMockReplicas(4L, State.CLOSED, State.CLOSED, State.CLOSED, State.CLOSED));
    // Was mis-replicated - make it healthy now
    placementMock.setMisRepWhenDnPresent(null);
    LambdaTestUtils.await(6000, 1000, () -> (unHealthyContainersTableHandle.count() == 3));
    rec = unHealthyContainersTableHandle.fetchByContainerId(1L).get(0);
    assertEquals("UNDER_REPLICATED", rec.getContainerState());
    assertEquals(1, rec.getReplicaDelta().intValue());
    // This container is now healthy, it should not be in the table any more
    assertEquals(0, unHealthyContainersTableHandle.fetchByContainerId(2L).size());
    rec = unHealthyContainersTableHandle.fetchByContainerId(3L).get(0);
    assertEquals("MISSING", rec.getContainerState());
    assertEquals(3, rec.getReplicaDelta().intValue());
    rec = unHealthyContainersTableHandle.fetchByContainerId(4L).get(0);
    assertEquals("OVER_REPLICATED", rec.getContainerState());
    assertEquals(-1, rec.getReplicaDelta().intValue());
    // This container is now healthy, it should not be in the table any more
    assertEquals(0, unHealthyContainersTableHandle.fetchByContainerId(5L).size());
}
Also used : ReconStorageContainerManagerFacade(org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade) ContainerManager(org.apache.hadoop.hdds.scm.container.ContainerManager) StorageContainerServiceProvider(org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider) ReconTaskStatusDao(org.hadoop.ozone.recon.schema.tables.daos.ReconTaskStatusDao) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) ContainerHealthSchemaManager(org.apache.hadoop.ozone.recon.persistence.ContainerHealthSchemaManager) ContainerReplica(org.apache.hadoop.hdds.scm.container.ContainerReplica) UnhealthyContainers(org.hadoop.ozone.recon.schema.tables.pojos.UnhealthyContainers) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) ContainerSchemaDefinition(org.hadoop.ozone.recon.schema.ContainerSchemaDefinition) ReconTaskConfig(org.apache.hadoop.ozone.recon.tasks.ReconTaskConfig) ReconTaskStatus(org.hadoop.ozone.recon.schema.tables.pojos.ReconTaskStatus) UnhealthyContainersDao(org.hadoop.ozone.recon.schema.tables.daos.UnhealthyContainersDao) AbstractReconSqlDBTest(org.apache.hadoop.ozone.recon.persistence.AbstractReconSqlDBTest) Test(org.junit.Test)

Example 7 with ReconStorageContainerManagerFacade

use of org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade in project ozone by apache.

the class TestReconTasks method testMissingContainerDownNode.

@Test
public void testMissingContainerDownNode() throws Exception {
    ReconStorageContainerManagerFacade reconScm = (ReconStorageContainerManagerFacade) cluster.getReconServer().getReconStorageContainerManager();
    StorageContainerManager scm = cluster.getStorageContainerManager();
    PipelineManager reconPipelineManager = reconScm.getPipelineManager();
    PipelineManager scmPipelineManager = scm.getPipelineManager();
    // Make sure Recon's pipeline state is initialized.
    LambdaTestUtils.await(60000, 5000, () -> (reconPipelineManager.getPipelines().size() >= 1));
    ContainerManager scmContainerManager = scm.getContainerManager();
    ReconContainerManager reconContainerManager = (ReconContainerManager) reconScm.getContainerManager();
    ContainerInfo containerInfo = scmContainerManager.allocateContainer(RatisReplicationConfig.getInstance(ONE), "test");
    long containerID = containerInfo.getContainerID();
    Pipeline pipeline = scmPipelineManager.getPipeline(containerInfo.getPipelineID());
    XceiverClientGrpc client = new XceiverClientGrpc(pipeline, conf);
    runTestOzoneContainerViaDataNode(containerID, client);
    // Make sure Recon got the container report with new container.
    Assert.assertEquals(scmContainerManager.getContainerIDs(), reconContainerManager.getContainerIDs());
    // Bring down the Datanode that had the container replica.
    cluster.shutdownHddsDatanode(pipeline.getFirstNode());
    LambdaTestUtils.await(120000, 10000, () -> {
        List<UnhealthyContainers> allMissingContainers = reconContainerManager.getContainerSchemaManager().getUnhealthyContainers(ContainerSchemaDefinition.UnHealthyContainerStates.MISSING, 0, 1000);
        return (allMissingContainers.size() == 1);
    });
    // Restart the Datanode to make sure we remove the missing container.
    cluster.restartHddsDatanode(pipeline.getFirstNode(), true);
    LambdaTestUtils.await(120000, 10000, () -> {
        List<UnhealthyContainers> allMissingContainers = reconContainerManager.getContainerSchemaManager().getUnhealthyContainers(ContainerSchemaDefinition.UnHealthyContainerStates.MISSING, 0, 1000);
        return (allMissingContainers.isEmpty());
    });
}
Also used : ReconStorageContainerManagerFacade(org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade) StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) ContainerManager(org.apache.hadoop.hdds.scm.container.ContainerManager) StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) ReconContainerManager(org.apache.hadoop.ozone.recon.scm.ReconContainerManager) ReconContainerManager(org.apache.hadoop.ozone.recon.scm.ReconContainerManager) PipelineManager(org.apache.hadoop.hdds.scm.pipeline.PipelineManager) XceiverClientGrpc(org.apache.hadoop.hdds.scm.XceiverClientGrpc) UnhealthyContainers(org.hadoop.ozone.recon.schema.tables.pojos.UnhealthyContainers) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) Test(org.junit.Test)

Aggregations

ReconStorageContainerManagerFacade (org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade)7 ContainerInfo (org.apache.hadoop.hdds.scm.container.ContainerInfo)6 ContainerManager (org.apache.hadoop.hdds.scm.container.ContainerManager)5 ContainerWithPipeline (org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline)4 OzoneStorageContainerManager (org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager)4 Test (org.junit.Test)4 ContainerHealthSchemaManager (org.apache.hadoop.ozone.recon.persistence.ContainerHealthSchemaManager)3 StorageContainerServiceProvider (org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider)3 UnhealthyContainers (org.hadoop.ozone.recon.schema.tables.pojos.UnhealthyContainers)3 HttpURLConnection (java.net.HttpURLConnection)2 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)2 XceiverClientGrpc (org.apache.hadoop.hdds.scm.XceiverClientGrpc)2 ContainerReplica (org.apache.hadoop.hdds.scm.container.ContainerReplica)2 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)2 PipelineManager (org.apache.hadoop.hdds.scm.pipeline.PipelineManager)2 StorageContainerLocationProtocol (org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol)2 StorageContainerManager (org.apache.hadoop.hdds.scm.server.StorageContainerManager)2 URLConnectionFactory (org.apache.hadoop.hdfs.web.URLConnectionFactory)2 ReconTestInjector (org.apache.hadoop.ozone.recon.ReconTestInjector)2 ReconUtils (org.apache.hadoop.ozone.recon.ReconUtils)2