use of org.hadoop.ozone.recon.schema.tables.pojos.UnhealthyContainers in project ozone by apache.
the class TestContainerHealthTask method testDeletedContainer.
@Test
public void testDeletedContainer() throws Exception {
UnhealthyContainersDao unHealthyContainersTableHandle = getDao(UnhealthyContainersDao.class);
ContainerHealthSchemaManager containerHealthSchemaManager = new ContainerHealthSchemaManager(getSchemaDefinition(ContainerSchemaDefinition.class), unHealthyContainersTableHandle);
ReconStorageContainerManagerFacade scmMock = mock(ReconStorageContainerManagerFacade.class);
MockPlacementPolicy placementMock = new MockPlacementPolicy();
ContainerManager containerManagerMock = mock(ContainerManager.class);
StorageContainerServiceProvider scmClientMock = mock(StorageContainerServiceProvider.class);
// Create 2 containers. The first is OPEN will no replicas, the second is
// CLOSED with no replicas.
List<ContainerInfo> mockContainers = getMockContainers(2);
when(scmMock.getScmServiceProvider()).thenReturn(scmClientMock);
when(scmMock.getContainerManager()).thenReturn(containerManagerMock);
when(containerManagerMock.getContainers()).thenReturn(mockContainers);
for (ContainerInfo c : mockContainers) {
when(containerManagerMock.getContainer(c.containerID())).thenReturn(c);
when(scmClientMock.getContainerWithPipeline(c.getContainerID())).thenReturn(new ContainerWithPipeline(c, null));
}
// Container State OPEN with no replicas
when(containerManagerMock.getContainer(ContainerID.valueOf(1L)).getState()).thenReturn(HddsProtos.LifeCycleState.OPEN);
when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(1L))).thenReturn(Collections.emptySet());
when(scmClientMock.getContainerWithPipeline(1)).thenReturn(new ContainerWithPipeline(mockContainers.get(0), null));
// Container State CLOSED with no replicas
when(containerManagerMock.getContainer(ContainerID.valueOf(2L)).getState()).thenReturn(HddsProtos.LifeCycleState.CLOSED);
when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(2L))).thenReturn(Collections.emptySet());
ContainerInfo mockDeletedContainer = getMockDeletedContainer(2);
when(scmClientMock.getContainerWithPipeline(2)).thenReturn(new ContainerWithPipeline(mockDeletedContainer, null));
List<UnhealthyContainers> all = unHealthyContainersTableHandle.findAll();
Assert.assertTrue(all.isEmpty());
long currentTime = System.currentTimeMillis();
ReconTaskStatusDao reconTaskStatusDao = getDao(ReconTaskStatusDao.class);
ReconTaskConfig reconTaskConfig = new ReconTaskConfig();
reconTaskConfig.setMissingContainerTaskInterval(Duration.ofSeconds(2));
ContainerHealthTask containerHealthTask = new ContainerHealthTask(scmMock.getContainerManager(), scmMock.getScmServiceProvider(), reconTaskStatusDao, containerHealthSchemaManager, placementMock, reconTaskConfig);
containerHealthTask.start();
LambdaTestUtils.await(6000, 1000, () -> (unHealthyContainersTableHandle.count() == 1));
UnhealthyContainers rec = unHealthyContainersTableHandle.fetchByContainerId(1L).get(0);
assertEquals("MISSING", rec.getContainerState());
assertEquals(3, rec.getReplicaDelta().intValue());
ReconTaskStatus taskStatus = reconTaskStatusDao.findById(containerHealthTask.getTaskName());
Assert.assertTrue(taskStatus.getLastUpdatedTimestamp() > currentTime);
}
use of org.hadoop.ozone.recon.schema.tables.pojos.UnhealthyContainers in project ozone by apache.
the class TestContainerHealthTask method testRun.
@SuppressWarnings("checkstyle:methodlength")
@Test
public void testRun() throws Exception {
UnhealthyContainersDao unHealthyContainersTableHandle = getDao(UnhealthyContainersDao.class);
ContainerHealthSchemaManager containerHealthSchemaManager = new ContainerHealthSchemaManager(getSchemaDefinition(ContainerSchemaDefinition.class), unHealthyContainersTableHandle);
ReconStorageContainerManagerFacade scmMock = mock(ReconStorageContainerManagerFacade.class);
MockPlacementPolicy placementMock = new MockPlacementPolicy();
ContainerManager containerManagerMock = mock(ContainerManager.class);
StorageContainerServiceProvider scmClientMock = mock(StorageContainerServiceProvider.class);
ContainerReplica unhealthyReplicaMock = mock(ContainerReplica.class);
when(unhealthyReplicaMock.getState()).thenReturn(State.UNHEALTHY);
ContainerReplica healthyReplicaMock = mock(ContainerReplica.class);
when(healthyReplicaMock.getState()).thenReturn(State.CLOSED);
// Create 6 containers. The first 5 will have various unhealthy states
// defined below. The container with ID=6 will be healthy.
List<ContainerInfo> mockContainers = getMockContainers(6);
when(scmMock.getScmServiceProvider()).thenReturn(scmClientMock);
when(scmMock.getContainerManager()).thenReturn(containerManagerMock);
when(containerManagerMock.getContainers()).thenReturn(mockContainers);
for (ContainerInfo c : mockContainers) {
when(containerManagerMock.getContainer(c.containerID())).thenReturn(c);
when(scmClientMock.getContainerWithPipeline(c.getContainerID())).thenReturn(new ContainerWithPipeline(c, null));
}
// Under replicated
when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(1L))).thenReturn(getMockReplicas(1L, State.CLOSED, State.UNHEALTHY));
// return all UNHEALTHY replicas for container ID 2 -> UNDER_REPLICATED
when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(2L))).thenReturn(getMockReplicas(2L, State.UNHEALTHY));
// return 0 replicas for container ID 3 -> Missing
when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(3L))).thenReturn(Collections.emptySet());
// Return 5 Healthy -> Over replicated
when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(4L))).thenReturn(getMockReplicas(4L, State.CLOSED, State.CLOSED, State.CLOSED, State.CLOSED, State.CLOSED));
// Mis-replicated
Set<ContainerReplica> misReplicas = getMockReplicas(5L, State.CLOSED, State.CLOSED, State.CLOSED);
placementMock.setMisRepWhenDnPresent(misReplicas.iterator().next().getDatanodeDetails().getUuid());
when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(5L))).thenReturn(misReplicas);
// Return 3 Healthy -> Healthy container
when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(6L))).thenReturn(getMockReplicas(6L, State.CLOSED, State.CLOSED, State.CLOSED));
List<UnhealthyContainers> all = unHealthyContainersTableHandle.findAll();
Assert.assertTrue(all.isEmpty());
long currentTime = System.currentTimeMillis();
ReconTaskStatusDao reconTaskStatusDao = getDao(ReconTaskStatusDao.class);
ReconTaskConfig reconTaskConfig = new ReconTaskConfig();
reconTaskConfig.setMissingContainerTaskInterval(Duration.ofSeconds(2));
ContainerHealthTask containerHealthTask = new ContainerHealthTask(scmMock.getContainerManager(), scmMock.getScmServiceProvider(), reconTaskStatusDao, containerHealthSchemaManager, placementMock, reconTaskConfig);
containerHealthTask.start();
LambdaTestUtils.await(6000, 1000, () -> (unHealthyContainersTableHandle.count() == 5));
UnhealthyContainers rec = unHealthyContainersTableHandle.fetchByContainerId(1L).get(0);
assertEquals("UNDER_REPLICATED", rec.getContainerState());
assertEquals(2, rec.getReplicaDelta().intValue());
rec = unHealthyContainersTableHandle.fetchByContainerId(2L).get(0);
assertEquals("UNDER_REPLICATED", rec.getContainerState());
assertEquals(3, rec.getReplicaDelta().intValue());
List<UnhealthyContainers> unhealthyContainers = containerHealthSchemaManager.getUnhealthyContainers(ALL_REPLICAS_UNHEALTHY, 0, Integer.MAX_VALUE);
assertEquals(1, unhealthyContainers.size());
assertEquals(2L, unhealthyContainers.get(0).getContainerId().longValue());
assertEquals(0, unhealthyContainers.get(0).getActualReplicaCount().intValue());
rec = unHealthyContainersTableHandle.fetchByContainerId(3L).get(0);
assertEquals("MISSING", rec.getContainerState());
assertEquals(3, rec.getReplicaDelta().intValue());
rec = unHealthyContainersTableHandle.fetchByContainerId(4L).get(0);
assertEquals("OVER_REPLICATED", rec.getContainerState());
assertEquals(-2, rec.getReplicaDelta().intValue());
rec = unHealthyContainersTableHandle.fetchByContainerId(5L).get(0);
assertEquals("MIS_REPLICATED", rec.getContainerState());
assertEquals(1, rec.getReplicaDelta().intValue());
assertEquals(2, rec.getExpectedReplicaCount().intValue());
assertEquals(1, rec.getActualReplicaCount().intValue());
assertNotNull(rec.getReason());
ReconTaskStatus taskStatus = reconTaskStatusDao.findById(containerHealthTask.getTaskName());
Assert.assertTrue(taskStatus.getLastUpdatedTimestamp() > currentTime);
// Now run the job again, to check that relevant records are updated or
// removed as appropriate. Need to adjust the return value for all the mocks
// Under replicated -> Delta goes from 2 to 1
when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(1L))).thenReturn(getMockReplicas(1L, State.CLOSED, State.CLOSED));
// ID 2 was missing - make it healthy now
when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(2L))).thenReturn(getMockReplicas(2L, State.CLOSED, State.CLOSED, State.CLOSED));
// return 0 replicas for container ID 3 -> Still Missing
when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(3L))).thenReturn(Collections.emptySet());
// Return 4 Healthy -> Delta changes from -2 to -1
when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(4L))).thenReturn(getMockReplicas(4L, State.CLOSED, State.CLOSED, State.CLOSED, State.CLOSED));
// Was mis-replicated - make it healthy now
placementMock.setMisRepWhenDnPresent(null);
LambdaTestUtils.await(6000, 1000, () -> (unHealthyContainersTableHandle.count() == 3));
rec = unHealthyContainersTableHandle.fetchByContainerId(1L).get(0);
assertEquals("UNDER_REPLICATED", rec.getContainerState());
assertEquals(1, rec.getReplicaDelta().intValue());
// This container is now healthy, it should not be in the table any more
assertEquals(0, unHealthyContainersTableHandle.fetchByContainerId(2L).size());
rec = unHealthyContainersTableHandle.fetchByContainerId(3L).get(0);
assertEquals("MISSING", rec.getContainerState());
assertEquals(3, rec.getReplicaDelta().intValue());
rec = unHealthyContainersTableHandle.fetchByContainerId(4L).get(0);
assertEquals("OVER_REPLICATED", rec.getContainerState());
assertEquals(-1, rec.getReplicaDelta().intValue());
// This container is now healthy, it should not be in the table any more
assertEquals(0, unHealthyContainersTableHandle.fetchByContainerId(5L).size());
}
use of org.hadoop.ozone.recon.schema.tables.pojos.UnhealthyContainers in project ozone by apache.
the class TestContainerHealthTaskRecordGenerator method testCorrectRecordsGenerated.
@Test
public void testCorrectRecordsGenerated() {
Set<ContainerReplica> replicas = generateReplicas(container, CLOSED, CLOSED, CLOSED);
// HEALTHY container - no records generated.
ContainerHealthStatus status = new ContainerHealthStatus(container, replicas, placementPolicy);
List<UnhealthyContainers> records = ContainerHealthTask.ContainerHealthRecords.generateUnhealthyRecords(status, (long) 1234567);
assertEquals(0, records.size());
// Over-replicated - expect 1 over replicated record
replicas = generateReplicas(container, CLOSED, CLOSED, CLOSED, CLOSED, CLOSED);
status = new ContainerHealthStatus(container, replicas, placementPolicy);
records = ContainerHealthTask.ContainerHealthRecords.generateUnhealthyRecords(status, (long) 1234567);
assertEquals(1, records.size());
UnhealthyContainers rec = records.get(0);
assertEquals(UnHealthyContainerStates.OVER_REPLICATED.toString(), rec.getContainerState());
assertEquals(3, rec.getExpectedReplicaCount().intValue());
assertEquals(5, rec.getActualReplicaCount().intValue());
assertEquals(-2, rec.getReplicaDelta().intValue());
// Under and Mis Replicated - expect 2 records - mis and under replicated
replicas = generateReplicas(container, CLOSED, CLOSED);
when(placementPolicy.validateContainerPlacement(Mockito.anyList(), Mockito.anyInt())).thenReturn(new ContainerPlacementStatusDefault(1, 2, 5));
status = new ContainerHealthStatus(container, replicas, placementPolicy);
records = ContainerHealthTask.ContainerHealthRecords.generateUnhealthyRecords(status, (long) 1234567);
assertEquals(2, records.size());
rec = findRecordForState(records, UnHealthyContainerStates.MIS_REPLICATED);
assertEquals(UnHealthyContainerStates.MIS_REPLICATED.toString(), rec.getContainerState());
assertEquals(2, rec.getExpectedReplicaCount().intValue());
assertEquals(1, rec.getActualReplicaCount().intValue());
assertEquals(1, rec.getReplicaDelta().intValue());
assertNotNull(rec.getReason());
rec = findRecordForState(records, UnHealthyContainerStates.UNDER_REPLICATED);
assertEquals(UnHealthyContainerStates.UNDER_REPLICATED.toString(), rec.getContainerState());
assertEquals(3, rec.getExpectedReplicaCount().intValue());
assertEquals(2, rec.getActualReplicaCount().intValue());
assertEquals(1, rec.getReplicaDelta().intValue());
// Missing Record - expect just a single missing record even though
// it is mis-replicated too
replicas.clear();
when(placementPolicy.validateContainerPlacement(Mockito.anyList(), Mockito.anyInt())).thenReturn(new ContainerPlacementStatusDefault(1, 2, 5));
status = new ContainerHealthStatus(container, replicas, placementPolicy);
records = ContainerHealthTask.ContainerHealthRecords.generateUnhealthyRecords(status, (long) 1234567);
assertEquals(1, records.size());
rec = records.get(0);
assertEquals(UnHealthyContainerStates.MISSING.toString(), rec.getContainerState());
assertEquals(3, rec.getExpectedReplicaCount().intValue());
assertEquals(0, rec.getActualReplicaCount().intValue());
assertEquals(3, rec.getReplicaDelta().intValue());
}
use of org.hadoop.ozone.recon.schema.tables.pojos.UnhealthyContainers in project ozone by apache.
the class ContainerEndpoint method getUnhealthyContainers.
/**
* Return
* {@link org.apache.hadoop.ozone.recon.api.types.UnhealthyContainerMetadata}
* for all unhealthy containers.
*
* @param state Return only containers matching the given unhealthy state,
* eg UNDER_REPLICATED, MIS_REPLICATED, OVER_REPLICATED or
* MISSING. Passing null returns all containers.
* @param limit The limit of unhealthy containers to return.
* @param batchNum The batch number (like "page number") of results to return.
* Passing 1, will return records 1 to limit. 2 will return
* limit + 1 to 2 * limit, etc.
* @return {@link Response}
*/
@GET
@Path("/unhealthy/{state}")
public Response getUnhealthyContainers(@PathParam("state") String state, @DefaultValue(DEFAULT_FETCH_COUNT) @QueryParam(RECON_QUERY_LIMIT) int limit, @DefaultValue(DEFAULT_BATCH_NUMBER) @QueryParam(RECON_QUERY_BATCH_PARAM) int batchNum) {
int offset = Math.max(((batchNum - 1) * limit), 0);
List<UnhealthyContainerMetadata> unhealthyMeta = new ArrayList<>();
List<UnhealthyContainersSummary> summary;
try {
UnHealthyContainerStates internalState = null;
if (state != null) {
// If an invalid state is passed in, this will throw
// illegalArgumentException and fail the request
internalState = UnHealthyContainerStates.valueOf(state);
}
summary = containerHealthSchemaManager.getUnhealthyContainersSummary();
List<UnhealthyContainers> containers = containerHealthSchemaManager.getUnhealthyContainers(internalState, offset, limit);
for (UnhealthyContainers c : containers) {
long containerID = c.getContainerId();
ContainerInfo containerInfo = containerManager.getContainer(ContainerID.valueOf(containerID));
long keyCount = containerInfo.getNumberOfKeys();
UUID pipelineID = containerInfo.getPipelineID().getId();
List<ContainerHistory> datanodes = containerManager.getLatestContainerHistory(containerID, containerInfo.getReplicationConfig().getRequiredNodes());
unhealthyMeta.add(new UnhealthyContainerMetadata(c, datanodes, pipelineID, keyCount));
}
} catch (IOException ex) {
throw new WebApplicationException(ex, Response.Status.INTERNAL_SERVER_ERROR);
} catch (IllegalArgumentException e) {
throw new WebApplicationException(e, Response.Status.BAD_REQUEST);
}
UnhealthyContainersResponse response = new UnhealthyContainersResponse(unhealthyMeta);
for (UnhealthyContainersSummary s : summary) {
response.setSummaryCount(s.getContainerState(), s.getCount());
}
return Response.ok(response).build();
}
use of org.hadoop.ozone.recon.schema.tables.pojos.UnhealthyContainers in project ozone by apache.
the class TestReconTasks method testMissingContainerDownNode.
@Test
public void testMissingContainerDownNode() throws Exception {
ReconStorageContainerManagerFacade reconScm = (ReconStorageContainerManagerFacade) cluster.getReconServer().getReconStorageContainerManager();
StorageContainerManager scm = cluster.getStorageContainerManager();
PipelineManager reconPipelineManager = reconScm.getPipelineManager();
PipelineManager scmPipelineManager = scm.getPipelineManager();
// Make sure Recon's pipeline state is initialized.
LambdaTestUtils.await(60000, 5000, () -> (reconPipelineManager.getPipelines().size() >= 1));
ContainerManager scmContainerManager = scm.getContainerManager();
ReconContainerManager reconContainerManager = (ReconContainerManager) reconScm.getContainerManager();
ContainerInfo containerInfo = scmContainerManager.allocateContainer(RatisReplicationConfig.getInstance(ONE), "test");
long containerID = containerInfo.getContainerID();
Pipeline pipeline = scmPipelineManager.getPipeline(containerInfo.getPipelineID());
XceiverClientGrpc client = new XceiverClientGrpc(pipeline, conf);
runTestOzoneContainerViaDataNode(containerID, client);
// Make sure Recon got the container report with new container.
Assert.assertEquals(scmContainerManager.getContainerIDs(), reconContainerManager.getContainerIDs());
// Bring down the Datanode that had the container replica.
cluster.shutdownHddsDatanode(pipeline.getFirstNode());
LambdaTestUtils.await(120000, 10000, () -> {
List<UnhealthyContainers> allMissingContainers = reconContainerManager.getContainerSchemaManager().getUnhealthyContainers(ContainerSchemaDefinition.UnHealthyContainerStates.MISSING, 0, 1000);
return (allMissingContainers.size() == 1);
});
// Restart the Datanode to make sure we remove the missing container.
cluster.restartHddsDatanode(pipeline.getFirstNode(), true);
LambdaTestUtils.await(120000, 10000, () -> {
List<UnhealthyContainers> allMissingContainers = reconContainerManager.getContainerSchemaManager().getUnhealthyContainers(ContainerSchemaDefinition.UnHealthyContainerStates.MISSING, 0, 1000);
return (allMissingContainers.isEmpty());
});
}
Aggregations