use of com.sequenceiq.cloudbreak.ha.domain.Node in project cloudbreak by hortonworks.
the class HeartbeatServiceTest method getClusterNodes.
private List<Node> getClusterNodes() {
List<Node> nodes = new ArrayList<>();
nodes.add(new Node(MY_ID));
nodes.add(new Node(NODE_1_ID));
nodes.add(new Node(NODE_2_ID));
IntStream.range(0, 3).forEach(i -> nodes.get(i).setLastUpdated(BASE_DATE_TIME.plusMinutes(i).toEpochSecond(ZoneOffset.UTC)));
return nodes;
}
use of com.sequenceiq.cloudbreak.ha.domain.Node in project cloudbreak by hortonworks.
the class HeartbeatServiceTest method testOneNodeTakesAllFlowsWithTerminationFlowShouldBeDistributed.
@Test
public void testOneNodeTakesAllFlowsWithTerminationFlowShouldBeDistributed() {
List<Node> clusterNodes = getClusterNodes();
// myself
clusterNodes.get(0).setLastUpdated(200_000L);
// set all nodes to failed except myself
for (int i = 1; i < clusterNodes.size(); i++) {
Node node = clusterNodes.get(i);
node.setLastUpdated(50_000L);
}
when(nodeService.findAll()).thenReturn(clusterNodes);
when(clock.getCurrentTimeMillis()).thenReturn(200_000L);
// all flows that need to be re-distributed
List<FlowLog> node1FlowLogs = getFlowLogs(2, 5000);
node1FlowLogs.forEach(fl -> fl.setFlowType(ClassValue.of(HelloWorldFlowConfig.class)));
List<String> suspendedFlows = node1FlowLogs.stream().map(FlowLog::getFlowId).distinct().collect(Collectors.toList());
when(flowLogService.findAllByCloudbreakNodeId(NODE_1_ID)).thenReturn(new HashSet<>(node1FlowLogs));
Set<FlowLog> node2FlowLogs = new HashSet<>(getFlowLogs(3, 3000));
node2FlowLogs.forEach(fl -> fl.setFlowType(ClassValue.of(HelloWorldFlowConfig.class)));
suspendedFlows.addAll(node2FlowLogs.stream().map(FlowLog::getFlowId).distinct().collect(Collectors.toList()));
when(flowLogService.findAllByCloudbreakNodeId(NODE_2_ID)).thenReturn(node2FlowLogs);
Map<Node, List<String>> distribution = new HashMap<>();
distribution.computeIfAbsent(clusterNodes.get(0), v -> new ArrayList<>()).addAll(Arrays.asList(suspendedFlows.get(0), suspendedFlows.get(1), suspendedFlows.get(2), suspendedFlows.get(3), suspendedFlows.get(4)));
when(flowDistributor.distribute(any(), any())).thenReturn(distribution);
Set<FlowLog> myNewFlowLogs = new HashSet<>();
myNewFlowLogs.addAll(node1FlowLogs);
myNewFlowLogs.addAll(node2FlowLogs);
when(flowLogService.findAllByCloudbreakNodeId(MY_ID)).thenReturn(myNewFlowLogs);
when(runningFlows.get(any())).thenReturn(null);
List<Long> stackIds = myNewFlowLogs.stream().map(FlowLog::getResourceId).distinct().collect(Collectors.toList());
when(haApplication.getDeletingResources(anySet())).thenReturn(Set.of(stackIds.get(0), stackIds.get(2)));
doReturn(Collections.singletonList(HelloWorldFlowConfig.class)).when(applicationFlowInformation).getTerminationFlow();
heartbeatService.scheduledFlowDistribution();
verify(flowLogService).saveAll(flowLogListCaptor.capture());
List<FlowLog> updatedFlows = flowLogListCaptor.getValue();
assertEquals(myNewFlowLogs.size(), updatedFlows.size());
for (FlowLog updatedFlow : updatedFlows) {
assertEquals(MY_ID, updatedFlow.getCloudbreakNodeId());
}
verify(flow2Handler, times(5)).restartFlow(stringCaptor.capture());
List<String> allFlowIds = stringCaptor.getAllValues();
assertEquals(5L, allFlowIds.size());
for (String flowId : suspendedFlows) {
assertTrue(allFlowIds.contains(flowId));
}
}
use of com.sequenceiq.cloudbreak.ha.domain.Node in project cloudbreak by hortonworks.
the class HeartbeatServiceTest method testDistributionConcurrency.
@Test
public void testDistributionConcurrency() {
List<Node> clusterNodes = getClusterNodes();
// myself
clusterNodes.get(0).setLastUpdated(200_000L);
// failed node
clusterNodes.get(1).setLastUpdated(50_000L);
// active node
clusterNodes.get(2).setLastUpdated(200_000L);
when(nodeService.findAll()).thenReturn(clusterNodes);
when(clock.getCurrentTimeMillis()).thenReturn(200_000L);
// all flows that need to be re-distributed
List<FlowLog> node1FlowLogs = getFlowLogs(3, 5000);
List<String> suspendedFlows = node1FlowLogs.stream().map(FlowLog::getFlowId).distinct().collect(Collectors.toList());
when(flowLogService.findAllByCloudbreakNodeId(NODE_1_ID)).thenReturn(new HashSet<>(node1FlowLogs));
Map<Node, List<String>> distribution = new HashMap<>();
distribution.computeIfAbsent(clusterNodes.get(0), v -> new ArrayList<>()).addAll(Arrays.asList(suspendedFlows.get(0), suspendedFlows.get(2)));
distribution.computeIfAbsent(clusterNodes.get(2), v -> new ArrayList<>()).addAll(Collections.singletonList(suspendedFlows.get(1)));
when(flowDistributor.distribute(any(), any())).thenReturn(distribution);
Set<FlowLog> myNewFlowLogs = new HashSet<>();
myNewFlowLogs.addAll(node1FlowLogs.stream().filter(fl -> fl.getFlowId().equalsIgnoreCase(suspendedFlows.get(0))).collect(Collectors.toList()));
myNewFlowLogs.addAll(node1FlowLogs.stream().filter(fl -> fl.getFlowId().equalsIgnoreCase(suspendedFlows.get(2))).collect(Collectors.toList()));
when(flowLogService.findAllByCloudbreakNodeId(MY_ID)).thenReturn(myNewFlowLogs);
when(runningFlows.get(any())).thenReturn(null);
when(flowLogService.saveAll(anyCollection())).thenThrow(new OptimisticLockingFailureException("Someone already distributed the flows.."));
heartbeatService.scheduledFlowDistribution();
verify(flow2Handler, times(2)).restartFlow(stringCaptor.capture());
List<String> allFlowIds = stringCaptor.getAllValues();
assertEquals(2L, allFlowIds.size());
for (FlowLog flowLog : myNewFlowLogs) {
assertTrue(allFlowIds.contains(flowLog.getFlowId()));
}
}
use of com.sequenceiq.cloudbreak.ha.domain.Node in project cloudbreak by hortonworks.
the class HeartbeatServiceTest method testOneNodeTakesAllFlowsWithInvalidFlows.
@Test
public void testOneNodeTakesAllFlowsWithInvalidFlows() {
List<Node> clusterNodes = getClusterNodes();
// myself
clusterNodes.get(0).setLastUpdated(200_000L);
// set all nodes to failed except myself
for (int i = 1; i < clusterNodes.size(); i++) {
Node node = clusterNodes.get(i);
node.setLastUpdated(50_000L);
}
when(nodeService.findAll()).thenReturn(clusterNodes);
when(clock.getCurrentTimeMillis()).thenReturn(200_000L);
// all flows that need to be re-distributed
List<FlowLog> node1FlowLogs = getFlowLogs(2, 5000);
List<String> suspendedFlows = node1FlowLogs.stream().map(FlowLog::getFlowId).distinct().collect(Collectors.toList());
when(flowLogService.findAllByCloudbreakNodeId(NODE_1_ID)).thenReturn(new HashSet<>(node1FlowLogs));
Set<FlowLog> node2FlowLogs = new HashSet<>(getFlowLogs(3, 3000));
suspendedFlows.addAll(node2FlowLogs.stream().map(FlowLog::getFlowId).distinct().collect(Collectors.toList()));
when(flowLogService.findAllByCloudbreakNodeId(NODE_2_ID)).thenReturn(node2FlowLogs);
Map<Node, List<String>> distribution = new HashMap<>();
distribution.computeIfAbsent(clusterNodes.get(0), v -> new ArrayList<>()).addAll(Arrays.asList(suspendedFlows.get(0), suspendedFlows.get(1), suspendedFlows.get(2), suspendedFlows.get(3), suspendedFlows.get(4)));
when(flowDistributor.distribute(any(), any())).thenReturn(distribution);
Set<FlowLog> myNewFlowLogs = new HashSet<>();
myNewFlowLogs.addAll(node1FlowLogs);
myNewFlowLogs.addAll(node2FlowLogs);
when(flowLogService.findAllByCloudbreakNodeId(MY_ID)).thenReturn(myNewFlowLogs);
when(runningFlows.get(any())).thenReturn(null);
List<Long> stackIds = myNewFlowLogs.stream().map(FlowLog::getResourceId).distinct().collect(Collectors.toList());
when(haApplication.getDeletingResources(anySet())).thenReturn(Set.of(stackIds.get(0), stackIds.get(2)));
doReturn(Collections.singletonList(HelloWorldFlowConfig.class)).when(applicationFlowInformation).getTerminationFlow();
List<FlowLog> invalidFlowLogs = myNewFlowLogs.stream().filter(fl -> fl.getResourceId().equals(stackIds.get(0)) || fl.getResourceId().equals(stackIds.get(2))).collect(Collectors.toList());
heartbeatService.scheduledFlowDistribution();
verify(flowLogService).saveAll(flowLogListCaptor.capture());
List<FlowLog> updatedFlows = flowLogListCaptor.getValue();
assertEquals(myNewFlowLogs.size(), updatedFlows.size());
for (FlowLog updatedFlow : updatedFlows) {
if (invalidFlowLogs.contains(updatedFlow)) {
assertEquals(StateStatus.SUCCESSFUL, updatedFlow.getStateStatus());
assertNull(updatedFlow.getCloudbreakNodeId());
} else {
assertEquals(MY_ID, updatedFlow.getCloudbreakNodeId());
}
}
verify(flow2Handler, times(5)).restartFlow(stringCaptor.capture());
List<String> allFlowIds = stringCaptor.getAllValues();
assertEquals(5L, allFlowIds.size());
for (String flowId : suspendedFlows) {
assertTrue(allFlowIds.contains(flowId));
}
}
use of com.sequenceiq.cloudbreak.ha.domain.Node in project cloudbreak by hortonworks.
the class HeartbeatService method heartbeat.
@Scheduled(cron = "${cb.ha.heartbeat.rate:0/30 * * * * *}")
public void heartbeat() {
if (shouldRun()) {
String nodeId = nodeConfig.getId();
try {
retryService.testWith2SecDelayMax5Times(() -> {
LOGGER.debug("Node {} is trying to update heartbeat timestamp", nodeId);
try {
Node self = nodeService.findById(nodeId).orElse(new Node(nodeId));
long lastUpdated = self.getLastUpdated();
long currentUpdated = clock.getCurrentTimeMillis();
self.setLastUpdated(currentUpdated);
nodeService.save(self);
LOGGER.debug("Node {} has updated heartbeat timestamp from {} to {}", nodeId, lastUpdated, currentUpdated);
metricService.incrementMetricCounter(MetricType.HEARTBEAT_UPDATE_SUCCESS);
return Boolean.TRUE;
} catch (RuntimeException e) {
LOGGER.error("Failed to update the heartbeat timestamp", e);
metricService.incrementMetricCounter(MetricType.HEARTBEAT_UPDATE_FAILED);
throw new Retry.ActionFailedException(e.getMessage());
}
});
} catch (Retry.ActionFailedException af) {
LOGGER.error("Failed to update the heartbeat timestamp 5 times for node {}: {}", nodeId, af.getMessage());
inMemoryCleanup.cancelEveryFlowWithoutDbUpdate();
}
cancelInvalidFlows();
}
}
Aggregations