use of org.apache.asterix.common.dataflow.ICcApplicationContext in project asterixdb by apache.
the class VersionApiServlet method get.
@Override
protected void get(IServletRequest request, IServletResponse response) {
response.setStatus(HttpResponseStatus.OK);
ICcApplicationContext props = (ICcApplicationContext) ctx.get(ASTERIX_APP_CONTEXT_INFO_ATTR);
Map<String, String> buildProperties = props.getBuildProperties().getAllProps();
ObjectMapper om = new ObjectMapper();
ObjectNode responseObject = om.createObjectNode();
for (Map.Entry<String, String> e : buildProperties.entrySet()) {
responseObject.put(e.getKey(), e.getValue());
}
try {
HttpUtil.setContentType(response, HttpUtil.ContentType.TEXT_PLAIN, HttpUtil.Encoding.UTF8);
} catch (IOException e) {
LOGGER.log(Level.WARNING, "Failure handling request", e);
response.setStatus(HttpResponseStatus.INTERNAL_SERVER_ERROR);
return;
}
PrintWriter responseWriter = response.writer();
responseWriter.write(responseObject.toString());
responseWriter.flush();
}
use of org.apache.asterix.common.dataflow.ICcApplicationContext in project asterixdb by apache.
the class AutoFaultToleranceStrategy method processPendingFailbackPlans.
private synchronized void processPendingFailbackPlans() {
ClusterState state = clusterManager.getState();
/*
* if the cluster state is not ACTIVE, then failbacks should not be processed
* since some partitions are not active
*/
if (state == ClusterState.ACTIVE) {
while (!pendingProcessingFailbackPlans.isEmpty()) {
//take the first pending failback plan
NodeFailbackPlan plan = pendingProcessingFailbackPlans.pop();
/*
* A plan at this stage will be in one of two states:
* 1. PREPARING -> the participants were selected but we haven't sent any request.
* 2. PENDING_ROLLBACK -> a participant failed before we send any requests
*/
if (plan.getState() == FailbackPlanState.PREPARING) {
//set the partitions that will be failed back as inactive
String failbackNode = plan.getNodeId();
for (Integer partitionId : plan.getPartitionsToFailback()) {
//partition expected to be returned to the failing back node
clusterManager.updateClusterPartition(partitionId, failbackNode, false);
}
/*
* if the returning node is the original metadata node,
* then metadata node will change after the failback completes
*/
ICcApplicationContext appCtx = (ICcApplicationContext) serviceCtx.getApplicationContext();
String originalMetadataNode = appCtx.getMetadataProperties().getMetadataNodeName();
if (originalMetadataNode.equals(failbackNode)) {
plan.setNodeToReleaseMetadataManager(currentMetadataNode);
currentMetadataNode = "";
metadataNodeActive = false;
clusterManager.updateMetadataNode(currentMetadataNode, metadataNodeActive);
}
//force new jobs to wait
clusterManager.setState(ClusterState.REBALANCING);
handleFailbackRequests(plan, messageBroker);
/*
* wait until the current plan is completed before processing the next plan.
* when the current one completes or is reverted, the cluster state will be
* ACTIVE again, and the next failback plan (if any) will be processed.
*/
break;
} else if (plan.getState() == FailbackPlanState.PENDING_ROLLBACK) {
//this plan failed before sending any requests -> nothing to rollback
planId2FailbackPlanMap.remove(plan.getPlanId());
}
}
}
}
use of org.apache.asterix.common.dataflow.ICcApplicationContext in project asterixdb by apache.
the class AutoFaultToleranceStrategy method requestPartitionsTakeover.
private synchronized void requestPartitionsTakeover(String failedNodeId) {
//replica -> list of partitions to takeover
Map<String, List<Integer>> partitionRecoveryPlan = new HashMap<>();
ICcApplicationContext appCtx = (ICcApplicationContext) serviceCtx.getApplicationContext();
ReplicationProperties replicationProperties = appCtx.getReplicationProperties();
//collect the partitions of the failed NC
List<ClusterPartition> lostPartitions = getNodeAssignedPartitions(failedNodeId);
if (!lostPartitions.isEmpty()) {
for (ClusterPartition partition : lostPartitions) {
//find replicas for this partitions
Set<String> partitionReplicas = replicationProperties.getNodeReplicasIds(partition.getNodeId());
//find a replica that is still active
for (String replica : partitionReplicas) {
//It needs to be modified to consider load balancing.
if (addActiveReplica(replica, partition, partitionRecoveryPlan)) {
break;
}
}
}
if (partitionRecoveryPlan.size() == 0) {
//no active replicas were found for the failed node
LOGGER.severe("Could not find active replicas for the partitions " + lostPartitions);
return;
} else {
LOGGER.info("Partitions to recover: " + lostPartitions);
}
//For each replica, send a request to takeover the assigned partitions
for (Entry<String, List<Integer>> entry : partitionRecoveryPlan.entrySet()) {
String replica = entry.getKey();
Integer[] partitionsToTakeover = entry.getValue().toArray(new Integer[entry.getValue().size()]);
long requestId = clusterRequestId++;
TakeoverPartitionsRequestMessage takeoverRequest = new TakeoverPartitionsRequestMessage(requestId, replica, partitionsToTakeover);
pendingTakeoverRequests.put(requestId, takeoverRequest);
try {
messageBroker.sendApplicationMessageToNC(takeoverRequest, replica);
} catch (Exception e) {
/*
* if we fail to send the request, it means the NC we tried to send the request to
* has failed. When the failure notification arrives, we will send any pending request
* that belongs to the failed NC to a different active replica.
*/
LOGGER.log(Level.WARNING, "Failed to send takeover request: " + takeoverRequest, e);
}
}
}
}
use of org.apache.asterix.common.dataflow.ICcApplicationContext in project asterixdb by apache.
the class MetadataNodeFaultToleranceStrategy method getMetadataPartitionRecoveryPlan.
private RemoteRecoveryTask getMetadataPartitionRecoveryPlan() {
if (hotStandbyMetadataReplica.isEmpty()) {
throw new IllegalStateException("No metadata replicas to recover from");
}
// Construct recovery plan: Node => Set of partitions to recover from it
Map<String, Set<Integer>> recoveryPlan = new HashMap<>();
// Recover metadata partition from any metadata hot standby replica
ICcApplicationContext appCtx = (ICcApplicationContext) serviceCtx.getApplicationContext();
int metadataPartitionId = appCtx.getMetadataProperties().getMetadataPartition().getPartitionId();
Set<Integer> metadataPartition = new HashSet<>(Arrays.asList(metadataPartitionId));
recoveryPlan.put(hotStandbyMetadataReplica.iterator().next(), metadataPartition);
return new RemoteRecoveryTask(recoveryPlan);
}
use of org.apache.asterix.common.dataflow.ICcApplicationContext in project asterixdb by apache.
the class MetadataNodeFaultToleranceStrategy method notifyNodeFailure.
@Override
public synchronized void notifyNodeFailure(String nodeId) throws HyracksDataException {
failedNodes.add(nodeId);
hotStandbyMetadataReplica.remove(nodeId);
clusterManager.updateNodePartitions(nodeId, false);
if (nodeId.equals(metadataNodeId)) {
clusterManager.updateMetadataNode(metadataNodeId, false);
}
clusterManager.refreshState();
if (replicationStrategy.isParticipant(nodeId)) {
// Notify impacted replica
FaultToleranceUtil.notifyImpactedReplicas(nodeId, ClusterEventType.NODE_FAILURE, clusterManager, messageBroker, replicationStrategy);
}
// If the failed node is the metadata node, ask its replicas to replay any committed jobs
if (nodeId.equals(metadataNodeId)) {
ICcApplicationContext appCtx = (ICcApplicationContext) serviceCtx.getApplicationContext();
int metadataPartitionId = appCtx.getMetadataProperties().getMetadataPartition().getPartitionId();
Set<Integer> metadataPartition = new HashSet<>(Arrays.asList(metadataPartitionId));
Set<Replica> activeRemoteReplicas = replicationStrategy.getRemoteReplicas(metadataNodeId).stream().filter(replica -> !failedNodes.contains(replica.getId())).collect(Collectors.toSet());
//TODO Do election to identity the node with latest state
for (Replica replica : activeRemoteReplicas) {
ReplayPartitionLogsRequestMessage msg = new ReplayPartitionLogsRequestMessage(metadataPartition);
try {
messageBroker.sendApplicationMessageToNC(msg, replica.getId());
} catch (Exception e) {
LOGGER.log(Level.WARNING, "Failed sending an application message to an NC", e);
continue;
}
}
}
}
Aggregations