use of org.apache.asterix.common.cluster.ClusterPartition in project asterixdb by apache.
the class AutoFaultToleranceStrategy method requestPartitionsTakeover.
private synchronized void requestPartitionsTakeover(String failedNodeId) {
//replica -> list of partitions to takeover
Map<String, List<Integer>> partitionRecoveryPlan = new HashMap<>();
ICcApplicationContext appCtx = (ICcApplicationContext) serviceCtx.getApplicationContext();
ReplicationProperties replicationProperties = appCtx.getReplicationProperties();
//collect the partitions of the failed NC
List<ClusterPartition> lostPartitions = getNodeAssignedPartitions(failedNodeId);
if (!lostPartitions.isEmpty()) {
for (ClusterPartition partition : lostPartitions) {
//find replicas for this partitions
Set<String> partitionReplicas = replicationProperties.getNodeReplicasIds(partition.getNodeId());
//find a replica that is still active
for (String replica : partitionReplicas) {
//It needs to be modified to consider load balancing.
if (addActiveReplica(replica, partition, partitionRecoveryPlan)) {
break;
}
}
}
if (partitionRecoveryPlan.size() == 0) {
//no active replicas were found for the failed node
LOGGER.severe("Could not find active replicas for the partitions " + lostPartitions);
return;
} else {
LOGGER.info("Partitions to recover: " + lostPartitions);
}
//For each replica, send a request to takeover the assigned partitions
for (Entry<String, List<Integer>> entry : partitionRecoveryPlan.entrySet()) {
String replica = entry.getKey();
Integer[] partitionsToTakeover = entry.getValue().toArray(new Integer[entry.getValue().size()]);
long requestId = clusterRequestId++;
TakeoverPartitionsRequestMessage takeoverRequest = new TakeoverPartitionsRequestMessage(requestId, replica, partitionsToTakeover);
pendingTakeoverRequests.put(requestId, takeoverRequest);
try {
messageBroker.sendApplicationMessageToNC(takeoverRequest, replica);
} catch (Exception e) {
/*
* if we fail to send the request, it means the NC we tried to send the request to
* has failed. When the failure notification arrives, we will send any pending request
* that belongs to the failed NC to a different active replica.
*/
LOGGER.log(Level.WARNING, "Failed to send takeover request: " + takeoverRequest, e);
}
}
}
}
use of org.apache.asterix.common.cluster.ClusterPartition in project asterixdb by apache.
the class SplitsAndConstraintsUtil method getDataverseSplits.
private static FileSplit[] getDataverseSplits(String dataverseName) {
File relPathFile = new File(dataverseName);
List<FileSplit> splits = new ArrayList<>();
// get all partitions
ClusterPartition[] clusterPartition = ClusterStateManager.INSTANCE.getClusterPartitons();
String storageDirName = ClusterProperties.INSTANCE.getStorageDirectoryName();
for (int j = 0; j < clusterPartition.length; j++) {
File f = new File(StoragePathUtil.prepareStoragePartitionPath(storageDirName, clusterPartition[j].getPartitionId()) + File.separator + relPathFile);
splits.add(StoragePathUtil.getFileSplitForClusterPartition(clusterPartition[j], f.getPath()));
}
return splits.toArray(new FileSplit[] {});
}
use of org.apache.asterix.common.cluster.ClusterPartition in project asterixdb by apache.
the class SplitsAndConstraintsUtil method getIndexSplits.
public static FileSplit[] getIndexSplits(Dataset dataset, String indexName, List<String> nodes) {
File relPathFile = new File(StoragePathUtil.prepareDataverseIndexName(dataset.getDataverseName(), dataset.getDatasetName(), indexName, dataset.getRebalanceCount()));
String storageDirName = ClusterProperties.INSTANCE.getStorageDirectoryName();
List<FileSplit> splits = new ArrayList<>();
for (String nd : nodes) {
int numPartitions = ClusterStateManager.INSTANCE.getNodePartitionsCount(nd);
ClusterPartition[] nodePartitions = ClusterStateManager.INSTANCE.getNodePartitions(nd);
// currently this case is never executed since the metadata group doesn't exists
if (dataset.getNodeGroupName().compareTo(MetadataConstants.METADATA_NODEGROUP_NAME) == 0) {
numPartitions = 1;
}
for (int k = 0; k < numPartitions; k++) {
// format: 'storage dir name'/partition_#/dataverse/dataset_idx_index
File f = new File(StoragePathUtil.prepareStoragePartitionPath(storageDirName, nodePartitions[k].getPartitionId()) + (dataset.isTemp() ? (File.separator + StoragePathUtil.TEMP_DATASETS_STORAGE_FOLDER) : "") + File.separator + relPathFile);
splits.add(StoragePathUtil.getFileSplitForClusterPartition(nodePartitions[k], f.getPath()));
}
}
return splits.toArray(new FileSplit[] {});
}
use of org.apache.asterix.common.cluster.ClusterPartition in project asterixdb by apache.
the class ClusterStateManager method refreshState.
@Override
public synchronized void refreshState() throws HyracksDataException {
resetClusterPartitionConstraint();
for (ClusterPartition p : clusterPartitions.values()) {
if (!p.isActive()) {
setState(ClusterState.UNUSABLE);
return;
}
}
setState(ClusterState.PENDING);
LOGGER.info("Cluster is now " + state);
// if all storage partitions are active as well as the metadata node, then the cluster is active
if (metadataNodeActive) {
appCtx.getMetadataBootstrap().init();
setState(ClusterState.ACTIVE);
LOGGER.info("Cluster is now " + state);
notifyAll();
// start global recovery
appCtx.getGlobalRecoveryManager().startGlobalRecovery(appCtx);
}
}
use of org.apache.asterix.common.cluster.ClusterPartition in project asterixdb by apache.
the class ClusterStateManager method getClusterStateDescription.
public synchronized ObjectNode getClusterStateDescription() {
ObjectMapper om = new ObjectMapper();
ObjectNode stateDescription = om.createObjectNode();
stateDescription.put("state", state.name());
stateDescription.put("metadata_node", currentMetadataNode);
ArrayNode ncs = om.createArrayNode();
stateDescription.set("ncs", ncs);
for (String node : new TreeSet<>(((ClusterControllerService) appCtx.getServiceContext().getControllerService()).getNodeManager().getAllNodeIds())) {
ObjectNode nodeJSON = om.createObjectNode();
nodeJSON.put("node_id", node);
boolean allActive = true;
boolean anyActive = false;
Set<Map<String, Object>> partitions = new HashSet<>();
if (node2PartitionsMap.containsKey(node)) {
for (ClusterPartition part : node2PartitionsMap.get(node)) {
HashMap<String, Object> partition = new HashMap<>();
partition.put("partition_id", "partition_" + part.getPartitionId());
partition.put("active", part.isActive());
partitions.add(partition);
allActive = allActive && part.isActive();
if (allActive) {
anyActive = true;
}
}
}
nodeJSON.put("state", failedNodes.contains(node) ? "FAILED" : allActive && anyActive ? "ACTIVE" : anyActive ? "PARTIALLY_ACTIVE" : "INACTIVE");
nodeJSON.putPOJO("partitions", partitions);
ncs.add(nodeJSON);
}
return stateDescription;
}
Aggregations