Search in sources :

Example 16 with ClusterPartition

use of org.apache.asterix.common.cluster.ClusterPartition in project asterixdb by apache.

the class AutoFaultToleranceStrategy method requestPartitionsTakeover.

private synchronized void requestPartitionsTakeover(String failedNodeId) {
    //replica -> list of partitions to takeover
    Map<String, List<Integer>> partitionRecoveryPlan = new HashMap<>();
    ICcApplicationContext appCtx = (ICcApplicationContext) serviceCtx.getApplicationContext();
    ReplicationProperties replicationProperties = appCtx.getReplicationProperties();
    //collect the partitions of the failed NC
    List<ClusterPartition> lostPartitions = getNodeAssignedPartitions(failedNodeId);
    if (!lostPartitions.isEmpty()) {
        for (ClusterPartition partition : lostPartitions) {
            //find replicas for this partitions
            Set<String> partitionReplicas = replicationProperties.getNodeReplicasIds(partition.getNodeId());
            //find a replica that is still active
            for (String replica : partitionReplicas) {
                //It needs to be modified to consider load balancing.
                if (addActiveReplica(replica, partition, partitionRecoveryPlan)) {
                    break;
                }
            }
        }
        if (partitionRecoveryPlan.size() == 0) {
            //no active replicas were found for the failed node
            LOGGER.severe("Could not find active replicas for the partitions " + lostPartitions);
            return;
        } else {
            LOGGER.info("Partitions to recover: " + lostPartitions);
        }
        //For each replica, send a request to takeover the assigned partitions
        for (Entry<String, List<Integer>> entry : partitionRecoveryPlan.entrySet()) {
            String replica = entry.getKey();
            Integer[] partitionsToTakeover = entry.getValue().toArray(new Integer[entry.getValue().size()]);
            long requestId = clusterRequestId++;
            TakeoverPartitionsRequestMessage takeoverRequest = new TakeoverPartitionsRequestMessage(requestId, replica, partitionsToTakeover);
            pendingTakeoverRequests.put(requestId, takeoverRequest);
            try {
                messageBroker.sendApplicationMessageToNC(takeoverRequest, replica);
            } catch (Exception e) {
                /*
                     * if we fail to send the request, it means the NC we tried to send the request to
                     * has failed. When the failure notification arrives, we will send any pending request
                     * that belongs to the failed NC to a different active replica.
                     */
                LOGGER.log(Level.WARNING, "Failed to send takeover request: " + takeoverRequest, e);
            }
        }
    }
}
Also used : TakeoverPartitionsRequestMessage(org.apache.asterix.app.replication.message.TakeoverPartitionsRequestMessage) ICcApplicationContext(org.apache.asterix.common.dataflow.ICcApplicationContext) HashMap(java.util.HashMap) RuntimeDataException(org.apache.asterix.common.exceptions.RuntimeDataException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ReplicationProperties(org.apache.asterix.common.config.ReplicationProperties) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Example 17 with ClusterPartition

use of org.apache.asterix.common.cluster.ClusterPartition in project asterixdb by apache.

the class SplitsAndConstraintsUtil method getDataverseSplits.

private static FileSplit[] getDataverseSplits(String dataverseName) {
    File relPathFile = new File(dataverseName);
    List<FileSplit> splits = new ArrayList<>();
    // get all partitions
    ClusterPartition[] clusterPartition = ClusterStateManager.INSTANCE.getClusterPartitons();
    String storageDirName = ClusterProperties.INSTANCE.getStorageDirectoryName();
    for (int j = 0; j < clusterPartition.length; j++) {
        File f = new File(StoragePathUtil.prepareStoragePartitionPath(storageDirName, clusterPartition[j].getPartitionId()) + File.separator + relPathFile);
        splits.add(StoragePathUtil.getFileSplitForClusterPartition(clusterPartition[j], f.getPath()));
    }
    return splits.toArray(new FileSplit[] {});
}
Also used : ArrayList(java.util.ArrayList) FileSplit(org.apache.hyracks.api.io.FileSplit) File(java.io.File) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Example 18 with ClusterPartition

use of org.apache.asterix.common.cluster.ClusterPartition in project asterixdb by apache.

the class SplitsAndConstraintsUtil method getIndexSplits.

public static FileSplit[] getIndexSplits(Dataset dataset, String indexName, List<String> nodes) {
    File relPathFile = new File(StoragePathUtil.prepareDataverseIndexName(dataset.getDataverseName(), dataset.getDatasetName(), indexName, dataset.getRebalanceCount()));
    String storageDirName = ClusterProperties.INSTANCE.getStorageDirectoryName();
    List<FileSplit> splits = new ArrayList<>();
    for (String nd : nodes) {
        int numPartitions = ClusterStateManager.INSTANCE.getNodePartitionsCount(nd);
        ClusterPartition[] nodePartitions = ClusterStateManager.INSTANCE.getNodePartitions(nd);
        // currently this case is never executed since the metadata group doesn't exists
        if (dataset.getNodeGroupName().compareTo(MetadataConstants.METADATA_NODEGROUP_NAME) == 0) {
            numPartitions = 1;
        }
        for (int k = 0; k < numPartitions; k++) {
            // format: 'storage dir name'/partition_#/dataverse/dataset_idx_index
            File f = new File(StoragePathUtil.prepareStoragePartitionPath(storageDirName, nodePartitions[k].getPartitionId()) + (dataset.isTemp() ? (File.separator + StoragePathUtil.TEMP_DATASETS_STORAGE_FOLDER) : "") + File.separator + relPathFile);
            splits.add(StoragePathUtil.getFileSplitForClusterPartition(nodePartitions[k], f.getPath()));
        }
    }
    return splits.toArray(new FileSplit[] {});
}
Also used : ArrayList(java.util.ArrayList) FileSplit(org.apache.hyracks.api.io.FileSplit) File(java.io.File) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Example 19 with ClusterPartition

use of org.apache.asterix.common.cluster.ClusterPartition in project asterixdb by apache.

the class ClusterStateManager method refreshState.

@Override
public synchronized void refreshState() throws HyracksDataException {
    resetClusterPartitionConstraint();
    for (ClusterPartition p : clusterPartitions.values()) {
        if (!p.isActive()) {
            setState(ClusterState.UNUSABLE);
            return;
        }
    }
    setState(ClusterState.PENDING);
    LOGGER.info("Cluster is now " + state);
    // if all storage partitions are active as well as the metadata node, then the cluster is active
    if (metadataNodeActive) {
        appCtx.getMetadataBootstrap().init();
        setState(ClusterState.ACTIVE);
        LOGGER.info("Cluster is now " + state);
        notifyAll();
        // start global recovery
        appCtx.getGlobalRecoveryManager().startGlobalRecovery(appCtx);
    }
}
Also used : ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Example 20 with ClusterPartition

use of org.apache.asterix.common.cluster.ClusterPartition in project asterixdb by apache.

the class ClusterStateManager method getClusterStateDescription.

public synchronized ObjectNode getClusterStateDescription() {
    ObjectMapper om = new ObjectMapper();
    ObjectNode stateDescription = om.createObjectNode();
    stateDescription.put("state", state.name());
    stateDescription.put("metadata_node", currentMetadataNode);
    ArrayNode ncs = om.createArrayNode();
    stateDescription.set("ncs", ncs);
    for (String node : new TreeSet<>(((ClusterControllerService) appCtx.getServiceContext().getControllerService()).getNodeManager().getAllNodeIds())) {
        ObjectNode nodeJSON = om.createObjectNode();
        nodeJSON.put("node_id", node);
        boolean allActive = true;
        boolean anyActive = false;
        Set<Map<String, Object>> partitions = new HashSet<>();
        if (node2PartitionsMap.containsKey(node)) {
            for (ClusterPartition part : node2PartitionsMap.get(node)) {
                HashMap<String, Object> partition = new HashMap<>();
                partition.put("partition_id", "partition_" + part.getPartitionId());
                partition.put("active", part.isActive());
                partitions.add(partition);
                allActive = allActive && part.isActive();
                if (allActive) {
                    anyActive = true;
                }
            }
        }
        nodeJSON.put("state", failedNodes.contains(node) ? "FAILED" : allActive && anyActive ? "ACTIVE" : anyActive ? "PARTIALLY_ACTIVE" : "INACTIVE");
        nodeJSON.putPOJO("partitions", partitions);
        ncs.add(nodeJSON);
    }
    return stateDescription;
}
Also used : ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) ClusterControllerService(org.apache.hyracks.control.cc.ClusterControllerService) HashMap(java.util.HashMap) Map(java.util.Map) SortedMap(java.util.SortedMap) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) HashSet(java.util.HashSet) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Aggregations

ClusterPartition (org.apache.asterix.common.cluster.ClusterPartition)20 ArrayList (java.util.ArrayList)8 HashSet (java.util.HashSet)5 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)4 File (java.io.File)3 IOException (java.io.IOException)3 HashMap (java.util.HashMap)3 CheckpointTask (org.apache.asterix.app.nc.task.CheckpointTask)3 ExternalLibrarySetupTask (org.apache.asterix.app.nc.task.ExternalLibrarySetupTask)3 LocalRecoveryTask (org.apache.asterix.app.nc.task.LocalRecoveryTask)3 ReportMaxResourceIdTask (org.apache.asterix.app.nc.task.ReportMaxResourceIdTask)3 StartLifecycleComponentsTask (org.apache.asterix.app.nc.task.StartLifecycleComponentsTask)3 INCLifecycleTask (org.apache.asterix.common.api.INCLifecycleTask)3 ICcApplicationContext (org.apache.asterix.common.dataflow.ICcApplicationContext)3 Set (java.util.Set)2 BindMetadataNodeTask (org.apache.asterix.app.nc.task.BindMetadataNodeTask)2 MetadataBootstrapTask (org.apache.asterix.app.nc.task.MetadataBootstrapTask)2 TakeoverPartitionsRequestMessage (org.apache.asterix.app.replication.message.TakeoverPartitionsRequestMessage)2 IApplicationContext (org.apache.asterix.common.api.IApplicationContext)2 IDatasetLifecycleManager (org.apache.asterix.common.api.IDatasetLifecycleManager)2