Search in sources :

Example 16 with NodeControllerState

use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.

the class JobManager method prepareComplete.

@Override
public void prepareComplete(JobRun run, JobStatus status, List<Exception> exceptions) throws HyracksException {
    checkJob(run);
    if (status == JobStatus.FAILURE_BEFORE_EXECUTION) {
        run.setPendingStatus(JobStatus.FAILURE, exceptions);
        finalComplete(run);
        return;
    }
    JobId jobId = run.getJobId();
    HyracksException caughtException = null;
    if (run.getPendingStatus() != null && run.getCleanupPendingNodeIds().isEmpty()) {
        finalComplete(run);
        return;
    }
    if (run.getPendingStatus() != null) {
        LOGGER.warning("Ignoring duplicate cleanup for JobRun with id: " + jobId);
        return;
    }
    Set<String> targetNodes = run.getParticipatingNodeIds();
    run.getCleanupPendingNodeIds().addAll(targetNodes);
    if (run.getPendingStatus() != JobStatus.FAILURE && run.getPendingStatus() != JobStatus.TERMINATED) {
        run.setPendingStatus(status, exceptions);
    }
    if (targetNodes != null && !targetNodes.isEmpty()) {
        INodeManager nodeManager = ccs.getNodeManager();
        Set<String> toDelete = new HashSet<>();
        for (String n : targetNodes) {
            NodeControllerState ncs = nodeManager.getNodeControllerState(n);
            try {
                if (ncs == null) {
                    toDelete.add(n);
                } else {
                    ncs.getNodeController().cleanUpJoblet(jobId, status);
                }
            } catch (Exception e) {
                LOGGER.log(Level.SEVERE, e.getMessage(), e);
                if (caughtException == null) {
                    caughtException = new HyracksException(e);
                } else {
                    caughtException.addSuppressed(e);
                }
            }
        }
        targetNodes.removeAll(toDelete);
        run.getCleanupPendingNodeIds().removeAll(toDelete);
        if (run.getCleanupPendingNodeIds().isEmpty()) {
            finalComplete(run);
        }
    } else {
        finalComplete(run);
    }
    // throws caught exceptions if any
    if (caughtException != null) {
        throw caughtException;
    }
}
Also used : INodeManager(org.apache.hyracks.control.cc.cluster.INodeManager) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) NodeControllerState(org.apache.hyracks.control.cc.NodeControllerState) JobId(org.apache.hyracks.api.job.JobId) InvocationTargetException(java.lang.reflect.InvocationTargetException) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) HashSet(java.util.HashSet)

Example 17 with NodeControllerState

use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.

the class DistributeJobWork method doRun.

@Override
protected void doRun() throws Exception {
    try {
        final CCServiceContext ccServiceCtx = ccs.getContext();
        ccs.getPreDistributedJobStore().checkForExistingDistributedJobDescriptor(jobId);
        IActivityClusterGraphGeneratorFactory acggf = (IActivityClusterGraphGeneratorFactory) DeploymentUtils.deserialize(acggfBytes, null, ccServiceCtx);
        IActivityClusterGraphGenerator acgg = acggf.createActivityClusterGraphGenerator(jobId, ccServiceCtx, EnumSet.noneOf(JobFlag.class));
        ActivityClusterGraph acg = acgg.initialize();
        ccs.getPreDistributedJobStore().addDistributedJobDescriptor(jobId, acg, acggf.getJobSpecification(), acgg.getConstraints());
        ccServiceCtx.notifyJobCreation(jobId, acggf.getJobSpecification());
        byte[] acgBytes = JavaSerializationUtils.serialize(acg);
        INodeManager nodeManager = ccs.getNodeManager();
        for (NodeControllerState node : nodeManager.getAllNodeControllerStates()) {
            node.getNodeController().distributeJob(jobId, acgBytes);
        }
        callback.setValue(jobId);
    } catch (Exception e) {
        callback.setException(e);
    }
}
Also used : INodeManager(org.apache.hyracks.control.cc.cluster.INodeManager) JobFlag(org.apache.hyracks.api.job.JobFlag) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) IActivityClusterGraphGeneratorFactory(org.apache.hyracks.api.job.IActivityClusterGraphGeneratorFactory) IActivityClusterGraphGenerator(org.apache.hyracks.api.job.IActivityClusterGraphGenerator) NodeControllerState(org.apache.hyracks.control.cc.NodeControllerState) CCServiceContext(org.apache.hyracks.control.cc.application.CCServiceContext)

Example 18 with NodeControllerState

use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.

the class GatherStateDumpsWork method doRun.

@Override
public void doRun() throws Exception {
    ccs.addStateDumpRun(sdr.stateDumpId, sdr);
    INodeManager nodeManager = ccs.getNodeManager();
    Collection<String> nodeIds = new HashSet<>();
    nodeIds.addAll(nodeManager.getAllNodeIds());
    sdr.setNCs(nodeIds);
    for (NodeControllerState ncs : nodeManager.getAllNodeControllerStates()) {
        ncs.getNodeController().dumpState(sdr.stateDumpId);
    }
}
Also used : INodeManager(org.apache.hyracks.control.cc.cluster.INodeManager) NodeControllerState(org.apache.hyracks.control.cc.NodeControllerState) HashSet(java.util.HashSet)

Example 19 with NodeControllerState

use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.

the class GetNodeSummariesJSONWork method doRun.

@Override
protected void doRun() throws Exception {
    ObjectMapper om = new ObjectMapper();
    summaries = om.createArrayNode();
    for (NodeControllerState ncs : nodeManager.getAllNodeControllerStates()) {
        summaries.add(ncs.toSummaryJSON());
    }
}
Also used : NodeControllerState(org.apache.hyracks.control.cc.NodeControllerState) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 20 with NodeControllerState

use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.

the class GetThreadDumpWork method run.

@Override
public void run() {
    if (nodeId == null) {
        // null nodeId means the request is for the cluster controller
        try {
            callback.setValue(ThreadDumpHelper.takeDumpJSON(ManagementFactory.getThreadMXBean()));
        } catch (Exception e) {
            LOGGER.log(Level.WARNING, "Exception taking CC thread dump", e);
            callback.setException(e);
        }
    } else {
        INodeManager nodeManager = ccs.getNodeManager();
        final NodeControllerState ncState = nodeManager.getNodeControllerState(nodeId);
        if (ncState == null) {
            // bad node id, reply with null immediately
            callback.setValue(null);
        } else {
            ccs.addThreadDumpRun(run.getRequestId(), run);
            try {
                ncState.getNodeController().takeThreadDump(run.getRequestId());
            } catch (Exception e) {
                ccs.removeThreadDumpRun(run.getRequestId());
                callback.setException(e);
            }
            final long requestTime = System.currentTimeMillis();
            ccs.getExecutor().execute(() -> {
                try {
                    final long queueTime = System.currentTimeMillis() - requestTime;
                    final long sleepTime = TimeUnit.SECONDS.toMillis(TIMEOUT_SECS) - queueTime;
                    if (sleepTime > 0) {
                        Thread.sleep(sleepTime);
                    }
                    if (ccs.removeThreadDumpRun(run.getRequestId()) != null) {
                        LOGGER.log(Level.WARNING, "Timed out thread dump request " + run.getRequestId() + " for node " + nodeId);
                        callback.setException(new TimeoutException("Thread dump request for node " + nodeId + " timed out after " + TIMEOUT_SECS + " seconds."));
                    }
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                }
            });
        }
    }
}
Also used : INodeManager(org.apache.hyracks.control.cc.cluster.INodeManager) NodeControllerState(org.apache.hyracks.control.cc.NodeControllerState) TimeoutException(java.util.concurrent.TimeoutException) TimeoutException(java.util.concurrent.TimeoutException)

Aggregations

NodeControllerState (org.apache.hyracks.control.cc.NodeControllerState)23 INodeManager (org.apache.hyracks.control.cc.cluster.INodeManager)15 HashMap (java.util.HashMap)5 HyracksException (org.apache.hyracks.api.exceptions.HyracksException)5 HashSet (java.util.HashSet)4 Map (java.util.Map)4 JobId (org.apache.hyracks.api.job.JobId)4 DeploymentId (org.apache.hyracks.api.deployment.DeploymentId)3 NodeCapacity (org.apache.hyracks.api.job.resource.NodeCapacity)3 INodeController (org.apache.hyracks.control.common.base.INodeController)3 ArrayList (java.util.ArrayList)2 LinkedHashMap (java.util.LinkedHashMap)2 List (java.util.List)2 NetworkAddress (org.apache.hyracks.api.comm.NetworkAddress)2 ActivityClusterGraph (org.apache.hyracks.api.job.ActivityClusterGraph)2 IResourceManager (org.apache.hyracks.control.cc.scheduler.IResourceManager)2 ResourceManager (org.apache.hyracks.control.cc.scheduler.ResourceManager)2 NCConfig (org.apache.hyracks.control.common.controllers.NCConfig)2 DeploymentRun (org.apache.hyracks.control.common.deployment.DeploymentRun)2 Test (org.junit.Test)2