use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.
the class JobManager method prepareComplete.
@Override
public void prepareComplete(JobRun run, JobStatus status, List<Exception> exceptions) throws HyracksException {
checkJob(run);
if (status == JobStatus.FAILURE_BEFORE_EXECUTION) {
run.setPendingStatus(JobStatus.FAILURE, exceptions);
finalComplete(run);
return;
}
JobId jobId = run.getJobId();
HyracksException caughtException = null;
if (run.getPendingStatus() != null && run.getCleanupPendingNodeIds().isEmpty()) {
finalComplete(run);
return;
}
if (run.getPendingStatus() != null) {
LOGGER.warning("Ignoring duplicate cleanup for JobRun with id: " + jobId);
return;
}
Set<String> targetNodes = run.getParticipatingNodeIds();
run.getCleanupPendingNodeIds().addAll(targetNodes);
if (run.getPendingStatus() != JobStatus.FAILURE && run.getPendingStatus() != JobStatus.TERMINATED) {
run.setPendingStatus(status, exceptions);
}
if (targetNodes != null && !targetNodes.isEmpty()) {
INodeManager nodeManager = ccs.getNodeManager();
Set<String> toDelete = new HashSet<>();
for (String n : targetNodes) {
NodeControllerState ncs = nodeManager.getNodeControllerState(n);
try {
if (ncs == null) {
toDelete.add(n);
} else {
ncs.getNodeController().cleanUpJoblet(jobId, status);
}
} catch (Exception e) {
LOGGER.log(Level.SEVERE, e.getMessage(), e);
if (caughtException == null) {
caughtException = new HyracksException(e);
} else {
caughtException.addSuppressed(e);
}
}
}
targetNodes.removeAll(toDelete);
run.getCleanupPendingNodeIds().removeAll(toDelete);
if (run.getCleanupPendingNodeIds().isEmpty()) {
finalComplete(run);
}
} else {
finalComplete(run);
}
// throws caught exceptions if any
if (caughtException != null) {
throw caughtException;
}
}
use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.
the class DistributeJobWork method doRun.
@Override
protected void doRun() throws Exception {
try {
final CCServiceContext ccServiceCtx = ccs.getContext();
ccs.getPreDistributedJobStore().checkForExistingDistributedJobDescriptor(jobId);
IActivityClusterGraphGeneratorFactory acggf = (IActivityClusterGraphGeneratorFactory) DeploymentUtils.deserialize(acggfBytes, null, ccServiceCtx);
IActivityClusterGraphGenerator acgg = acggf.createActivityClusterGraphGenerator(jobId, ccServiceCtx, EnumSet.noneOf(JobFlag.class));
ActivityClusterGraph acg = acgg.initialize();
ccs.getPreDistributedJobStore().addDistributedJobDescriptor(jobId, acg, acggf.getJobSpecification(), acgg.getConstraints());
ccServiceCtx.notifyJobCreation(jobId, acggf.getJobSpecification());
byte[] acgBytes = JavaSerializationUtils.serialize(acg);
INodeManager nodeManager = ccs.getNodeManager();
for (NodeControllerState node : nodeManager.getAllNodeControllerStates()) {
node.getNodeController().distributeJob(jobId, acgBytes);
}
callback.setValue(jobId);
} catch (Exception e) {
callback.setException(e);
}
}
use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.
the class GatherStateDumpsWork method doRun.
@Override
public void doRun() throws Exception {
ccs.addStateDumpRun(sdr.stateDumpId, sdr);
INodeManager nodeManager = ccs.getNodeManager();
Collection<String> nodeIds = new HashSet<>();
nodeIds.addAll(nodeManager.getAllNodeIds());
sdr.setNCs(nodeIds);
for (NodeControllerState ncs : nodeManager.getAllNodeControllerStates()) {
ncs.getNodeController().dumpState(sdr.stateDumpId);
}
}
use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.
the class GetNodeSummariesJSONWork method doRun.
@Override
protected void doRun() throws Exception {
ObjectMapper om = new ObjectMapper();
summaries = om.createArrayNode();
for (NodeControllerState ncs : nodeManager.getAllNodeControllerStates()) {
summaries.add(ncs.toSummaryJSON());
}
}
use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.
the class GetThreadDumpWork method run.
@Override
public void run() {
if (nodeId == null) {
// null nodeId means the request is for the cluster controller
try {
callback.setValue(ThreadDumpHelper.takeDumpJSON(ManagementFactory.getThreadMXBean()));
} catch (Exception e) {
LOGGER.log(Level.WARNING, "Exception taking CC thread dump", e);
callback.setException(e);
}
} else {
INodeManager nodeManager = ccs.getNodeManager();
final NodeControllerState ncState = nodeManager.getNodeControllerState(nodeId);
if (ncState == null) {
// bad node id, reply with null immediately
callback.setValue(null);
} else {
ccs.addThreadDumpRun(run.getRequestId(), run);
try {
ncState.getNodeController().takeThreadDump(run.getRequestId());
} catch (Exception e) {
ccs.removeThreadDumpRun(run.getRequestId());
callback.setException(e);
}
final long requestTime = System.currentTimeMillis();
ccs.getExecutor().execute(() -> {
try {
final long queueTime = System.currentTimeMillis() - requestTime;
final long sleepTime = TimeUnit.SECONDS.toMillis(TIMEOUT_SECS) - queueTime;
if (sleepTime > 0) {
Thread.sleep(sleepTime);
}
if (ccs.removeThreadDumpRun(run.getRequestId()) != null) {
LOGGER.log(Level.WARNING, "Timed out thread dump request " + run.getRequestId() + " for node " + nodeId);
callback.setException(new TimeoutException("Thread dump request for node " + nodeId + " timed out after " + TIMEOUT_SECS + " seconds."));
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
});
}
}
}
Aggregations