use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.
the class CliDeployBinaryWork method doRun.
@Override
public void doRun() {
try {
if (deploymentId == null) {
deploymentId = new DeploymentId(UUID.randomUUID().toString());
}
/**
* Deploy for the cluster controller
*/
DeploymentUtils.deploy(deploymentId, binaryURLs, ccs.getContext().getJobSerializerDeserializerContainer(), ccs.getServerContext(), false);
/**
* Deploy for the node controllers
*/
INodeManager nodeManager = ccs.getNodeManager();
Collection<String> nodeIds = nodeManager.getAllNodeIds();
final DeploymentRun dRun = new DeploymentRun(nodeIds);
/** The following call prevents a user to deploy with the same deployment id simultaneously. */
ccs.addDeploymentRun(deploymentId, dRun);
/***
* deploy binaries to each node controller
*/
for (NodeControllerState ncs : nodeManager.getAllNodeControllerStates()) {
ncs.getNodeController().deployBinary(deploymentId, binaryURLs);
}
ccs.getExecutor().execute(new Runnable() {
@Override
public void run() {
try {
/**
* wait for completion
*/
dRun.waitForCompletion();
ccs.removeDeploymentRun(deploymentId);
callback.setValue(deploymentId);
} catch (Exception e) {
callback.setException(e);
}
}
});
} catch (Exception e) {
callback.setException(e);
}
}
use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.
the class CliUnDeployBinaryWork method doRun.
@Override
public void doRun() {
try {
if (deploymentId == null) {
deploymentId = new DeploymentId(UUID.randomUUID().toString());
}
/**
* Deploy for the cluster controller
*/
DeploymentUtils.undeploy(deploymentId, ccs.getContext().getJobSerializerDeserializerContainer(), ccs.getServerContext());
/**
* Deploy for the node controllers
*/
INodeManager nodeManager = ccs.getNodeManager();
Collection<String> nodeIds = nodeManager.getAllNodeIds();
final DeploymentRun dRun = new DeploymentRun(nodeIds);
/** The following call prevents a user to undeploy with the same deployment id simultaneously. */
ccs.addDeploymentRun(deploymentId, dRun);
/***
* deploy binaries to each node controller
*/
for (NodeControllerState ncs : nodeManager.getAllNodeControllerStates()) {
ncs.getNodeController().undeployBinary(deploymentId);
}
ccs.getExecutor().execute(new Runnable() {
@Override
public void run() {
try {
/**
* wait for completion
*/
dRun.waitForCompletion();
ccs.removeDeploymentRun(deploymentId);
callback.setValue(null);
} catch (Exception e) {
callback.setException(e);
}
}
});
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.
the class DestroyJobWork method doRun.
@Override
protected void doRun() throws Exception {
try {
ccs.getPreDistributedJobStore().removeDistributedJobDescriptor(jobId);
INodeManager nodeManager = ccs.getNodeManager();
for (NodeControllerState node : nodeManager.getAllNodeControllerStates()) {
node.getNodeController().destroyJob(jobId);
}
callback.setValue(jobId);
} catch (Exception e) {
callback.setException(e);
}
}
use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.
the class JobExecutor method abortTaskCluster.
private void abortTaskCluster(TaskClusterAttempt tcAttempt, TaskClusterAttempt.TaskClusterStatus failedOrAbortedStatus) {
LOGGER.fine("Aborting task cluster: " + tcAttempt.getAttempt());
Set<TaskAttemptId> abortTaskIds = new HashSet<>();
Map<String, List<TaskAttemptId>> abortTaskAttemptMap = new HashMap<>();
for (TaskAttempt ta : tcAttempt.getTaskAttempts().values()) {
TaskAttemptId taId = ta.getTaskAttemptId();
TaskAttempt.TaskStatus status = ta.getStatus();
abortTaskIds.add(taId);
LOGGER.fine("Checking " + taId + ": " + ta.getStatus());
if (status == TaskAttempt.TaskStatus.RUNNING || status == TaskAttempt.TaskStatus.COMPLETED) {
ta.setStatus(TaskAttempt.TaskStatus.ABORTED, null);
ta.setEndTime(System.currentTimeMillis());
List<TaskAttemptId> abortTaskAttempts = abortTaskAttemptMap.get(ta.getNodeId());
if (status == TaskAttempt.TaskStatus.RUNNING && abortTaskAttempts == null) {
abortTaskAttempts = new ArrayList<>();
abortTaskAttemptMap.put(ta.getNodeId(), abortTaskAttempts);
}
if (status == TaskAttempt.TaskStatus.RUNNING) {
abortTaskAttempts.add(taId);
}
}
}
final JobId jobId = jobRun.getJobId();
LOGGER.fine("Abort map for job: " + jobId + ": " + abortTaskAttemptMap);
INodeManager nodeManager = ccs.getNodeManager();
for (Map.Entry<String, List<TaskAttemptId>> entry : abortTaskAttemptMap.entrySet()) {
final NodeControllerState node = nodeManager.getNodeControllerState(entry.getKey());
final List<TaskAttemptId> abortTaskAttempts = entry.getValue();
if (node != null) {
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("Aborting: " + abortTaskAttempts + " at " + entry.getKey());
}
try {
node.getNodeController().abortTasks(jobId, abortTaskAttempts);
} catch (Exception e) {
LOGGER.log(Level.SEVERE, e.getMessage(), e);
}
}
}
inProgressTaskClusters.remove(tcAttempt.getTaskCluster());
TaskCluster tc = tcAttempt.getTaskCluster();
PartitionMatchMaker pmm = jobRun.getPartitionMatchMaker();
pmm.removeUncommittedPartitions(tc.getProducedPartitions(), abortTaskIds);
pmm.removePartitionRequests(tc.getRequiredPartitions(), abortTaskIds);
tcAttempt.setStatus(failedOrAbortedStatus);
tcAttempt.setEndTime(System.currentTimeMillis());
}
use of org.apache.hyracks.control.cc.NodeControllerState in project asterixdb by apache.
the class NodeManager method removeDeadNodes.
@Override
public Pair<Collection<String>, Collection<JobId>> removeDeadNodes() throws HyracksException {
Set<String> deadNodes = new HashSet<>();
Set<JobId> affectedJobIds = new HashSet<>();
Iterator<Map.Entry<String, NodeControllerState>> nodeIterator = nodeRegistry.entrySet().iterator();
while (nodeIterator.hasNext()) {
Map.Entry<String, NodeControllerState> entry = nodeIterator.next();
String nodeId = entry.getKey();
NodeControllerState state = entry.getValue();
if (state.incrementLastHeartbeatDuration() >= ccConfig.getHeartbeatMaxMisses()) {
deadNodes.add(nodeId);
affectedJobIds.addAll(state.getActiveJobIds());
// Removes the node from node map.
nodeIterator.remove();
// Removes the node from IP map.
removeNodeFromIpAddressMap(nodeId, state);
// Updates the cluster capacity.
resourceManager.update(nodeId, new NodeCapacity(0L, 0));
LOGGER.info(entry.getKey() + " considered dead");
}
}
return Pair.of(deadNodes, affectedJobIds);
}
Aggregations