use of org.apache.hyracks.control.cc.job.TaskClusterAttempt in project asterixdb by apache.
the class JobExecutor method findRunnableTaskClusterRoots.
private void findRunnableTaskClusterRoots(Set<TaskCluster> frontier, ActivityCluster candidate) throws HyracksException {
boolean depsComplete = true;
for (ActivityCluster depAC : candidate.getDependencies()) {
if (!isPlanned(depAC)) {
depsComplete = false;
findRunnableTaskClusterRoots(frontier, depAC);
} else {
boolean tcRootsComplete = true;
for (TaskCluster tc : getActivityClusterPlan(depAC).getTaskClusters()) {
if (!tc.getProducedPartitions().isEmpty()) {
continue;
}
TaskClusterAttempt tca = findLastTaskClusterAttempt(tc);
if (tca == null || tca.getStatus() != TaskClusterAttempt.TaskClusterStatus.COMPLETED) {
tcRootsComplete = false;
break;
}
}
if (!tcRootsComplete) {
depsComplete = false;
findRunnableTaskClusterRoots(frontier, depAC);
}
}
}
if (!depsComplete) {
return;
}
if (!isPlanned(candidate)) {
ActivityClusterPlanner acp = new ActivityClusterPlanner(this);
ActivityClusterPlan acPlan = acp.planActivityCluster(candidate);
jobRun.getActivityClusterPlanMap().put(candidate.getId(), acPlan);
partitionProducingTaskClusterMap.putAll(acp.getPartitionProducingTaskClusterMap());
}
for (TaskCluster tc : getActivityClusterPlan(candidate).getTaskClusters()) {
if (!tc.getProducedPartitions().isEmpty()) {
continue;
}
TaskClusterAttempt tca = findLastTaskClusterAttempt(tc);
if (tca == null || tca.getStatus() != TaskClusterAttempt.TaskClusterStatus.COMPLETED) {
frontier.add(tc);
}
}
}
use of org.apache.hyracks.control.cc.job.TaskClusterAttempt in project asterixdb by apache.
the class JobExecutor method assignRunnabilityRank.
/*
* Runnability rank has the following semantics
* Runnability(Runnable TaskCluster depending on completed TaskClusters) = {RUNNABLE, 0}
* Runnability(Runnable TaskCluster) = max(Rank(Dependent TaskClusters)) + 1
* Runnability(Non-schedulable TaskCluster) = {NOT_RUNNABLE, _}
*/
private Runnability assignRunnabilityRank(TaskCluster goal, Map<TaskCluster, Runnability> runnabilityMap) {
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("Computing runnability: " + goal);
}
if (runnabilityMap.containsKey(goal)) {
return runnabilityMap.get(goal);
}
TaskClusterAttempt lastAttempt = findLastTaskClusterAttempt(goal);
if (lastAttempt != null) {
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("Last Attempt Status: " + lastAttempt.getStatus());
}
if (lastAttempt.getStatus() == TaskClusterAttempt.TaskClusterStatus.COMPLETED) {
Runnability runnability = new Runnability(Runnability.Tag.COMPLETED, Integer.MIN_VALUE);
runnabilityMap.put(goal, runnability);
return runnability;
}
if (lastAttempt.getStatus() == TaskClusterAttempt.TaskClusterStatus.RUNNING) {
Runnability runnability = new Runnability(Runnability.Tag.RUNNING, Integer.MIN_VALUE);
runnabilityMap.put(goal, runnability);
return runnability;
}
}
Map<ConnectorDescriptorId, IConnectorPolicy> connectorPolicyMap = jobRun.getConnectorPolicyMap();
PartitionMatchMaker pmm = jobRun.getPartitionMatchMaker();
Runnability aggregateRunnability = new Runnability(Runnability.Tag.RUNNABLE, 0);
for (PartitionId pid : goal.getRequiredPartitions()) {
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("Inspecting required partition: " + pid);
}
Runnability runnability;
ConnectorDescriptorId cdId = pid.getConnectorDescriptorId();
IConnectorPolicy cPolicy = connectorPolicyMap.get(cdId);
PartitionState maxState = pmm.getMaximumAvailableState(pid);
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("Policy: " + cPolicy + " maxState: " + maxState);
}
if (PartitionState.COMMITTED.equals(maxState)) {
runnability = new Runnability(Runnability.Tag.RUNNABLE, 0);
} else if (PartitionState.STARTED.equals(maxState) && !cPolicy.consumerWaitsForProducerToFinish()) {
runnability = new Runnability(Runnability.Tag.RUNNABLE, 1);
} else {
runnability = assignRunnabilityRank(partitionProducingTaskClusterMap.get(pid), runnabilityMap);
switch(runnability.getTag()) {
case RUNNABLE:
if (cPolicy.consumerWaitsForProducerToFinish()) {
runnability = new Runnability(Runnability.Tag.NOT_RUNNABLE, Integer.MAX_VALUE);
} else {
runnability = new Runnability(Runnability.Tag.RUNNABLE, runnability.getPriority() + 1);
}
break;
case NOT_RUNNABLE:
break;
case RUNNING:
if (cPolicy.consumerWaitsForProducerToFinish()) {
runnability = new Runnability(Runnability.Tag.NOT_RUNNABLE, Integer.MAX_VALUE);
} else {
runnability = new Runnability(Runnability.Tag.RUNNABLE, 1);
}
break;
default:
break;
}
}
aggregateRunnability = Runnability.getWorstCase(aggregateRunnability, runnability);
if (aggregateRunnability.getTag() == Runnability.Tag.NOT_RUNNABLE) {
// already not runnable -- cannot get better. bail.
break;
}
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("aggregateRunnability: " + aggregateRunnability);
}
}
runnabilityMap.put(goal, aggregateRunnability);
return aggregateRunnability;
}
use of org.apache.hyracks.control.cc.job.TaskClusterAttempt in project asterixdb by apache.
the class AbstractTaskLifecycleWork method runWork.
@Override
public final void runWork() {
IJobManager jobManager = ccs.getJobManager();
JobRun run = jobManager.get(jobId);
if (run != null) {
TaskId tid = taId.getTaskId();
Map<ActivityId, ActivityCluster> activityClusterMap = run.getActivityClusterGraph().getActivityMap();
ActivityCluster ac = activityClusterMap.get(tid.getActivityId());
if (ac != null) {
Map<ActivityId, ActivityPlan> taskStateMap = run.getActivityClusterPlanMap().get(ac.getId()).getActivityPlanMap();
Task[] taskStates = taskStateMap.get(tid.getActivityId()).getTasks();
if (taskStates != null && taskStates.length > tid.getPartition()) {
Task ts = taskStates[tid.getPartition()];
TaskCluster tc = ts.getTaskCluster();
List<TaskClusterAttempt> taskClusterAttempts = tc.getAttempts();
if (taskClusterAttempts != null && taskClusterAttempts.size() > taId.getAttempt()) {
TaskClusterAttempt tca = taskClusterAttempts.get(taId.getAttempt());
TaskAttempt ta = tca.getTaskAttempts().get(tid);
if (ta != null) {
performEvent(ta);
}
}
}
}
}
}
use of org.apache.hyracks.control.cc.job.TaskClusterAttempt in project asterixdb by apache.
the class JobExecutor method assignTaskLocations.
private void assignTaskLocations(TaskCluster tc, Map<String, List<TaskAttemptDescriptor>> taskAttemptMap) throws HyracksException {
ActivityClusterGraph acg = jobRun.getActivityClusterGraph();
Task[] tasks = tc.getTasks();
List<TaskClusterAttempt> tcAttempts = tc.getAttempts();
int attempts = tcAttempts.size();
TaskClusterAttempt tcAttempt = new TaskClusterAttempt(tc, attempts);
Map<TaskId, TaskAttempt> taskAttempts = new HashMap<>();
Map<TaskId, LValueConstraintExpression> locationMap = new HashMap<>();
for (int i = 0; i < tasks.length; ++i) {
Task ts = tasks[i];
TaskId tid = ts.getTaskId();
TaskAttempt taskAttempt = new TaskAttempt(tcAttempt, new TaskAttemptId(new TaskId(tid.getActivityId(), tid.getPartition()), attempts), ts);
taskAttempt.setStatus(TaskAttempt.TaskStatus.INITIALIZED, null);
locationMap.put(tid, new PartitionLocationExpression(tid.getActivityId().getOperatorDescriptorId(), tid.getPartition()));
taskAttempts.put(tid, taskAttempt);
}
tcAttempt.setTaskAttempts(taskAttempts);
solver.solve(locationMap.values());
for (int i = 0; i < tasks.length; ++i) {
Task ts = tasks[i];
TaskId tid = ts.getTaskId();
TaskAttempt taskAttempt = taskAttempts.get(tid);
String nodeId = assignLocation(acg, locationMap, tid, taskAttempt);
taskAttempt.setNodeId(nodeId);
taskAttempt.setStatus(TaskAttempt.TaskStatus.RUNNING, null);
taskAttempt.setStartTime(System.currentTimeMillis());
List<TaskAttemptDescriptor> tads = taskAttemptMap.get(nodeId);
if (tads == null) {
tads = new ArrayList<>();
taskAttemptMap.put(nodeId, tads);
}
OperatorDescriptorId opId = tid.getActivityId().getOperatorDescriptorId();
jobRun.registerOperatorLocation(opId, tid.getPartition(), nodeId);
ActivityPartitionDetails apd = ts.getActivityPlan().getActivityPartitionDetails();
TaskAttemptDescriptor tad = new TaskAttemptDescriptor(taskAttempt.getTaskAttemptId(), apd.getPartitionCount(), apd.getInputPartitionCounts(), apd.getOutputPartitionCounts());
tads.add(tad);
}
tcAttempt.initializePendingTaskCounter();
tcAttempts.add(tcAttempt);
/**
* Improvement for reducing master/slave message communications, for each TaskAttemptDescriptor,
* we set the NetworkAddress[][] partitionLocations, in which each row is for an incoming connector descriptor
* and each column is for an input channel of the connector.
*/
INodeManager nodeManager = ccs.getNodeManager();
for (Map.Entry<String, List<TaskAttemptDescriptor>> e : taskAttemptMap.entrySet()) {
List<TaskAttemptDescriptor> tads = e.getValue();
for (TaskAttemptDescriptor tad : tads) {
TaskAttemptId taid = tad.getTaskAttemptId();
int attempt = taid.getAttempt();
TaskId tid = taid.getTaskId();
ActivityId aid = tid.getActivityId();
List<IConnectorDescriptor> inConnectors = acg.getActivityInputs(aid);
int[] inPartitionCounts = tad.getInputPartitionCounts();
if (inPartitionCounts == null) {
continue;
}
NetworkAddress[][] partitionLocations = new NetworkAddress[inPartitionCounts.length][];
for (int i = 0; i < inPartitionCounts.length; ++i) {
ConnectorDescriptorId cdId = inConnectors.get(i).getConnectorId();
IConnectorPolicy policy = jobRun.getConnectorPolicyMap().get(cdId);
/**
* carry sender location information into a task
* when it is not the case that it is an re-attempt and the send-side
* is materialized blocking.
*/
if (attempt > 0 && policy.materializeOnSendSide() && policy.consumerWaitsForProducerToFinish()) {
continue;
}
ActivityId producerAid = acg.getProducerActivity(cdId);
partitionLocations[i] = new NetworkAddress[inPartitionCounts[i]];
for (int j = 0; j < inPartitionCounts[i]; ++j) {
TaskId producerTaskId = new TaskId(producerAid, j);
String nodeId = findTaskLocation(producerTaskId);
partitionLocations[i][j] = nodeManager.getNodeControllerState(nodeId).getDataPort();
}
}
tad.setInputPartitionLocations(partitionLocations);
}
}
tcAttempt.setStatus(TaskClusterAttempt.TaskClusterStatus.RUNNING);
tcAttempt.setStartTime(System.currentTimeMillis());
inProgressTaskClusters.add(tc);
}
use of org.apache.hyracks.control.cc.job.TaskClusterAttempt in project asterixdb by apache.
the class JobExecutor method notifyTaskComplete.
public void notifyTaskComplete(TaskAttempt ta) throws HyracksException {
TaskAttemptId taId = ta.getTaskAttemptId();
TaskCluster tc = ta.getTask().getTaskCluster();
TaskClusterAttempt lastAttempt = findLastTaskClusterAttempt(tc);
if (lastAttempt == null || taId.getAttempt() != lastAttempt.getAttempt()) {
LOGGER.warning("Ignoring task complete notification: " + taId + " -- Current last attempt = " + lastAttempt);
return;
}
TaskAttempt.TaskStatus taStatus = ta.getStatus();
if (taStatus != TaskAttempt.TaskStatus.RUNNING) {
LOGGER.warning("Spurious task complete notification: " + taId + " Current state = " + taStatus);
return;
}
ta.setStatus(TaskAttempt.TaskStatus.COMPLETED, null);
ta.setEndTime(System.currentTimeMillis());
if (lastAttempt.decrementPendingTasksCounter() == 0) {
lastAttempt.setStatus(TaskClusterAttempt.TaskClusterStatus.COMPLETED);
lastAttempt.setEndTime(System.currentTimeMillis());
inProgressTaskClusters.remove(tc);
startRunnableActivityClusters();
}
}
Aggregations