use of org.apache.hyracks.api.job.ActivityCluster in project asterixdb by apache.
the class ActivityClusterGraphBuilder method inferActivityClusters.
public ActivityClusterGraph inferActivityClusters(JobId jobId, JobActivityGraph jag) {
/*
* Build initial equivalence sets map. We create a map such that for each IOperatorTask, t -> { t }
*/
Map<ActivityId, Set<ActivityId>> stageMap = new HashMap<ActivityId, Set<ActivityId>>();
Set<Set<ActivityId>> stages = new HashSet<Set<ActivityId>>();
for (ActivityId taskId : jag.getActivityMap().keySet()) {
Set<ActivityId> eqSet = new HashSet<ActivityId>();
eqSet.add(taskId);
stageMap.put(taskId, eqSet);
stages.add(eqSet);
}
boolean changed = true;
while (changed) {
changed = false;
Pair<ActivityId, ActivityId> pair = findMergePair(jag, stages);
if (pair != null) {
merge(stageMap, stages, pair.getLeft(), pair.getRight());
changed = true;
}
}
ActivityClusterGraph acg = new ActivityClusterGraph();
Map<ActivityId, ActivityCluster> acMap = new HashMap<ActivityId, ActivityCluster>();
int acCounter = 0;
Map<ActivityId, IActivity> activityNodeMap = jag.getActivityMap();
List<ActivityCluster> acList = new ArrayList<ActivityCluster>();
for (Set<ActivityId> stage : stages) {
ActivityCluster ac = new ActivityCluster(acg, new ActivityClusterId(jobId, acCounter++));
acList.add(ac);
for (ActivityId aid : stage) {
IActivity activity = activityNodeMap.get(aid);
ac.addActivity(activity);
acMap.put(aid, ac);
}
}
for (Set<ActivityId> stage : stages) {
for (ActivityId aid : stage) {
IActivity activity = activityNodeMap.get(aid);
ActivityCluster ac = acMap.get(aid);
List<IConnectorDescriptor> aOutputs = jag.getActivityOutputMap().get(aid);
if (aOutputs == null || aOutputs.isEmpty()) {
ac.addRoot(activity);
} else {
int nActivityOutputs = aOutputs.size();
for (int i = 0; i < nActivityOutputs; ++i) {
IConnectorDescriptor conn = aOutputs.get(i);
ac.addConnector(conn);
Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> pcPair = jag.getConnectorActivityMap().get(conn.getConnectorId());
ac.connect(conn, activity, i, pcPair.getRight().getLeft(), pcPair.getRight().getRight(), jag.getConnectorRecordDescriptorMap().get(conn.getConnectorId()));
}
}
}
}
Map<ActivityId, Set<ActivityId>> blocked2BlockerMap = jag.getBlocked2BlockerMap();
for (ActivityCluster s : acList) {
Map<ActivityId, Set<ActivityId>> acBlocked2BlockerMap = s.getBlocked2BlockerMap();
Set<ActivityCluster> blockerStages = new HashSet<ActivityCluster>();
for (ActivityId t : s.getActivityMap().keySet()) {
Set<ActivityId> blockerTasks = blocked2BlockerMap.get(t);
acBlocked2BlockerMap.put(t, blockerTasks);
if (blockerTasks != null) {
for (ActivityId bt : blockerTasks) {
blockerStages.add(acMap.get(bt));
}
}
}
for (ActivityCluster bs : blockerStages) {
s.getDependencies().add(bs);
}
}
acg.addActivityClusters(acList);
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine(acg.toJSON().asText());
}
return acg;
}
use of org.apache.hyracks.api.job.ActivityCluster in project asterixdb by apache.
the class ActivityClusterPlanner method getTaskCluster.
private TaskCluster getTaskCluster(TaskId tid) {
JobRun run = executor.getJobRun();
ActivityCluster ac = run.getActivityClusterGraph().getActivityMap().get(tid.getActivityId());
ActivityClusterPlan acp = run.getActivityClusterPlanMap().get(ac.getId());
Task[] tasks = acp.getActivityPlanMap().get(tid.getActivityId()).getTasks();
Task task = tasks[tid.getPartition()];
assert task.getTaskId().equals(tid);
return task.getTaskCluster();
}
use of org.apache.hyracks.api.job.ActivityCluster in project asterixdb by apache.
the class ActivityClusterPlanner method buildActivityPlanMap.
private Map<ActivityId, ActivityPlan> buildActivityPlanMap(ActivityCluster ac, JobRun jobRun, Map<ActivityId, ActivityPartitionDetails> pcMap) {
Map<ActivityId, ActivityPlan> activityPlanMap = new HashMap<>();
Set<ActivityId> depAnIds = new HashSet<>();
for (ActivityId anId : ac.getActivityMap().keySet()) {
depAnIds.clear();
getDependencyActivityIds(depAnIds, anId, ac);
ActivityPartitionDetails apd = pcMap.get(anId);
Task[] tasks = new Task[apd.getPartitionCount()];
ActivityPlan activityPlan = new ActivityPlan(apd);
for (int i = 0; i < tasks.length; ++i) {
TaskId tid = new TaskId(anId, i);
tasks[i] = new Task(tid, activityPlan);
for (ActivityId danId : depAnIds) {
ActivityCluster dAC = ac.getActivityClusterGraph().getActivityMap().get(danId);
ActivityClusterPlan dACP = jobRun.getActivityClusterPlanMap().get(dAC.getId());
assert dACP != null : "IllegalStateEncountered: Dependent AC is being planned without a plan for " + "dependency AC: Encountered no plan for ActivityID " + danId;
Task[] dATasks = dACP.getActivityPlanMap().get(danId).getTasks();
assert dATasks != null : "IllegalStateEncountered: Dependent AC is being planned without a plan for" + " dependency AC: Encountered no plan for ActivityID " + danId;
assert dATasks.length == tasks.length : "Dependency activity partitioned differently from " + "dependent: " + dATasks.length + " != " + tasks.length;
Task dTask = dATasks[i];
TaskId dTaskId = dTask.getTaskId();
tasks[i].getDependencies().add(dTaskId);
dTask.getDependents().add(tid);
}
}
activityPlan.setTasks(tasks);
activityPlanMap.put(anId, activityPlan);
}
return activityPlanMap;
}
use of org.apache.hyracks.api.job.ActivityCluster in project asterixdb by apache.
the class JobExecutor method findRunnableTaskClusterRoots.
private void findRunnableTaskClusterRoots(Set<TaskCluster> frontier, ActivityCluster candidate) throws HyracksException {
boolean depsComplete = true;
for (ActivityCluster depAC : candidate.getDependencies()) {
if (!isPlanned(depAC)) {
depsComplete = false;
findRunnableTaskClusterRoots(frontier, depAC);
} else {
boolean tcRootsComplete = true;
for (TaskCluster tc : getActivityClusterPlan(depAC).getTaskClusters()) {
if (!tc.getProducedPartitions().isEmpty()) {
continue;
}
TaskClusterAttempt tca = findLastTaskClusterAttempt(tc);
if (tca == null || tca.getStatus() != TaskClusterAttempt.TaskClusterStatus.COMPLETED) {
tcRootsComplete = false;
break;
}
}
if (!tcRootsComplete) {
depsComplete = false;
findRunnableTaskClusterRoots(frontier, depAC);
}
}
}
if (!depsComplete) {
return;
}
if (!isPlanned(candidate)) {
ActivityClusterPlanner acp = new ActivityClusterPlanner(this);
ActivityClusterPlan acPlan = acp.planActivityCluster(candidate);
jobRun.getActivityClusterPlanMap().put(candidate.getId(), acPlan);
partitionProducingTaskClusterMap.putAll(acp.getPartitionProducingTaskClusterMap());
}
for (TaskCluster tc : getActivityClusterPlan(candidate).getTaskClusters()) {
if (!tc.getProducedPartitions().isEmpty()) {
continue;
}
TaskClusterAttempt tca = findLastTaskClusterAttempt(tc);
if (tca == null || tca.getStatus() != TaskClusterAttempt.TaskClusterStatus.COMPLETED) {
frontier.add(tc);
}
}
}
use of org.apache.hyracks.api.job.ActivityCluster in project asterixdb by apache.
the class StartTasksWork method run.
@Override
public void run() {
Task task = null;
try {
NCServiceContext serviceCtx = ncs.getContext();
Joblet joblet = getOrCreateLocalJoblet(deploymentId, jobId, serviceCtx, acgBytes);
final ActivityClusterGraph acg = joblet.getActivityClusterGraph();
IRecordDescriptorProvider rdp = new IRecordDescriptorProvider() {
@Override
public RecordDescriptor getOutputRecordDescriptor(ActivityId aid, int outputIndex) {
ActivityCluster ac = acg.getActivityMap().get(aid);
IConnectorDescriptor conn = ac.getActivityOutputMap().get(aid).get(outputIndex);
return ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
}
@Override
public RecordDescriptor getInputRecordDescriptor(ActivityId aid, int inputIndex) {
ActivityCluster ac = acg.getActivityMap().get(aid);
IConnectorDescriptor conn = ac.getActivityInputMap().get(aid).get(inputIndex);
return ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
}
};
for (TaskAttemptDescriptor td : taskDescriptors) {
TaskAttemptId taId = td.getTaskAttemptId();
TaskId tid = taId.getTaskId();
ActivityId aid = tid.getActivityId();
ActivityCluster ac = acg.getActivityMap().get(aid);
IActivity han = ac.getActivityMap().get(aid);
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Initializing " + taId + " -> " + han);
}
final int partition = tid.getPartition();
List<IConnectorDescriptor> inputs = ac.getActivityInputMap().get(aid);
task = new Task(joblet, taId, han.getClass().getName(), ncs.getExecutor(), ncs, createInputChannels(td, inputs));
IOperatorNodePushable operator = han.createPushRuntime(task, rdp, partition, td.getPartitionCount());
List<IPartitionCollector> collectors = new ArrayList<>();
if (inputs != null) {
for (int i = 0; i < inputs.size(); ++i) {
IConnectorDescriptor conn = inputs.get(i);
IConnectorPolicy cPolicy = connectorPoliciesMap.get(conn.getConnectorId());
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("input: " + i + ": " + conn.getConnectorId());
}
RecordDescriptor recordDesc = ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
IPartitionCollector collector = createPartitionCollector(td, partition, task, i, conn, recordDesc, cPolicy);
collectors.add(collector);
}
}
List<IConnectorDescriptor> outputs = ac.getActivityOutputMap().get(aid);
if (outputs != null) {
for (int i = 0; i < outputs.size(); ++i) {
final IConnectorDescriptor conn = outputs.get(i);
RecordDescriptor recordDesc = ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
IConnectorPolicy cPolicy = connectorPoliciesMap.get(conn.getConnectorId());
IPartitionWriterFactory pwFactory = createPartitionWriterFactory(task, cPolicy, jobId, conn, partition, taId, flags);
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("output: " + i + ": " + conn.getConnectorId());
}
IFrameWriter writer = conn.createPartitioner(task, recordDesc, pwFactory, partition, td.getPartitionCount(), td.getOutputPartitionCounts()[i]);
operator.setOutputFrameWriter(i, writer, recordDesc);
}
}
task.setTaskRuntime(collectors.toArray(new IPartitionCollector[collectors.size()]), operator);
joblet.addTask(task);
task.start();
}
} catch (Exception e) {
LOGGER.log(Level.WARNING, "Failure starting a task", e);
// notify cc of start task failure
List<Exception> exceptions = new ArrayList<>();
ExceptionUtils.setNodeIds(exceptions, ncs.getId());
ncs.getWorkQueue().schedule(new NotifyTaskFailureWork(ncs, task, exceptions));
}
}
Aggregations