Search in sources :

Example 1 with OperationMode

use of edu.iu.dsc.tws.api.compute.graph.OperationMode in project twister2 by DSC-SPIDAL.

the class ExecutionPlanBuilder method build.

@Override
public ExecutionPlan build(Config cfg, ComputeGraph taskGraph, TaskSchedulePlan taskSchedule) {
    // we need to build the task plan
    LogicalPlan logicalPlan = TaskPlanBuilder.build(workerId, workerInfoList, taskSchedule, taskIdGenerator);
    ParallelOperationFactory opFactory = new ParallelOperationFactory(cfg, network, logicalPlan);
    Map<Integer, WorkerSchedulePlan> containersMap = taskSchedule.getContainersMap();
    WorkerSchedulePlan conPlan = containersMap.get(workerId);
    if (conPlan == null) {
        LOG.log(Level.INFO, "Cannot find worker in the task plan: " + workerId);
        return null;
    }
    ExecutionPlan execution = new ExecutionPlan();
    Set<TaskInstancePlan> instancePlan = conPlan.getTaskInstances();
    long tasksVersion = 0L;
    if (CheckpointingContext.isCheckpointingEnabled(cfg)) {
        Set<Integer> globalTasks = Collections.emptySet();
        if (workerId == 0) {
            globalTasks = containersMap.values().stream().flatMap(containerPlan -> containerPlan.getTaskInstances().stream()).filter(ip -> taskGraph.vertex(ip.getTaskName()).getTask() instanceof CheckpointableTask && !(taskGraph.vertex(ip.getTaskName()).getTask() instanceof CheckpointingSGatherSink)).map(TaskInstancePlan::getTaskId).collect(Collectors.toSet());
        }
        try {
            Checkpoint.FamilyInitializeResponse familyInitializeResponse = this.checkpointingClient.initFamily(workerId, containersMap.size(), taskGraph.getGraphName(), globalTasks);
            tasksVersion = familyInitializeResponse.getVersion();
        } catch (BlockingSendException e) {
            throw new RuntimeException("Failed to register tasks with Checkpoint Manager", e);
        }
        LOG.info("Tasks will start with version " + tasksVersion);
    }
    // for each task we are going to create the communications
    for (TaskInstancePlan ip : instancePlan) {
        Vertex v = taskGraph.vertex(ip.getTaskName());
        Map<String, Set<String>> inEdges = new HashMap<>();
        Map<String, String> outEdges = new HashMap<>();
        if (v == null) {
            throw new RuntimeException("Non-existing task scheduled: " + ip.getTaskName());
        }
        INode node = v.getTask();
        if (node instanceof ICompute || node instanceof ISource) {
            // lets get the communication
            Set<Edge> edges = taskGraph.outEdges(v);
            // now lets create the communication object
            for (Edge e : edges) {
                Vertex child = taskGraph.childOfTask(v, e.getName());
                // lets figure out the parents task id
                Set<Integer> srcTasks = taskIdGenerator.getTaskIds(v, ip.getTaskId());
                Set<Integer> tarTasks = taskIdGenerator.getTaskIds(child, getTaskIdOfTask(child.getName(), taskSchedule));
                Map<Integer, Integer> srcGlobalToIndex = taskIdGenerator.getGlobalTaskToIndex(v, ip.getTaskId());
                Map<Integer, Integer> tarGlobaToIndex = taskIdGenerator.getGlobalTaskToIndex(child, getTaskIdOfTask(child.getName(), taskSchedule));
                createCommunication(child, e, v, srcTasks, tarTasks, srcGlobalToIndex, tarGlobaToIndex);
                outEdges.put(e.getName(), child.getName());
            }
        }
        if (node instanceof ICompute) {
            // lets get the parent tasks
            Set<Edge> parentEdges = taskGraph.inEdges(v);
            for (Edge e : parentEdges) {
                Vertex parent = taskGraph.getParentOfTask(v, e.getName());
                // lets figure out the parents task id
                Set<Integer> srcTasks = taskIdGenerator.getTaskIds(parent, getTaskIdOfTask(parent.getName(), taskSchedule));
                Set<Integer> tarTasks = taskIdGenerator.getTaskIds(v, ip.getTaskId());
                Map<Integer, Integer> srcGlobalToIndex = taskIdGenerator.getGlobalTaskToIndex(parent, getTaskIdOfTask(parent.getName(), taskSchedule));
                Map<Integer, Integer> tarGlobalToIndex = taskIdGenerator.getGlobalTaskToIndex(v, ip.getTaskId());
                createCommunication(v, e, parent, srcTasks, tarTasks, srcGlobalToIndex, tarGlobalToIndex);
                // if we are a grouped edge, we have to use the group name
                String inEdge;
                if (e.getTargetEdge() == null) {
                    inEdge = e.getName();
                } else {
                    inEdge = e.getTargetEdge();
                }
                Set<String> parents = inEdges.get(inEdge);
                if (parents == null) {
                    parents = new HashSet<>();
                }
                parents.add(inEdge);
                inEdges.put(inEdge, parents);
            }
        }
        // lets create the instance
        INodeInstance iNodeInstance = createInstances(cfg, taskGraph.getGraphName(), ip, v, taskGraph.getOperationMode(), inEdges, outEdges, taskSchedule, tasksVersion);
        // add to execution
        execution.addNodes(v.getName(), taskIdGenerator.generateGlobalTaskId(ip.getTaskId(), ip.getTaskIndex()), iNodeInstance);
    }
    // now lets create the queues and start the execution
    for (Table.Cell<String, String, Communication> cell : parOpTable.cellSet()) {
        Communication c = cell.getValue();
        // lets create the communication
        OperationMode operationMode = taskGraph.getOperationMode();
        IParallelOperation op;
        assert c != null;
        c.build();
        if (c.getEdge().size() == 1) {
            op = opFactory.build(c.getEdge(0), c.getSourceTasks(), c.getTargetTasks(), operationMode, c.srcGlobalToIndex, c.tarGlobalToIndex);
        } else if (c.getEdge().size() > 1) {
            // just join op for now. Could change in the future
            // here the sources should be separated out for left and right edge
            Set<Integer> sourceTasks = c.getSourceTasks();
            Set<Integer> leftSources = new HashSet<>();
            Set<Integer> rightSources = new HashSet<>();
            if (!sourceTasks.isEmpty()) {
                // just to safely do .get() calls without isPresent()
                int minBin = (sourceTasks.stream().min(Integer::compareTo).get() / TaskIdGenerator.TASK_OFFSET) * TaskIdGenerator.TASK_OFFSET;
                for (Integer source : sourceTasks) {
                    if ((source / TaskIdGenerator.TASK_OFFSET) * TaskIdGenerator.TASK_OFFSET == minBin) {
                        leftSources.add(source);
                    } else {
                        rightSources.add(source);
                    }
                }
            }
            // now determine, which task is connected to which edge
            Edge leftEdge = c.getEdge(0);
            Edge rightEdge = c.getEdge(1);
            op = opFactory.build(leftEdge, rightEdge, leftSources, rightSources, c.getTargetTasks(), operationMode, c.srcGlobalToIndex, c.tarGlobalToIndex);
        } else {
            throw new RuntimeException("Cannot have communication with 0 edges");
        }
        // now lets check the sources and targets that are in this executor
        Set<Integer> sourcesOfThisWorker = intersectionOfTasks(conPlan, c.getSourceTasks());
        Set<Integer> targetsOfThisWorker = intersectionOfTasks(conPlan, c.getTargetTasks());
        // we use the target edge as the group name
        String targetEdge;
        if (c.getEdge().size() > 1) {
            targetEdge = c.getEdge(0).getTargetEdge();
        } else {
            targetEdge = c.getEdge(0).getName();
        }
        // so along with the operation mode, the windowing mode must be tested
        if (operationMode == OperationMode.STREAMING) {
            for (Integer i : sourcesOfThisWorker) {
                boolean found = false;
                // we can have multiple source tasks for an operation
                for (int sIndex = 0; sIndex < c.getSourceTask().size(); sIndex++) {
                    String sourceTask = c.getSourceTask().get(sIndex);
                    if (streamingTaskInstances.contains(sourceTask, i)) {
                        TaskStreamingInstance taskStreamingInstance = streamingTaskInstances.get(sourceTask, i);
                        taskStreamingInstance.registerOutParallelOperation(c.getEdge(sIndex).getName(), op);
                        op.registerSync(i, taskStreamingInstance);
                        found = true;
                    } else if (streamingSourceInstances.contains(sourceTask, i)) {
                        SourceStreamingInstance sourceStreamingInstance = streamingSourceInstances.get(sourceTask, i);
                        sourceStreamingInstance.registerOutParallelOperation(c.getEdge(sIndex).getName(), op);
                        found = true;
                    }
                    if (!found) {
                        throw new RuntimeException("Not found: " + c.getSourceTask());
                    }
                }
            }
            // we only have one target task always
            for (Integer i : targetsOfThisWorker) {
                if (streamingTaskInstances.contains(c.getTargetTask(), i)) {
                    TaskStreamingInstance taskStreamingInstance = streamingTaskInstances.get(c.getTargetTask(), i);
                    op.register(i, taskStreamingInstance.getInQueue());
                    taskStreamingInstance.registerInParallelOperation(targetEdge, op);
                    op.registerSync(i, taskStreamingInstance);
                } else {
                    throw new RuntimeException("Not found: " + c.getTargetTask());
                }
            }
            execution.addOps(op);
        }
        if (operationMode == OperationMode.BATCH) {
            for (Integer i : sourcesOfThisWorker) {
                boolean found = false;
                // we can have multiple source tasks for an operation
                for (int sIndex = 0; sIndex < c.getSourceTask().size(); sIndex++) {
                    String sourceTask = c.getSourceTask().get(sIndex);
                    if (batchTaskInstances.contains(sourceTask, i)) {
                        TaskBatchInstance taskBatchInstance = batchTaskInstances.get(sourceTask, i);
                        taskBatchInstance.registerOutParallelOperation(c.getEdge(sIndex).getName(), op);
                        found = true;
                    } else if (batchSourceInstances.contains(sourceTask, i)) {
                        SourceBatchInstance sourceBatchInstance = batchSourceInstances.get(sourceTask, i);
                        sourceBatchInstance.registerOutParallelOperation(c.getEdge(sIndex).getName(), op);
                        found = true;
                    }
                }
                if (!found) {
                    throw new RuntimeException("Not found: " + c.getSourceTask());
                }
            }
            for (Integer i : targetsOfThisWorker) {
                if (batchTaskInstances.contains(c.getTargetTask(), i)) {
                    TaskBatchInstance taskBatchInstance = batchTaskInstances.get(c.getTargetTask(), i);
                    op.register(i, taskBatchInstance.getInQueue());
                    taskBatchInstance.registerInParallelOperation(targetEdge, op);
                    op.registerSync(i, taskBatchInstance);
                } else {
                    throw new RuntimeException("Not found: " + c.getTargetTask());
                }
            }
            execution.addOps(op);
        }
    }
    return execution;
}
Also used : Checkpoint(edu.iu.dsc.tws.proto.checkpoint.Checkpoint) ComputeGraph(edu.iu.dsc.tws.api.compute.graph.ComputeGraph) LogicalPlan(edu.iu.dsc.tws.api.comms.LogicalPlan) IParallelOperation(edu.iu.dsc.tws.api.compute.executor.IParallelOperation) INode(edu.iu.dsc.tws.api.compute.nodes.INode) HashMap(java.util.HashMap) HashBasedTable(com.google.common.collect.HashBasedTable) Config(edu.iu.dsc.tws.api.config.Config) INodeInstance(edu.iu.dsc.tws.api.compute.executor.INodeInstance) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) HashSet(java.util.HashSet) WorkerSchedulePlan(edu.iu.dsc.tws.api.compute.schedule.elements.WorkerSchedulePlan) JobMasterAPI(edu.iu.dsc.tws.proto.jobmaster.JobMasterAPI) ExecutionPlan(edu.iu.dsc.tws.api.compute.executor.ExecutionPlan) TaskSchedulePlan(edu.iu.dsc.tws.api.compute.schedule.elements.TaskSchedulePlan) Map(java.util.Map) CheckpointableTask(edu.iu.dsc.tws.checkpointing.task.CheckpointableTask) TaskInstancePlan(edu.iu.dsc.tws.api.compute.schedule.elements.TaskInstancePlan) BlockingSendException(edu.iu.dsc.tws.api.exceptions.net.BlockingSendException) ISource(edu.iu.dsc.tws.api.compute.nodes.ISource) TaskStreamingInstance(edu.iu.dsc.tws.executor.core.streaming.TaskStreamingInstance) TaskBatchInstance(edu.iu.dsc.tws.executor.core.batch.TaskBatchInstance) CheckpointingContext(edu.iu.dsc.tws.checkpointing.util.CheckpointingContext) SourceStreamingInstance(edu.iu.dsc.tws.executor.core.streaming.SourceStreamingInstance) IExecutionPlanBuilder(edu.iu.dsc.tws.api.compute.executor.IExecutionPlanBuilder) ICompute(edu.iu.dsc.tws.api.compute.nodes.ICompute) Set(java.util.Set) CheckpointingSGatherSink(edu.iu.dsc.tws.checkpointing.task.CheckpointingSGatherSink) Logger(java.util.logging.Logger) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) SourceBatchInstance(edu.iu.dsc.tws.executor.core.batch.SourceBatchInstance) Vertex(edu.iu.dsc.tws.api.compute.graph.Vertex) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) Communicator(edu.iu.dsc.tws.api.comms.Communicator) List(java.util.List) CheckpointingClient(edu.iu.dsc.tws.api.checkpointing.CheckpointingClient) OperationMode(edu.iu.dsc.tws.api.compute.graph.OperationMode) Utils(edu.iu.dsc.tws.executor.util.Utils) ExecutorContext(edu.iu.dsc.tws.api.compute.executor.ExecutorContext) Edge(edu.iu.dsc.tws.api.compute.graph.Edge) Collections(java.util.Collections) Table(com.google.common.collect.Table) Vertex(edu.iu.dsc.tws.api.compute.graph.Vertex) INode(edu.iu.dsc.tws.api.compute.nodes.INode) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) CheckpointingSGatherSink(edu.iu.dsc.tws.checkpointing.task.CheckpointingSGatherSink) WorkerSchedulePlan(edu.iu.dsc.tws.api.compute.schedule.elements.WorkerSchedulePlan) ExecutionPlan(edu.iu.dsc.tws.api.compute.executor.ExecutionPlan) TaskInstancePlan(edu.iu.dsc.tws.api.compute.schedule.elements.TaskInstancePlan) ISource(edu.iu.dsc.tws.api.compute.nodes.ISource) OperationMode(edu.iu.dsc.tws.api.compute.graph.OperationMode) CheckpointableTask(edu.iu.dsc.tws.checkpointing.task.CheckpointableTask) SourceStreamingInstance(edu.iu.dsc.tws.executor.core.streaming.SourceStreamingInstance) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.common.collect.Table) INodeInstance(edu.iu.dsc.tws.api.compute.executor.INodeInstance) TaskBatchInstance(edu.iu.dsc.tws.executor.core.batch.TaskBatchInstance) Checkpoint(edu.iu.dsc.tws.proto.checkpoint.Checkpoint) Checkpoint(edu.iu.dsc.tws.proto.checkpoint.Checkpoint) BlockingSendException(edu.iu.dsc.tws.api.exceptions.net.BlockingSendException) TaskStreamingInstance(edu.iu.dsc.tws.executor.core.streaming.TaskStreamingInstance) IParallelOperation(edu.iu.dsc.tws.api.compute.executor.IParallelOperation) SourceBatchInstance(edu.iu.dsc.tws.executor.core.batch.SourceBatchInstance) LogicalPlan(edu.iu.dsc.tws.api.comms.LogicalPlan) ICompute(edu.iu.dsc.tws.api.compute.nodes.ICompute) Edge(edu.iu.dsc.tws.api.compute.graph.Edge)

Aggregations

HashBasedTable (com.google.common.collect.HashBasedTable)1 Table (com.google.common.collect.Table)1 CheckpointingClient (edu.iu.dsc.tws.api.checkpointing.CheckpointingClient)1 Communicator (edu.iu.dsc.tws.api.comms.Communicator)1 LogicalPlan (edu.iu.dsc.tws.api.comms.LogicalPlan)1 ExecutionPlan (edu.iu.dsc.tws.api.compute.executor.ExecutionPlan)1 ExecutorContext (edu.iu.dsc.tws.api.compute.executor.ExecutorContext)1 IExecutionPlanBuilder (edu.iu.dsc.tws.api.compute.executor.IExecutionPlanBuilder)1 INodeInstance (edu.iu.dsc.tws.api.compute.executor.INodeInstance)1 IParallelOperation (edu.iu.dsc.tws.api.compute.executor.IParallelOperation)1 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)1 Edge (edu.iu.dsc.tws.api.compute.graph.Edge)1 OperationMode (edu.iu.dsc.tws.api.compute.graph.OperationMode)1 Vertex (edu.iu.dsc.tws.api.compute.graph.Vertex)1 ICompute (edu.iu.dsc.tws.api.compute.nodes.ICompute)1 INode (edu.iu.dsc.tws.api.compute.nodes.INode)1 ISource (edu.iu.dsc.tws.api.compute.nodes.ISource)1 TaskInstancePlan (edu.iu.dsc.tws.api.compute.schedule.elements.TaskInstancePlan)1 TaskSchedulePlan (edu.iu.dsc.tws.api.compute.schedule.elements.TaskSchedulePlan)1 WorkerSchedulePlan (edu.iu.dsc.tws.api.compute.schedule.elements.WorkerSchedulePlan)1