use of edu.iu.dsc.tws.api.compute.executor.ExecutionPlan in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method executeTestingTaskGraph.
/**
* This method executes the testing taskgraph with testing data loaded from testing taskgraph
* and uses the final weight vector obtained from the training task graph
* Testing is also done in a parallel way. At the testing data loading stage we load the data
* in parallel with reference to the given parallelism and testing is also in in parallel
* Then we get test results for all these testing data partitions
*
* @return Returns the Accuracy value obtained
*/
public DataObject<Object> executeTestingTaskGraph() {
DataObject<Object> data = null;
predictionSourceTask = new PredictionSourceTask(svmJobParameters.isDummy(), this.binaryBatchModel, operationMode);
predictionReduceTask = new PredictionReduceTask(operationMode);
testingBuilder.addSource(Constants.SimpleGraphConfig.PREDICTION_SOURCE_TASK, predictionSourceTask, dataStreamerParallelism);
ComputeConnection predictionReduceConnection = testingBuilder.addCompute(Constants.SimpleGraphConfig.PREDICTION_REDUCE_TASK, predictionReduceTask, reduceParallelism);
predictionReduceConnection.reduce(Constants.SimpleGraphConfig.PREDICTION_SOURCE_TASK).viaEdge(Constants.SimpleGraphConfig.PREDICTION_EDGE).withReductionFunction(new PredictionAggregator()).withDataType(MessageTypes.OBJECT);
testingBuilder.setMode(operationMode);
ComputeGraph predictionGraph = testingBuilder.build();
predictionGraph.setGraphName("testing-graph");
ExecutionPlan predictionPlan = taskExecutor.plan(predictionGraph);
// adding test data set
taskExecutor.addInput(predictionGraph, predictionPlan, Constants.SimpleGraphConfig.PREDICTION_SOURCE_TASK, Constants.SimpleGraphConfig.TEST_DATA, testingData);
// adding final weight vector
taskExecutor.addInput(predictionGraph, predictionPlan, Constants.SimpleGraphConfig.PREDICTION_SOURCE_TASK, Constants.SimpleGraphConfig.FINAL_WEIGHT_VECTOR, trainedWeightVector);
taskExecutor.execute(predictionGraph, predictionPlan);
data = retrieveTestingAccuracyObject(predictionGraph, predictionPlan);
return data;
}
use of edu.iu.dsc.tws.api.compute.executor.ExecutionPlan in project twister2 by DSC-SPIDAL.
the class ExecutionPlanBuilder method build.
@Override
public ExecutionPlan build(Config cfg, ComputeGraph taskGraph, TaskSchedulePlan taskSchedule) {
// we need to build the task plan
LogicalPlan logicalPlan = TaskPlanBuilder.build(workerId, workerInfoList, taskSchedule, taskIdGenerator);
ParallelOperationFactory opFactory = new ParallelOperationFactory(cfg, network, logicalPlan);
Map<Integer, WorkerSchedulePlan> containersMap = taskSchedule.getContainersMap();
WorkerSchedulePlan conPlan = containersMap.get(workerId);
if (conPlan == null) {
LOG.log(Level.INFO, "Cannot find worker in the task plan: " + workerId);
return null;
}
ExecutionPlan execution = new ExecutionPlan();
Set<TaskInstancePlan> instancePlan = conPlan.getTaskInstances();
long tasksVersion = 0L;
if (CheckpointingContext.isCheckpointingEnabled(cfg)) {
Set<Integer> globalTasks = Collections.emptySet();
if (workerId == 0) {
globalTasks = containersMap.values().stream().flatMap(containerPlan -> containerPlan.getTaskInstances().stream()).filter(ip -> taskGraph.vertex(ip.getTaskName()).getTask() instanceof CheckpointableTask && !(taskGraph.vertex(ip.getTaskName()).getTask() instanceof CheckpointingSGatherSink)).map(TaskInstancePlan::getTaskId).collect(Collectors.toSet());
}
try {
Checkpoint.FamilyInitializeResponse familyInitializeResponse = this.checkpointingClient.initFamily(workerId, containersMap.size(), taskGraph.getGraphName(), globalTasks);
tasksVersion = familyInitializeResponse.getVersion();
} catch (BlockingSendException e) {
throw new RuntimeException("Failed to register tasks with Checkpoint Manager", e);
}
LOG.info("Tasks will start with version " + tasksVersion);
}
// for each task we are going to create the communications
for (TaskInstancePlan ip : instancePlan) {
Vertex v = taskGraph.vertex(ip.getTaskName());
Map<String, Set<String>> inEdges = new HashMap<>();
Map<String, String> outEdges = new HashMap<>();
if (v == null) {
throw new RuntimeException("Non-existing task scheduled: " + ip.getTaskName());
}
INode node = v.getTask();
if (node instanceof ICompute || node instanceof ISource) {
// lets get the communication
Set<Edge> edges = taskGraph.outEdges(v);
// now lets create the communication object
for (Edge e : edges) {
Vertex child = taskGraph.childOfTask(v, e.getName());
// lets figure out the parents task id
Set<Integer> srcTasks = taskIdGenerator.getTaskIds(v, ip.getTaskId());
Set<Integer> tarTasks = taskIdGenerator.getTaskIds(child, getTaskIdOfTask(child.getName(), taskSchedule));
Map<Integer, Integer> srcGlobalToIndex = taskIdGenerator.getGlobalTaskToIndex(v, ip.getTaskId());
Map<Integer, Integer> tarGlobaToIndex = taskIdGenerator.getGlobalTaskToIndex(child, getTaskIdOfTask(child.getName(), taskSchedule));
createCommunication(child, e, v, srcTasks, tarTasks, srcGlobalToIndex, tarGlobaToIndex);
outEdges.put(e.getName(), child.getName());
}
}
if (node instanceof ICompute) {
// lets get the parent tasks
Set<Edge> parentEdges = taskGraph.inEdges(v);
for (Edge e : parentEdges) {
Vertex parent = taskGraph.getParentOfTask(v, e.getName());
// lets figure out the parents task id
Set<Integer> srcTasks = taskIdGenerator.getTaskIds(parent, getTaskIdOfTask(parent.getName(), taskSchedule));
Set<Integer> tarTasks = taskIdGenerator.getTaskIds(v, ip.getTaskId());
Map<Integer, Integer> srcGlobalToIndex = taskIdGenerator.getGlobalTaskToIndex(parent, getTaskIdOfTask(parent.getName(), taskSchedule));
Map<Integer, Integer> tarGlobalToIndex = taskIdGenerator.getGlobalTaskToIndex(v, ip.getTaskId());
createCommunication(v, e, parent, srcTasks, tarTasks, srcGlobalToIndex, tarGlobalToIndex);
// if we are a grouped edge, we have to use the group name
String inEdge;
if (e.getTargetEdge() == null) {
inEdge = e.getName();
} else {
inEdge = e.getTargetEdge();
}
Set<String> parents = inEdges.get(inEdge);
if (parents == null) {
parents = new HashSet<>();
}
parents.add(inEdge);
inEdges.put(inEdge, parents);
}
}
// lets create the instance
INodeInstance iNodeInstance = createInstances(cfg, taskGraph.getGraphName(), ip, v, taskGraph.getOperationMode(), inEdges, outEdges, taskSchedule, tasksVersion);
// add to execution
execution.addNodes(v.getName(), taskIdGenerator.generateGlobalTaskId(ip.getTaskId(), ip.getTaskIndex()), iNodeInstance);
}
// now lets create the queues and start the execution
for (Table.Cell<String, String, Communication> cell : parOpTable.cellSet()) {
Communication c = cell.getValue();
// lets create the communication
OperationMode operationMode = taskGraph.getOperationMode();
IParallelOperation op;
assert c != null;
c.build();
if (c.getEdge().size() == 1) {
op = opFactory.build(c.getEdge(0), c.getSourceTasks(), c.getTargetTasks(), operationMode, c.srcGlobalToIndex, c.tarGlobalToIndex);
} else if (c.getEdge().size() > 1) {
// just join op for now. Could change in the future
// here the sources should be separated out for left and right edge
Set<Integer> sourceTasks = c.getSourceTasks();
Set<Integer> leftSources = new HashSet<>();
Set<Integer> rightSources = new HashSet<>();
if (!sourceTasks.isEmpty()) {
// just to safely do .get() calls without isPresent()
int minBin = (sourceTasks.stream().min(Integer::compareTo).get() / TaskIdGenerator.TASK_OFFSET) * TaskIdGenerator.TASK_OFFSET;
for (Integer source : sourceTasks) {
if ((source / TaskIdGenerator.TASK_OFFSET) * TaskIdGenerator.TASK_OFFSET == minBin) {
leftSources.add(source);
} else {
rightSources.add(source);
}
}
}
// now determine, which task is connected to which edge
Edge leftEdge = c.getEdge(0);
Edge rightEdge = c.getEdge(1);
op = opFactory.build(leftEdge, rightEdge, leftSources, rightSources, c.getTargetTasks(), operationMode, c.srcGlobalToIndex, c.tarGlobalToIndex);
} else {
throw new RuntimeException("Cannot have communication with 0 edges");
}
// now lets check the sources and targets that are in this executor
Set<Integer> sourcesOfThisWorker = intersectionOfTasks(conPlan, c.getSourceTasks());
Set<Integer> targetsOfThisWorker = intersectionOfTasks(conPlan, c.getTargetTasks());
// we use the target edge as the group name
String targetEdge;
if (c.getEdge().size() > 1) {
targetEdge = c.getEdge(0).getTargetEdge();
} else {
targetEdge = c.getEdge(0).getName();
}
// so along with the operation mode, the windowing mode must be tested
if (operationMode == OperationMode.STREAMING) {
for (Integer i : sourcesOfThisWorker) {
boolean found = false;
// we can have multiple source tasks for an operation
for (int sIndex = 0; sIndex < c.getSourceTask().size(); sIndex++) {
String sourceTask = c.getSourceTask().get(sIndex);
if (streamingTaskInstances.contains(sourceTask, i)) {
TaskStreamingInstance taskStreamingInstance = streamingTaskInstances.get(sourceTask, i);
taskStreamingInstance.registerOutParallelOperation(c.getEdge(sIndex).getName(), op);
op.registerSync(i, taskStreamingInstance);
found = true;
} else if (streamingSourceInstances.contains(sourceTask, i)) {
SourceStreamingInstance sourceStreamingInstance = streamingSourceInstances.get(sourceTask, i);
sourceStreamingInstance.registerOutParallelOperation(c.getEdge(sIndex).getName(), op);
found = true;
}
if (!found) {
throw new RuntimeException("Not found: " + c.getSourceTask());
}
}
}
// we only have one target task always
for (Integer i : targetsOfThisWorker) {
if (streamingTaskInstances.contains(c.getTargetTask(), i)) {
TaskStreamingInstance taskStreamingInstance = streamingTaskInstances.get(c.getTargetTask(), i);
op.register(i, taskStreamingInstance.getInQueue());
taskStreamingInstance.registerInParallelOperation(targetEdge, op);
op.registerSync(i, taskStreamingInstance);
} else {
throw new RuntimeException("Not found: " + c.getTargetTask());
}
}
execution.addOps(op);
}
if (operationMode == OperationMode.BATCH) {
for (Integer i : sourcesOfThisWorker) {
boolean found = false;
// we can have multiple source tasks for an operation
for (int sIndex = 0; sIndex < c.getSourceTask().size(); sIndex++) {
String sourceTask = c.getSourceTask().get(sIndex);
if (batchTaskInstances.contains(sourceTask, i)) {
TaskBatchInstance taskBatchInstance = batchTaskInstances.get(sourceTask, i);
taskBatchInstance.registerOutParallelOperation(c.getEdge(sIndex).getName(), op);
found = true;
} else if (batchSourceInstances.contains(sourceTask, i)) {
SourceBatchInstance sourceBatchInstance = batchSourceInstances.get(sourceTask, i);
sourceBatchInstance.registerOutParallelOperation(c.getEdge(sIndex).getName(), op);
found = true;
}
}
if (!found) {
throw new RuntimeException("Not found: " + c.getSourceTask());
}
}
for (Integer i : targetsOfThisWorker) {
if (batchTaskInstances.contains(c.getTargetTask(), i)) {
TaskBatchInstance taskBatchInstance = batchTaskInstances.get(c.getTargetTask(), i);
op.register(i, taskBatchInstance.getInQueue());
taskBatchInstance.registerInParallelOperation(targetEdge, op);
op.registerSync(i, taskBatchInstance);
} else {
throw new RuntimeException("Not found: " + c.getTargetTask());
}
}
execution.addOps(op);
}
}
return execution;
}
use of edu.iu.dsc.tws.api.compute.executor.ExecutionPlan in project twister2 by DSC-SPIDAL.
the class BasicComputation method execute.
@Override
public void execute() {
LOG.log(Level.INFO, "Task worker starting: " + workerId);
WorkerParameter workerParameter = WorkerParameter.build(config);
parallelism = workerParameter.getParallelismValue();
graphsize = workerParameter.getDsize();
sourceVertexGlobal = workerParameter.getSourcevertex();
dataDirectory = workerParameter.getDatapointDirectory();
iterations = workerParameter.getIterations();
/* First Graph to partition and read the partitioned data points **/
ComputeGraph graphpartitionTaskGraph = graphpartition();
// Get the execution plan for the first task graph
ExecutionPlan executionPlan = taskExecutor.plan(graphpartitionTaskGraph);
// Actual execution for the first taskgraph
taskExecutor.execute(graphpartitionTaskGraph, executionPlan);
// Retrieve the output of the first task graph
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// the second task graph for assign initial pagerank values for vertex.
ComputeGraph graphInitializationTaskGraph = graphInitialization();
// Get the execution plan for the first task graph
ExecutionPlan executionPlan1 = taskExecutor.plan(graphInitializationTaskGraph);
// Actual execution for the first taskgraph
taskExecutor.execute(graphInitializationTaskGraph, executionPlan1);
// Retrieve the output of the first task graph
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// third task graph for computations
ComputeGraph computationTaskgraph = computation();
IExecutor ex = taskExecutor.createExecution(computationTaskgraph);
int unlimitedItr = 0;
long startime = System.currentTimeMillis();
if (iterations != 0) {
for (int i = 0; i < iterations; i++) {
ex.execute(i == iterations - 1);
}
} else {
while (globaliterationStatus) {
ex.execute(false);
unlimitedItr++;
}
ex.closeExecution();
}
taskExecutor.close();
long endTime = System.currentTimeMillis();
if (workerId == 0) {
System.out.println("total number of iterations : " + unlimitedItr);
System.out.println("computation time: " + (endTime - startime));
}
}
use of edu.iu.dsc.tws.api.compute.executor.ExecutionPlan in project twister2 by DSC-SPIDAL.
the class PageRankWorker method execute.
// private static double danglingNodeValues;
@Override
public void execute() {
LOG.log(Level.INFO, "Task worker starting: " + workerId);
PageRankWorkerParameters pageRankWorkerParameters = PageRankWorkerParameters.build(config);
int parallelismValue = pageRankWorkerParameters.getParallelismValue();
int dsize = pageRankWorkerParameters.getDsize();
String dataDirectory = pageRankWorkerParameters.getDatapointDirectory();
int iterations = pageRankWorkerParameters.getIterations();
graphsize = dsize;
/* First Graph to partition and read the partitioned data points **/
ComputeGraph datapointsTaskGraph = buildDataPointsTG(dataDirectory, dsize, parallelismValue, config);
// Get the execution plan for the first task graph
ExecutionPlan executionPlan = taskExecutor.plan(datapointsTaskGraph);
// Actual execution for the first taskgraph
taskExecutor.execute(datapointsTaskGraph, executionPlan);
/* the out of the first graph would like below
* task Id: 0
{1=[3, 4], 2=[3, 4, 5]}*/
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// the second task graph for assign initial pagerank values for vertex.
ComputeGraph graphInitialValueTaskGraph = buildGraphInitialValueTG(dataDirectory, dsize, parallelismValue, config);
// Get the execution plan for the first task graph
ExecutionPlan executionPlan1 = taskExecutor.plan(graphInitialValueTaskGraph);
// Actual execution for the first taskgraph
taskExecutor.execute(graphInitialValueTaskGraph, executionPlan1);
/* the output of second graph should like below
initiate the pagerank value
* {1=0.25, 2=0.25}
*/
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// third task graph for computations
ComputeGraph pageranktaskgraph = buildComputationTG(parallelismValue, config);
IExecutor ex = taskExecutor.createExecution(pageranktaskgraph);
// Perform the iterations from 0 to 'n' number of iterations
long startime = System.currentTimeMillis();
for (int i = 0; i < iterations; i++) {
ex.execute(i == iterations - 1);
}
taskExecutor.close();
long endTime = System.currentTimeMillis();
if (workerId == 0) {
DataObject<Object> graphInitialPagerankValue = taskExecutor.getOutput("InitialValue");
DataPartition<?> finaloutput = graphInitialPagerankValue.getPartition(workerId);
HashMap<String, Double> finalone = (HashMap<String, Double>) finaloutput.getConsumer().next();
System.out.println(finalone);
LOG.info("Final output After " + iterations + "iterations ");
Iterator it = finalone.entrySet().iterator();
Double recivedFinalDanglingValue = finalone.get("danglingvalues");
double cummulativepagerankvalue = 0.0;
int num = 0;
System.out.println(graphsize);
while (it.hasNext()) {
Map.Entry pair = (Map.Entry) it.next();
if (!pair.getKey().equals("danglingvalues")) {
double finalPagerankValue = (double) pair.getValue() + ((0.85 * recivedFinalDanglingValue) / graphsize);
System.out.print("Vertex Id: " + pair.getKey());
System.out.printf(" and it's pagerank value: %.15f \n", finalPagerankValue);
cummulativepagerankvalue += finalPagerankValue;
num += 1;
}
// avoids a ConcurrentModificationException
it.remove();
}
System.out.println(recivedFinalDanglingValue);
System.out.println(num);
System.out.println(cummulativepagerankvalue);
System.out.println(cummulativepagerankvalue + ((graphsize - num) * ((((double) 1 / graphsize) * 0.15) + (0.85 * (recivedFinalDanglingValue / graphsize)))));
System.out.println("computation time: " + (endTime - startime));
}
}
use of edu.iu.dsc.tws.api.compute.executor.ExecutionPlan in project twister2 by DSC-SPIDAL.
the class Twister2StormWorker method execute.
@Override
public void execute() {
ComputeGraph graph = this.buildTopology().getT2ComputeGraph();
ExecutionPlan executionPlan = taskExecutor.plan(graph);
taskExecutor.execute(graph, executionPlan);
}
Aggregations