use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class TaskExecutor method distributeData.
/**
* This method distributes collected {@link DataPartition}s to the
* intended {@link Receptor}s
*/
public static void distributeData(ExecutionPlan executionPlan, Map<String, DataObject> dataMap) {
Map<Integer, INodeInstance> nodes = executionPlan.getNodes();
if (nodes != null) {
nodes.forEach((id, node) -> {
INode task = node.getNode();
if (task instanceof Receptor) {
Set<String> receivableNames = ((Receptor) task).getReceivableNames();
for (String receivableName : receivableNames) {
DataObject dataObject = dataMap.get(receivableName);
if (dataObject == null) {
throw new Twister2RuntimeException("Couldn't find input data" + receivableName + " for task " + node.getId());
}
DataPartition partition = dataObject.getPartition(node.getIndex());
if (partition == null) {
throw new Twister2RuntimeException("Couldn't find input data" + receivableName + " for task index " + node.getIndex() + " of task" + node.getId());
}
((Receptor) task).add(receivableName, dataObject);
((Receptor) task).add(receivableName, partition);
}
}
});
}
}
use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class TaskWorkerDataLoader method execute.
@Override
public void execute() {
getParams();
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
DataObjectSource sourceTask = new DataObjectSource(Context.TWISTER2_DIRECT_EDGE, dataSource);
DataObjectSink sinkTask = new DataObjectSink();
computeGraphBuilder.addSource("datapointsource", sourceTask, parallelism);
ComputeConnection firstGraphComputeConnection = computeGraphBuilder.addCompute("datapointsink", sinkTask, parallelism);
firstGraphComputeConnection.direct("datapointsource").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
computeGraphBuilder.setMode(OperationMode.BATCH);
ComputeGraph datapointsTaskGraph = computeGraphBuilder.build();
ExecutionPlan firstGraphExecutionPlan = taskExecutor.plan(datapointsTaskGraph);
taskExecutor.execute(datapointsTaskGraph, firstGraphExecutionPlan);
DataObject<Object> dataPointsObject = taskExecutor.getOutput(datapointsTaskGraph, firstGraphExecutionPlan, "datapointsink");
LOG.info("Total Partitions : " + dataPointsObject.getPartitions().length);
showAllUnits(dataPointsObject);
}
use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method executeTestingDataLoadingTaskGraph.
/**
* This method loads the testing data
* The loaded test data is used to evaluate the trained data
* Testing data is loaded in parallel depending on the parallelism parameter given
* There are partitions created equal to the parallelism
* Later this will be used to do the testing in parallel in the testing task graph
*
* @return twister2 DataObject containing the testing data
*/
public DataObject<Object> executeTestingDataLoadingTaskGraph() {
DataObject<Object> data = null;
final String TEST_DATA_LOAD_EDGE_DIRECT = "direct2";
DataObjectSource sourceTask1 = new DataObjectSource(TEST_DATA_LOAD_EDGE_DIRECT, this.svmJobParameters.getTestingDataDir());
DataObjectSink sinkTask1 = new DataObjectSink();
testingBuilder.addSource(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE_TESTING, sourceTask1, dataStreamerParallelism);
ComputeConnection firstGraphComputeConnection1 = testingBuilder.addCompute(Constants.SimpleGraphConfig.DATA_OBJECT_SINK_TESTING, sinkTask1, dataStreamerParallelism);
firstGraphComputeConnection1.direct(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE_TESTING).viaEdge(TEST_DATA_LOAD_EDGE_DIRECT).withDataType(MessageTypes.OBJECT);
testingBuilder.setMode(OperationMode.BATCH);
ComputeGraph datapointsTaskGraph1 = testingBuilder.build();
datapointsTaskGraph1.setGraphName("testing-data-loading-graph");
ExecutionPlan firstGraphExecutionPlan1 = taskExecutor.plan(datapointsTaskGraph1);
taskExecutor.execute(datapointsTaskGraph1, firstGraphExecutionPlan1);
data = taskExecutor.getOutput(datapointsTaskGraph1, firstGraphExecutionPlan1, Constants.SimpleGraphConfig.DATA_OBJECT_SINK_TESTING);
if (data == null) {
throw new NullPointerException("Something Went Wrong in Loading Testing Data");
} else {
LOG.info("Testing Data Total Partitions : " + data.getPartitions().length);
}
return data;
}
use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method executeTestingTaskGraph.
/**
* This method executes the testing taskgraph with testing data loaded from testing taskgraph
* and uses the final weight vector obtained from the training task graph
* Testing is also done in a parallel way. At the testing data loading stage we load the data
* in parallel with reference to the given parallelism and testing is also in in parallel
* Then we get test results for all these testing data partitions
*
* @return Returns the Accuracy value obtained
*/
public DataObject<Object> executeTestingTaskGraph() {
DataObject<Object> data = null;
predictionSourceTask = new PredictionSourceTask(svmJobParameters.isDummy(), this.binaryBatchModel, operationMode);
predictionReduceTask = new PredictionReduceTask(operationMode);
testingBuilder.addSource(Constants.SimpleGraphConfig.PREDICTION_SOURCE_TASK, predictionSourceTask, dataStreamerParallelism);
ComputeConnection predictionReduceConnection = testingBuilder.addCompute(Constants.SimpleGraphConfig.PREDICTION_REDUCE_TASK, predictionReduceTask, reduceParallelism);
predictionReduceConnection.reduce(Constants.SimpleGraphConfig.PREDICTION_SOURCE_TASK).viaEdge(Constants.SimpleGraphConfig.PREDICTION_EDGE).withReductionFunction(new PredictionAggregator()).withDataType(MessageTypes.OBJECT);
testingBuilder.setMode(operationMode);
ComputeGraph predictionGraph = testingBuilder.build();
predictionGraph.setGraphName("testing-graph");
ExecutionPlan predictionPlan = taskExecutor.plan(predictionGraph);
// adding test data set
taskExecutor.addInput(predictionGraph, predictionPlan, Constants.SimpleGraphConfig.PREDICTION_SOURCE_TASK, Constants.SimpleGraphConfig.TEST_DATA, testingData);
// adding final weight vector
taskExecutor.addInput(predictionGraph, predictionPlan, Constants.SimpleGraphConfig.PREDICTION_SOURCE_TASK, Constants.SimpleGraphConfig.FINAL_WEIGHT_VECTOR, trainedWeightVector);
taskExecutor.execute(predictionGraph, predictionPlan);
data = retrieveTestingAccuracyObject(predictionGraph, predictionPlan);
return data;
}
use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class PageRankWorker method execute.
// private static double danglingNodeValues;
@Override
public void execute() {
LOG.log(Level.INFO, "Task worker starting: " + workerId);
PageRankWorkerParameters pageRankWorkerParameters = PageRankWorkerParameters.build(config);
int parallelismValue = pageRankWorkerParameters.getParallelismValue();
int dsize = pageRankWorkerParameters.getDsize();
String dataDirectory = pageRankWorkerParameters.getDatapointDirectory();
int iterations = pageRankWorkerParameters.getIterations();
graphsize = dsize;
/* First Graph to partition and read the partitioned data points **/
ComputeGraph datapointsTaskGraph = buildDataPointsTG(dataDirectory, dsize, parallelismValue, config);
// Get the execution plan for the first task graph
ExecutionPlan executionPlan = taskExecutor.plan(datapointsTaskGraph);
// Actual execution for the first taskgraph
taskExecutor.execute(datapointsTaskGraph, executionPlan);
/* the out of the first graph would like below
* task Id: 0
{1=[3, 4], 2=[3, 4, 5]}*/
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// the second task graph for assign initial pagerank values for vertex.
ComputeGraph graphInitialValueTaskGraph = buildGraphInitialValueTG(dataDirectory, dsize, parallelismValue, config);
// Get the execution plan for the first task graph
ExecutionPlan executionPlan1 = taskExecutor.plan(graphInitialValueTaskGraph);
// Actual execution for the first taskgraph
taskExecutor.execute(graphInitialValueTaskGraph, executionPlan1);
/* the output of second graph should like below
initiate the pagerank value
* {1=0.25, 2=0.25}
*/
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// third task graph for computations
ComputeGraph pageranktaskgraph = buildComputationTG(parallelismValue, config);
IExecutor ex = taskExecutor.createExecution(pageranktaskgraph);
// Perform the iterations from 0 to 'n' number of iterations
long startime = System.currentTimeMillis();
for (int i = 0; i < iterations; i++) {
ex.execute(i == iterations - 1);
}
taskExecutor.close();
long endTime = System.currentTimeMillis();
if (workerId == 0) {
DataObject<Object> graphInitialPagerankValue = taskExecutor.getOutput("InitialValue");
DataPartition<?> finaloutput = graphInitialPagerankValue.getPartition(workerId);
HashMap<String, Double> finalone = (HashMap<String, Double>) finaloutput.getConsumer().next();
System.out.println(finalone);
LOG.info("Final output After " + iterations + "iterations ");
Iterator it = finalone.entrySet().iterator();
Double recivedFinalDanglingValue = finalone.get("danglingvalues");
double cummulativepagerankvalue = 0.0;
int num = 0;
System.out.println(graphsize);
while (it.hasNext()) {
Map.Entry pair = (Map.Entry) it.next();
if (!pair.getKey().equals("danglingvalues")) {
double finalPagerankValue = (double) pair.getValue() + ((0.85 * recivedFinalDanglingValue) / graphsize);
System.out.print("Vertex Id: " + pair.getKey());
System.out.printf(" and it's pagerank value: %.15f \n", finalPagerankValue);
cummulativepagerankvalue += finalPagerankValue;
num += 1;
}
// avoids a ConcurrentModificationException
it.remove();
}
System.out.println(recivedFinalDanglingValue);
System.out.println(num);
System.out.println(cummulativepagerankvalue);
System.out.println(cummulativepagerankvalue + ((graphsize - num) * ((((double) 1 / graphsize) * 0.15) + (0.85 * (recivedFinalDanglingValue / graphsize)))));
System.out.println("computation time: " + (endTime - startime));
}
}
Aggregations