use of edu.iu.dsc.tws.task.dataobjects.DataObjectSource in project twister2 by DSC-SPIDAL.
the class TaskWorkerDataLoader method execute.
@Override
public void execute() {
getParams();
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
DataObjectSource sourceTask = new DataObjectSource(Context.TWISTER2_DIRECT_EDGE, dataSource);
DataObjectSink sinkTask = new DataObjectSink();
computeGraphBuilder.addSource("datapointsource", sourceTask, parallelism);
ComputeConnection firstGraphComputeConnection = computeGraphBuilder.addCompute("datapointsink", sinkTask, parallelism);
firstGraphComputeConnection.direct("datapointsource").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
computeGraphBuilder.setMode(OperationMode.BATCH);
ComputeGraph datapointsTaskGraph = computeGraphBuilder.build();
ExecutionPlan firstGraphExecutionPlan = taskExecutor.plan(datapointsTaskGraph);
taskExecutor.execute(datapointsTaskGraph, firstGraphExecutionPlan);
DataObject<Object> dataPointsObject = taskExecutor.getOutput(datapointsTaskGraph, firstGraphExecutionPlan, "datapointsink");
LOG.info("Total Partitions : " + dataPointsObject.getPartitions().length);
showAllUnits(dataPointsObject);
}
use of edu.iu.dsc.tws.task.dataobjects.DataObjectSource in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method executeTestingDataLoadingTaskGraph.
/**
* This method loads the testing data
* The loaded test data is used to evaluate the trained data
* Testing data is loaded in parallel depending on the parallelism parameter given
* There are partitions created equal to the parallelism
* Later this will be used to do the testing in parallel in the testing task graph
*
* @return twister2 DataObject containing the testing data
*/
public DataObject<Object> executeTestingDataLoadingTaskGraph() {
DataObject<Object> data = null;
final String TEST_DATA_LOAD_EDGE_DIRECT = "direct2";
DataObjectSource sourceTask1 = new DataObjectSource(TEST_DATA_LOAD_EDGE_DIRECT, this.svmJobParameters.getTestingDataDir());
DataObjectSink sinkTask1 = new DataObjectSink();
testingBuilder.addSource(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE_TESTING, sourceTask1, dataStreamerParallelism);
ComputeConnection firstGraphComputeConnection1 = testingBuilder.addCompute(Constants.SimpleGraphConfig.DATA_OBJECT_SINK_TESTING, sinkTask1, dataStreamerParallelism);
firstGraphComputeConnection1.direct(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE_TESTING).viaEdge(TEST_DATA_LOAD_EDGE_DIRECT).withDataType(MessageTypes.OBJECT);
testingBuilder.setMode(OperationMode.BATCH);
ComputeGraph datapointsTaskGraph1 = testingBuilder.build();
datapointsTaskGraph1.setGraphName("testing-data-loading-graph");
ExecutionPlan firstGraphExecutionPlan1 = taskExecutor.plan(datapointsTaskGraph1);
taskExecutor.execute(datapointsTaskGraph1, firstGraphExecutionPlan1);
data = taskExecutor.getOutput(datapointsTaskGraph1, firstGraphExecutionPlan1, Constants.SimpleGraphConfig.DATA_OBJECT_SINK_TESTING);
if (data == null) {
throw new NullPointerException("Something Went Wrong in Loading Testing Data");
} else {
LOG.info("Testing Data Total Partitions : " + data.getPartitions().length);
}
return data;
}
use of edu.iu.dsc.tws.task.dataobjects.DataObjectSource in project twister2 by DSC-SPIDAL.
the class KMeansConnectedDataflowExample method generateFirstJob.
private static DataFlowGraph generateFirstJob(Config config, int parallelismValue, String dataDirectory, int dimension, int dsize, int instances, DataFlowJobConfig jobConfig) {
DataObjectSource dataObjectSource = new DataObjectSource(Context.TWISTER2_DIRECT_EDGE, dataDirectory);
KMeansDataObjectCompute dataObjectCompute = new KMeansDataObjectCompute(Context.TWISTER2_DIRECT_EDGE, dsize, parallelismValue, dimension);
KMeansDataObjectDirectSink dataObjectSink = new KMeansDataObjectDirectSink("points");
ComputeGraphBuilder datapointsComputeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
// Add source, compute, and sink tasks to the task graph builder for the first task graph
datapointsComputeGraphBuilder.addSource("datapointsource", dataObjectSource, parallelismValue);
ComputeConnection datapointComputeConnection = datapointsComputeGraphBuilder.addCompute("datapointcompute", dataObjectCompute, parallelismValue);
ComputeConnection firstGraphComputeConnection = datapointsComputeGraphBuilder.addCompute("datapointsink", dataObjectSink, parallelismValue);
// Creating the communication edges between the tasks for the second task graph
datapointComputeConnection.direct("datapointsource").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
firstGraphComputeConnection.direct("datapointcompute").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
datapointsComputeGraphBuilder.setMode(OperationMode.BATCH);
datapointsComputeGraphBuilder.setTaskGraphName("datapointsTG");
ComputeGraph firstGraph = datapointsComputeGraphBuilder.build();
DataFlowGraph job = DataFlowGraph.newSubGraphJob("datapointsTG", firstGraph).setWorkers(instances).addDataFlowJobConfig(jobConfig).setGraphType("non-iterative");
return job;
}
use of edu.iu.dsc.tws.task.dataobjects.DataObjectSource in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method executeTrainingDataLoadingTaskGraph.
/**
* This method loads the training data in a distributed mode
* dataStreamerParallelism is the amount of parallelism used
* in loaded the data in parallel.
*
* @return twister2 DataObject containing the training data
*/
public DataObject<Object> executeTrainingDataLoadingTaskGraph() {
DataObject<Object> data = null;
DataObjectSource sourceTask = new DataObjectSource(Context.TWISTER2_DIRECT_EDGE, this.svmJobParameters.getTrainingDataDir());
DataObjectSink sinkTask = new DataObjectSink();
trainingBuilder.addSource(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE, sourceTask, dataStreamerParallelism);
ComputeConnection firstGraphComputeConnection = trainingBuilder.addCompute(Constants.SimpleGraphConfig.DATA_OBJECT_SINK, sinkTask, dataStreamerParallelism);
firstGraphComputeConnection.direct(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
trainingBuilder.setMode(OperationMode.BATCH);
ComputeGraph datapointsTaskGraph = trainingBuilder.build();
datapointsTaskGraph.setGraphName("training-data-loading-graph");
ExecutionPlan firstGraphExecutionPlan = taskExecutor.plan(datapointsTaskGraph);
taskExecutor.execute(datapointsTaskGraph, firstGraphExecutionPlan);
data = taskExecutor.getOutput(datapointsTaskGraph, firstGraphExecutionPlan, Constants.SimpleGraphConfig.DATA_OBJECT_SINK);
if (data == null) {
throw new NullPointerException("Something Went Wrong in Loading Training Data");
} else {
LOG.info("Training Data Total Partitions : " + data.getPartitions().length);
}
return data;
}
use of edu.iu.dsc.tws.task.dataobjects.DataObjectSource in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method executeWeightVectorLoadingTaskGraph.
/**
* This method loads the training data in a distributed mode
* dataStreamerParallelism is the amount of parallelism used
* in loaded the data in parallel.
*
* @return twister2 DataObject containing the training data
*/
public DataObject<Object> executeWeightVectorLoadingTaskGraph() {
DataObject<Object> data = null;
DataObjectSource sourceTask = new DataObjectSource(Context.TWISTER2_DIRECT_EDGE, this.svmJobParameters.getWeightVectorDataDir());
DataObjectSink sinkTask = new DataObjectSink();
trainingBuilder.addSource(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE, sourceTask, dataStreamerParallelism);
ComputeConnection firstGraphComputeConnection = trainingBuilder.addCompute(Constants.SimpleGraphConfig.DATA_OBJECT_SINK, sinkTask, dataStreamerParallelism);
firstGraphComputeConnection.direct(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
trainingBuilder.setMode(OperationMode.BATCH);
ComputeGraph datapointsTaskGraph = trainingBuilder.build();
datapointsTaskGraph.setGraphName("weight-vector-loading-graph");
ExecutionPlan firstGraphExecutionPlan = taskExecutor.plan(datapointsTaskGraph);
taskExecutor.execute(datapointsTaskGraph, firstGraphExecutionPlan);
data = taskExecutor.getOutput(datapointsTaskGraph, firstGraphExecutionPlan, Constants.SimpleGraphConfig.DATA_OBJECT_SINK);
if (data == null) {
throw new NullPointerException("Something Went Wrong in Loading Weight Vector");
} else {
LOG.info("Training Data Total Partitions : " + data.getPartitions().length);
}
return data;
}
Aggregations