use of edu.iu.dsc.tws.api.compute.graph.ComputeGraph in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method executeTrainingDataLoadingTaskGraph.
/**
* This method loads the training data in a distributed mode
* dataStreamerParallelism is the amount of parallelism used
* in loaded the data in parallel.
*
* @return twister2 DataObject containing the training data
*/
public DataObject<Object> executeTrainingDataLoadingTaskGraph() {
DataObject<Object> data = null;
DataObjectSource sourceTask = new DataObjectSource(Context.TWISTER2_DIRECT_EDGE, this.svmJobParameters.getTrainingDataDir());
DataObjectSink sinkTask = new DataObjectSink();
trainingBuilder.addSource(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE, sourceTask, dataStreamerParallelism);
ComputeConnection firstGraphComputeConnection = trainingBuilder.addCompute(Constants.SimpleGraphConfig.DATA_OBJECT_SINK, sinkTask, dataStreamerParallelism);
firstGraphComputeConnection.direct(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
trainingBuilder.setMode(OperationMode.BATCH);
ComputeGraph datapointsTaskGraph = trainingBuilder.build();
datapointsTaskGraph.setGraphName("training-data-loading-graph");
ExecutionPlan firstGraphExecutionPlan = taskExecutor.plan(datapointsTaskGraph);
taskExecutor.execute(datapointsTaskGraph, firstGraphExecutionPlan);
data = taskExecutor.getOutput(datapointsTaskGraph, firstGraphExecutionPlan, Constants.SimpleGraphConfig.DATA_OBJECT_SINK);
if (data == null) {
throw new NullPointerException("Something Went Wrong in Loading Training Data");
} else {
LOG.info("Training Data Total Partitions : " + data.getPartitions().length);
}
return data;
}
use of edu.iu.dsc.tws.api.compute.graph.ComputeGraph in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method executeIterativeTrainingGraph.
/**
* This method executes the iterative training graph
* Training is done in parallel depending on the parallelism factor given
* In this implementation the data loading parallelism and data computing or
* training parallelism is same. It is the general model to keep them equal. But
* you can increase the parallelism the way you want. But it is adviced to keep these
* values equal. Dynamic parallelism in training is not yet tested fully in Twister2 Framework.
*
* @return Twister2 DataObject{@literal <double[]>} containing the reduced weight vector
*/
public DataObject<double[]> executeIterativeTrainingGraph() {
DataObject<double[]> trainedWeight = null;
dataStreamer = new InputDataStreamer(this.operationMode, svmJobParameters.isDummy(), this.binaryBatchModel);
iterativeSVMCompute = new IterativeSVMCompute(this.binaryBatchModel, this.operationMode);
svmReduce = new SVMReduce(this.operationMode);
trainingBuilder.addSource(Constants.SimpleGraphConfig.DATASTREAMER_SOURCE, dataStreamer, dataStreamerParallelism);
ComputeConnection svmComputeConnection = trainingBuilder.addCompute(Constants.SimpleGraphConfig.SVM_COMPUTE, iterativeSVMCompute, svmComputeParallelism);
ComputeConnection svmReduceConnection = trainingBuilder.addCompute(Constants.SimpleGraphConfig.SVM_REDUCE, svmReduce, reduceParallelism);
svmComputeConnection.direct(Constants.SimpleGraphConfig.DATASTREAMER_SOURCE).viaEdge(Constants.SimpleGraphConfig.DATA_EDGE).withDataType(MessageTypes.OBJECT);
// svmReduceConnection
// .reduce(Constants.SimpleGraphConfig.SVM_COMPUTE, Constants.SimpleGraphConfig.REDUCE_EDGE,
// new ReduceAggregator(), DataType.OBJECT);
svmReduceConnection.allreduce(Constants.SimpleGraphConfig.SVM_COMPUTE).viaEdge(Constants.SimpleGraphConfig.REDUCE_EDGE).withReductionFunction(new ReduceAggregator()).withDataType(MessageTypes.OBJECT);
trainingBuilder.setMode(operationMode);
ComputeGraph graph = trainingBuilder.build();
graph.setGraphName("training-graph");
ExecutionPlan plan = taskExecutor.plan(graph);
IExecutor ex = taskExecutor.createExecution(graph, plan);
// iteration is being decoupled from the computation task
for (int i = 0; i < this.binaryBatchModel.getIterations(); i++) {
taskExecutor.addInput(graph, plan, Constants.SimpleGraphConfig.DATASTREAMER_SOURCE, Constants.SimpleGraphConfig.INPUT_DATA, trainingData);
taskExecutor.addInput(graph, plan, Constants.SimpleGraphConfig.DATASTREAMER_SOURCE, Constants.SimpleGraphConfig.INPUT_WEIGHT_VECTOR, inputWeightVector);
inputWeightVector = taskExecutor.getOutput(graph, plan, Constants.SimpleGraphConfig.SVM_REDUCE);
ex.execute();
}
ex.closeExecution();
LOG.info("Task Graph Executed !!! ");
if (workerId == 0) {
trainedWeight = retrieveWeightVectorFromTaskGraph(graph, plan);
this.trainedWeightVector = trainedWeight;
}
return trainedWeight;
}
use of edu.iu.dsc.tws.api.compute.graph.ComputeGraph in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method executeWeightVectorLoadingTaskGraph.
/**
* This method loads the training data in a distributed mode
* dataStreamerParallelism is the amount of parallelism used
* in loaded the data in parallel.
*
* @return twister2 DataObject containing the training data
*/
public DataObject<Object> executeWeightVectorLoadingTaskGraph() {
DataObject<Object> data = null;
DataObjectSource sourceTask = new DataObjectSource(Context.TWISTER2_DIRECT_EDGE, this.svmJobParameters.getWeightVectorDataDir());
DataObjectSink sinkTask = new DataObjectSink();
trainingBuilder.addSource(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE, sourceTask, dataStreamerParallelism);
ComputeConnection firstGraphComputeConnection = trainingBuilder.addCompute(Constants.SimpleGraphConfig.DATA_OBJECT_SINK, sinkTask, dataStreamerParallelism);
firstGraphComputeConnection.direct(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
trainingBuilder.setMode(OperationMode.BATCH);
ComputeGraph datapointsTaskGraph = trainingBuilder.build();
datapointsTaskGraph.setGraphName("weight-vector-loading-graph");
ExecutionPlan firstGraphExecutionPlan = taskExecutor.plan(datapointsTaskGraph);
taskExecutor.execute(datapointsTaskGraph, firstGraphExecutionPlan);
data = taskExecutor.getOutput(datapointsTaskGraph, firstGraphExecutionPlan, Constants.SimpleGraphConfig.DATA_OBJECT_SINK);
if (data == null) {
throw new NullPointerException("Something Went Wrong in Loading Weight Vector");
} else {
LOG.info("Training Data Total Partitions : " + data.getPartitions().length);
}
return data;
}
use of edu.iu.dsc.tws.api.compute.graph.ComputeGraph in project twister2 by DSC-SPIDAL.
the class MultiComputeTasksGraphExample method execute.
@Override
public void execute() {
LOG.log(Level.INFO, "Task worker starting: " + workerId);
ComputeGraphBuilder builder = ComputeGraphBuilder.newBuilder(config);
int parallel = Integer.parseInt((String) config.get(DataObjectConstants.PARALLELISM_VALUE));
SourceTask sourceTask = new SourceTask();
FirstComputeTask firstComputeTask = new FirstComputeTask();
SecondComputeTask secondComputeTask = new SecondComputeTask();
ReduceTask reduceTask = new ReduceTask();
String dataDirectory = (String) config.get(DataObjectConstants.DINPUT_DIRECTORY) + workerId;
String centroidDirectory = (String) config.get(DataObjectConstants.CINPUT_DIRECTORY) + workerId;
int dimension = Integer.parseInt((String) config.get(DataObjectConstants.DIMENSIONS));
int numFiles = Integer.parseInt((String) config.get(DataObjectConstants.NUMBER_OF_FILES));
int dsize = Integer.parseInt((String) config.get(DataObjectConstants.DSIZE));
int csize = Integer.parseInt((String) config.get(DataObjectConstants.CSIZE));
String type = config.getStringValue(DataObjectConstants.FILE_TYPE);
LOG.info("Input Values:" + dataDirectory + centroidDirectory + dimension + numFiles);
KMeansUtils.generateDataPoints(config, dimension, numFiles, dsize, csize, dataDirectory, centroidDirectory, type);
// Adding the user-defined constraints to the graph
Map<String, String> sourceTaskConstraintsMap = new HashMap<>();
// sourceTaskConstraintsMap.put(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, "2");
Map<String, String> computeTaskConstraintsMap = new HashMap<>();
// computeTaskConstraintsMap.put(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, "2");
Map<String, String> sinkTaskConstraintsMap = new HashMap<>();
// sinkTaskConstraintsMap.put(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, "2");
builder.addSource("source", sourceTask, parallel);
ComputeConnection firstComputeConnection = builder.addCompute("firstcompute", firstComputeTask, parallel);
ComputeConnection secondComputeConnection = builder.addCompute("secondcompute", secondComputeTask, parallel);
ComputeConnection reduceConnection = builder.addCompute("compute", reduceTask, parallel);
firstComputeConnection.direct("source").viaEdge("fdirect").withDataType(MessageTypes.OBJECT);
secondComputeConnection.direct("source").viaEdge("sdirect").withDataType(MessageTypes.OBJECT);
reduceConnection.allreduce("firstcompute").viaEdge("freduce").withReductionFunction(new Aggregator()).withDataType(MessageTypes.OBJECT).connect().allreduce("secondcompute").viaEdge("sreduce").withReductionFunction(new Aggregator()).withDataType(MessageTypes.OBJECT);
builder.setMode(OperationMode.BATCH);
// Adding graph and node level constraints
// builder.addNodeConstraints("source", sourceTaskConstraintsMap);
// builder.addNodeConstraints("firstcompute", computeTaskConstraintsMap);
// builder.addNodeConstraints("secondcompute", computeTaskConstraintsMap);
// builder.addNodeConstraints("sink", sinkTaskConstraintsMap);
builder.addGraphConstraints(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, "4");
ComputeGraph graph = builder.build();
LOG.info("%%% Graph Constraints:%%%" + graph.getGraphConstraints());
ExecutionPlan plan = taskExecutor.plan(graph);
taskExecutor.execute(graph, plan);
}
use of edu.iu.dsc.tws.api.compute.graph.ComputeGraph in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method executeTrainingGraph.
/**
* This method executes the training graph
* Training is done in parallel depending on the parallelism factor given
* In this implementation the data loading parallelism and data computing or
* training parallelism is same. It is the general model to keep them equal. But
* you can increase the parallelism the way you want. But it is adviced to keep these
* values equal. Dynamic parallelism in training is not yet tested fully in Twister2 Framework.
*
* @return Twister2 DataObject{@literal <double[]>} containing the reduced weight vector
*/
public DataObject<double[]> executeTrainingGraph() {
DataObject<double[]> trainedWeight = null;
dataStreamer = new InputDataStreamer(this.operationMode, svmJobParameters.isDummy(), this.binaryBatchModel);
svmCompute = new SVMCompute(this.binaryBatchModel, this.operationMode);
svmReduce = new SVMReduce(this.operationMode);
trainingBuilder.addSource(Constants.SimpleGraphConfig.DATASTREAMER_SOURCE, dataStreamer, dataStreamerParallelism);
ComputeConnection svmComputeConnection = trainingBuilder.addCompute(Constants.SimpleGraphConfig.SVM_COMPUTE, svmCompute, svmComputeParallelism);
ComputeConnection svmReduceConnection = trainingBuilder.addCompute(Constants.SimpleGraphConfig.SVM_REDUCE, svmReduce, reduceParallelism);
svmComputeConnection.direct(Constants.SimpleGraphConfig.DATASTREAMER_SOURCE).viaEdge(Constants.SimpleGraphConfig.DATA_EDGE).withDataType(MessageTypes.OBJECT);
// svmReduceConnection
// .reduce(Constants.SimpleGraphConfig.SVM_COMPUTE, Constants.SimpleGraphConfig.REDUCE_EDGE,
// new ReduceAggregator(), DataType.OBJECT);
svmReduceConnection.allreduce(Constants.SimpleGraphConfig.SVM_COMPUTE).viaEdge(Constants.SimpleGraphConfig.REDUCE_EDGE).withReductionFunction(new ReduceAggregator()).withDataType(MessageTypes.OBJECT);
trainingBuilder.setMode(operationMode);
ComputeGraph graph = trainingBuilder.build();
graph.setGraphName("training-graph");
ExecutionPlan plan = taskExecutor.plan(graph);
taskExecutor.addInput(graph, plan, Constants.SimpleGraphConfig.DATASTREAMER_SOURCE, Constants.SimpleGraphConfig.INPUT_DATA, trainingData);
taskExecutor.addInput(graph, plan, Constants.SimpleGraphConfig.DATASTREAMER_SOURCE, Constants.SimpleGraphConfig.INPUT_WEIGHT_VECTOR, inputWeightVector);
taskExecutor.execute(graph, plan);
LOG.info("Task Graph Executed !!! ");
if (workerId == 0) {
trainedWeight = retrieveWeightVectorFromTaskGraph(graph, plan);
this.trainedWeightVector = trainedWeight;
}
return trainedWeight;
}
Aggregations