use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.
the class SvmSgdIterativeRunner method generateGenericDataPointLoader.
private ComputeGraph generateGenericDataPointLoader(int samples, int parallelism, int numOfFeatures, String dataSourcePathStr, String dataObjectSourceStr, String dataObjectComputeStr, String dataObjectSinkStr, String graphName) {
SVMDataObjectSource<String, TextInputSplit> sourceTask = new SVMDataObjectSource(Context.TWISTER2_DIRECT_EDGE, dataSourcePathStr, samples);
IterativeSVMDataObjectCompute dataObjectCompute = new IterativeSVMDataObjectCompute(Context.TWISTER2_DIRECT_EDGE, parallelism, samples, numOfFeatures, DELIMITER);
IterativeSVMDataObjectDirectSink iterativeSVMPrimaryDataObjectDirectSink = new IterativeSVMDataObjectDirectSink();
ComputeGraphBuilder datapointsComputeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
datapointsComputeGraphBuilder.addSource(dataObjectSourceStr, sourceTask, parallelism);
ComputeConnection datapointComputeConnection = datapointsComputeGraphBuilder.addCompute(dataObjectComputeStr, dataObjectCompute, parallelism);
ComputeConnection computeConnectionSink = datapointsComputeGraphBuilder.addCompute(dataObjectSinkStr, iterativeSVMPrimaryDataObjectDirectSink, parallelism);
datapointComputeConnection.direct(dataObjectSourceStr).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
computeConnectionSink.direct(dataObjectComputeStr).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
datapointsComputeGraphBuilder.setMode(this.operationMode);
datapointsComputeGraphBuilder.setTaskGraphName(graphName);
// Build the first taskgraph
return datapointsComputeGraphBuilder.build();
}
use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.
the class MultiComputeTasksGraphExample method execute.
@Override
public void execute() {
LOG.log(Level.INFO, "Task worker starting: " + workerId);
ComputeGraphBuilder builder = ComputeGraphBuilder.newBuilder(config);
int parallel = Integer.parseInt((String) config.get(DataObjectConstants.PARALLELISM_VALUE));
SourceTask sourceTask = new SourceTask();
FirstComputeTask firstComputeTask = new FirstComputeTask();
SecondComputeTask secondComputeTask = new SecondComputeTask();
ReduceTask reduceTask = new ReduceTask();
String dataDirectory = (String) config.get(DataObjectConstants.DINPUT_DIRECTORY) + workerId;
String centroidDirectory = (String) config.get(DataObjectConstants.CINPUT_DIRECTORY) + workerId;
int dimension = Integer.parseInt((String) config.get(DataObjectConstants.DIMENSIONS));
int numFiles = Integer.parseInt((String) config.get(DataObjectConstants.NUMBER_OF_FILES));
int dsize = Integer.parseInt((String) config.get(DataObjectConstants.DSIZE));
int csize = Integer.parseInt((String) config.get(DataObjectConstants.CSIZE));
String type = config.getStringValue(DataObjectConstants.FILE_TYPE);
LOG.info("Input Values:" + dataDirectory + centroidDirectory + dimension + numFiles);
KMeansUtils.generateDataPoints(config, dimension, numFiles, dsize, csize, dataDirectory, centroidDirectory, type);
// Adding the user-defined constraints to the graph
Map<String, String> sourceTaskConstraintsMap = new HashMap<>();
// sourceTaskConstraintsMap.put(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, "2");
Map<String, String> computeTaskConstraintsMap = new HashMap<>();
// computeTaskConstraintsMap.put(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, "2");
Map<String, String> sinkTaskConstraintsMap = new HashMap<>();
// sinkTaskConstraintsMap.put(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, "2");
builder.addSource("source", sourceTask, parallel);
ComputeConnection firstComputeConnection = builder.addCompute("firstcompute", firstComputeTask, parallel);
ComputeConnection secondComputeConnection = builder.addCompute("secondcompute", secondComputeTask, parallel);
ComputeConnection reduceConnection = builder.addCompute("compute", reduceTask, parallel);
firstComputeConnection.direct("source").viaEdge("fdirect").withDataType(MessageTypes.OBJECT);
secondComputeConnection.direct("source").viaEdge("sdirect").withDataType(MessageTypes.OBJECT);
reduceConnection.allreduce("firstcompute").viaEdge("freduce").withReductionFunction(new Aggregator()).withDataType(MessageTypes.OBJECT).connect().allreduce("secondcompute").viaEdge("sreduce").withReductionFunction(new Aggregator()).withDataType(MessageTypes.OBJECT);
builder.setMode(OperationMode.BATCH);
// Adding graph and node level constraints
// builder.addNodeConstraints("source", sourceTaskConstraintsMap);
// builder.addNodeConstraints("firstcompute", computeTaskConstraintsMap);
// builder.addNodeConstraints("secondcompute", computeTaskConstraintsMap);
// builder.addNodeConstraints("sink", sinkTaskConstraintsMap);
builder.addGraphConstraints(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, "4");
ComputeGraph graph = builder.build();
LOG.info("%%% Graph Constraints:%%%" + graph.getGraphConstraints());
ExecutionPlan plan = taskExecutor.plan(graph);
taskExecutor.execute(graph, plan);
}
use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.
the class KMeansDataGeneratorTest method testUniqueSchedules3.
/**
* Commented the hdfs data generation testing for the travis build
*/
/* @Test
public void testUniqueSchedules2() throws IOException {
Config config = getConfig();
String hostname = String.valueOf(config.get("twister2.data.hdfs.namenode"));
String dinputDirectory = "hdfs://" + hostname + ":9000/tmp/testdinput";
int numFiles = 1;
int dsize = 20;
int dimension = 2;
int parallelismValue = 2;
KMeansDataGenerator.generateData("txt", new Path(dinputDirectory),
numFiles, dsize, 100, dimension, config);
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
computeGraphBuilder.setTaskGraphName("kmeans");
DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
DataObjectSink sinkTask = new DataObjectSink();
computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
ComputeConnection computeConnection1 = computeGraphBuilder.addSink("sink", sinkTask,
parallelismValue);
computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
computeGraphBuilder.setMode(OperationMode.BATCH);
LocalCompleteTextInputPartitioner localCompleteTextInputPartitioner
= new LocalCompleteTextInputPartitioner(
new Path(dinputDirectory), parallelismValue, config);
DataSource<String, ?> source
= new DataSource<>(config, localCompleteTextInputPartitioner, parallelismValue);
InputSplit<String> inputSplit;
for (int i = 0; i < parallelismValue; i++) {
inputSplit = source.getNextSplit(i);
Assert.assertNotNull(inputSplit);
}
}*/
@Test
public void testUniqueSchedules3() throws IOException {
Config config = getConfig();
String cinputDirectory = "/tmp/testcinput";
int numFiles = 1;
int csize = 4;
int dimension = 2;
int parallelismValue = 2;
KMeansDataGenerator.generateData("txt", new Path(cinputDirectory), numFiles, csize, 100, dimension, config);
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
computeGraphBuilder.setTaskGraphName("kmeans");
DataFileReplicatedReadSource task = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, cinputDirectory);
computeGraphBuilder.addSource("map", task, parallelismValue);
computeGraphBuilder.setMode(OperationMode.BATCH);
Path path = new Path(cinputDirectory);
final FileSystem fs = FileSystemUtils.get(path);
final FileStatus pathFile = fs.getFileStatus(path);
Assert.assertNotNull(pathFile);
DataFileReader fileReader = new DataFileReader(config, "local");
double[][] centroids = fileReader.readData(path, dimension, csize);
Assert.assertNotNull(centroids);
}
use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.
the class KMeansDataGeneratorTest method testUniqueSchedules1.
@Test
public void testUniqueSchedules1() throws IOException {
Config config = getConfig();
String dinputDirectory = "/tmp/testdinput";
int numFiles = 1;
int dsize = 20;
int dimension = 2;
int parallelismValue = 2;
KMeansDataGenerator.generateData("txt", new Path(dinputDirectory), numFiles, dsize, 100, dimension, config);
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
computeGraphBuilder.setTaskGraphName("kmeans");
DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
DataObjectSink sinkTask = new DataObjectSink();
computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
ComputeConnection computeConnection1 = computeGraphBuilder.addCompute("sink", sinkTask, parallelismValue);
computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
computeGraphBuilder.setMode(OperationMode.BATCH);
LocalTextInputPartitioner localTextInputPartitioner = new LocalTextInputPartitioner(new Path(dinputDirectory), parallelismValue, config);
DataSource<String, ?> source = new DataSource<>(config, localTextInputPartitioner, parallelismValue);
InputSplit<String> inputSplit;
for (int i = 0; i < parallelismValue; i++) {
inputSplit = source.getNextSplit(i);
Assert.assertNotNull(inputSplit);
}
}
use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.
the class TaskGraphBuildTest method createGraph.
private ComputeGraph createGraph() {
TestSource testSource = new TestSource();
TestSink1 testCompute = new TestSink1();
TestSink2 testSink = new TestSink2();
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(getConfig());
computeGraphBuilder.addSource("source", testSource, 4);
ComputeConnection computeConnection = computeGraphBuilder.addCompute("compute", testCompute, 4);
computeConnection.partition("source").viaEdge(TaskConfigurations.DEFAULT_EDGE).withDataType(MessageTypes.OBJECT);
ComputeConnection rc = computeGraphBuilder.addCompute("sink", testSink, 1);
rc.allreduce("compute").viaEdge(TaskConfigurations.DEFAULT_EDGE).withReductionFunction(new Aggregator()).withDataType(MessageTypes.OBJECT);
ComputeGraph graph = computeGraphBuilder.build();
return graph;
}
Aggregations