Search in sources :

Example 36 with ComputeGraphBuilder

use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.

the class SvmSgdIterativeRunner method generateGenericDataPointLoader.

private ComputeGraph generateGenericDataPointLoader(int samples, int parallelism, int numOfFeatures, String dataSourcePathStr, String dataObjectSourceStr, String dataObjectComputeStr, String dataObjectSinkStr, String graphName) {
    SVMDataObjectSource<String, TextInputSplit> sourceTask = new SVMDataObjectSource(Context.TWISTER2_DIRECT_EDGE, dataSourcePathStr, samples);
    IterativeSVMDataObjectCompute dataObjectCompute = new IterativeSVMDataObjectCompute(Context.TWISTER2_DIRECT_EDGE, parallelism, samples, numOfFeatures, DELIMITER);
    IterativeSVMDataObjectDirectSink iterativeSVMPrimaryDataObjectDirectSink = new IterativeSVMDataObjectDirectSink();
    ComputeGraphBuilder datapointsComputeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    datapointsComputeGraphBuilder.addSource(dataObjectSourceStr, sourceTask, parallelism);
    ComputeConnection datapointComputeConnection = datapointsComputeGraphBuilder.addCompute(dataObjectComputeStr, dataObjectCompute, parallelism);
    ComputeConnection computeConnectionSink = datapointsComputeGraphBuilder.addCompute(dataObjectSinkStr, iterativeSVMPrimaryDataObjectDirectSink, parallelism);
    datapointComputeConnection.direct(dataObjectSourceStr).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
    computeConnectionSink.direct(dataObjectComputeStr).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
    datapointsComputeGraphBuilder.setMode(this.operationMode);
    datapointsComputeGraphBuilder.setTaskGraphName(graphName);
    // Build the first taskgraph
    return datapointsComputeGraphBuilder.build();
}
Also used : TextInputSplit(edu.iu.dsc.tws.data.api.splits.TextInputSplit) IterativeSVMDataObjectDirectSink(edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMDataObjectDirectSink) IterativeSVMDataObjectCompute(edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMDataObjectCompute) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) SVMDataObjectSource(edu.iu.dsc.tws.examples.ml.svm.data.SVMDataObjectSource) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection)

Example 37 with ComputeGraphBuilder

use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.

the class MultiComputeTasksGraphExample method execute.

@Override
public void execute() {
    LOG.log(Level.INFO, "Task worker starting: " + workerId);
    ComputeGraphBuilder builder = ComputeGraphBuilder.newBuilder(config);
    int parallel = Integer.parseInt((String) config.get(DataObjectConstants.PARALLELISM_VALUE));
    SourceTask sourceTask = new SourceTask();
    FirstComputeTask firstComputeTask = new FirstComputeTask();
    SecondComputeTask secondComputeTask = new SecondComputeTask();
    ReduceTask reduceTask = new ReduceTask();
    String dataDirectory = (String) config.get(DataObjectConstants.DINPUT_DIRECTORY) + workerId;
    String centroidDirectory = (String) config.get(DataObjectConstants.CINPUT_DIRECTORY) + workerId;
    int dimension = Integer.parseInt((String) config.get(DataObjectConstants.DIMENSIONS));
    int numFiles = Integer.parseInt((String) config.get(DataObjectConstants.NUMBER_OF_FILES));
    int dsize = Integer.parseInt((String) config.get(DataObjectConstants.DSIZE));
    int csize = Integer.parseInt((String) config.get(DataObjectConstants.CSIZE));
    String type = config.getStringValue(DataObjectConstants.FILE_TYPE);
    LOG.info("Input Values:" + dataDirectory + centroidDirectory + dimension + numFiles);
    KMeansUtils.generateDataPoints(config, dimension, numFiles, dsize, csize, dataDirectory, centroidDirectory, type);
    // Adding the user-defined constraints to the graph
    Map<String, String> sourceTaskConstraintsMap = new HashMap<>();
    // sourceTaskConstraintsMap.put(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, "2");
    Map<String, String> computeTaskConstraintsMap = new HashMap<>();
    // computeTaskConstraintsMap.put(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, "2");
    Map<String, String> sinkTaskConstraintsMap = new HashMap<>();
    // sinkTaskConstraintsMap.put(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, "2");
    builder.addSource("source", sourceTask, parallel);
    ComputeConnection firstComputeConnection = builder.addCompute("firstcompute", firstComputeTask, parallel);
    ComputeConnection secondComputeConnection = builder.addCompute("secondcompute", secondComputeTask, parallel);
    ComputeConnection reduceConnection = builder.addCompute("compute", reduceTask, parallel);
    firstComputeConnection.direct("source").viaEdge("fdirect").withDataType(MessageTypes.OBJECT);
    secondComputeConnection.direct("source").viaEdge("sdirect").withDataType(MessageTypes.OBJECT);
    reduceConnection.allreduce("firstcompute").viaEdge("freduce").withReductionFunction(new Aggregator()).withDataType(MessageTypes.OBJECT).connect().allreduce("secondcompute").viaEdge("sreduce").withReductionFunction(new Aggregator()).withDataType(MessageTypes.OBJECT);
    builder.setMode(OperationMode.BATCH);
    // Adding graph and node level constraints
    // builder.addNodeConstraints("source", sourceTaskConstraintsMap);
    // builder.addNodeConstraints("firstcompute", computeTaskConstraintsMap);
    // builder.addNodeConstraints("secondcompute", computeTaskConstraintsMap);
    // builder.addNodeConstraints("sink", sinkTaskConstraintsMap);
    builder.addGraphConstraints(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, "4");
    ComputeGraph graph = builder.build();
    LOG.info("%%% Graph Constraints:%%%" + graph.getGraphConstraints());
    ExecutionPlan plan = taskExecutor.plan(graph);
    taskExecutor.execute(graph, plan);
}
Also used : HashMap(java.util.HashMap) ComputeGraph(edu.iu.dsc.tws.api.compute.graph.ComputeGraph) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection) ExecutionPlan(edu.iu.dsc.tws.api.compute.executor.ExecutionPlan)

Example 38 with ComputeGraphBuilder

use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.

the class KMeansDataGeneratorTest method testUniqueSchedules3.

/**
 * Commented the hdfs data generation testing for the travis build
 */
/* @Test
  public void testUniqueSchedules2() throws IOException {
    Config config = getConfig();

    String hostname = String.valueOf(config.get("twister2.data.hdfs.namenode"));
    String dinputDirectory = "hdfs://" + hostname + ":9000/tmp/testdinput";

    int numFiles = 1;
    int dsize = 20;
    int dimension = 2;
    int parallelismValue = 2;

    KMeansDataGenerator.generateData("txt", new Path(dinputDirectory),
        numFiles, dsize, 100, dimension, config);
    ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    computeGraphBuilder.setTaskGraphName("kmeans");
    DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
    DataObjectSink sinkTask = new DataObjectSink();
    computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
    ComputeConnection computeConnection1 = computeGraphBuilder.addSink("sink", sinkTask,
        parallelismValue);
    computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
    computeGraphBuilder.setMode(OperationMode.BATCH);

    LocalCompleteTextInputPartitioner localCompleteTextInputPartitioner
        = new LocalCompleteTextInputPartitioner(
        new Path(dinputDirectory), parallelismValue, config);

    DataSource<String, ?> source
        = new DataSource<>(config, localCompleteTextInputPartitioner, parallelismValue);
    InputSplit<String> inputSplit;
    for (int i = 0; i < parallelismValue; i++) {
      inputSplit = source.getNextSplit(i);
      Assert.assertNotNull(inputSplit);
    }
  }*/
@Test
public void testUniqueSchedules3() throws IOException {
    Config config = getConfig();
    String cinputDirectory = "/tmp/testcinput";
    int numFiles = 1;
    int csize = 4;
    int dimension = 2;
    int parallelismValue = 2;
    KMeansDataGenerator.generateData("txt", new Path(cinputDirectory), numFiles, csize, 100, dimension, config);
    ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    computeGraphBuilder.setTaskGraphName("kmeans");
    DataFileReplicatedReadSource task = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, cinputDirectory);
    computeGraphBuilder.addSource("map", task, parallelismValue);
    computeGraphBuilder.setMode(OperationMode.BATCH);
    Path path = new Path(cinputDirectory);
    final FileSystem fs = FileSystemUtils.get(path);
    final FileStatus pathFile = fs.getFileStatus(path);
    Assert.assertNotNull(pathFile);
    DataFileReader fileReader = new DataFileReader(config, "local");
    double[][] centroids = fileReader.readData(path, dimension, csize);
    Assert.assertNotNull(centroids);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) DataFileReader(edu.iu.dsc.tws.data.utils.DataFileReader) DataFileReplicatedReadSource(edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource) Config(edu.iu.dsc.tws.api.config.Config) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) Test(org.junit.Test)

Example 39 with ComputeGraphBuilder

use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.

the class KMeansDataGeneratorTest method testUniqueSchedules1.

@Test
public void testUniqueSchedules1() throws IOException {
    Config config = getConfig();
    String dinputDirectory = "/tmp/testdinput";
    int numFiles = 1;
    int dsize = 20;
    int dimension = 2;
    int parallelismValue = 2;
    KMeansDataGenerator.generateData("txt", new Path(dinputDirectory), numFiles, dsize, 100, dimension, config);
    ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    computeGraphBuilder.setTaskGraphName("kmeans");
    DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
    DataObjectSink sinkTask = new DataObjectSink();
    computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
    ComputeConnection computeConnection1 = computeGraphBuilder.addCompute("sink", sinkTask, parallelismValue);
    computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
    computeGraphBuilder.setMode(OperationMode.BATCH);
    LocalTextInputPartitioner localTextInputPartitioner = new LocalTextInputPartitioner(new Path(dinputDirectory), parallelismValue, config);
    DataSource<String, ?> source = new DataSource<>(config, localTextInputPartitioner, parallelismValue);
    InputSplit<String> inputSplit;
    for (int i = 0; i < parallelismValue; i++) {
        inputSplit = source.getNextSplit(i);
        Assert.assertNotNull(inputSplit);
    }
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) LocalTextInputPartitioner(edu.iu.dsc.tws.data.api.formatters.LocalTextInputPartitioner) DataObjectSink(edu.iu.dsc.tws.task.dataobjects.DataObjectSink) Config(edu.iu.dsc.tws.api.config.Config) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) DataObjectSource(edu.iu.dsc.tws.task.dataobjects.DataObjectSource) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection) DataSource(edu.iu.dsc.tws.dataset.DataSource) Test(org.junit.Test)

Example 40 with ComputeGraphBuilder

use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.

the class TaskGraphBuildTest method createGraph.

private ComputeGraph createGraph() {
    TestSource testSource = new TestSource();
    TestSink1 testCompute = new TestSink1();
    TestSink2 testSink = new TestSink2();
    ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(getConfig());
    computeGraphBuilder.addSource("source", testSource, 4);
    ComputeConnection computeConnection = computeGraphBuilder.addCompute("compute", testCompute, 4);
    computeConnection.partition("source").viaEdge(TaskConfigurations.DEFAULT_EDGE).withDataType(MessageTypes.OBJECT);
    ComputeConnection rc = computeGraphBuilder.addCompute("sink", testSink, 1);
    rc.allreduce("compute").viaEdge(TaskConfigurations.DEFAULT_EDGE).withReductionFunction(new Aggregator()).withDataType(MessageTypes.OBJECT);
    ComputeGraph graph = computeGraphBuilder.build();
    return graph;
}
Also used : ComputeGraph(edu.iu.dsc.tws.api.compute.graph.ComputeGraph) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection)

Aggregations

ComputeGraphBuilder (edu.iu.dsc.tws.task.impl.ComputeGraphBuilder)66 ComputeConnection (edu.iu.dsc.tws.task.impl.ComputeConnection)55 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)39 TaskSchedulerClassTest (edu.iu.dsc.tws.tsched.utils.TaskSchedulerClassTest)16 ExecutionPlan (edu.iu.dsc.tws.api.compute.executor.ExecutionPlan)10 DataFlowGraph (edu.iu.dsc.tws.task.cdfw.DataFlowGraph)8 ComputeEnvironment (edu.iu.dsc.tws.task.ComputeEnvironment)7 GraphDataSource (edu.iu.dsc.tws.graphapi.partition.GraphDataSource)6 Config (edu.iu.dsc.tws.api.config.Config)4 ConnectedSink (edu.iu.dsc.tws.task.cdfw.task.ConnectedSink)4 DataFileReplicatedReadSource (edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource)4 Path (edu.iu.dsc.tws.api.data.Path)3 DataObjectSource (edu.iu.dsc.tws.task.dataobjects.DataObjectSource)3 ReduceFn (edu.iu.dsc.tws.task.impl.function.ReduceFn)3 JobConfig (edu.iu.dsc.tws.api.JobConfig)2 DataObject (edu.iu.dsc.tws.api.dataset.DataObject)2 TextInputSplit (edu.iu.dsc.tws.data.api.splits.TextInputSplit)2 IterativeSVMDataObjectCompute (edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMDataObjectCompute)2 IterativeSVMDataObjectDirectSink (edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMDataObjectDirectSink)2 IterativeSVMWeightVectorObjectCompute (edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMWeightVectorObjectCompute)2