Search in sources :

Example 1 with DataFileReplicatedReadSource

use of edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource in project twister2 by DSC-SPIDAL.

the class SvmSgdIterativeRunner method buildWeightVectorTG.

private ComputeGraph buildWeightVectorTG() {
    DataFileReplicatedReadSource dataFileReplicatedReadSource = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, this.svmJobParameters.getWeightVectorDataDir(), 1);
    IterativeSVMWeightVectorObjectCompute weightVectorObjectCompute = new IterativeSVMWeightVectorObjectCompute(Context.TWISTER2_DIRECT_EDGE, 1, this.svmJobParameters.getFeatures());
    IterativeSVMWeightVectorObjectDirectSink weightVectorObjectSink = new IterativeSVMWeightVectorObjectDirectSink();
    ComputeGraphBuilder weightVectorComputeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    weightVectorComputeGraphBuilder.addSource(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_SOURCE, dataFileReplicatedReadSource, dataStreamerParallelism);
    ComputeConnection weightVectorComputeConnection = weightVectorComputeGraphBuilder.addCompute(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_COMPUTE, weightVectorObjectCompute, dataStreamerParallelism);
    ComputeConnection weightVectorSinkConnection = weightVectorComputeGraphBuilder.addCompute(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_SINK, weightVectorObjectSink, dataStreamerParallelism);
    weightVectorComputeConnection.direct(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_SOURCE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
    weightVectorSinkConnection.direct(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_COMPUTE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.DOUBLE_ARRAY);
    weightVectorComputeGraphBuilder.setMode(operationMode);
    weightVectorComputeGraphBuilder.setTaskGraphName(IterativeSVMConstants.WEIGHT_VECTOR_LOADING_TASK_GRAPH);
    return weightVectorComputeGraphBuilder.build();
}
Also used : DataFileReplicatedReadSource(edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource) IterativeSVMWeightVectorObjectCompute(edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMWeightVectorObjectCompute) IterativeSVMWeightVectorObjectDirectSink(edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMWeightVectorObjectDirectSink) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection)

Example 2 with DataFileReplicatedReadSource

use of edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource in project twister2 by DSC-SPIDAL.

the class KMeansDataGeneratorTest method testUniqueSchedules3.

/**
 * Commented the hdfs data generation testing for the travis build
 */
/* @Test
  public void testUniqueSchedules2() throws IOException {
    Config config = getConfig();

    String hostname = String.valueOf(config.get("twister2.data.hdfs.namenode"));
    String dinputDirectory = "hdfs://" + hostname + ":9000/tmp/testdinput";

    int numFiles = 1;
    int dsize = 20;
    int dimension = 2;
    int parallelismValue = 2;

    KMeansDataGenerator.generateData("txt", new Path(dinputDirectory),
        numFiles, dsize, 100, dimension, config);
    ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    computeGraphBuilder.setTaskGraphName("kmeans");
    DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
    DataObjectSink sinkTask = new DataObjectSink();
    computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
    ComputeConnection computeConnection1 = computeGraphBuilder.addSink("sink", sinkTask,
        parallelismValue);
    computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
    computeGraphBuilder.setMode(OperationMode.BATCH);

    LocalCompleteTextInputPartitioner localCompleteTextInputPartitioner
        = new LocalCompleteTextInputPartitioner(
        new Path(dinputDirectory), parallelismValue, config);

    DataSource<String, ?> source
        = new DataSource<>(config, localCompleteTextInputPartitioner, parallelismValue);
    InputSplit<String> inputSplit;
    for (int i = 0; i < parallelismValue; i++) {
      inputSplit = source.getNextSplit(i);
      Assert.assertNotNull(inputSplit);
    }
  }*/
@Test
public void testUniqueSchedules3() throws IOException {
    Config config = getConfig();
    String cinputDirectory = "/tmp/testcinput";
    int numFiles = 1;
    int csize = 4;
    int dimension = 2;
    int parallelismValue = 2;
    KMeansDataGenerator.generateData("txt", new Path(cinputDirectory), numFiles, csize, 100, dimension, config);
    ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    computeGraphBuilder.setTaskGraphName("kmeans");
    DataFileReplicatedReadSource task = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, cinputDirectory);
    computeGraphBuilder.addSource("map", task, parallelismValue);
    computeGraphBuilder.setMode(OperationMode.BATCH);
    Path path = new Path(cinputDirectory);
    final FileSystem fs = FileSystemUtils.get(path);
    final FileStatus pathFile = fs.getFileStatus(path);
    Assert.assertNotNull(pathFile);
    DataFileReader fileReader = new DataFileReader(config, "local");
    double[][] centroids = fileReader.readData(path, dimension, csize);
    Assert.assertNotNull(centroids);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) DataFileReader(edu.iu.dsc.tws.data.utils.DataFileReader) DataFileReplicatedReadSource(edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource) Config(edu.iu.dsc.tws.api.config.Config) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) Test(org.junit.Test)

Example 3 with DataFileReplicatedReadSource

use of edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource in project twister2 by DSC-SPIDAL.

the class TGUtils method buildWeightVectorTG.

public static ComputeGraph buildWeightVectorTG(Config config, int dataStreamerParallelism, SVMJobParameters svmJobParameters, OperationMode opMode) {
    DataFileReplicatedReadSource dataFileReplicatedReadSource = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, svmJobParameters.getWeightVectorDataDir());
    IterativeSVMWeightVectorObjectCompute weightVectorObjectCompute = new IterativeSVMWeightVectorObjectCompute(Context.TWISTER2_DIRECT_EDGE, 1, svmJobParameters.getFeatures());
    IterativeSVMWeightVectorObjectDirectSink weightVectorObjectSink = new IterativeSVMWeightVectorObjectDirectSink();
    ComputeGraphBuilder weightVectorComputeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    weightVectorComputeGraphBuilder.addSource(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_SOURCE, dataFileReplicatedReadSource, dataStreamerParallelism);
    ComputeConnection weightVectorComputeConnection = weightVectorComputeGraphBuilder.addCompute(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_COMPUTE, weightVectorObjectCompute, dataStreamerParallelism);
    ComputeConnection weightVectorSinkConnection = weightVectorComputeGraphBuilder.addCompute(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_SINK, weightVectorObjectSink, dataStreamerParallelism);
    weightVectorComputeConnection.direct(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_SOURCE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
    weightVectorSinkConnection.direct(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_COMPUTE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.DOUBLE_ARRAY);
    weightVectorComputeGraphBuilder.setMode(opMode);
    weightVectorComputeGraphBuilder.setTaskGraphName(IterativeSVMConstants.WEIGHT_VECTOR_LOADING_TASK_GRAPH);
    return weightVectorComputeGraphBuilder.build();
}
Also used : DataFileReplicatedReadSource(edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource) IterativeSVMWeightVectorObjectCompute(edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMWeightVectorObjectCompute) IterativeSVMWeightVectorObjectDirectSink(edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMWeightVectorObjectDirectSink) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection)

Example 4 with DataFileReplicatedReadSource

use of edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource in project twister2 by DSC-SPIDAL.

the class KMeansConnectedDataflowExample method generateSecondJob.

private static DataFlowGraph generateSecondJob(Config config, int parallelismValue, String centroidDirectory, int dimension, int csize, int instances, DataFlowJobConfig jobConfig) {
    DataFileReplicatedReadSource dataFileReplicatedReadSource = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, centroidDirectory);
    KMeansDataObjectCompute centroidObjectCompute = new KMeansDataObjectCompute(Context.TWISTER2_DIRECT_EDGE, csize, dimension);
    KMeansDataObjectDirectSink centroidObjectSink = new KMeansDataObjectDirectSink("centroids");
    ComputeGraphBuilder centroidsComputeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    // Add source, compute, and sink tasks to the task graph builder for the second task graph
    centroidsComputeGraphBuilder.addSource("centroidsource", dataFileReplicatedReadSource, parallelismValue);
    ComputeConnection centroidComputeConnection = centroidsComputeGraphBuilder.addCompute("centroidcompute", centroidObjectCompute, parallelismValue);
    ComputeConnection secondGraphComputeConnection = centroidsComputeGraphBuilder.addCompute("centroidsink", centroidObjectSink, parallelismValue);
    // Creating the communication edges between the tasks for the second task graph
    centroidComputeConnection.direct("centroidsource").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
    secondGraphComputeConnection.direct("centroidcompute").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
    centroidsComputeGraphBuilder.setMode(OperationMode.BATCH);
    centroidsComputeGraphBuilder.setTaskGraphName("centroidTG");
    // Build the second taskgraph
    ComputeGraph secondGraph = centroidsComputeGraphBuilder.build();
    DataFlowGraph job = DataFlowGraph.newSubGraphJob("centroidTG", secondGraph).setWorkers(instances).addDataFlowJobConfig(jobConfig).setGraphType("non-iterative");
    return job;
}
Also used : DataFileReplicatedReadSource(edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource) ComputeGraph(edu.iu.dsc.tws.api.compute.graph.ComputeGraph) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection) DataFlowGraph(edu.iu.dsc.tws.task.cdfw.DataFlowGraph)

Aggregations

DataFileReplicatedReadSource (edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource)4 ComputeGraphBuilder (edu.iu.dsc.tws.task.impl.ComputeGraphBuilder)4 ComputeConnection (edu.iu.dsc.tws.task.impl.ComputeConnection)3 IterativeSVMWeightVectorObjectCompute (edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMWeightVectorObjectCompute)2 IterativeSVMWeightVectorObjectDirectSink (edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMWeightVectorObjectDirectSink)2 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)1 Config (edu.iu.dsc.tws.api.config.Config)1 FileStatus (edu.iu.dsc.tws.api.data.FileStatus)1 FileSystem (edu.iu.dsc.tws.api.data.FileSystem)1 Path (edu.iu.dsc.tws.api.data.Path)1 DataFileReader (edu.iu.dsc.tws.data.utils.DataFileReader)1 DataFlowGraph (edu.iu.dsc.tws.task.cdfw.DataFlowGraph)1 Test (org.junit.Test)1