Search in sources :

Example 6 with DataFlowGraph

use of edu.iu.dsc.tws.task.cdfw.DataFlowGraph in project twister2 by DSC-SPIDAL.

the class KMeansConnectedDataflowExample method generateData.

private static DataFlowGraph generateData(Config config, String dataDirectory, String centroidDirectory, int dimension, int dsize, int csize, int workers, int parallel, DataFlowJobConfig jobConfig) {
    DataGeneratorSource dataGeneratorSource = new DataGeneratorSource(Context.TWISTER2_DIRECT_EDGE, dsize, csize, dimension, dataDirectory, centroidDirectory);
    DataGeneratorSink dataGeneratorSink = new DataGeneratorSink();
    ComputeGraphBuilder dataGenerationGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    dataGenerationGraphBuilder.setTaskGraphName("DataGenerator");
    dataGenerationGraphBuilder.addSource("datageneratorsource", dataGeneratorSource, parallel);
    ComputeConnection dataObjectComputeConnection = dataGenerationGraphBuilder.addCompute("datageneratorsink", dataGeneratorSink, parallel);
    dataObjectComputeConnection.direct("datageneratorsource").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
    dataGenerationGraphBuilder.setMode(OperationMode.BATCH);
    ComputeGraph dataObjectTaskGraph = dataGenerationGraphBuilder.build();
    dataGenerationGraphBuilder.setTaskGraphName("datageneratorTG");
    DataFlowGraph job = DataFlowGraph.newSubGraphJob("datageneratorTG", dataObjectTaskGraph).setWorkers(workers).addDataFlowJobConfig(jobConfig).setGraphType("non-iterative");
    return job;
}
Also used : ComputeGraph(edu.iu.dsc.tws.api.compute.graph.ComputeGraph) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection) DataFlowGraph(edu.iu.dsc.tws.task.cdfw.DataFlowGraph)

Example 7 with DataFlowGraph

use of edu.iu.dsc.tws.task.cdfw.DataFlowGraph in project twister2 by DSC-SPIDAL.

the class KMeansConnectedDataflowExample method generateSecondJob.

private static DataFlowGraph generateSecondJob(Config config, int parallelismValue, String centroidDirectory, int dimension, int csize, int instances, DataFlowJobConfig jobConfig) {
    DataFileReplicatedReadSource dataFileReplicatedReadSource = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, centroidDirectory);
    KMeansDataObjectCompute centroidObjectCompute = new KMeansDataObjectCompute(Context.TWISTER2_DIRECT_EDGE, csize, dimension);
    KMeansDataObjectDirectSink centroidObjectSink = new KMeansDataObjectDirectSink("centroids");
    ComputeGraphBuilder centroidsComputeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    // Add source, compute, and sink tasks to the task graph builder for the second task graph
    centroidsComputeGraphBuilder.addSource("centroidsource", dataFileReplicatedReadSource, parallelismValue);
    ComputeConnection centroidComputeConnection = centroidsComputeGraphBuilder.addCompute("centroidcompute", centroidObjectCompute, parallelismValue);
    ComputeConnection secondGraphComputeConnection = centroidsComputeGraphBuilder.addCompute("centroidsink", centroidObjectSink, parallelismValue);
    // Creating the communication edges between the tasks for the second task graph
    centroidComputeConnection.direct("centroidsource").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
    secondGraphComputeConnection.direct("centroidcompute").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
    centroidsComputeGraphBuilder.setMode(OperationMode.BATCH);
    centroidsComputeGraphBuilder.setTaskGraphName("centroidTG");
    // Build the second taskgraph
    ComputeGraph secondGraph = centroidsComputeGraphBuilder.build();
    DataFlowGraph job = DataFlowGraph.newSubGraphJob("centroidTG", secondGraph).setWorkers(instances).addDataFlowJobConfig(jobConfig).setGraphType("non-iterative");
    return job;
}
Also used : DataFileReplicatedReadSource(edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource) ComputeGraph(edu.iu.dsc.tws.api.compute.graph.ComputeGraph) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection) DataFlowGraph(edu.iu.dsc.tws.task.cdfw.DataFlowGraph)

Example 8 with DataFlowGraph

use of edu.iu.dsc.tws.task.cdfw.DataFlowGraph in project twister2 by DSC-SPIDAL.

the class TwoDataFlowsExample method runFirstJob.

private static void runFirstJob(Config config, CDFWEnv cdfwEnv, int parallelism, DataFlowJobConfig jobConfig) {
    FirstSourceTask firstSourceTask = new FirstSourceTask();
    ConnectedSink connectedSink = new ConnectedSink("first_out");
    ComputeGraphBuilder graphBuilderX = ComputeGraphBuilder.newBuilder(config);
    graphBuilderX.addSource("source1", firstSourceTask, parallelism);
    ComputeConnection partitionConnection = graphBuilderX.addCompute("sink1", connectedSink, parallelism);
    partitionConnection.partition("source1").viaEdge("partition").withDataType(MessageTypes.OBJECT);
    graphBuilderX.setMode(OperationMode.BATCH);
    ComputeGraph batchGraph = graphBuilderX.build();
    DataFlowGraph job = DataFlowGraph.newSubGraphJob("first_graph", batchGraph).setWorkers(4).addDataFlowJobConfig(jobConfig).setGraphType("non-iterative");
    cdfwEnv.executeDataFlowGraph(job);
}
Also used : ConnectedSink(edu.iu.dsc.tws.task.cdfw.task.ConnectedSink) ComputeGraph(edu.iu.dsc.tws.api.compute.graph.ComputeGraph) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection) DataFlowGraph(edu.iu.dsc.tws.task.cdfw.DataFlowGraph)

Example 9 with DataFlowGraph

use of edu.iu.dsc.tws.task.cdfw.DataFlowGraph in project twister2 by DSC-SPIDAL.

the class TwoDataFlowsExample method runSecondJob.

private static void runSecondJob(Config config, CDFWEnv cdfwEnv, int parallelism, DataFlowJobConfig jobConfig) {
    ConnectedSource connectedSource = new ConnectedSource("reduce", "first_out");
    ConnectedSink connectedSink = new ConnectedSink();
    ComputeGraphBuilder graphBuilderX = ComputeGraphBuilder.newBuilder(config);
    graphBuilderX.addSource("source2", connectedSource, parallelism);
    ComputeConnection reduceConn = graphBuilderX.addCompute("sink2", connectedSink, 1);
    reduceConn.reduce("source2").viaEdge("reduce").withReductionFunction(new Aggregator()).withDataType(MessageTypes.OBJECT);
    graphBuilderX.setMode(OperationMode.BATCH);
    ComputeGraph batchGraph = graphBuilderX.build();
    DataFlowGraph job = DataFlowGraph.newSubGraphJob("second_graph", batchGraph).setWorkers(4).addDataFlowJobConfig(jobConfig).setGraphType("non-iterative");
    cdfwEnv.executeDataFlowGraph(job);
}
Also used : ConnectedSink(edu.iu.dsc.tws.task.cdfw.task.ConnectedSink) ConnectedSource(edu.iu.dsc.tws.task.cdfw.task.ConnectedSource) ComputeGraph(edu.iu.dsc.tws.api.compute.graph.ComputeGraph) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection) DataFlowGraph(edu.iu.dsc.tws.task.cdfw.DataFlowGraph)

Aggregations

DataFlowGraph (edu.iu.dsc.tws.task.cdfw.DataFlowGraph)9 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)8 ComputeConnection (edu.iu.dsc.tws.task.impl.ComputeConnection)8 ComputeGraphBuilder (edu.iu.dsc.tws.task.impl.ComputeGraphBuilder)8 ConnectedSink (edu.iu.dsc.tws.task.cdfw.task.ConnectedSink)4 ConnectedSource (edu.iu.dsc.tws.task.cdfw.task.ConnectedSource)2 DataFlowJobConfig (edu.iu.dsc.tws.task.cdfw.DataFlowJobConfig)1 DataFileReplicatedReadSource (edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource)1 DataObjectSource (edu.iu.dsc.tws.task.dataobjects.DataObjectSource)1