use of edu.iu.dsc.tws.task.cdfw.DataFlowGraph in project twister2 by DSC-SPIDAL.
the class KMeansConnectedDataflowExample method generateData.
private static DataFlowGraph generateData(Config config, String dataDirectory, String centroidDirectory, int dimension, int dsize, int csize, int workers, int parallel, DataFlowJobConfig jobConfig) {
DataGeneratorSource dataGeneratorSource = new DataGeneratorSource(Context.TWISTER2_DIRECT_EDGE, dsize, csize, dimension, dataDirectory, centroidDirectory);
DataGeneratorSink dataGeneratorSink = new DataGeneratorSink();
ComputeGraphBuilder dataGenerationGraphBuilder = ComputeGraphBuilder.newBuilder(config);
dataGenerationGraphBuilder.setTaskGraphName("DataGenerator");
dataGenerationGraphBuilder.addSource("datageneratorsource", dataGeneratorSource, parallel);
ComputeConnection dataObjectComputeConnection = dataGenerationGraphBuilder.addCompute("datageneratorsink", dataGeneratorSink, parallel);
dataObjectComputeConnection.direct("datageneratorsource").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
dataGenerationGraphBuilder.setMode(OperationMode.BATCH);
ComputeGraph dataObjectTaskGraph = dataGenerationGraphBuilder.build();
dataGenerationGraphBuilder.setTaskGraphName("datageneratorTG");
DataFlowGraph job = DataFlowGraph.newSubGraphJob("datageneratorTG", dataObjectTaskGraph).setWorkers(workers).addDataFlowJobConfig(jobConfig).setGraphType("non-iterative");
return job;
}
use of edu.iu.dsc.tws.task.cdfw.DataFlowGraph in project twister2 by DSC-SPIDAL.
the class KMeansConnectedDataflowExample method generateSecondJob.
private static DataFlowGraph generateSecondJob(Config config, int parallelismValue, String centroidDirectory, int dimension, int csize, int instances, DataFlowJobConfig jobConfig) {
DataFileReplicatedReadSource dataFileReplicatedReadSource = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, centroidDirectory);
KMeansDataObjectCompute centroidObjectCompute = new KMeansDataObjectCompute(Context.TWISTER2_DIRECT_EDGE, csize, dimension);
KMeansDataObjectDirectSink centroidObjectSink = new KMeansDataObjectDirectSink("centroids");
ComputeGraphBuilder centroidsComputeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
// Add source, compute, and sink tasks to the task graph builder for the second task graph
centroidsComputeGraphBuilder.addSource("centroidsource", dataFileReplicatedReadSource, parallelismValue);
ComputeConnection centroidComputeConnection = centroidsComputeGraphBuilder.addCompute("centroidcompute", centroidObjectCompute, parallelismValue);
ComputeConnection secondGraphComputeConnection = centroidsComputeGraphBuilder.addCompute("centroidsink", centroidObjectSink, parallelismValue);
// Creating the communication edges between the tasks for the second task graph
centroidComputeConnection.direct("centroidsource").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
secondGraphComputeConnection.direct("centroidcompute").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
centroidsComputeGraphBuilder.setMode(OperationMode.BATCH);
centroidsComputeGraphBuilder.setTaskGraphName("centroidTG");
// Build the second taskgraph
ComputeGraph secondGraph = centroidsComputeGraphBuilder.build();
DataFlowGraph job = DataFlowGraph.newSubGraphJob("centroidTG", secondGraph).setWorkers(instances).addDataFlowJobConfig(jobConfig).setGraphType("non-iterative");
return job;
}
use of edu.iu.dsc.tws.task.cdfw.DataFlowGraph in project twister2 by DSC-SPIDAL.
the class TwoDataFlowsExample method runFirstJob.
private static void runFirstJob(Config config, CDFWEnv cdfwEnv, int parallelism, DataFlowJobConfig jobConfig) {
FirstSourceTask firstSourceTask = new FirstSourceTask();
ConnectedSink connectedSink = new ConnectedSink("first_out");
ComputeGraphBuilder graphBuilderX = ComputeGraphBuilder.newBuilder(config);
graphBuilderX.addSource("source1", firstSourceTask, parallelism);
ComputeConnection partitionConnection = graphBuilderX.addCompute("sink1", connectedSink, parallelism);
partitionConnection.partition("source1").viaEdge("partition").withDataType(MessageTypes.OBJECT);
graphBuilderX.setMode(OperationMode.BATCH);
ComputeGraph batchGraph = graphBuilderX.build();
DataFlowGraph job = DataFlowGraph.newSubGraphJob("first_graph", batchGraph).setWorkers(4).addDataFlowJobConfig(jobConfig).setGraphType("non-iterative");
cdfwEnv.executeDataFlowGraph(job);
}
use of edu.iu.dsc.tws.task.cdfw.DataFlowGraph in project twister2 by DSC-SPIDAL.
the class TwoDataFlowsExample method runSecondJob.
private static void runSecondJob(Config config, CDFWEnv cdfwEnv, int parallelism, DataFlowJobConfig jobConfig) {
ConnectedSource connectedSource = new ConnectedSource("reduce", "first_out");
ConnectedSink connectedSink = new ConnectedSink();
ComputeGraphBuilder graphBuilderX = ComputeGraphBuilder.newBuilder(config);
graphBuilderX.addSource("source2", connectedSource, parallelism);
ComputeConnection reduceConn = graphBuilderX.addCompute("sink2", connectedSink, 1);
reduceConn.reduce("source2").viaEdge("reduce").withReductionFunction(new Aggregator()).withDataType(MessageTypes.OBJECT);
graphBuilderX.setMode(OperationMode.BATCH);
ComputeGraph batchGraph = graphBuilderX.build();
DataFlowGraph job = DataFlowGraph.newSubGraphJob("second_graph", batchGraph).setWorkers(4).addDataFlowJobConfig(jobConfig).setGraphType("non-iterative");
cdfwEnv.executeDataFlowGraph(job);
}
Aggregations