use of edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource in project twister2 by DSC-SPIDAL.
the class SvmSgdIterativeRunner method buildWeightVectorTG.
private ComputeGraph buildWeightVectorTG() {
DataFileReplicatedReadSource dataFileReplicatedReadSource = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, this.svmJobParameters.getWeightVectorDataDir(), 1);
IterativeSVMWeightVectorObjectCompute weightVectorObjectCompute = new IterativeSVMWeightVectorObjectCompute(Context.TWISTER2_DIRECT_EDGE, 1, this.svmJobParameters.getFeatures());
IterativeSVMWeightVectorObjectDirectSink weightVectorObjectSink = new IterativeSVMWeightVectorObjectDirectSink();
ComputeGraphBuilder weightVectorComputeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
weightVectorComputeGraphBuilder.addSource(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_SOURCE, dataFileReplicatedReadSource, dataStreamerParallelism);
ComputeConnection weightVectorComputeConnection = weightVectorComputeGraphBuilder.addCompute(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_COMPUTE, weightVectorObjectCompute, dataStreamerParallelism);
ComputeConnection weightVectorSinkConnection = weightVectorComputeGraphBuilder.addCompute(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_SINK, weightVectorObjectSink, dataStreamerParallelism);
weightVectorComputeConnection.direct(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_SOURCE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
weightVectorSinkConnection.direct(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_COMPUTE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.DOUBLE_ARRAY);
weightVectorComputeGraphBuilder.setMode(operationMode);
weightVectorComputeGraphBuilder.setTaskGraphName(IterativeSVMConstants.WEIGHT_VECTOR_LOADING_TASK_GRAPH);
return weightVectorComputeGraphBuilder.build();
}
use of edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource in project twister2 by DSC-SPIDAL.
the class KMeansDataGeneratorTest method testUniqueSchedules3.
/**
* Commented the hdfs data generation testing for the travis build
*/
/* @Test
public void testUniqueSchedules2() throws IOException {
Config config = getConfig();
String hostname = String.valueOf(config.get("twister2.data.hdfs.namenode"));
String dinputDirectory = "hdfs://" + hostname + ":9000/tmp/testdinput";
int numFiles = 1;
int dsize = 20;
int dimension = 2;
int parallelismValue = 2;
KMeansDataGenerator.generateData("txt", new Path(dinputDirectory),
numFiles, dsize, 100, dimension, config);
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
computeGraphBuilder.setTaskGraphName("kmeans");
DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
DataObjectSink sinkTask = new DataObjectSink();
computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
ComputeConnection computeConnection1 = computeGraphBuilder.addSink("sink", sinkTask,
parallelismValue);
computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
computeGraphBuilder.setMode(OperationMode.BATCH);
LocalCompleteTextInputPartitioner localCompleteTextInputPartitioner
= new LocalCompleteTextInputPartitioner(
new Path(dinputDirectory), parallelismValue, config);
DataSource<String, ?> source
= new DataSource<>(config, localCompleteTextInputPartitioner, parallelismValue);
InputSplit<String> inputSplit;
for (int i = 0; i < parallelismValue; i++) {
inputSplit = source.getNextSplit(i);
Assert.assertNotNull(inputSplit);
}
}*/
@Test
public void testUniqueSchedules3() throws IOException {
Config config = getConfig();
String cinputDirectory = "/tmp/testcinput";
int numFiles = 1;
int csize = 4;
int dimension = 2;
int parallelismValue = 2;
KMeansDataGenerator.generateData("txt", new Path(cinputDirectory), numFiles, csize, 100, dimension, config);
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
computeGraphBuilder.setTaskGraphName("kmeans");
DataFileReplicatedReadSource task = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, cinputDirectory);
computeGraphBuilder.addSource("map", task, parallelismValue);
computeGraphBuilder.setMode(OperationMode.BATCH);
Path path = new Path(cinputDirectory);
final FileSystem fs = FileSystemUtils.get(path);
final FileStatus pathFile = fs.getFileStatus(path);
Assert.assertNotNull(pathFile);
DataFileReader fileReader = new DataFileReader(config, "local");
double[][] centroids = fileReader.readData(path, dimension, csize);
Assert.assertNotNull(centroids);
}
use of edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource in project twister2 by DSC-SPIDAL.
the class TGUtils method buildWeightVectorTG.
public static ComputeGraph buildWeightVectorTG(Config config, int dataStreamerParallelism, SVMJobParameters svmJobParameters, OperationMode opMode) {
DataFileReplicatedReadSource dataFileReplicatedReadSource = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, svmJobParameters.getWeightVectorDataDir());
IterativeSVMWeightVectorObjectCompute weightVectorObjectCompute = new IterativeSVMWeightVectorObjectCompute(Context.TWISTER2_DIRECT_EDGE, 1, svmJobParameters.getFeatures());
IterativeSVMWeightVectorObjectDirectSink weightVectorObjectSink = new IterativeSVMWeightVectorObjectDirectSink();
ComputeGraphBuilder weightVectorComputeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
weightVectorComputeGraphBuilder.addSource(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_SOURCE, dataFileReplicatedReadSource, dataStreamerParallelism);
ComputeConnection weightVectorComputeConnection = weightVectorComputeGraphBuilder.addCompute(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_COMPUTE, weightVectorObjectCompute, dataStreamerParallelism);
ComputeConnection weightVectorSinkConnection = weightVectorComputeGraphBuilder.addCompute(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_SINK, weightVectorObjectSink, dataStreamerParallelism);
weightVectorComputeConnection.direct(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_SOURCE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
weightVectorSinkConnection.direct(Constants.SimpleGraphConfig.WEIGHT_VECTOR_OBJECT_COMPUTE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.DOUBLE_ARRAY);
weightVectorComputeGraphBuilder.setMode(opMode);
weightVectorComputeGraphBuilder.setTaskGraphName(IterativeSVMConstants.WEIGHT_VECTOR_LOADING_TASK_GRAPH);
return weightVectorComputeGraphBuilder.build();
}
use of edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource in project twister2 by DSC-SPIDAL.
the class KMeansConnectedDataflowExample method generateSecondJob.
private static DataFlowGraph generateSecondJob(Config config, int parallelismValue, String centroidDirectory, int dimension, int csize, int instances, DataFlowJobConfig jobConfig) {
DataFileReplicatedReadSource dataFileReplicatedReadSource = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, centroidDirectory);
KMeansDataObjectCompute centroidObjectCompute = new KMeansDataObjectCompute(Context.TWISTER2_DIRECT_EDGE, csize, dimension);
KMeansDataObjectDirectSink centroidObjectSink = new KMeansDataObjectDirectSink("centroids");
ComputeGraphBuilder centroidsComputeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
// Add source, compute, and sink tasks to the task graph builder for the second task graph
centroidsComputeGraphBuilder.addSource("centroidsource", dataFileReplicatedReadSource, parallelismValue);
ComputeConnection centroidComputeConnection = centroidsComputeGraphBuilder.addCompute("centroidcompute", centroidObjectCompute, parallelismValue);
ComputeConnection secondGraphComputeConnection = centroidsComputeGraphBuilder.addCompute("centroidsink", centroidObjectSink, parallelismValue);
// Creating the communication edges between the tasks for the second task graph
centroidComputeConnection.direct("centroidsource").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
secondGraphComputeConnection.direct("centroidcompute").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
centroidsComputeGraphBuilder.setMode(OperationMode.BATCH);
centroidsComputeGraphBuilder.setTaskGraphName("centroidTG");
// Build the second taskgraph
ComputeGraph secondGraph = centroidsComputeGraphBuilder.build();
DataFlowGraph job = DataFlowGraph.newSubGraphJob("centroidTG", secondGraph).setWorkers(instances).addDataFlowJobConfig(jobConfig).setGraphType("non-iterative");
return job;
}
Aggregations