use of edu.iu.dsc.tws.dataset.DataSource in project twister2 by DSC-SPIDAL.
the class DataLoadingTask method prepare.
@Override
public void prepare(TSetContext context) {
super.prepare(context);
this.config = context.getConfig();
this.parallelism = context.getParallelism();
LOG.info(String.format("%d, %d, %d", context.getIndex(), this.svmJobParameters.getParallelism(), context.getParallelism()));
// dimension is +1 features as the input data comes along with the label
this.dimension = this.binaryBatchModel.getFeatures() + 1;
if ("train".equalsIgnoreCase(this.dataType)) {
this.dataSize = this.binaryBatchModel.getSamples();
this.localPoints = new double[this.dataSize / parallelism][this.dimension];
LOG.info(String.format("Data Size : %d, Array Shape [%d,%d]", this.dataSize, this.localPoints.length, this.dimension));
this.source = new DataSource(config, new LocalFixedInputPartitioner(new Path(this.svmJobParameters.getTrainingDataDir()), this.parallelism, config, dataSize), this.parallelism);
}
if ("test".equalsIgnoreCase(this.dataType)) {
this.dataSize = this.svmJobParameters.getTestingSamples();
this.localPoints = new double[this.dataSize / parallelism][this.dimension];
this.source = new DataSource(config, new LocalFixedInputPartitioner(new Path(this.svmJobParameters.getTestingDataDir()), this.parallelism, config, dataSize), this.parallelism);
}
}
use of edu.iu.dsc.tws.dataset.DataSource in project twister2 by DSC-SPIDAL.
the class KMeansDataGeneratorTest method testUniqueSchedules1.
@Test
public void testUniqueSchedules1() throws IOException {
Config config = getConfig();
String dinputDirectory = "/tmp/testdinput";
int numFiles = 1;
int dsize = 20;
int dimension = 2;
int parallelismValue = 2;
KMeansDataGenerator.generateData("txt", new Path(dinputDirectory), numFiles, dsize, 100, dimension, config);
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
computeGraphBuilder.setTaskGraphName("kmeans");
DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
DataObjectSink sinkTask = new DataObjectSink();
computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
ComputeConnection computeConnection1 = computeGraphBuilder.addCompute("sink", sinkTask, parallelismValue);
computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
computeGraphBuilder.setMode(OperationMode.BATCH);
LocalTextInputPartitioner localTextInputPartitioner = new LocalTextInputPartitioner(new Path(dinputDirectory), parallelismValue, config);
DataSource<String, ?> source = new DataSource<>(config, localTextInputPartitioner, parallelismValue);
InputSplit<String> inputSplit;
for (int i = 0; i < parallelismValue; i++) {
inputSplit = source.getNextSplit(i);
Assert.assertNotNull(inputSplit);
}
}
use of edu.iu.dsc.tws.dataset.DataSource in project twister2 by DSC-SPIDAL.
the class TextBasedSourceFunction method prepare.
@Override
public void prepare(TSetContext context) {
super.prepare(context);
this.ctx = context;
Config cfg = ctx.getConfig();
if ("complete".equals(partitionerType)) {
this.dataSource = new DataSource(cfg, new LocalCompleteCSVInputPartitioner(new Path(datainputDirectory), context.getParallelism(), dataSize, cfg), parallel);
} else {
this.dataSource = new DataSource(cfg, new LocalCSVInputPartitioner(new Path(datainputDirectory), parallel, dataSize, cfg), parallel);
}
this.dataSplit = this.dataSource.getNextSplit(context.getIndex());
}
Aggregations