use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class Twister2ArrowFileReader method initInputFile.
public void initInputFile() {
try {
LOG.info("arrow schema:" + Schema.fromJSON(arrowSchema));
Path path = new Path(arrowInputFile);
this.fileSystem = FileSystemUtils.get(path);
this.fsDataInputStream = fileSystem.open(path);
this.fileInputStream = new FileInputStream(arrowInputFile);
this.arrowFileReader = new ArrowFileReader(new SeekableReadChannel(fileInputStream.getChannel()), rootAllocator);
this.root = arrowFileReader.getVectorSchemaRoot();
arrowBlocks = arrowFileReader.getRecordBlocks();
LOG.info("\nReading the arrow file : " + arrowInputFile + "\tFile size:" + arrowInputFile.length() + "\tschema:" + root.getSchema().toString() + "\tArrow Blocks Size: " + arrowBlocks.size());
} catch (FileNotFoundException e) {
throw new Twister2RuntimeException("File Not Found", e);
} catch (Exception ioe) {
throw new Twister2RuntimeException("IOException Occured", ioe);
}
}
use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class CSVInputFormatTest method testUniqueSchedules.
/**
* To test the CSV Input Format
*/
@Test
public void testUniqueSchedules() throws IOException {
Config config = getConfig();
Path path = new Path("/tmp/dinput/");
createOutputFile(path, config);
LocalCSVInputPartitioner csvInputPartitioner = new LocalCSVInputPartitioner(path, 4, config);
csvInputPartitioner.configure(config);
FileInputSplit[] inputSplits = csvInputPartitioner.createInputSplits(2);
LOG.info("input split values are:" + Arrays.toString(inputSplits));
InputSplitAssigner inputSplitAssigner = csvInputPartitioner.getInputSplitAssigner(inputSplits);
InputSplit inputSplit = inputSplitAssigner.getNextInputSplit("localhost", 0);
inputSplit.open(config);
do {
inputSplit.nextRecord(null);
} while (!inputSplit.reachedEnd());
}
use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class DataObjectSource method prepare.
@Override
public void prepare(Config cfg, TaskContext context) {
super.prepare(cfg, context);
ExecutionRuntime runtime = (ExecutionRuntime) cfg.get(ExecutorContext.TWISTER2_RUNTIME_OBJECT);
this.source = runtime.createInput(cfg, context, new LocalTextInputPartitioner(new Path(getDataDirectory()), context.getParallelism(), cfg));
}
use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class DataParallelTask method prepare.
@Override
public void prepare(Config cfg, TaskContext context) {
super.prepare(cfg, context);
String directory = cfg.getStringValue(Constants.ARGS_INPUT_DIRECTORY);
ExecutionRuntime runtime = (ExecutionRuntime) config.get(ExecutorContext.TWISTER2_RUNTIME_OBJECT);
String outDir = cfg.getStringValue(Constants.ARGS_OUTPUT_DIRECTORY);
boolean shared = cfg.getBooleanValue(Constants.ARGS_SHARED_FILE_SYSTEM);
if (!shared) {
this.source = runtime.createInput(cfg, context, new LocalTextInputPartitioner(new Path(directory), context.getParallelism()));
} else {
this.source = runtime.createInput(cfg, context, new SharedTextInputPartitioner(new Path(directory)));
}
this.sink = new DataSink<String>(cfg, new TextOutputWriter(FileSystem.WriteMode.OVERWRITE, new Path(outDir)));
}
use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class DataLoadingTask method prepare.
@Override
public void prepare(TSetContext context) {
super.prepare(context);
this.config = context.getConfig();
this.parallelism = context.getParallelism();
LOG.info(String.format("%d, %d, %d", context.getIndex(), this.svmJobParameters.getParallelism(), context.getParallelism()));
// dimension is +1 features as the input data comes along with the label
this.dimension = this.binaryBatchModel.getFeatures() + 1;
if ("train".equalsIgnoreCase(this.dataType)) {
this.dataSize = this.binaryBatchModel.getSamples();
this.localPoints = new double[this.dataSize / parallelism][this.dimension];
LOG.info(String.format("Data Size : %d, Array Shape [%d,%d]", this.dataSize, this.localPoints.length, this.dimension));
this.source = new DataSource(config, new LocalFixedInputPartitioner(new Path(this.svmJobParameters.getTrainingDataDir()), this.parallelism, config, dataSize), this.parallelism);
}
if ("test".equalsIgnoreCase(this.dataType)) {
this.dataSize = this.svmJobParameters.getTestingSamples();
this.localPoints = new double[this.dataSize / parallelism][this.dimension];
this.source = new DataSource(config, new LocalFixedInputPartitioner(new Path(this.svmJobParameters.getTestingDataDir()), this.parallelism, config, dataSize), this.parallelism);
}
}
Aggregations