Search in sources :

Example 21 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class Twister2ArrowFileReader method initInputFile.

public void initInputFile() {
    try {
        LOG.info("arrow schema:" + Schema.fromJSON(arrowSchema));
        Path path = new Path(arrowInputFile);
        this.fileSystem = FileSystemUtils.get(path);
        this.fsDataInputStream = fileSystem.open(path);
        this.fileInputStream = new FileInputStream(arrowInputFile);
        this.arrowFileReader = new ArrowFileReader(new SeekableReadChannel(fileInputStream.getChannel()), rootAllocator);
        this.root = arrowFileReader.getVectorSchemaRoot();
        arrowBlocks = arrowFileReader.getRecordBlocks();
        LOG.info("\nReading the arrow file : " + arrowInputFile + "\tFile size:" + arrowInputFile.length() + "\tschema:" + root.getSchema().toString() + "\tArrow Blocks Size: " + arrowBlocks.size());
    } catch (FileNotFoundException e) {
        throw new Twister2RuntimeException("File Not Found", e);
    } catch (Exception ioe) {
        throw new Twister2RuntimeException("IOException Occured", ioe);
    }
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) ArrowFileReader(org.apache.arrow.vector.ipc.ArrowFileReader) FileNotFoundException(java.io.FileNotFoundException) SeekableReadChannel(org.apache.arrow.vector.ipc.SeekableReadChannel) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)

Example 22 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class CSVInputFormatTest method testUniqueSchedules.

/**
 * To test the CSV Input Format
 */
@Test
public void testUniqueSchedules() throws IOException {
    Config config = getConfig();
    Path path = new Path("/tmp/dinput/");
    createOutputFile(path, config);
    LocalCSVInputPartitioner csvInputPartitioner = new LocalCSVInputPartitioner(path, 4, config);
    csvInputPartitioner.configure(config);
    FileInputSplit[] inputSplits = csvInputPartitioner.createInputSplits(2);
    LOG.info("input split values are:" + Arrays.toString(inputSplits));
    InputSplitAssigner inputSplitAssigner = csvInputPartitioner.getInputSplitAssigner(inputSplits);
    InputSplit inputSplit = inputSplitAssigner.getNextInputSplit("localhost", 0);
    inputSplit.open(config);
    do {
        inputSplit.nextRecord(null);
    } while (!inputSplit.reachedEnd());
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileInputSplit(edu.iu.dsc.tws.data.api.splits.FileInputSplit) InputSplitAssigner(edu.iu.dsc.tws.data.fs.io.InputSplitAssigner) Config(edu.iu.dsc.tws.api.config.Config) LocalCSVInputPartitioner(edu.iu.dsc.tws.data.api.formatters.LocalCSVInputPartitioner) FileInputSplit(edu.iu.dsc.tws.data.api.splits.FileInputSplit) InputSplit(edu.iu.dsc.tws.data.fs.io.InputSplit) Test(org.junit.Test)

Example 23 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class DataObjectSource method prepare.

@Override
public void prepare(Config cfg, TaskContext context) {
    super.prepare(cfg, context);
    ExecutionRuntime runtime = (ExecutionRuntime) cfg.get(ExecutorContext.TWISTER2_RUNTIME_OBJECT);
    this.source = runtime.createInput(cfg, context, new LocalTextInputPartitioner(new Path(getDataDirectory()), context.getParallelism(), cfg));
}
Also used : LocalTextInputPartitioner(edu.iu.dsc.tws.data.api.formatters.LocalTextInputPartitioner) Path(edu.iu.dsc.tws.api.data.Path) ExecutionRuntime(edu.iu.dsc.tws.executor.core.ExecutionRuntime)

Example 24 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class DataParallelTask method prepare.

@Override
public void prepare(Config cfg, TaskContext context) {
    super.prepare(cfg, context);
    String directory = cfg.getStringValue(Constants.ARGS_INPUT_DIRECTORY);
    ExecutionRuntime runtime = (ExecutionRuntime) config.get(ExecutorContext.TWISTER2_RUNTIME_OBJECT);
    String outDir = cfg.getStringValue(Constants.ARGS_OUTPUT_DIRECTORY);
    boolean shared = cfg.getBooleanValue(Constants.ARGS_SHARED_FILE_SYSTEM);
    if (!shared) {
        this.source = runtime.createInput(cfg, context, new LocalTextInputPartitioner(new Path(directory), context.getParallelism()));
    } else {
        this.source = runtime.createInput(cfg, context, new SharedTextInputPartitioner(new Path(directory)));
    }
    this.sink = new DataSink<String>(cfg, new TextOutputWriter(FileSystem.WriteMode.OVERWRITE, new Path(outDir)));
}
Also used : LocalTextInputPartitioner(edu.iu.dsc.tws.data.api.formatters.LocalTextInputPartitioner) Path(edu.iu.dsc.tws.api.data.Path) SharedTextInputPartitioner(edu.iu.dsc.tws.data.api.formatters.SharedTextInputPartitioner) TextOutputWriter(edu.iu.dsc.tws.data.api.out.TextOutputWriter) ExecutionRuntime(edu.iu.dsc.tws.executor.core.ExecutionRuntime)

Example 25 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class DataLoadingTask method prepare.

@Override
public void prepare(TSetContext context) {
    super.prepare(context);
    this.config = context.getConfig();
    this.parallelism = context.getParallelism();
    LOG.info(String.format("%d, %d, %d", context.getIndex(), this.svmJobParameters.getParallelism(), context.getParallelism()));
    // dimension is +1 features as the input data comes along with the label
    this.dimension = this.binaryBatchModel.getFeatures() + 1;
    if ("train".equalsIgnoreCase(this.dataType)) {
        this.dataSize = this.binaryBatchModel.getSamples();
        this.localPoints = new double[this.dataSize / parallelism][this.dimension];
        LOG.info(String.format("Data Size : %d, Array Shape [%d,%d]", this.dataSize, this.localPoints.length, this.dimension));
        this.source = new DataSource(config, new LocalFixedInputPartitioner(new Path(this.svmJobParameters.getTrainingDataDir()), this.parallelism, config, dataSize), this.parallelism);
    }
    if ("test".equalsIgnoreCase(this.dataType)) {
        this.dataSize = this.svmJobParameters.getTestingSamples();
        this.localPoints = new double[this.dataSize / parallelism][this.dimension];
        this.source = new DataSource(config, new LocalFixedInputPartitioner(new Path(this.svmJobParameters.getTestingDataDir()), this.parallelism, config, dataSize), this.parallelism);
    }
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) LocalFixedInputPartitioner(edu.iu.dsc.tws.data.api.formatters.LocalFixedInputPartitioner) DataSource(edu.iu.dsc.tws.dataset.DataSource)

Aggregations

Path (edu.iu.dsc.tws.api.data.Path)61 IOException (java.io.IOException)23 FileSystem (edu.iu.dsc.tws.api.data.FileSystem)19 FileStatus (edu.iu.dsc.tws.api.data.FileStatus)14 ArrayList (java.util.ArrayList)12 Config (edu.iu.dsc.tws.api.config.Config)11 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)8 FileInputSplit (edu.iu.dsc.tws.data.api.splits.FileInputSplit)8 ExecutionRuntime (edu.iu.dsc.tws.executor.core.ExecutionRuntime)8 BlockLocation (edu.iu.dsc.tws.api.data.BlockLocation)7 FSDataOutputStream (edu.iu.dsc.tws.api.data.FSDataOutputStream)7 PrintWriter (java.io.PrintWriter)7 File (java.io.File)6 LocalTextInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalTextInputPartitioner)5 Test (org.junit.Test)5 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)4 LocalCSVInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalCSVInputPartitioner)4 LocalFixedInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalFixedInputPartitioner)4 DataGenerator (edu.iu.dsc.tws.tsched.utils.DataGenerator)4 CSVInputSplit (edu.iu.dsc.tws.data.api.splits.CSVInputSplit)3