Search in sources :

Example 51 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class FileOutputWriter method write.

public void write(int partition, T out) {
    FSDataOutputStream fsOut;
    if (!openStreams.containsKey(partition)) {
        Path path = new Path(outPath, "part-" + partition);
        try {
            fsOut = fs.create(path);
            // lets ask user to create its own output method
            createOutput(partition, fsOut);
            openStreams.put(partition, fsOut);
        } catch (IOException e) {
            throw new RuntimeException("Failed to create output stream for file: " + path, e);
        }
    }
    writeRecord(partition, out);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FSDataOutputStream(edu.iu.dsc.tws.api.data.FSDataOutputStream) IOException(java.io.IOException)

Example 52 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class FileOutputWriter method write.

public void write(T out) {
    FSDataOutputStream fsOut;
    try {
        if (fs.exists(outPath)) {
            fs.delete(outPath, true);
        }
        fsOut = fs.create(new Path(outPath, generateRandom(10) + ".csv"));
        pw = new PrintWriter(fsOut);
    } catch (IOException e) {
        throw new RuntimeException("IOException Occured");
    }
    writeRecord(out);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FSDataOutputStream(edu.iu.dsc.tws.api.data.FSDataOutputStream) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter)

Example 53 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class Twister2ArrowFileWriter method setUpTwister2ArrowWrite.

public boolean setUpTwister2ArrowWrite(int workerId) throws Exception {
    LOG.fine("%%%%%%%%% worker id details:" + workerId + "\t" + arrowFile);
    this.root = VectorSchemaRoot.create(Schema.fromJSON(arrowSchema), this.rootAllocator);
    Path path = new Path(arrowFile);
    this.fileSystem = FileSystemUtils.get(path);
    this.fsDataOutputStream = fileSystem.create(path);
    this.twister2ArrowOutputStream = new Twister2ArrowOutputStream(this.fsDataOutputStream);
    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
    if (!flag) {
        this.arrowFileWriter = new ArrowFileWriter(root, provider, this.fsDataOutputStream.getChannel());
    } else {
        this.arrowFileWriter = new ArrowFileWriter(root, provider, this.twister2ArrowOutputStream);
    }
    LOG.info("root schema fields:" + root.getSchema().getFields());
    for (Field field : root.getSchema().getFields()) {
        FieldVector vector = root.getVector(field.getName());
        if (vector.getMinorType().equals(Types.MinorType.INT)) {
            this.generatorMap.put(vector, new IntVectorGenerator());
        } else if (vector.getMinorType().equals(Types.MinorType.BIGINT)) {
            this.generatorMap.put(vector, new BigIntVectorGenerator());
        } else if (vector.getMinorType().equals(Types.MinorType.FLOAT4)) {
            this.generatorMap.put(vector, new FloatVectorGenerator());
        } else {
            throw new RuntimeException("unsupported arrow write type");
        }
    }
    return true;
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) Field(org.apache.arrow.vector.types.pojo.Field) DictionaryProvider(org.apache.arrow.vector.dictionary.DictionaryProvider) ArrowFileWriter(org.apache.arrow.vector.ipc.ArrowFileWriter) FieldVector(org.apache.arrow.vector.FieldVector)

Example 54 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class BufferedCollectionPartition method flush.

public void flush() {
    if (this.buffers.isEmpty()) {
        return;
    }
    Path filePath = new Path(this.rootPath, (this.fileCounter++) + EXTENSION);
    try (DataOutputStream outputStream = new DataOutputStream(this.fileSystem.create(filePath))) {
        outputStream.writeLong(this.buffers.size());
        Iterator<byte[]> bufferIt = this.buffers.iterator();
        while (bufferIt.hasNext()) {
            byte[] next = bufferIt.next();
            outputStream.writeInt(next.length);
            outputStream.write(next);
        }
    } catch (IOException e) {
        throw new Twister2RuntimeException("Couldn't flush partitions to the disk", e);
    }
    this.filesList.add(filePath);
    this.buffers.clear();
    this.bufferedBytes = 0;
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) DataOutputStream(java.io.DataOutputStream) IOException(java.io.IOException)

Example 55 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class BufferedCollectionPartition method loadFromFS.

/**
 * This method loads existing frames on disk
 */
private void loadFromFS() {
    try {
        FileStatus[] fileStatuses = this.fileSystem.listFiles(this.rootPath);
        this.filesList = Arrays.stream(fileStatuses).map(FileStatus::getPath).filter(p -> p.getName().contains(EXTENSION)).sorted(Comparator.comparingLong(path -> Long.parseLong(path.getName().replace(EXTENSION, "")))).collect(Collectors.toList());
        this.fileCounter = fileStatuses.length;
    } catch (IOException e) {
        throw new Twister2RuntimeException("Failed to load frames from file system", e);
    }
}
Also used : DataInputStream(java.io.DataInputStream) Arrays(java.util.Arrays) Iterator(java.util.Iterator) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) DataPartitionConsumer(edu.iu.dsc.tws.api.dataset.DataPartitionConsumer) MessageType(edu.iu.dsc.tws.api.comms.messaging.types.MessageType) Collection(java.util.Collection) IOException(java.io.IOException) Config(edu.iu.dsc.tws.api.config.Config) UUID(java.util.UUID) MessageTypes(edu.iu.dsc.tws.api.comms.messaging.types.MessageTypes) Logger(java.util.logging.Logger) Collectors(java.util.stream.Collectors) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) ArrayList(java.util.ArrayList) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) List(java.util.List) DataOutputStream(java.io.DataOutputStream) Closeable(java.io.Closeable) Path(edu.iu.dsc.tws.api.data.Path) Queue(java.util.Queue) Comparator(java.util.Comparator) LinkedList(java.util.LinkedList) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) IOException(java.io.IOException)

Aggregations

Path (edu.iu.dsc.tws.api.data.Path)61 IOException (java.io.IOException)23 FileSystem (edu.iu.dsc.tws.api.data.FileSystem)19 FileStatus (edu.iu.dsc.tws.api.data.FileStatus)14 ArrayList (java.util.ArrayList)12 Config (edu.iu.dsc.tws.api.config.Config)11 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)8 FileInputSplit (edu.iu.dsc.tws.data.api.splits.FileInputSplit)8 ExecutionRuntime (edu.iu.dsc.tws.executor.core.ExecutionRuntime)8 BlockLocation (edu.iu.dsc.tws.api.data.BlockLocation)7 FSDataOutputStream (edu.iu.dsc.tws.api.data.FSDataOutputStream)7 PrintWriter (java.io.PrintWriter)7 File (java.io.File)6 LocalTextInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalTextInputPartitioner)5 Test (org.junit.Test)5 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)4 LocalCSVInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalCSVInputPartitioner)4 LocalFixedInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalFixedInputPartitioner)4 DataGenerator (edu.iu.dsc.tws.tsched.utils.DataGenerator)4 CSVInputSplit (edu.iu.dsc.tws.data.api.splits.CSVInputSplit)3