Search in sources :

Example 6 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameReaderTextCSV method readFrameFromInputStream.

@Override
public FrameBlock readFrameFromInputStream(InputStream is, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
    //allocate output frame block
    ValueType[] lschema = createOutputSchema(schema, clen);
    String[] lnames = createOutputNames(names, clen);
    FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
    //core read (sequential/parallel) 
    InputStreamInputFormat informat = new InputStreamInputFormat(is);
    InputSplit split = informat.getSplits(null, 1)[0];
    readCSVFrameFromInputSplit(split, informat, null, ret, schema, names, rlen, clen, 0, true);
    return ret;
}
Also used : InputStreamInputFormat(org.apache.sysml.runtime.util.InputStreamInputFormat) ValueType(org.apache.sysml.parser.Expression.ValueType) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 7 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameReaderTextCell method readFrameFromInputStream.

@Override
public final FrameBlock readFrameFromInputStream(InputStream is, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
    //allocate output frame block
    ValueType[] lschema = createOutputSchema(schema, clen);
    String[] lnames = createOutputNames(names, clen);
    FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
    //core read 
    readRawTextCellFrameFromInputStream(is, ret, lschema, lnames, rlen, clen);
    return ret;
}
Also used : ValueType(org.apache.sysml.parser.Expression.ValueType) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock)

Example 8 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameReaderTextCell method readFrameFromHDFS.

@Override
public final FrameBlock readFrameFromHDFS(String fname, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
    //allocate output frame block
    ValueType[] lschema = createOutputSchema(schema, clen);
    String[] lnames = createOutputNames(names, clen);
    FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    //check existence and non-empty file
    checkValidInputFile(fs, path);
    //core read (sequential/parallel)
    readTextCellFrameFromHDFS(path, job, fs, ret, lschema, lnames, rlen, clen);
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) ValueType(org.apache.sysml.parser.Expression.ValueType) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf)

Example 9 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameReaderBinaryBlock method readBinaryBlockFrameFromSequenceFile.

@SuppressWarnings({ "deprecation" })
protected final void readBinaryBlockFrameFromSequenceFile(Path path, JobConf job, FileSystem fs, FrameBlock dest) throws IOException, DMLRuntimeException {
    int rlen = dest.getNumRows();
    int clen = dest.getNumColumns();
    //directly read from sequence files (individual partfiles)
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
    LongWritable key = new LongWritable(-1L);
    FrameBlock value = new FrameBlock();
    try {
        while (reader.next(key, value)) {
            int row_offset = (int) (key.get() - 1);
            int rows = value.getNumRows();
            int cols = value.getNumColumns();
            if (//Empty block, ignore it.
            rows == 0 || cols == 0)
                continue;
            //bound check per block
            if (row_offset + rows < 0 || row_offset + rows > rlen) {
                throw new IOException("Frame block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + ":" + "] " + "out of overall frame range [1:" + rlen + ",1:" + clen + "].");
            }
            //copy block into target frame, incl meta on first
            dest.copy(row_offset, row_offset + rows - 1, 0, cols - 1, value);
            if (row_offset == 0)
                dest.setColumnMetadata(value.getColumnMetadata());
        }
    } finally {
        IOUtilFunctions.closeSilently(reader);
    }
}
Also used : SequenceFile(org.apache.hadoop.io.SequenceFile) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) LongWritable(org.apache.hadoop.io.LongWritable) IOException(java.io.IOException)

Example 10 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class Connection method readStringFrame.

/**
	 * Reads an input frame in arbitrary format from HDFS into a dense string array.
	 * NOTE: this call currently only supports default configurations for CSV.
	 * 
	 * @param fname the filename of the input frame
	 * @param iinfo InputInfo object
	 * @param rows number of rows in the frame
	 * @param cols number of columns in the frame
	 * @return frame as a two-dimensional string array
	 * @throws IOException if IOException occurs
	 */
public String[][] readStringFrame(String fname, InputInfo iinfo, long rows, long cols) throws IOException {
    try {
        FrameReader reader = FrameReaderFactory.createFrameReader(iinfo);
        FrameBlock mb = reader.readFrameFromHDFS(fname, rows, cols);
        return DataConverter.convertToStringFrame(mb);
    } catch (Exception ex) {
        throw new IOException(ex);
    }
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLException(org.apache.sysml.api.DMLException) IOException(java.io.IOException) ParseException(org.apache.sysml.parser.ParseException)

Aggregations

FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)82 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)31 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)23 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)22 ValueType (org.apache.sysml.parser.Expression.ValueType)21 IOException (java.io.IOException)17 FrameReader (org.apache.sysml.runtime.io.FrameReader)17 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)14 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)12 LongWritable (org.apache.hadoop.io.LongWritable)10 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)10 CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)10 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)9 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)8 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)7 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)6 Text (org.apache.hadoop.io.Text)5 ArrayList (java.util.ArrayList)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Path (org.apache.hadoop.fs.Path)4